# Decision Theory Project - TicTacToe
*By Jelle Huibregtse and Aron Hemmes*

Below is a TicTacToe environment with an AI based on reward

## Setup
- Loading in some libraries
- Configuring layout

In [160]:
# Libraries
from enum import Enum
from random import random, choice
from IPython.display import display, clear_output
import ipywidgets as widgets

# Layout
field_layout = widgets.Layout(width = '50px', height = '50px')
wide_layout = widgets.Layout(width = '158px')
column_layout = widgets.Layout(flex_flow = 'column')
text_layout = widgets.Layout(white_space = 'pre-wrap')

## 1. Definition of the Environment

the code below defines all characteristics of a TicTacToe Environment:

Environment state:
- the player is either X or O
- the opposing player (agent) is either X or O depending on the player
- X and O take turns placing an X or O on empty fields untill either one has won or there are no more fields left on the board
- a board starts out empty and can contain X and O marks

The following methods are described in code below:
- reset() which completely resets the board to an empty state and agent get processed
- step() processes the game
- render() visualisation of the current TicTacToe game
- change_player() the player switches between X and O and resets the board
- field_click() the player places either an X or O on an empty field and agent get processed
- check_result() set result to player with three in a row, if it can find any
- get_turns() returns the amount of turns that have passed
- get_turn() returns which player's turn it currently is

In [161]:
class TicTacToeEnvironment():
    def __init__(self, func):
        self.board = [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']
        self.player = 'X'
        self.result = ''
        self.data = []
        
        # Pass agent processing as a param for the environment
        self.agent = func
        
        # Execute agent code
        self.agent(self)

    # Count turns that have passed
    def get_turns(self):
        n = 0
        for i in range(len(self.board)):
            if not self.board[i] == ' ':
                n += 1
        return n

    # Check which player's turn it currently is
    def get_turn(self):
        if self.get_turns() % 2 == 0:
            return 'X'
        return 'O'

    # Check for three of the same marks in a row
    def check_result(self):
        board = self.board
        if not board[4] == ' ' and (board[1] == board[4] == board[7] or board[3] == board[4] == board[5] or board[0] == board[4] == board[8] or board[2] == board[4] == board[6]):
            self.result = board[4]
        elif not board[0] == ' ' and (board[0] == board[1] == board[2] or board[0] == board[3] == board[6]):
            self.result = board[0]
        elif not board[8] == ' ' and (board[8] == board[5] == board[2] or board[8] == board[7] == board[6]):
            self.result = board[8]

    # Process step on field click
    def field_click(self, e):
        self.step(self.player, int(e.tooltip))
        
        # Execute agent code
        self.agent(self)

    # Reset board
    def reset(self, e):
        if not self.board == [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']:
            self.board = [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']
            self.result = ''
            self.data = []
            # Execute agent code
            self.agent(self)
            
            # Render board
            self.render()
        elif e == '':
            # Execute agent code
            self.agent(self)
            
            # Render board
            self.render()

    # Change player on player click
    def change_player(self, e):
        if self.player == 'X':
            self.player = 'O'
        else:
            self.player = 'X'
        self.reset('')

    # Processing the game
    def step(self, player, field, reward = 1.0):
        # If there's no result
        if self.result == '':
            # If it's the player's turn
            if self.get_turn() == player:
                # If field is empty
                if self.board[field] == ' ':
                    # Set field to player
                    self.board[field] = player
                    
                    # if player is agent
                    if not player == self.player:
                        self.data.append('mark: {}     field: {}     reward: {}'.format(player, field, reward))

                    # Check if anyone has won
                    self.check_result();

                    # Render board
                    self.render()

    # Render the board
    def render(self):
        elements = []

        # Clear output
        clear_output(wait=True)

        # Add header
        elements.append(widgets.HTML(value = '<h1>TicTacToe</h1>'))

        # Add board buttons
        buttons = []
        rows = []
        for i in range(9):
            btn = widgets.Button(tooltip = str(i), description = self.board[i], layout = field_layout)
            btn.on_click(self.field_click)
            buttons.append(btn)
            if (i + 1) % 3 == 0:
                rows.append(widgets.Box(buttons))
                buttons = []
        elements.append(widgets.Box(children = rows, layout = column_layout))

        # Add player select
        player_btn = widgets.Button(description = 'PLAYER ' + self.player, layout = wide_layout)
        player_btn.on_click(self.change_player)

        elements.append(player_btn)

        # Add reset button
        reset_btn = widgets.Button(description = 'reset', layout = wide_layout)
        reset_btn.on_click(self.reset)
        elements.append(reset_btn)

        # Add winner text, if there's a result
        if not self.result == '':
            elements.append(widgets.HTML(value = 'Winner is <b>{}</b>'.format(self.result)))
        
        # Display elements and data
        a = widgets.Box(children = elements, layout = column_layout)
        b = widgets.HTML(value = '\n'.join(self.data), layout = text_layout)
        display(widgets.Box([a, b]))

## 2. Random Agent

The agent which places X or O marks on random empty fields

In [162]:
# Calculate random move for agent
def random_move(self):
    agent = 'X'
    if self.player == 'X':
        agent = 'O'

    # Getting all the empty fields
    empty_fields = []
    for field in range(len(self.board)):
        if self.board[field] == ' ':
            empty_fields.append(field)
    
    # Choose random empty field
    if len(empty_fields) > 0:
        self.step(agent, choice(empty_fields))

# Initializing environment
random_environment = TicTacToeEnvironment(random_move)

# Rendering the board
random_environment.render()

Box(children=(Box(children=(HTML(value='<h1>TicTacToe</h1>'), Box(children=(Box(children=(Button(description='…

## 3. Decisions based on Reward

Next we have the code for the AI and the util used for it

In [163]:
# Calculate the best move for agent based on reward
def reward_based_move(self):
    agent = 'X'
    if self.player == 'X':
        agent = 'O'
    
    # Getting all the empty fields
    empty_fields = []
    for field in range(len(self.board)):
        if self.board[field] == ' ':
            empty_fields.append([field, 0.0])
    
    # Calculate reward for empty fields
    
    # Choose field with highest reward
    highest_reward = max([x[1] for x in empty_fields])
    for field in empty_fields:
        if field[1] == highest_reward:
            self.step(agent, field[0])
    

# Initializing environment
reward_environment = TicTacToeEnvironment(reward_based_move)

# Rendering the board
reward_environment.render()

Box(children=(Box(children=(HTML(value='<h1>TicTacToe</h1>'), Box(children=(Box(children=(Button(description='…