# Decision Theory Project - TicTacToe
*By Jelle Huibregtse and Aron Hemmes*

Below is a TicTacToe environment with an AI based on reward

## Setup
- Loading in some libraries
- Configuring layout

In [315]:
# Libraries
from enum import Enum
from random import random, choice
from IPython.display import display, clear_output
import ipywidgets as widgets

# Layout
field_layout = widgets.Layout(width = '50px', height = '50px')
wide_layout = widgets.Layout(width = '158px')
column_layout = widgets.Layout(flex_flow = 'column')

## 1. Definition of the Environment

the code below defines all characteristics of a TicTacToe Environment:

Environment state:
- the player is either X or O
- the opposing player (agent) is either X or O depending on the player
- X and O take turns placing an X or O on empty fields untill either one has won or there are no more fields left on the board
- a board starts out empty and can contain X and O marks

The following methods are described in code below:
- reset() which completely resets the board to an empty state
- step() processes the game
- render() visualisation of the current TicTacToe game
- change_player() the player switches between X and O and resets the board
- player_move() the player places either an X or O on an empty field
- get_turns() returns the amount of turns that have passed
- get_turn() returns which player's turn it currently is

In [316]:
class AgentType(Enum):
    Random = 1
    Reward = 2

class TicTacToeEnvironment():
    def __init__(self, action):
        self.board = [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']
        self.player = 'X'
        self.result = ''
        self.action = action
        
    #=========================================================
    # util for the environment
    #=========================================================

    # Count turns that have passed
    def get_turns(self):
        n = 0
        for i in range(len(self.board)):
            if not self.board[i] == ' ':
                n += 1
        return n

    # Check which player's turn it currently is
    def get_turn(self):
        if self.get_turns() % 2 == 0:
            return 'X'
        return 'O'

    # Check for three of the same marks in a row
    def check_result(self):
        board = self.board
        if not board[4] == ' ' and (board[1] == board[4] == board[7] or board[3] == board[4] == board[5] or board[0] == board[4] == board[8] or board[2] == board[4] == board[6]):
            self.result = board[4]
        elif not board[0] == ' ' and (board[0] == board[1] == board[2] or board[0] == board[3] == board[6]):
            self.result = board[0]
        elif not board[8] == ' ' and (board[8] == board[5] == board[2] or board[8] == board[7] == board[6]):
            self.result = board[8]

    #=========================================================
    # actions
    #=========================================================

    # Set board action
    def player_move(self, e):
        # If there's no result and it's the player's turn
        if self.result == '' and self.get_turn() == self.player:
            # If board space isn't occupied 
            if self.board[int(e.tooltip)] == ' ':
                self.board[int(e.tooltip)] = self.player
                self.step()

    # Reset board action
    def reset(self, e):
        if not self.board == [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']:
            self.board = [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']
            self.result = ''
            self.step()
        elif e == '':
            self.step()

    # Change player action
    def change_player(self, e):
        if self.player == 'X':
            self.player = 'O'
        else:
            self.player = 'X'
        self.reset('')

    # Processing the game
    def step(self):
        # Check if anyone has won
        self.check_result();
        
        # Calculate agent's move
        if self.action == AgentType.Random:
            random_move(self.board, self.player, self.result)
        elif self.action == AgentType.Reward:
            reward_based_move(self.board, self.player, self.result)
        
        # Check if anyone has won
        self.check_result();
        
        # Render board
        self.render()
    
    # Render the board
    def render(self):
        elements = []

        # Clear output
        clear_output(wait=True)

        # Add header
        text = '<h1>TicTacToe</h1>'
        if not self.result == '':
            text += ' Winner is <b>{}</b>'.format(self.result)
        elements.append(widgets.HTML(value = text))

        # Add board buttons
        buttons = []
        rows = []
        for i in range(9):
            btn = widgets.Button(tooltip = str(i), description = self.board[i], layout = field_layout)
            btn.on_click(self.player_move)
            buttons.append(btn)
            if (i + 1) % 3 == 0:
                rows.append(widgets.Box(buttons))
                buttons = []
        elements.append(widgets.Box(children = rows, layout = column_layout))

        # Add player select
        player_btn = widgets.Button(description = 'PLAYER ' + self.player, layout = wide_layout)
        player_btn.on_click(self.change_player)

        elements.append(player_btn)

        # Add reset button
        reset_btn = widgets.Button(description = 'reset', layout = wide_layout)
        reset_btn.on_click(self.reset)
        elements.append(reset_btn)

        # Display elements
        display(widgets.Box(children = elements, layout = column_layout))

## 2. Random Agent

An agent which places random X or O marks on random empty fields

In [317]:
# Initializing environment
random_environment = TicTacToeEnvironment(AgentType.Random)

# Code for random move
def random_move(board, player, result):
    agent = 'X'
    if player == 'X':
        agent = 'O'
    
    # If there's no result and it's the agent's turn
    if result == '' and random_environment.get_turn() == agent:
        empty_fields = []
        for field in range(len(board)):
            if board[field] == ' ':
                empty_fields.append(field)
        board[choice(empty_fields)] = agent


# Rendering the board
random_environment.render()

Box(children=(HTML(value='<h1>TicTacToe</h1> Winner is <b>X</b>'), Box(children=(Box(children=(Button(descript…

## 3. Decisions based on Reward

Next we have the code for the AI and the util used for it

In [318]:
# Initializing environment
reward_environment = TicTacToeEnvironment(AgentType.Reward)

# Calculate the best move for agent based on reward
def reward_based_move(board, player, result):
    agent = 'X'
    if player == 'X':
        agent = 'O'
    
    # If there's no result and it's the ai's turn
    if result == '' and get_turn() == agent:
        reward = 0

#=========================================================
# rendering the board
#=========================================================
reward_environment.render()

Box(children=(HTML(value='<h1>TicTacToe</h1>'), Box(children=(Box(children=(Button(description=' ', layout=Lay…