# Decision Theory Project - TicTacToe
*By Jelle Huibregtse and Aron Hemmes*

Below is a TicTacToe environment with an AI based on reward

## Setup
- Loading in some libraries
- Configuring layout

In [365]:
# Libraries
from enum import Enum
from random import random, choice
from IPython.display import display, clear_output
import ipywidgets as widgets

# Layout
field_layout = widgets.Layout(width = '50px', height = '50px')
wide_layout = widgets.Layout(width = '158px')
column_layout = widgets.Layout(flex_flow = 'column')

## 1. Definition of the Environment

the code below defines all characteristics of a TicTacToe Environment:

Environment state:
- the player is either X or O
- the opposing player (agent) is either X or O depending on the player
- X and O take turns placing an X or O on empty fields untill either one has won or there are no more fields left on the board
- a board starts out empty and can contain X and O marks

The following methods are described in code below:
- reset() which completely resets the board to an empty state
- step() processes the game
- render() visualisation of the current TicTacToe game
- change_player() the player switches between X and O and resets the board
- player_move() the player places either an X or O on an empty field
- get_turns() returns the amount of turns that have passed
- get_turn() returns which player's turn it currently is

In [366]:
class TicTacToeEnvironment():
    def __init__(self, action):
        self.board = [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']
        self.player = 'X'
        self.result = ''
        self.data = []
        self.action = action
        self.reset('')

    # Count turns that have passed
    def get_turns(self):
        n = 0
        for i in range(len(self.board)):
            if not self.board[i] == ' ':
                n += 1
        return n

    # Check which player's turn it currently is
    def get_turn(self):
        if self.get_turns() % 2 == 0:
            return 'X'
        return 'O'

    # Check for three of the same marks in a row
    def check_result(self):
        board = self.board
        if not board[4] == ' ' and (board[1] == board[4] == board[7] or board[3] == board[4] == board[5] or board[0] == board[4] == board[8] or board[2] == board[4] == board[6]):
            self.result = board[4]
        elif not board[0] == ' ' and (board[0] == board[1] == board[2] or board[0] == board[3] == board[6]):
            self.result = board[0]
        elif not board[8] == ' ' and (board[8] == board[5] == board[2] or board[8] == board[7] == board[6]):
            self.result = board[8]

    # Process step on field click
    def field_click(self, e):
        self.step(self.player, int(e.tooltip))
        
        # Execute agent code
        self.agent()

    # Reset board
    def reset(self, e):
        # Execute agent code
        self.agent()
        
        if not self.board == [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']:
            self.board = [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']
            self.result = ''
            self.data = []
            self.render()
        elif e == '':
            self.render()

    # Change player on player click
    def change_player(self, e):
        if self.player == 'X':
            self.player = 'O'
        else:
            self.player = 'X'
        self.reset('')

    # Execute agent code
    def agent(self):
        if self.action == 'random':
            random_move()
        
    # Processing the game
    def step(self, player, field):
        # If there's no result
        if self.result == '':
            # If it's the player's turn
            if self.get_turn() == player:
                # If field is empty
                print(field)
#                 if self.board[field] == ' ':
#                     # Set field to player
#                     self.board[field] = player

#                     # Check if anyone has won
#                     self.check_result();

#                     # Render board
#                     self.render()

#                     # Print data
#                     for d in self.data:
#                         print(d)
            
        
    
    # Render the board
    def render(self):
        elements = []

        # Clear output
        clear_output(wait=True)

        # Add header
        text = '<h1>TicTacToe</h1>'
        if not self.result == '':
            text += ' Winner is <b>{}</b>'.format(self.result)
        elements.append(widgets.HTML(value = text))

        # Add board buttons
        buttons = []
        rows = []
        for i in range(9):
            btn = widgets.Button(tooltip = str(i), description = self.board[i], layout = field_layout)
            btn.on_click(self.field_click)
            buttons.append(btn)
            if (i + 1) % 3 == 0:
                rows.append(widgets.Box(buttons))
                buttons = []
        elements.append(widgets.Box(children = rows, layout = column_layout))

        # Add player select
        player_btn = widgets.Button(description = 'PLAYER ' + self.player, layout = wide_layout)
        player_btn.on_click(self.change_player)

        elements.append(player_btn)

        # Add reset button
        reset_btn = widgets.Button(description = 'reset', layout = wide_layout)
        reset_btn.on_click(self.reset)
        elements.append(reset_btn)

        # Display elements
        display(widgets.Box(children = elements, layout = column_layout))

## 2. Random Agent

An agent which places random X or O marks on random empty fields

In [367]:
# Initializing environment
random_environment = TicTacToeEnvironment('random')

# Calculate random move for agent
def random_move():
    agent = 'X'
    if random_environment.player == 'X':
        agent = 'O'

    empty_fields = []
    for field in range(len(random_environment.board)):
        if random_environment.board[field] == ' ':
            empty_fields.append(field)
    print(empty_fields)
#     random_environment.step(agent, choice(empty_fields))

TypeError: list indices must be integers or slices, not str

## 3. Decisions based on Reward

Next we have the code for the AI and the util used for it

In [None]:
# Initializing environment
reward_environment = TicTacToeEnvironment(AgentType.Reward)

# Calculate the best move for agent based on reward
def reward_based_move(board, player, result):
    agent = 'X'
    if player == 'X':
        agent = 'O'
    
    # If there's no result and it's the ai's turn
    if result == '' and get_turn() == agent:
        reward = 0

# Rendering the board
reward_environment.render()