Copyright **`(c)`** 2023 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

# LAB10

Use reinforcement learning to devise a tic-tac-toe player.

### Deadlines:

* Submission: [Dies Natalis Solis Invicti](https://en.wikipedia.org/wiki/Sol_Invictus)
* Reviews: [Befana](https://en.wikipedia.org/wiki/Befana)

Notes:

* Reviews will be assigned  on Monday, December 4
* You need to commit in order to be selected as a reviewer (ie. better to commit an empty work than not to commit)

In [30]:
import numpy as np 
from itertools import combinations
from collections import namedtuple, defaultdict
from random import choice
from copy import deepcopy

In [13]:
'''
Magic Square :

2 | 7 | 6
--+---+--
9 | 5 | 1
--+---+--
4 | 3 | 8

'''

MAGIC = [2,7,6,
         9,5,1,
         4,3,8]

In [31]:
State = namedtuple('State', ['x','o'])

In [32]:
def print_board(pos : State) :
    for r in range(3) :
        for c in range(3) :
            index = r*3 + c
            if MAGIC[index] in pos.x :
                print('X', end='')
            elif MAGIC[index] in pos.o :
                print('O', end='')
            else :
                print('.', end='')
        print()
    print()

In [44]:
def win(elements) :
    """Check if positions of a player contain a full line"""
    return any(sum(c) == 15 for c in combinations(elements,3))

def state_value(position : State) :
    """Returns 1 if x player wins, -1 if o player wins, 0 else"""
    if win(position.x) :
        return 1
    elif win(position.o) :
        return -1
    else :
        return 0

In [46]:
def random_game() :
    trajectory = list()
    state = State(set(),set())
    available = set(range(1, 9+1))
    while available :
        x = choice(list(available))
        state.x.add(x)
        trajectory.append(deepcopy(state))
        available.remove(x)
        if win(state.x) :
            break
        
        if available == set() :
            break
        o = choice(list(available))
        state.o.add(o)
        trajectory.append(deepcopy(state))
        available.remove(o)
        if win(state.o) :
            break
    
    return trajectory


In [35]:
plays = random_game()
for play in plays :
    print_board(play)

...
X..
...

...
XO.
...

..X
XO.
...

O.X
XO.
...

O.X
XO.
X..

O.X
XOO
X..

O.X
XOO
XX.

O.X
XOO
XXO



In [49]:
value_dictionary = defaultdict(float)
epsilon = 0.001

for steps in range(100000) :
    trajectory = random_game()
    final_reward = state_value(trajectory[-1])
    for state in trajectory :
        hashable_state = (frozenset(state.x), frozenset(state.o))
        value_dictionary[hashable_state] = value_dictionary[hashable_state] + epsilon*(final_reward - value_dictionary[hashable_state])

In [51]:
sorted(value_dictionary.items(), key=lambda e : e[1], reverse=False)[:10]

[((frozenset({3, 7}), frozenset({5})), -0.1752294217659493),
 ((frozenset({1, 4, 6, 9}), frozenset({2, 3, 5, 8})), -0.14536307757712247),
 ((frozenset({1, 9}), frozenset({5})), -0.14451364746627995),
 ((frozenset({1, 3, 4, 5}), frozenset({2, 6, 7, 9})), -0.13417463520514122),
 ((frozenset({1, 3, 8, 9}), frozenset({2, 4, 5, 6})), -0.1333079431482895),
 ((frozenset({2, 3, 5, 9}), frozenset({1, 4, 6, 8})), -0.13244038353182133),
 ((frozenset({1, 3, 7, 9}), frozenset({4, 5, 6, 8})), -0.12808954162980818),
 ((frozenset({1, 7, 8, 9}), frozenset({2, 4, 5, 6})), -0.12808954162980818),
 ((frozenset({2, 3, 7, 8}), frozenset({1, 4, 5, 9})), -0.1254685700597458),
 ((frozenset({1, 3, 7, 9}), frozenset({2, 5, 6, 8})), -0.12459316322296878)]

In [16]:
print_board(state)

X..
XX.
OOO



In [None]:
""" 
TODO :
- Create a function to choose the best move based on the state values dictionary (Find the closest state and try to approach it / In all the possible next states, choose the best)
- Function to test the performance of a player against a random player (both playing as X and O)
- Search for a better alternative to MonteCarlo
""" 