In [109]:
import numpy as np
import copy
import tensorflow as tf
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from tensorflow.keras.optimizers import SGD, Adam, RMSprop
from tensorflow.keras import regularizers
from keras.layers import Dropout
from sklearn.preprocessing import StandardScaler
from abc import ABC, abstractmethod
from random import random
import seaborn as sns
from scipy import stats
import matplotlib.pyplot as plt
import os
import time
import tkinter as tk
#initialize for random seeds/states
tf.keras.backend.clear_session()
os.environ['PYTHONHASHSEED'] = '0'
rng = np.random.default_rng(12345)
np.random.seed(42)
tf.random.set_seed(42)

In [49]:
class paper_game(ABC):
    def __init__(self, start_state):
        self.state=start_state
    
    @abstractmethod
    def transition(state, action, player):
        #return next state
        pass
    
    @abstractmethod
    def reward(self, state, action, player):
        #return reward immediately after action
        pass
    
    @abstractmethod
    def policy(self, state):
        #return action
        pass
    
    @abstractmethod
    def Q_func(self, state, action):
        pass
    
    @abstractmethod
    def Q_update(self, state, action, reward_1, maxQ, player):
        pass

In [207]:
from tkinter import messagebox

class tit_tac_toe(paper_game):
#state: 3x3 array to represent board config, with 1 representing self-checker, -1 representing opponent-checker, 0 representing empty
    def __init__(self, start_state=np.zeros((3,3), dtype=np.int8), epsilon=0.9, alpha=0.5, gamma=0.8):
        self.state=start_state
        self.epsilon = epsilon
        self.alpha = alpha
        self.gamma = gamma
        self.Q = np.zeros((19683, 9))  #initialize Q matrix with state 'reference' (see ref function) and action key (see key function)    
    
    def transition(self, state, action, player):
        #action: a list of row and column index to indicate which cell is chosen
        #player: 1 or -1
        #applicable to both tic_tac_toe and Connect-4
        #return next state
        new_state = copy.deepcopy(state)
        new_state[action[0]][action[1]] = player
        return new_state
    
    def win_status(self, s):
        #s: state, 
        #return: whether win or not
        win = 3 in np.sum(s, axis=0) or 3 in np.sum(s, axis=1) or np.sum(s.diagonal())==3 or np.sum(np.fliplr(s).diagonal())==3
        return win

    def lose_status(self, s):
        #s: state, 
        #return: whether win or not
        lose = -3 in np.sum(s, axis=0) or -3 in np.sum(s, axis=1) or np.sum(s.diagonal())==-3 or np.sum(np.fliplr(s).diagonal())==-3
        return lose
    
    def ref(self, state):
        #change state matrix into vector
        ref = np.sum(np.matrix([1,3,3**2,3**3,3**4,3**5,3**6,3**7,3**8]) @ np.reshape((state + 1).flatten(),(9,1)))
        return ref
    
    def key(self, action):
        #change actions [r, c] into numbers 0-8
        return 3*action[0]+action[1]
    
    def action_list(self, state):
        act_list = np.where(state==0)
        return act_list

    def reward(self, state, action, player):
        s = player * self.transition(state, action, player)
        if self.win_status(s):
            reward = 100
        else:
            reward = 0
        return reward
    
    def Q_func(self, state, action):
        Q = self.Q[self.ref(state)][self.key(action)]
        return Q

    def Q_update(self, state, action, reward_1, maxQ, player):
        #state of board instead of current player's perspective
        #reward_1: reward of player 1 regardless of who is current player
        #maxQ: player's maxQ after another action
        #player: 1 or -1
        s = player*state
        self.Q[self.ref(s)][self.key(action)] += self.alpha*(reward_1*player + self.gamma*maxQ - self.Q[self.ref(s)][self.key(action)] )


    def random_move(self, state):
        act_list = self.action_list(state)
        i = np.random.randint(len(act_list[0]))
        action = [act_list[0][i],act_list[1][i]]
        return action

    def best_move(self, state):
        act_list = self.action_list(state)
        maxQ = -10000
        for i in range (len(act_list[0])):
            a = [act_list[0][i],act_list[1][i]]
            temp = self.Q_func(state, a)
            if temp > maxQ:
                maxQ = temp
                action = a
        return action, maxQ
    
    def policy(self, state):
        exploit = (not self.train) or rng.random()> self.epsilon or self.play
        if exploit:
            action, maxQ = self.best_move(state)
        else:
            action = self.random_move(state)
        if self.epsilon < 0.5:
            self.epsilon *= 0.9999
        else:
            self.epsilon *= 0.99999    
        return action
    
    def train(self, train_number=2000):
        self.train = True
        self.play = False
        #loop by train_number
        start_time = time.time()
        for i in range (train_number):
            if i % 10000 == 0 and i != 0:
                end_time = time.time()
                elp_time = '{:.2f}'.format(end_time - start_time)
                print(f'Training Phase, epoch {i}, elapsed time:{elp_time}')
                start_time = time.time()
                
            states =[]
            state = self.state #start state
            states.append(state)
            actions=[]
            r1=[0] #reward list at different times for player 1; for player -1: use zero sum property
            t = 0
            #choose who is X, i.e. plays first
            player = 1 #as this is self-play, we simply assume 1 always plays first and be playerX
            endgame = False
            while not endgame:
                #update his Q before action if previous has action
                s = player * state
                if t >=2:
                    action, maxQ = self.best_move(s)
                    self.Q_update(states[t-2], actions[t-2], r1[t], maxQ, player)
                    
                #action
                action = self.policy(s)
                actions.append(action)
                t += 1
                r1.append(player*self.reward(state, action, player))
                state=self.transition(state, action, player)
                states.append(state)
                #check win status and end game status, update Q for both players if end game
                endgame = self.win_status(s) or t==9
                if endgame:
                    self.Q_update(states[t-1], actions[t-1], r1[t], 0, player) #r1[t+1] is not used as there won't be t+1 when endgame is reached
                    self.Q_update(states[t-2], actions[t-2], r1[t], 0, -player)
                    
                #update player for next loop
                player *= -1
        self.last_episode = states
        
    def display_board(self, state):
        print('-------------------')
        print(state)
        print('-------------------')

    def display_episode(self):
        for i in range(len(self.last_episode)):
            print('state {} :'.format(i))
            self.display_board(self.last_episode[i])
            
    def play_game(self,starter = 'player'):
        self.play = True
        self.train = False
        state = np.zeros((3,3), dtype=np.int8)
        
        endgame = False
        t = 0
        self.display_board(state)
        while not endgame:
            if starter == 'player':
                i = int(input('enter row:\n'))
                j = int(input('enter column:\n'))
                state[i,j] = -1

            else:
                i,j = self.best_move(state)[0]
                state[i,j] = 1

            self.display_board(state)   
            starter *= -1
            t += 1
            endgame = self.win_status(state) or self.lose_status(state) or t==9
            
                    
   
    def play_gui(self):

        self.play = True
        self.train = False
        state = np.zeros((3,3), dtype=np.int8)
        
        endgame = False
        t = 0
        
        win = tk.Tk()
        win.geometry("240x380")

        def next_turn(i,j,button_id):
            nonlocal t
            state[i,j] = -1
            button[button_id].config(text = 'X',state = 'disabled')
            i,j = self.best_move(state)[0]
            
            if self.lose_status(state):
                option = messagebox.askyesno('You Won! Do you want to play again?')
                if option == True:
                    self.play_gui()
                else:
                    win.destroy()
                    return None
                
            state[i,j] = 1
            button[i * 3 + j].config(text = 'O',state = 'disabled')
            
            if self.win_status(state):
                option = messagebox.askyesno('You Lost! Do you want to play again?')
                if option == True:
                    self.play_gui()
                else:
                    win.destroy()
                    
            t += 1
            
            if t == 9:
                option = messagebox.askyesno('Draw! Do you want to play again?')
                if option == True:
                    self.play_gui()
                else:
                    win.destroy()
          
        var = []
        button = []
        game_frame = tk.Frame(win)
        for i in range(9):
            v = tk.StringVar()
            var.append(v)
            m = int(i)
            b = tk.Button(game_frame,  textvariable = v, height= 5, width=10, state = 'disabled')
            button.append(b)
            b.grid(row = i //3 + 1, column = i % 3)

        game_title = tk.Label(win,text = 'TIC TAC TOE')
        game_title.pack()
        game_frame.pack()
        button[0].config(command = lambda:next_turn(0,0,0))
        button[1].config(command = lambda:next_turn(0,1,1))
        button[2].config(command = lambda:next_turn(0,2,2))
        button[3].config(command = lambda:next_turn(1,0,3))
        button[4].config(command = lambda:next_turn(1,1,4))
        button[5].config(command = lambda:next_turn(1,2,5))
        button[6].config(command = lambda:next_turn(2,0,6))
        button[7].config(command = lambda:next_turn(2,1,7))
        button[8].config(command = lambda:next_turn(2,2,8))
        starting_player = tk.Label(win,text = 'First Player:')
        game_variable = tk.StringVar()
        game_option1 = tk.Radiobutton(win, text = 'BOT: ', variable = game_variable, value = 'BOT')
        game_option2 = tk.Radiobutton(win, text = 'Player: ', variable = game_variable, value = 'player')
        starting_player.pack()
        game_option1.pack()
        game_option2.pack()
        
        
        def initiating():
            for b in button:
                b.config(state = 'normal',bg = 'white')

        start_button = tk.Button(win,text = 'Start Game', command = initiating)
        start_button.pack()
        
        win.mainloop()
        
        
            
            
        
        
    

In [208]:
test1 = tit_tac_toe()
test1.train(1000)
# test1.display_episode()

In [107]:
test1.play_game('player')

-------------------
[[0 0 0]
 [0 0 0]
 [0 0 0]]
-------------------
enter row0
enter column0
-------------------
[[-1  0  0]
 [ 0  0  0]
 [ 0  0  0]]
-------------------
-------------------
[[-1  0  0]
 [ 0  1  0]
 [ 0  0  0]]
-------------------
enter row1
enter column2
-------------------
[[-1  0  0]
 [ 0  1 -1]
 [ 0  0  0]]
-------------------
-------------------
[[-1  1  0]
 [ 0  1 -1]
 [ 0  0  0]]
-------------------
enter row2
enter column2
-------------------
[[-1  1  0]
 [ 0  1 -1]
 [ 0  0 -1]]
-------------------
-------------------
[[-1  1  0]
 [ 0  1 -1]
 [ 0  1 -1]]
-------------------


In [108]:
test1.play_game('player')

-------------------
[[0 0 0]
 [0 0 0]
 [0 0 0]]
-------------------
enter row1
enter column1
-------------------
[[ 0  0  0]
 [ 0 -1  0]
 [ 0  0  0]]
-------------------
-------------------
[[ 1  0  0]
 [ 0 -1  0]
 [ 0  0  0]]
-------------------
enter row2
enter column2
-------------------
[[ 1  0  0]
 [ 0 -1  0]
 [ 0  0 -1]]
-------------------
-------------------
[[ 1  0  1]
 [ 0 -1  0]
 [ 0  0 -1]]
-------------------
enter row2
enter column0
-------------------
[[ 1  0  1]
 [ 0 -1  0]
 [-1  0 -1]]
-------------------
-------------------
[[ 1  1  1]
 [ 0 -1  0]
 [-1  0 -1]]
-------------------


In [206]:
test1.play_gui()

Exception in Tkinter callback
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\tkinter\__init__.py", line 1892, in __call__
    return self.func(*args)
  File "C:\Users\esmae\AppData\Local\Temp/ipykernel_32424/629742819.py", line 234, in <lambda>
    button[0].config(command = lambda:next_turn(0,0,0))
  File "C:\Users\esmae\AppData\Local\Temp/ipykernel_32424/629742819.py", line 211, in next_turn
    t += 1
UnboundLocalError: local variable 't' referenced before assignment
ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3444, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\esmae\AppData\Local\Temp/ipykernel_32424/3788592356.py", line 1, in <module>
    test1.play_gui()
  File "C:\Users\esmae\AppData\Local\Temp/ipykernel_32424/629742819.py", line 259, in play_gui
    win.mainloop()
  File "C:\ProgramData\Anaconda3\lib\tkinter\__init__.py", line 1429, in mainloop
    self.tk.mainloop(n)
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2064, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'KeyboardInterrupt' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File 

TypeError: object of type 'NoneType' has no len()

In [196]:
win = tk.Tk()
win.geometry("240x380")

def id_ret(i):
    print(i)
    
var = []
button = []
game_frame = tk.Frame(win)
for i in range(9):
    v = tk.StringVar()
    var.append(v)
    m = int(i)
    b = tk.Button(game_frame,  textvariable = v, height= 5, width=10, state = 'disabled')
    button.append(b)
    b.grid(row = i //3 + 1, column = i % 3)

game_title = tk.Label(win,text = 'TIC TAC TOE')
game_title.pack()
game_frame.pack()
button[0].config(command = lambda:id_ret(0))
button[1].config(command = lambda:id_ret(1))
button[2].config(command = lambda:id_ret(2))
button[3].config(command = lambda:id_ret(3))
button[4].config(command = lambda:id_ret(4))
button[5].config(command = lambda:id_ret(5))
button[6].config(command = lambda:id_ret(6))
button[7].config(command = lambda:id_ret(7))
button[8].config(command = lambda:id_ret(8))
starting_player = tk.Label(win,text = 'First Player:')
game_variable = tk.StringVar()
game_option1 = tk.Radiobutton(win, text = 'BOT: ', variable = game_variable, value = 'BOT')
game_option2 = tk.Radiobutton(win, text = 'Player: ', variable = game_variable, value = 'player')
starting_player.pack()
game_option1.pack()
game_option2.pack()
def initiating():
    for b in button:
        b.config(state = 'normal',bg = 'white')
        
start_button = tk.Button(win,text = 'Start Game', command = initiating)
start_button.pack()



    
# self.play = True
#         self.train = False
#         state = np.zeros((3,3), dtype=np.int8)
        
#         endgame = False
#         t = 0
#         self.display_board(state)
#         while not endgame:
#             if starter == 'player':
#                 i = int(input('enter row:\n'))
#                 j = int(input('enter column:\n'))
#                 state[i,j] = -1

#             else:
#                 i,j = self.best_move(state)[0]
#                 state[i,j] = 1

#             self.display_board(state)   
#             starter *= -1
#             endgame = self.win_status(state) or self.lose_status(state) or t==9
#             t += 1
win.mainloop()

In [167]:
def hi(i):
    print(i)
    
(lambda m:hi(m))(2)

2


In [197]:
tk

<module 'tkinter' from 'C:\\ProgramData\\Anaconda3\\lib\\tkinter\\__init__.py'>

In [199]:
from tkinter import messagebox