<a href="https://colab.research.google.com/github/jesung/Tic-Tac-Toe/blob/master/tic_tac_toe.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# 1. Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [0]:
import pprint
import numpy as np
import tensorflow as tf
from tensorflow import keras
import random

model = keras.models.Sequential([
    keras.layers.Dense(32, activation=tf.nn.relu, input_shape=(1,9)),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(16, activation=tf.nn.relu),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(1, activation=tf.nn.sigmoid)
])

model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy','binary_crossentropy'])

class tic_tac_toe:
    def __init__(self):
        self.board = np.array([[0,0,0],[0,0,0],[0,0,0]])
        self.turn = 1

    def reset(self):
        self.board = np.array([[0,0,0],[0,0,0],[0,0,0]])
        self.turn = 1

    def move(self,x,y):
        ##### need to update #######
        if self.board[x,y] != 0:   
            print('Invalid move',x,y)
            print(self.board)
        else:
            self.board[x,y] = self.turn
            self.turn = -1 * self.turn

def win(board):
    #horizontal and vertical
    for i in range(3):
        if board[0,i]==board[1,i]==board[2,i]!=0:
            return 'Won'
        if board[i,0]==board[i,1]==board[i,2]!=0:
            return 'Won'

    #diagonal
    if board[0,0]==board[1,1]==board[2,2]!=0:
        return 'Won'
    if board[0,2]==board[1,1]==board[2,0]!=0:
        return 'Won'

    #if board is full, game is drawn. In Progress otherwise
    if not 0 in board:
        return 'Drawn'
    else:
        return 'In Progress'
          
def move_gen(board_state, turn):
    legal_moves = {}
    
    #loop through every cell and add to dictionary if cell is unoccupied
    for i in range(3):
        for j in range(3):
            if board_state[i,j] == 0:
                tmp_board = board_state.copy()
                tmp_board[i,j] = turn
                legal_moves[(i,j)]=tmp_board.flatten()

    return legal_moves
  
def move_sel(move_list, turn, train=True):
    tmp = np.zeros((9,12))
    i = 0

    for key, value in move_list.items():
      tmp[i,0:9] = value.reshape(1,9)
      tmp[i,9] = model.predict(value.reshape(1,1,9))[0][0][0]
      tmp[i,10:12] = [key[0], key[1]]
      if not train:
          print(tmp[i,9], (tmp[i,10],tmp[i,11]))
      
      #if win(value.reshape(3,3)) == 'Win':
      #    return tmp[i,0:9], tmp[i,9], (tmp[i,10],tmp[i,11])
      i = i + 1
    
    if random.randint(1,5) == train:
        #pick a move based on probability
        #if turn == 0:
            #j = random.choices(range(i),tmp[:i,9]**1)
        #else:  
            #j = random.choices(range(i),(1-tmp[:i,9])**1)
        #pick a random legal move
        j = random.randint(0,i-1)
    else:
        if turn == 1:
            j = np.argmax(tmp[:i,9])
        else:
            j = np.argmin(tmp[:i,9])
    
    return tmp[j,0:9], tmp[j,9], (tmp[j,10],tmp[j,11])

def simulate(batch_size, num_round):
    result_total = [[0,0,0]]
    for i in range(num_round):
        board, prob, move = move_sel(move_gen(game.board, game.turn), game.turn)
        train = [np.append(board,prob)]
        result = [0,0,0]

        for j in range(batch_size):
            #clean up - train & tmp outside for loops and clear at beginning
            board, prob, move = move_sel(move_gen(game.board, game.turn), game.turn)
            tmp = [np.append(board,prob)]

            while win(game.board) == 'In Progress':
                board, prob, move = move_sel(move_gen(game.board, game.turn), game.turn)
                tmp = np.append(tmp, [np.append(board,prob)], axis = 0)

                #print(move[0], move[1])
                game.move(int(move[0]), int(move[1]))

            #update score based on result
            if win(game.board) == 'Won':
                tmp[:,-1] = (1-game.turn)/2
                if game.turn == -1:
                    result[0] = result[0] + 1        
                else:
                    result[1] = result[1] + 1        
            else:
                tmp[:,-1] = 0.5
                result[2] = result[2] + 1

            #add to training set
            np.delete(tmp, 0, 0)
            train = np.append(train, tmp, axis = 0)

            game.reset()

        print('Round', i)
        np.delete(train, 0, 0)
        model.fit(train[:,0:9].reshape(-1,1,9), train[:,9].reshape(-1,1,1),epochs=3,verbose=0)
        print(result[0],'-',result[1],'-',result[2])
        result_total = np.append(result_total, [result], axis = 0)
        
    return result_total

In [0]:
#initialize game
game = tic_tac_toe()

#run simulation and get round results
result = simulate(250,400)

In [0]:
# 2. Save Keras Model or weights on google drive

# create on Colab directory
model.save('model.h5')    
model_file = drive.CreateFile({'title' : 'model.h5'})
model_file.SetContentFile('model.h5')
model_file.Upload()

# download to google drive
drive.CreateFile({'id': model_file.get('id')})

GoogleDriveFile({'id': '1l6uENsaAQCPuP4UajhkTy1Bc4dM7uiav'})

In [0]:
#save the weights
model.save_weights('model_weights.h5')
weights_file = drive.CreateFile({'title' : 'model_weights.h5'})
weights_file.SetContentFile('model_weights.h5')
weights_file.Upload()
drive.CreateFile({'id': weights_file.get('id')})

GoogleDriveFile({'id': '1iD_qFRHVYm63LDtP0dCUQzKxjCv0tYzl'})

In [0]:
# 3. reload weights from google drive into the model

# use (get shareable link) to get file id
last_weight_file = drive.CreateFile({'id': '1iD_qFRHVYm63LDtP0dCUQzKxjCv0tYzl'}) 
last_weight_file.GetContentFile('last_weights.mat')
model.load_weights('last_weights.mat')

In [37]:
#game = tic_tac_toe()
#game.reset()
game.move(2,2)
print(win(game.board))
print(game.board)
print(game.turn)
print(move_sel(move_gen(game.board, game.turn), game.turn, train=False))


In Progress
[[ 0  0  0]
 [ 0  1  0]
 [-1  0  1]]
-1
0.5346093773841858 (0.0, 0.0)
0.9258827567100525 (0.0, 1.0)
0.9813613295555115 (0.0, 2.0)
0.770250141620636 (1.0, 0.0)
0.9399659037590027 (1.0, 2.0)
0.8763774633407593 (2.0, 1.0)
(array([-1.,  0.,  0.,  0.,  1.,  0., -1.,  0.,  1.]), 0.5346093773841858, (0.0, 0.0))
