<a href="https://colab.research.google.com/github/Umesh94kr/TIC_TAC_TOE/blob/main/TIC_TAC_TOE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import keras
import random
from keras.models import Sequential
from keras.layers import Dense
import numpy as np

In [2]:
BLANK = ' '
AI_PLAYER = 'X'
HUMAN_PLAYER = 'O'
TRAINING_EPOCHS = 80
TRAINING_EPSILON = 0.4
REWARD_WIN = 10
REWARD_LOSE = -100
REWARD_TIE = -10

In [3]:
class Player:

  @staticmethod
  def show_board(board):
    print('|'.join(board[0:3]))
    print('|'.join(board[3:6]))
    print('|'.join(board[6:9]))

In [4]:
class HumanPlayer(Player):
  def reward(self, value, board):
    pass
  def make_move(self, board):
      while True:
          try:
              self.show_board(board)
              move = input('Your next move (cell index 1-9): ')
              move = int(move) - 1  # Convert move to integer and adjust for 0-based indexing

              if move not in range(9):
                  raise ValueError

          except ValueError:
              print('Invalid move; try again.')
          else:
              return move






In [5]:
class AIPlayer(Player):
  def __init__(self, epsilon=0.4, alpha=0.3, gamma=0.9):
    # this is epsilon parameter of the model: probability of exploration
    self.EPSILON = epsilon
    # learning rate
    self.ALPHA = alpha
    # discount parameter for future reward
    self.GAMMA = gamma

    # previous move during the game
    self.q = Sequential()
    self.q.add(Dense(32, input_dim=36, activation='relu'))
    self.q.add(Dense(1, activation='relu'))
    self.q.compile(optimizer='adam', loss='mean_squared_error')
    self.move = None
    # board in previous iteration
    self.board = [' '] * 9


  def available_moves(self, board):
    return [i for i in range(9) if board[i] == ' ']

  def encode_input(self, board, action):
      # we represented the (s, a) pair with a one-dimensional array (one-hot representation)
    vector_representation = []

      # one-hot encoding for 3 states
      # [1, 0, 0] - it means the given cell has X ticker
      # [0, 1, 0] - it means the given cell has 0 ticker
      # [0, 0, 1] - it means the given cell has ' ' ticker so empty
      #   every single cell on the bpard (9 cells) has 3 values because of this representation
      # so there are 9X3=27 values
    for cell in board:
      for ticker in ['X', 'O', ' ']:
        if cell == ticker:
          vector_representation.append(1)
        else:
          vector_representation.append(0)

      # one-hot encoding of the action - array with size 9
      # [1, 0, 0, 0, 0, 0, 0, 0, 0] - it means putting X to the first cell
      # [0, 1, 0, 0, 0, 0, 0, 0, 0] - it means putting X to the second cell
    for move in range(9):
      if action == move:
        vector_representation.append(1)
      else:
        vector_representation.append(0)

    return np.array([vector_representation])




  def make_move(self, board):
      self.board = tuple(board)
      actions = self.available_moves(board)

    # Actions with epsilon probability
      if random.random() < self.EPSILON:
          self.move = random.choice(actions)
          return self.move

    # Take the actions with the highest q values
      q_values = [self.get_q(self.board, a) for a in actions]
      max_q_value = max(q_values)

    # If multiple best actions, choose one at random
      best_actions = [i for i in range(len(actions)) if q_values[i] == max_q_value]
      best_move = actions[random.choice(best_actions)]

      self.move = best_move
      return self.move

  def get_q(self, state, action):
    return self.q.predict([self.encode_input(state, action)], batch_size=1)

  def reward(self, reward, board):
    if self.move:
      prev_q = self.get_q(self.board, self.move)
      max_q_new = max([self.get_q(tuple(board) ,a) for a in self.available_moves(self.board)])

      self.q.fit(self.encode_input(self.board, self.move),
                 prev_q + self.ALPHA * ((reward + self.GAMMA * max_q_new) - prev_q),
                 epochs=3, verbose=0)

    self.move = None
    self.board = None




In [6]:
class TicTacToe:


  def __init__(self, player1, player2):
    self.player1 = player1
    self.player2 = player2
    self.first_player_turn = random.choice([True, False])
    self.board = [' '] * 9

  def play(self):
      while True:
          if self.first_player_turn:
              player = self.player1
              other_player = self.player2
              player_tickers = (AI_PLAYER, HUMAN_PLAYER)
          else:
              player = self.player2
              other_player = self.player1
              player_tickers = (HUMAN_PLAYER, AI_PLAYER)

            # Check the state of the game (win, lose, or draw)
          game_over, winner = self.is_game_over(player_tickers)

          if game_over:
              if winner == player_tickers[0]:
                  player.show_board(self.board[:])
                  print('\n%s won!' % player.__class__.__name__)
                  player.reward(REWARD_WIN, self.board[:])
                  other_player.reward(REWARD_LOSE, self.board[:])
              elif winner == player_tickers[1]:
                  player.show_board(self.board[:])
                  print('\n%s won!' % player.__class__.__name__)
                  player.reward(REWARD_WIN, self.board[:])
                  other_player.reward(REWARD_LOSE, self.board[:])
              else:
                  player.show_board(self.board[:])
                  print('Tie!')
                  player.reward(REWARD_TIE, self.board[:])
                  other_player.reward(REWARD_TIE, self.board[:])

              break

            # Next player's turn in the next iteration
          self.first_player_turn = not self.first_player_turn

            # Actual player's best move
          move = player.make_move(self.board[:])  # Use a copy of the board
          self.board[move] = player_tickers[0]


  def is_game_over(self, player_tickers):
    for player_ticker in player_tickers:
      for i in range(3):
        if self.board[3 * i + 0] == player_ticker and\
                self.board[3 * i + 1] == player_ticker and\
                self.board[3 * i + 2] == player_ticker:
            return True, player_ticker

      for j in range(3):
        if self.board[i + 0] == player_ticker and\
                self.board[i + 3] == player_ticker and\
                self.board[i + 6] == player_ticker:
            return True, player_ticker

      if self.board[0] == player_ticker and self.board[4] == player_ticker and self.board[8] == player_ticker:
        return True, player_ticker

      if self.board[2] == player_ticker and self.board[4] == player_ticker and self.board[6] == player_ticker:
        return True, player_ticker

    if self.board.count(' ') == 0:
      return True, None
    else:
      return False, None




In [7]:
if __name__ == '__main__':
  ai_player_1 = AIPlayer()
  ai_player_2 = AIPlayer()

  print('Training')
  ai_player_1.EPSILON = TRAINING_EPSILON
  ai_player_1.EPSILON = TRAINING_EPSILON

  for i in range(TRAINING_EPOCHS):
    print("Training iterations %s" % i)
    game = TicTacToe(ai_player_1, ai_player_2)
    game.play()

  print('\n Training is done')

  ai_player_1.EPSILON = 0
  human_player = HumanPlayer()
  game = TicTacToe(ai_player_1, human_player)
  game.play()


Training
Training iterations 0
X|O|X
O|O|X
X|O|O
Tie!
Training iterations 1
O| | 
O|O|X
X|X|O

AIPlayer won!
Training iterations 2
X|O|X
O|O|O
X|X| 

AIPlayer won!
Training iterations 3
X|O|X
O|O|X
X|X|O
Tie!
Training iterations 4
X|X|O
O|X|X
O|X|O
Tie!
Training iterations 5
X|O|X
O|X|X
O|X|O
Tie!
Training iterations 6
X|O|O
X|X|O
X|O|O

AIPlayer won!
Training iterations 7
X|O|O
X|O|X
X|O|O
Tie!
Training iterations 8
X|O|O
O|X|X
X|X|O
Tie!
Training iterations 9
X|O|O
X|O|X
X|O|O
Tie!
Training iterations 10
X|O|O
O|O|X
X|O|X
Tie!
Training iterations 11
O| | 
O|O|X
X|X|O

AIPlayer won!
Training iterations 12
O|X|X
X|O|O
X|O|O

AIPlayer won!
Training iterations 13
X|X|O
O|O|X
X|X|O
Tie!
Training iterations 14
X|O|X
X|O|O
X|O|O
Tie!
Training iterations 15
O|O|X
O|O|X
X|X|O

AIPlayer won!
Training iterations 16
X|X|O
O|O|X
X|O|O
Tie!
Training iterations 17
X|X|O
O|O|X
X|O|O
Tie!
Training iterations 18
X|X|X
 |O| 
X|O|O

AIPlayer won!
Training iterations 19
X|O|X
O|X|O
O|X|O
Tie!
Training it