In [1]:
import numpy as np
np.__version__

'1.23.5'

In [2]:
class TicTacToe:
  def __init__(self):
    self.row_count = 3
    self.col_count = 3
    self.action_size = self.row_count * self.col_count

  def get_initial_state(self):
    return np.zeros((self.row_count, self.col_count))

  def get_next_state(self, state, action, player):
    row = action // self.col_count
    column = action % self.col_count
    state[row, column] = player
    return state

  def get_valid_moves(self, state):
    return (state.reshape(-1) == 0).astype(np.uint8)

  def check_win(self, state, action):
    row = action // self.col_count
    column = action % self.col_count
    player = state[row, column]
    return (
        np.sum(state[row, :]) == player * self.col_count
        or np.sum(state[: , column]) == player * self.row_count
        or np.sum(np.diag(state)) == player * self.row_count
        or np.sum(np.diag(np.flip(state, axis = 0))) == player * self.row_count
    )

  def get_value_and_terminated(self, state, action):
    if self.check_win(state, action) :
      return 1, True
    if np.sum(self.get_valid_moves(state)) == 0:
      return 0, True
    return 0, False

  def get_opponent(self, player):
    return -player


In [3]:
tictactoe = TicTacToe()

In [4]:
player = 1
state = tictactoe.get_initial_state()

while True:
  print(state)

  valid_moves = tictactoe.get_valid_moves(state)
  print("Valid moves : ", [i for i in range(tictactoe.action_size) if valid_moves[i] == 1])
  action = int(input(f"{player} : "))

  if valid_moves[action] == 0:
    print("action not valid")
    continue

  state = tictactoe.get_next_state(state, action, player)

  value, is_terminal = tictactoe.get_value_and_terminated(state, action)

  if is_terminal:
    print(state)
    if value == 1:
      print(player, "won")
    else:
      print("draw")
    break
  player = tictactoe.get_opponent(player)



[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
Valid moves :  [0, 1, 2, 3, 4, 5, 6, 7, 8]
1 : 0
[[1. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
Valid moves :  [1, 2, 3, 4, 5, 6, 7, 8]
-1 : 4
[[ 1.  0.  0.]
 [ 0. -1.  0.]
 [ 0.  0.  0.]]
Valid moves :  [1, 2, 3, 5, 6, 7, 8]
1 : 1
[[ 1.  1.  0.]
 [ 0. -1.  0.]
 [ 0.  0.  0.]]
Valid moves :  [2, 3, 5, 6, 7, 8]
-1 : 2
[[ 1.  1. -1.]
 [ 0. -1.  0.]
 [ 0.  0.  0.]]
Valid moves :  [3, 5, 6, 7, 8]
1 : 7
[[ 1.  1. -1.]
 [ 0. -1.  0.]
 [ 0.  1.  0.]]
Valid moves :  [3, 5, 6, 8]
-1 : 7
action not valid
[[ 1.  1. -1.]
 [ 0. -1.  0.]
 [ 0.  1.  0.]]
Valid moves :  [3, 5, 6, 8]
-1 : 6
[[ 1.  1. -1.]
 [ 0. -1.  0.]
 [-1.  1.  0.]]
-1 won
