In [1]:
import numpy as np
import random
import json

In [11]:
class Environment(): # Tic-tac-toe board
    
    def __init__(self):
        """Setup a 3 x 3 board as an array.
        ' ' ~ 0
        'X' ~ 1
        'O' ~ -1
        """
        self.state = np.zeros((3, 3))
        self.turn = 1 # Keep track who's turn it is (X first)
        
    def reset(self):
        """Resets to empty"""
        self.state = np.zeros((3, 3))
        
    def get_state(self):
        """Return the current state"""
        return self.state
        
    def set_state(self, state):
        """Set the current state"""
        self.state = state
    
    def display(self, state=None):
        """
            1   2   3
          *---*---*---*
        1 | X | O | X |
          *---*---*---*
        2 |   | X | O |
          *---*---*---*
        3 | X |   | O |
          *---*---*---*
        """
        if not state: state = self.state
        to_txt = {0: ' ', 1: 'X', -1: 'O'} 
        print('    1   2   3')
        print('  *---*---*---*')
        for row in range(3):
            row_str = f'{row+1} | '
            for col in range(3):
                mark = state[row, col]
                row_str += f'{to_txt[mark]} | '
            print(row_str)
            print('  *---*---*---*')
            
    def get_actions(self, state=None):
        """Return a list of actions that can be taken in the state, i.e.
        indices of empty spaces (row, col)
        """
        if not state: state = self.state
        s = []
        for row in range(3):
            for col in range(3):
                if state[row, col] == 0:
                    s.append((row, col))
        return s

    def get_rewards(self, state=None, turn=None):
        """Return the reward for given player at the given state"""
        if not state: state = self.state
        if not turn: turn = self.turn
        for i in range(goal):
            this_row_sum = np.sum(state[i, :])
            this_col_sum = np.sum(state[:, i])
            if this_row_sum == goal or this_col_sum == goal:
                return mark
        diag1_sum = np.trace(state)
        diag2_sum = np.trace(np.fliplr(state))
        if diag1_sum == goal or diag2_sum == goal:
            return mark
        return 0
    
    def is_full(self):
        """Checks if the board is full"""
        for row in range(3):
            for col in range(3):
                if self.state[row, col] == 0: # Any empty space
                    return False
        return True
    
    def step(self, action):
        """Udpate our state with an action and return:
        
        next state as numpy.array
        reward
        finished as boolean
        
        Only accepts valid actions on empty board spaces
        """
        row, col = action[0], action[1]
        self.state[row, col] = self.turn
        reward = self.get_rewards(self.turn, self.state)
        finished = False
        if reward or self.is_full():
            finished = True
        self.turn *= -1
        return self.state, reward, finished

In [12]:
env = Environment()

In [13]:
env.get_actions()

[(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2), (2, 0), (2, 1), (2, 2)]

In [14]:
env.set_state(np.zeros((3,3)) + 1)

In [15]:
env.get_state()

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [16]:
env.display()

    1   2   3
  *---*---*---*
1 | X | X | X | 
  *---*---*---*
2 | X | X | X | 
  *---*---*---*
3 | X | X | X | 
  *---*---*---*
