# Playground

This is a space for experimentation.

In [21]:
import os
import json
import random
import itertools
import numpy as np
from player import Player
from typing import Callable
from datetime import datetime
from utility import int2board
from utility import board2int
from connect4 import WorldCon4
from tic_tac_toe import WorldTTT
from utility import board_to_str
from utility import str_to_int_2tuple
from strategies import StrategyMiniMax
from strategies import StrategyRandomTTT
from strategies import StrategyManualTTT
from output_handler import OutputHandler
from strategies import StrategyManualCon4
from strategies import StrategyDefaultTTT
from strategies import StrategyRandomCon4
from strategies import StrategyDefaultCon4
from strategies import StrategyTabQLearning

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Tic Tac Toe

In [2]:
world_ttt = WorldTTT(
    player1sym='X', player2sym='O',
    output_handler=OutputHandler(
        logs_folder='./__logs',
        csv_folder='./__run_results'
    )
)

In [3]:
board_test_ttt = np.array([
    [-1, 1, 1],
    [-1, 0, 0],
    [1, 0, -1]
])
board_test_ttt_int = board2int(board_test_ttt)
is_player1 = True
is_my_turn_next = True
action = ((2, 2), 1)
print(board_to_str(board_test_ttt, 'X', 'O'))

0 # X X
1 # O O
2 X O #
  0 1 2



### Test World

In [52]:
# # Test function get_actions(...)
# world_ttt.get_actions(is_player1)
# # [OK]

In [32]:
# # Test function is_valid(...)
# world_ttt.is_valid(board_test_ttt, is_player1)
# # [OK]

In [59]:
# # Test function is_legal(...)
# world_ttt.is_legal(board_test_ttt, ((0, 0), 1))
# # [OK]

In [77]:
# # Test get_next_state(...)
# next_state = world_ttt.get_next_state(board_test_ttt, action)
# print("Next State =", next_state)
# if not next_state == -1:
#     print("=", int2board(next_state, world_ttt.board.shape))
# # [OK]

In [88]:
# # Test state_eval(...)
# world_ttt.state_eval(board_test_ttt, is_my_turn_next)
# # [OK]

In [41]:
# # Test get_next_states(...)
# for s_num in world_ttt.get_next_states(
#     board_test_ttt, 
#     is_player1
# ): print(int2board(s_num[0], world_ttt.board.shape), '\n')
# # [OK]

In [1]:
# # Test get_reward(...)
# world_ttt.get_reward(board_test_ttt, action)
# # [OK]

In [96]:
# # Test is_winner(...)
# world_ttt.is_winner(board_test_ttt)
# # [OK]

In [78]:
# # Testing get_start_states(...)
# world_ttt.get_start_states(is_player1=True)
# # [OK]

In [83]:
# # Testing is_game_over(...).
# world_ttt.is_game_over(0)
# # [OK]

### Test Strategies

#### Q Learning Strategy

In [49]:
strategy_tabq = StrategyTabQLearning(
    get_reward=world_ttt.get_reward,
    is_game_over=world_ttt.is_game_over,
    get_next_states=world_ttt.get_next_states,
    get_next_state=world_ttt.get_next_state,
    get_actions=world_ttt.get_actions,
    get_start_states=world_ttt.get_start_states,
    board_shape=world_ttt.board.shape
) # Player 1 tabular Q learning strategy TTT.

In [146]:
# # Testing function get_move(...)
# strategy_tabq.get_move(
#     board=board_test_ttt,
#     is_player1=is_player1
# )
# # [OK]

In [50]:
# Testing function learn(...)

strategy_tabq.learn(
    max_episodes=1e20,
    discount_factor=0.99,
    learning_rate=0.9,
    max_seconds=1*60,
    is_player1=True
)

strategy_tabq.learn(
    max_episodes=1e20,
    discount_factor=0.99,
    learning_rate=0.9,
    max_seconds=1*60,
    is_player1=False
)

# [OK]

Learning (starting player = 1) ...
Max time reached (60 s).
All done. Episodes = 207288.
Learning (starting player = 2) ...
Max time reached (60 s).
All done. Episodes = 179518.


{'f_out': {'num_moves': 4517,
  'num_episodes': 179518,
  'stopping_condition': 'time'},
 'milliseconds': 60000.2724609375}

In [14]:
# strategy_tabq.save_qtab(folder='./__q_tables', filename='test')

In [51]:
print("known player 1 states = ", len(strategy_tabq.q_tab[1]))
print("known player 2 states = ", len(strategy_tabq.q_tab[2]))
print("known states = ", len(strategy_tabq.q_tab[1]) + len(strategy_tabq.q_tab[2]))

known player 1 states =  2422
known player 2 states =  2095
known states =  4517


In [52]:
board_test_ttt = np.array([
    [1, 1, 0],
    [-1, 0, -1],
    [-1, 0, 1]
])
board_test_ttt_int = board2int(board_test_ttt)
board_test_ttt

array([[ 1,  1,  0],
       [-1,  0, -1],
       [-1,  0,  1]])

In [53]:
world_ttt.get_reward(board=board_test_ttt, action=((2, 0), 1))

5.0

In [54]:
world_ttt.state_eval(board=board_test_ttt, is_my_turn_next=True)

0.0

In [55]:
world_ttt.get_reward(board=board_test_ttt, action=((2, 0), 1))

5.0

In [56]:
strategy_tabq.q_tab[1][board_test_ttt_int]

{((1, 2), 1): 4.85, ((2, 0), 1): 19.750999999999998, ((1, 0), 1): 4.85}

In [48]:
strategy_tabq.get_move(board=board_test_ttt, is_player1=True)

{'f_out': (2, 0), 'milliseconds': 0.0}

### Test Play

## Connect 4

In [8]:
# Create world.
world_con4 = WorldCon4(
    player1sym='R', player2sym='Y',
    output_handler=OutputHandler(
        logs_folder='./__logs',
        csv_folder='./__run_results'
    )
)

In [22]:
board_test_con4 = np.array([
    [-1, -1, -1, -1, -1, -1, -1],
    [-1, -1, -1, -1, -1, -1, -1],
    [-1, -1, -1, -1, -1, -1, -1],
    [-1, -1, -1, -1, -1, -1, -1],
    [-1, -1, -1, -1, -1, -1, -1],
    [-1, -1, -1, -1, -1, -1, -1],
])
board_test_con4_int = board2int(board_test_con4)
print(board_to_str(
    board_test_con4, 
    world_con4.player_symbols[1], 
    world_con4.player_symbols[2]
))
print(f"Next Turn = {world_con4.player_symbols[1]}")

0 # # # # # # #
1 # # # # # # #
2 # # # # # # #
3 # # # # # # #
4 # # # # # # #
5 # # # # # # #
  0 1 2 3 4 5 6

Next Turn = R


### Test World

In [44]:
# # Test can_connect4(...)
# cc4_0, cc4_1 = world_con4.can_connect4(board_test_con4)
# print("Can connect 4 [1] =", cc4_1)
# print("Can connect 4 [0] =", cc4_0)
# # [OK]

In [306]:
# # Test get_actions(...)
# print("Player 1 Actions =", world_con4.get_actions(is_player1=True))
# print("Player 2 Actions =", world_con4.get_actions(is_player1=False))
# # [OK]

In [46]:
# # Test is_legal(...)
# world_con4.is_legal(board_test_con4, action=(6, 2))
# # [OK]

In [45]:
# # Test is_valid(...)
# world_con4.is_valid(board_test_con4, is_player1=True)
# # [OK]

In [51]:
# # Test is_winner(...)
# world_con4.is_winner(board_test_con4)
# # [OK]

In [61]:
# # Test get_next_state(...)
# world_con4.get_next_state(board_test_con4, (5, 1))
# # [OK]

18737243841865286406

### Test Strategies

#### Default Strategy

In [5]:
strategy_default_con4 = StrategyDefaultCon4(can_connect4=world_con4.can_connect4)

In [39]:
# # Test get_move(...)
# strategy_default_con4.get_move(board=board_test_con4)
# # [OK]

#### Random Strategy

In [37]:
strategy_random_con4 = StrategyRandomCon4()

In [76]:
# # Test get_move(...)
# strategy_random_con4.get_move(board=board_test_con4)
# # [OK]

{'f_out': 1, 'milliseconds': 0.998779296875}

#### Minimax Strategy

In [129]:
strategy_minimax = StrategyMiniMax(
    is_game_over=world_con4.is_game_over,
    state_eval=world_con4.state_eval,
    get_next_states=world_con4.get_next_states,
    alpha_beta=True,
    depth=3
)

In [130]:
strategy_minimax.minimax(
    board=board_test_con4,
    is_max_player=True,
    actions=[],
    is_player1=True,
    depth=5,
    alpha_beta=[float('-inf'), float('inf')]
)

{'val': 15.0, 'actions': [(6, 1)]}

In [91]:
# # Test get_move(...)
# strategy_minimax.get_move(board_test_con4, is_player1=True)
# # [OK]

{'f_out': 6, 'seconds': 0.009275436401367188}

#### Q Learning Strategy

In [23]:
strategy_manual = StrategyManualCon4()

In [14]:
strategy_tabq_con4 = StrategyTabQLearning(
    get_reward=world_con4.get_reward,
    is_game_over=world_con4.is_game_over,
    get_next_state=world_con4.get_next_state,
    get_next_states=world_con4.get_next_states,
    get_actions=world_con4.get_actions,
    get_start_states=world_con4.get_start_states,
    board_shape=world_con4.board.shape,
)
strategy_tabq_con4.load_qtab("./q_tables/07042024151222con40.9alpha0.99gamma87319episodes30mins.json")

Loaded Q table from ./q_tables/07042024151222con40.9alpha0.99gamma87319episodes30mins.json.


In [18]:
# Test get_move(...)
strategy_tabq_con4.get_move(board_test_con4, is_player1=True)
# [OK]

{'f_out': 6, 'milliseconds': 0.0}

In [69]:
strategy_tabq_con4.learn(
    max_episodes=1e20,
    discount_factor=0.99,
    learning_rate=0.9,
    is_player1=True,
    max_seconds=3*60,
    start_board=board_test_con4_int
)

strategy_tabq_con4.learn(
    max_episodes=1e20,
    discount_factor=0.99,
    learning_rate=0.9,
    is_player1=False,
    max_seconds=3*60
)

Learning (starting player = 1) ...
Max time reached (180 s).
All done. Episodes = 28313.
Learning (starting player = 2) ...
Max time reached (180 s).
All done. Episodes = 25752.


{'f_out': {'num_moves': 178057,
  'num_episodes': 25752,
  'stopping_condition': 'time'},
 'milliseconds': 180005.62060546875}

In [70]:
q_tab = strategy_tabq_con4.q_tab
q_tab

{1: {152031671795894159: {(0, 1): -9.0, (1, 1): -17.919},
  152313146772604911: {(5, 1): -9.0, (1, 1): -9.0, (6, 1): 13.5},
  18599057220486354927: {(3, 1): -8.0035065,
   (1, 1): -9.0,
   (0, 1): -9.0,
   (6, 1): 13.5},
  19175517972789909503: {(4, 1): -9.0,
   (2, 1): 13.5,
   (5, 1): 13.5,
   (1, 1): -17.919,
   (3, 1): 13.5},
  19463748348941688831: {(5, 1): 13.5, (0, 1): -9.0},
  0: {(2, 1): 9.0, (6, 1): 9.0, (5, 1): 9.0},
  70368744177684: {(4, 1): 9.0, (5, 1): 9.0, (3, 1): 9.0},
  2322168557863446: {(6, 1): 9.0, (4, 1): 9.0, (5, 1): 9.0},
  2326566604376599: {(2, 1): 17.919},
  1155248071211485783: {(2, 1): 9.0, (6, 1): 9.0, (5, 1): 9.0},
  148729200660921453175: {(6, 1): 17.919, (4, 1): 9.0, (0, 1): 9.0},
  148729763615169841911: {(3, 1): 17.919, (0, 1): 9.0, (1, 1): 9.0},
  148729798799541947135: {(6, 1): 17.919,
   (1, 1): 17.919,
   (5, 1): 4.5,
   (3, 1): 4.5,
   (0, 1): 9.0},
  157953170836667255551: {(4, 1): 17.919,
   (2, 1): 17.919,
   (0, 1): 9.0,
   (3, 1): 4.5},
  15

In [15]:
for player_state in q_tab.values():
    state_actions = player_state.values()
    num_moves += len(state_actions)

dict_values([{(2, 1): -193.65}, {(0, 1): -193.65}, {(0, 1): -193.65}, {(3, 1): -193.65, (5, 1): -193.65}, {(5, 1): -193.65}, {(4, 1): -58.64999999999999, (5, 1): -193.65}, {(6, 1): -148.65}, {(2, 1): -148.65}, {(5, 1): -148.65}, {(3, 1): -147.31215}, {(1, 1): -148.65}, {(4, 1): -148.65}, {(2, 1): -193.65}, {(0, 1): -193.65, (6, 1): -193.65}, {(5, 1): -193.65, (3, 1): -58.64999999999999}, {(3, 1): -58.64999999999999}, {(4, 1): -58.64999999999999}, {(5, 1): -148.65}, {(2, 1): -148.65}, {(4, 1): -148.65}, {(6, 1): -148.65, (4, 1): -148.65}, {(0, 1): -148.65}, {(0, 1): -148.65}, {(5, 1): -148.65}, {(2, 1): -148.65}, {(1, 1): -148.65}, {(0, 1): -148.65}, {(3, 1): -58.64999999999999}, {(3, 1): -58.64999999999999, (0, 1): -193.65}, {(0, 1): -193.65}, {(6, 1): -193.65}, {(3, 1): -58.64999999999999}, {(5, 1): -193.65}, {(4, 1): -193.65}, {(4, 1): -193.65}, {(1, 1): -193.65}, {(3, 1): -49.51499999999999}])


In [276]:
# strategy_tabq_con4.learn(
#     max_episodes=1e3,
#     discount_factor=0.99, # gamma
#     learning_rate=0.9, # learning rate
#     is_player1=True,
#     start_board=board_test_con4_int
# )# # Testing function learn(...)



# strategy_tabq_con4.learn(
#     max_episodes=1e3,
#     discount_factor=0.99, # gamma
#     learning_rate=0.9, # learning rate
#     is_player1=False
# )

# # [OK]

In [278]:
# # Check values associated with a certain state.
# strategy_tabq_con4.q_tab[1][
#     board_test_con4_int
# ]
# strategy_tabq_con4.get_move(
#     board_test_con4, is_player1=is_player1
# )

In [281]:
# # Test save_qtab(...)
# strategy_tabq_con4.save_qtab(
#     name="test", 
#     folder="__q_tables"
# )
# # [OK]

In [282]:
# # Test load_qtab(...)
# strategy_tabq_con4.load_qtab(
#     src="__q_tables/test.json", 
# )
# # [OK]