# Playground

This is a space for experimentation.

In [2]:
import os
import json
import random
import itertools
import numpy as np
from player import Player
from datetime import datetime
from utility import int2board
from utility import board2int
from connect4 import WorldCon4
from tic_tac_toe import WorldTTT
from utility import board_to_str
from utility import str_to_int_2tuple
from strategies import StrategyMiniMax
from strategies import StrategyRandomTTT
from strategies import StrategyManualTTT
from output_handler import OutputHandler
from strategies import StrategyDefaultTTT
from strategies import StrategyDefaultCon4
from strategies import StrategyTabQLearning

%load_ext autoreload
%autoreload 2

## Tic Tac Toe

In [4]:
world_ttt = WorldTTT(
    board_size=(3, 3), player1sym='X', player2sym='O',
    output_handler=OutputHandler(
        logs_folder="./__logs",
        csv_folder="./__run_metrics"
    )
)

In [5]:
board_test_ttt = np.array([
    [-1, 1, 1],
    [-1, 0, 0],
    [1, 0, -1]
])
board_test_ttt_int = board2int(board_test_ttt)
is_player1 = True
is_my_turn_next = True
action = ((2, 2), 1)
print(board_to_str(board_test_ttt, 'X', 'O'))

0 # X X
1 # O O
2 X O #
  0 1 2



### Test World

In [52]:
# # Test function get_actions(...)
# world_ttt.get_actions(is_player1)
# # [OK]

In [32]:
# # Test function is_valid(...)
# world_ttt.is_valid(board_test_ttt, is_player1)
# # [OK]

In [59]:
# # Test function is_legal(...)
# world_ttt.is_legal(board_test_ttt, ((0, 0), 1))
# # [OK]

In [77]:
# # Test get_next_state(...)
# next_state = world_ttt.get_next_state(board_test_ttt, action)
# print("Next State =", next_state)
# if not next_state == -1:
#     print("=", int2board(next_state, world_ttt.board.shape))
# # [OK]

In [88]:
# # Test state_eval(...)
# world_ttt.state_eval(board_test_ttt, is_my_turn_next)
# # [OK]

In [41]:
# # Test get_next_states(...)
# for s_num in world_ttt.get_next_states(
#     board_test_ttt, 
#     is_player1
# ): print(int2board(s_num[0], world_ttt.board.shape), '\n')
# # [OK]

In [1]:
# # Test get_reward(...)
# world_ttt.get_reward(board_test_ttt, action)
# # [OK]

In [96]:
# # Test is_winner(...)
# world_ttt.is_winner(board_test_ttt)
# # [OK]

In [78]:
# # Testing get_start_states(...)
# world_ttt.get_start_states(is_player1=True)
# # [OK]

In [83]:
# # Testing is_game_over(...).
# world_ttt.is_game_over(0)
# # [OK]

### Test Strategies

#### Q Learning Strategy

In [6]:
strategy_tabq = StrategyTabQLearning(
    get_reward=world_ttt.get_reward,
    is_game_over=world_ttt.is_game_over,
    get_next_states=world_ttt.get_next_states,
    get_next_state=world_ttt.get_next_state,
    get_actions=world_ttt.get_actions,
    get_start_states=world_ttt.get_start_states,
    board_shape=world_ttt.board.shape
) # Player 1 tabular Q learning strategy TTT.

In [146]:
# # Testing function get_move(...)
# strategy_tabq.get_move(
#     board=board_test_ttt,
#     is_player1=is_player1
# )
# # [OK]

In [95]:
# # Testing function learn(...)

# strategy_tabq.learn(
#     max_episodes=1e4,
#     discount_factor=0.99, # gamma
#     learning_rate=0.9, # learning rate
#     is_player1=True
# )

# strategy_tabq.learn(
#     max_episodes=1e4,
#     discount_factor=0.99, # gamma
#     learning_rate=0.9, # learning rate
#     is_player1=False
# )

# # [OK]

In [96]:
# print("known player 1 states = ", len(strategy_tabq.q_tab[1]))
# print("known player 2 states = ", len(strategy_tabq.q_tab[2]))
# print("known states = ", len(strategy_tabq.q_tab[1]) + len(strategy_tabq.q_tab[2]))

In [97]:
# board_test_ttt = np.array([
#     [1, 1, 0],
#     [0, 0, -1],
#     [1, 1, -1]
# ])
# board_test_ttt_int = board2int(board_test_ttt)
# is_player1 = False
# is_my_turn_next = True
# action = ((1, 0), 1)
# board_test_ttt

In [98]:
# strategy_tabq.q_tab[2][board_test_ttt_int]

In [101]:
# strategy_tabq.get_move(board=board_test_ttt, is_player1=is_player1)

### Test Play

## Connect 4

In [5]:
world_con4 = WorldCon4(
    board_size=(6, 7), 
    player1sym='R', player2sym='Y',
    output_handler=OutputHandler(
        logs_folder="./__logs",
        csv_folder="./__run_metrics"
    )
)

In [6]:
board_test_con4 = np.array([
    [-1, -1, -1, -1, -1, -1, -1],
    [-1, -1, -1, -1, -1, -1, -1],
    [-1, -1,  0, -1,  1, -1, -1],
    [-1, -1,  1, -1,  0,  1,  0],
    [ 1,  1,  0,  1,  1,  0,  0],
    [ 0,  1,  0,  1,  1,  0,  0],
])
board_test_con4_int = board2int(board_test_con4)
is_player1 = True
next_player = 1
player_num = 1 if is_player1 else 2
opponent_num = player_num % 2 + 1
is_my_turn_next = True
action = ((2, 2), 1)
print(board_to_str(
    board_test_con4, 
    world_con4.player_symbols[player_num], 
    world_con4.player_symbols[opponent_num]
))
print(f"Next Turn = {world_con4.player_symbols[next_player]}")

0 # # # # # # #
1 # # # # # # #
2 # # Y # R # #
3 # # R # Y R Y
4 R R Y R R Y Y
5 Y R Y R R Y Y
  0 1 2 3 4 5 6

Next Turn = R


### Test World

In [44]:
# # Test can_connect4(...)
# cc4_0, cc4_1 = world_con4.can_connect4(board_test_con4)
# print("Can connect 4 [1] =", cc4_1)
# print("Can connect 4 [0] =", cc4_0)
# # [OK]

In [306]:
# # Test get_actions(...)
# print("Player 1 Actions =", world_con4.get_actions(is_player1=True))
# print("Player 2 Actions =", world_con4.get_actions(is_player1=False))
# # [OK]

In [46]:
# # Test is_legal(...)
# world_con4.is_legal(board_test_con4, action=(6, 2))
# # [OK]

In [45]:
# # Test is_valid(...)
# world_con4.is_valid(board_test_con4, is_player1=True)
# # [OK]

In [51]:
# # Test is_winner(...)
# world_con4.is_winner(board_test_con4)
# # [OK]

In [61]:
# # Test get_next_state(...)
# world_con4.get_next_state(board_test_con4, (5, 1))
# # [OK]

18737243841865286406

### Test Strategies

#### Default Strategy

In [37]:
strategy_default_con4 = StrategyDefaultCon4(can_connect4=world_con4.can_connect4)

In [39]:
# # Test get_move(...)
# strategy_default_con4.get_move(board=board_test_con4)
# # [OK]

#### Minimax Strategy

In [69]:
strategy_minimax = StrategyMiniMax(
    is_game_over=world_con4.is_game_over,
    state_eval=world_con4.state_eval,
    get_next_states=world_con4.get_next_states,
    alpha_beta=True, depth=1
)

In [91]:
# # Test get_move(...)
# strategy_minimax.get_move(board_test_con4, is_player1=True)
# # [OK]

{'f_out': 6, 'seconds': 0.009275436401367188}

#### Q Learning Strategy

In [7]:
strategy_tabq_con4 = StrategyTabQLearning(
    get_reward=world_con4.get_reward,
    is_game_over=world_con4.is_game_over,
    get_next_state=world_con4.get_next_state,
    get_next_states=world_con4.get_next_states,
    get_actions=world_con4.get_actions,
    get_start_states=world_con4.get_start_states,
    board_shape=world_con4.board.shape,
)

In [277]:
# # Test get_move(...)
# strategy_tabq.get_move(board_test_con4, is_player1=True)
# # [OK]

In [8]:
strategy_tabq_con4.learn(
    max_episodes=10,
    discount_factor=0.99, # gamma
    learning_rate=0.9, # learning rate
    is_player1=True,
    start_board=board_test_con4_int
)

Learning (Starting Player = 1) ...
Max episodes reached.
All done. Episodes = 10.


In [10]:
q_tab = strategy_tabq_con4.q_tab
q_tab

{1: {38251516949160132607: {(2, 1): -193.65},
  18927717448981791768575: {(0, 1): -193.65},
  18932329135000220336127: {(0, 1): -193.65},
  19522624945376240074751: {(3, 1): -193.65, (5, 1): -193.65},
  19596411921673242542079: {(5, 1): -193.65},
  19614858665746956812287: {(4, 1): -58.64999999999999, (5, 1): -193.65},
  0: {(6, 1): -148.65},
  4398046511121: {(2, 1): -148.65},
  9011597301254193: {(5, 1): -148.65},
  9020393394276407: {(3, 1): -147.31215},
  9055577766369343: {(1, 1): -148.65},
  2314898586980588607: {(4, 1): -148.65},
  2317150386794536511: {(2, 1): -193.65},
  149891102976504503999: {(0, 1): -193.65, (6, 1): -193.65},
  149891384455776182015: {(5, 1): -193.65, (3, 1): -58.64999999999999},
  149892510355683033087: {(3, 1): -58.64999999999999},
  21976041907182383398911: {(4, 1): -58.64999999999999},
  9055577766627391: {(5, 1): -148.65},
  10181477673470335: {(2, 1): -148.65},
  147584134067383445887: {(4, 1): -148.65},
  147586385867197135743: {(6, 1): -148.65, (4, 

In [15]:
for player_state in q_tab.values():
    state_actions = player_state.values()
    num_moves += len(state_actions)

dict_values([{(2, 1): -193.65}, {(0, 1): -193.65}, {(0, 1): -193.65}, {(3, 1): -193.65, (5, 1): -193.65}, {(5, 1): -193.65}, {(4, 1): -58.64999999999999, (5, 1): -193.65}, {(6, 1): -148.65}, {(2, 1): -148.65}, {(5, 1): -148.65}, {(3, 1): -147.31215}, {(1, 1): -148.65}, {(4, 1): -148.65}, {(2, 1): -193.65}, {(0, 1): -193.65, (6, 1): -193.65}, {(5, 1): -193.65, (3, 1): -58.64999999999999}, {(3, 1): -58.64999999999999}, {(4, 1): -58.64999999999999}, {(5, 1): -148.65}, {(2, 1): -148.65}, {(4, 1): -148.65}, {(6, 1): -148.65, (4, 1): -148.65}, {(0, 1): -148.65}, {(0, 1): -148.65}, {(5, 1): -148.65}, {(2, 1): -148.65}, {(1, 1): -148.65}, {(0, 1): -148.65}, {(3, 1): -58.64999999999999}, {(3, 1): -58.64999999999999, (0, 1): -193.65}, {(0, 1): -193.65}, {(6, 1): -193.65}, {(3, 1): -58.64999999999999}, {(5, 1): -193.65}, {(4, 1): -193.65}, {(4, 1): -193.65}, {(1, 1): -193.65}, {(3, 1): -49.51499999999999}])


In [276]:
# # Testing function learn(...)

# strategy_tabq_con4.learn(
#     max_episodes=1e3,
#     discount_factor=0.99, # gamma
#     learning_rate=0.9, # learning rate
#     is_player1=True,
#     start_board=board_test_con4_int
# )

# strategy_tabq_con4.learn(
#     max_episodes=1e3,
#     discount_factor=0.99, # gamma
#     learning_rate=0.9, # learning rate
#     is_player1=False
# )

# # [OK]

In [278]:
# # Check values associated with a certain state.
# strategy_tabq_con4.q_tab[1][
#     board_test_con4_int
# ]
# strategy_tabq_con4.get_move(
#     board_test_con4, is_player1=is_player1
# )

In [281]:
# # Test save_qtab(...)
# strategy_tabq_con4.save_qtab(
#     name="test", 
#     folder="__q_tables"
# )
# # [OK]

In [282]:
# # Test load_qtab(...)
# strategy_tabq_con4.load_qtab(
#     src="__q_tables/test.json", 
# )
# # [OK]