# Playground

This is a space for experimentation.

In [2]:
import random
import itertools
import numpy as np
from player import Player
from minimax import MiniMax
from datetime import datetime
from utility import int2board
from utility import board2int
from connect4 import WorldCon4
from tic_tac_toe import WorldTTT
from utility import board_to_str
from strategies import StrategyTabQLearning

%load_ext autoreload
%autoreload 2

## Tic Tac Toe

### Check World Implementation

In [22]:
world_ttt = WorldTTT(
    name="ttt", board_size=(3, 3), 
    player1sym='X', player2sym='O'
)

In [34]:
board_test_ttt = np.array([
    [-1, 1, 1],
    [-1, 0, 0],
    [1, 0, -1]
])
board_test_ttt_int = board2int(board_test_ttt)
is_player1 = True
is_my_turn_next = True
action = ((2, 2), 1)
print(board_to_str(board_test_ttt, 'X', 'O'))

# X X
# O O
X O #



In [52]:
# # Test function get_actions(...)
# world_ttt.get_actions(is_player1)
# # [OK]

In [32]:
# # Test function is_valid(...)
# world_ttt.is_valid(board_test_ttt, is_player1)
# # [OK]

In [59]:
# # Test function is_legal(...)
# world_ttt.is_legal(board_test_ttt, ((0, 0), 1))
# # [OK]

In [77]:
# # Test get_next_state(...)
# next_state = world_ttt.get_next_state(board_test_ttt, action)
# print("Next State =", next_state)
# if not next_state == -1:
#     print("=", int2board(next_state, world_ttt.board.shape))
# # [OK]

In [88]:
# # Test state_eval(...)
# world_ttt.state_eval(board_test_ttt, is_my_turn_next)
# # [OK]

In [41]:
# # Test get_next_states(...)
# for s_num in world_ttt.get_next_states(
#     board_test_ttt, 
#     is_player1
# ): print(int2board(s_num[0], world_ttt.board.shape), '\n')
# # [OK]

In [1]:
# # Test get_reward(...)
# world_ttt.get_reward(board_test_ttt, action)
# # [OK]

In [96]:
# # Test is_winner(...)
# world_ttt.is_winner(board_test_ttt)
# # [OK]

In [78]:
# # Testing get_start_states(...)
# world_ttt.get_start_states(is_player1=True)
# # [OK]

In [83]:
# # Testing is_game_over(...).
# world_ttt.is_game_over(0)
# # [OK]

### Check Q Learning Implementation

In [235]:
strategy_tabq = StrategyTabQLearning(
    get_reward=world_ttt.get_reward,
    is_game_over=world_ttt.is_game_over,
    get_next_states=world_ttt.get_next_states,
    get_next_state=world_ttt.get_next_state,
    get_actions=world_ttt.get_actions,
    get_start_states=world_ttt.get_start_states,
    board_shape=world_ttt.board.shape
) # Player 1 tabular Q learning strategy TTT.

In [146]:
# # Testing function get_move(...)
# strategy_tabq.get_move(
#     board=board_test_ttt,
#     is_player1=is_player1
# )
# # [OK]

In [236]:
# Testing function learn(...)

strategy_tabq.learn(
    max_episodes=1e4,
    discount_factor=0.99, # gamma
    learning_rate=0.9, # learning rate
    is_player1=True
)

strategy_tabq.learn(
    max_episodes=1e4,
    discount_factor=0.99, # gamma
    learning_rate=0.9, # learning rate
    is_player1=False
)

Learning (Starting Player = 1) ...
Max episodes reached.
All done. Episodes = 100000.
Learning (Starting Player = 2) ...
Max episodes reached.
All done. Episodes = 100000.


In [237]:
print("known player 1 states = ", len(strategy_tabq.q_tab[1]))
print("known player 2 states = ", len(strategy_tabq.q_tab[2]))
print("known states = ", len(strategy_tabq.q_tab[1]) + len(strategy_tabq.q_tab[2]))

known player 1 states =  2422
known player 2 states =  2092
known states =  4514


In [251]:
board_test_ttt = np.array([
    [1, 1, 0],
    [0, 0, -1],
    [1, 1, -1]
])
board_test_ttt_int = board2int(board_test_ttt)
is_player1 = False
is_my_turn_next = True
action = ((1, 0), 1)
board_test_ttt

array([[ 1,  1,  0],
       [ 0,  0, -1],
       [ 1,  1, -1]])

In [252]:
strategy_tabq.q_tab[2][board_test_ttt_int]

{((2, 2), 2): -142.015, ((1, 2), 2): -138.5}

In [250]:
strategy_tabq.get_move(board=board_test_ttt, is_player1=is_player1)

{'f_out': (2, 2), 'seconds': 0.0}

## Connect 4

### Check World Implementation

In [4]:
word_con4 = WorldCon4(
    name="CON4", board_size=(6, 7), 
    player1sym='R', player2sym='Y'
)

In [58]:
board_test_con4 = np.array([
    [-1, -1, -1, -1, -1, -1, -1],
    [-1, -1, -1, -1, -1, -1, -1],
    [-1, -1, -1, -1, -1, -1, -1],
    [-1, -1, -1, -1,  1,  0, -1],
    [-1, -1, -1, -1,  1,  0, -1],
    [-1, -1, -1,  -1,  1,  0, -1],
])
board_test_con4_int = board2int(board_test_con4)
is_player1 = True
is_my_turn_next = True
action = ((2, 2), 1)
print(board_to_str(
    board_test_con4, 
    word_con4.player_symbols[1], 
    word_con4.player_symbols[2]
))

# # # # # # #
# # # # # # #
# # # # # # #
# # # # R Y #
# # # # R Y #
# # # # R Y #



In [306]:
# # Test get_actions(...)
# print("Player 1 Actions =", word_con4.get_actions(is_player1=True))
# print("Player 2 Actions =", word_con4.get_actions(is_player1=False))
# # [OK]

In [46]:
# # Test is_legal(...)
# word_con4.is_legal(board_test_con4, action=(6, 2))
# # [OK]

In [45]:
# # Test is_valid(...)
# word_con4.is_valid(board_test_con4, is_player1=True)
# # [OK]

In [51]:
# # Test is_winner(...)
# word_con4.is_winner(board_test_con4)
# # [OK]

In [61]:
# # Test get_next_state(...)
# word_con4.get_next_state(board_test_con4, (5, 1))
# # [OK]

18737243841865286406