# Demo

In [1]:
from player import Player
from utility import int2board
from connect4 import WorldCon4
from tic_tac_toe import WorldTTT
from utility import board_to_str
from strategies import StrategyMiniMax
from strategies import StrategyManualTTT
from output_handler import OutputHandler
from strategies import StrategyTabQLearning
from utility import switch_player_perspective

## Connect 4 - Q Learning

In [2]:
# Create world.
world = WorldCon4(
    player1sym='R', player2sym='Y',
    output_handler=OutputHandler(
        logs_folder="./__logs",
        csv_folder="./__run_results"
    )
)

In [3]:
# Define q leaning strategy.
strategy_tabq = StrategyTabQLearning(
    get_reward=world.get_reward,
    is_game_over=world.is_game_over,
    get_next_states=world.get_next_states,
    get_next_state=world.get_next_state,
    get_actions=world.get_actions,
    get_start_states=world.get_start_states,
    board_shape=world.board.shape
)

In [4]:
# View fresh Q Table.
strategy_tabq.q_tab

{1: {}, 2: {}}

In [5]:
# Learn a tiny bit.
strategy_tabq.learn(
    max_episodes=10,
    discount_factor=0.99,
    learning_rate=0.9,
    is_player1=False,
    max_seconds=30
)

Learning (starting player = 2) ...
Max no. of episodes reached (10).
All done. Episodes = 10.


{'f_out': {'num_moves': 189,
  'num_episodes': 10,
  'stopping_condition': 'episodes'},
 'milliseconds': 277.423095703125}

In [6]:
# View Q Table update.
strategy_tabq.q_tab

{1: {4398046511233: {(4, 1): 9.0},
  21990232555669: {(5, 1): 4.5},
  30786325577911: {(4, 1): 4.5},
  2282586139267767: {(6, 1): 4.5},
  74340180177212343: {(6, 1): 4.5},
  9297712217034085311: {(4, 1): -9.0},
  9585942593185864639: {(1, 1): -9.0},
  11891785602668518335: {(0, 1): 4.5},
  11892067077712337919: {(6, 1): 4.5},
  151127619518940727303167: {(5, 1): 9.0},
  151127763634128803199999: {(2, 1): -9.0},
  151128916555642000243711: {(4, 1): -9.0},
  155851283038512752753663: {(4, 1): -9.0},
  8796093022226: {(2, 1): 9.0},
  9015995347765270: {(1, 1): 9.0},
  9156732836120631: {(3, 1): 9.0},
  9191917208209983: {(6, 1): 9.0},
  9754867161893567: {(5, 1): 9.0},
  10880767068737471: {(3, 1): 9.0},
  587341519372308415: {(5, 1): 4.5},
  731456707448262591: {(6, 1): -9.0},
  9954828744573571007: {(4, 1): 13.5},
  17592186044932: {(0, 1): 9.0},
  299067162763844: {(0, 1): 9.0},
  4611985085591200325: {(2, 1): 9.0},
  4612055454335380053: {(1, 1): 4.5},
  4612196191957953141: {(6, 1): 

In [7]:
# Load past knowledge.
strategy_tabq.load_qtab(
    "./q_tables/07042024151222con40.9alpha0.99gamma87319episodes30mins.json"
)

Loaded Q table from ./q_tables/07042024151222con40.9alpha0.99gamma87319episodes30mins.json.


In [8]:
# View Q table.
strategy_tabq.q_tab

{1: {0: {(0, 1): 17.919,
   (4, 1): 17.919,
   (1, 1): 17.919,
   (5, 1): 9.0,
   (2, 1): 9.0,
   (6, 1): 17.019000000000002},
  281474976710721: {(1, 1): 17.919,
   (5, 1): 9.0,
   (2, 1): 9.0,
   (0, 1): 17.919,
   (4, 1): 17.919},
  422212465074273: {(3, 1): -3.0690000000000004},
  457396837163241: {(5, 1): -3.8259, (2, 1): 13.5, (6, 1): 4.5},
  466192930185707: {(4, 1): 1.8809999999999998,
   (5, 1): -3.8259,
   (6, 1): 4.5,
   (2, 1): 13.5,
   (3, 1): 4.5,
   (0, 1): 4.5},
  483785116262895: {(3, 1): 1.8809999999999998,
   (1, 1): 9.0,
   (6, 1): 9.0,
   (4, 1): 9.0},
  4987384743634927: {(6, 1): 9.0, (4, 1): 1.8809999999999998, (2, 1): 13.5},
  77044978781710319: {(3, 1): 11.0134035,
   (0, 1): 9.0,
   (4, 1): 9.0,
   (2, 1): 13.5},
  73864021273640888303: {(2, 1): 14.85,
   (5, 1): 9.0,
   (3, 1): -9.0,
   (1, 1): -9.0,
   (4, 1): -9.0,
   (0, 1): -9.0},
  18498183626273263: {(3, 1): 9.0,
   (2, 1): 14.85,
   (4, 1): 17.128791000000003,
   (6, 1): 9.0,
   (1, 1): 1.8809999999999

In [9]:
# Examine a board.
board = int2board(10918503459800868529815035, (6, 7))
print(board_to_str(board, 'R', 'Y'))

0 R # # R # # #
1 Y R # Y # Y #
2 Y R # R # R #
3 R Y Y Y # R #
4 Y Y R R # Y Y
5 R R Y R # R Y
  0 1 2 3 4 5 6



In [10]:
# Pick a random board.
strategy_tabq.get_move(board=switch_player_perspective(board), is_player1=False)

{'f_out': 2, 'milliseconds': 0.0}