In [1]:
import torch
device = 'cuda' if torch.cuda.is_available else 'cpu'
from agent import Agent, advanced_random_policy, random_policy, sarsa
from game import Game
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
# Trains and test a single Agent

agent = Agent(size=3)
random_agent = Agent(size=3, policy=advanced_random_policy)
game = Game(agent, random_agent, n_dim=2, size=3)
sarsa(game, agent, random_policy, alpha=0.8, alpha_factor=0.999, gamma=0.9, epsilon=1.0, epsilon_factor=0.999, \
      r_win=5.0, r_lose=0.0, r_even=1.0, r_even2=1.5, num_episodes=10000)

  3%|▎         | 273/10000 [00:05<03:18, 48.98it/s]


KeyboardInterrupt: 

In [3]:
win_p1, win_p2, tot_even = game.simulate_games(1000)
print('Agent won', win_p1, 'times, lost', win_p2, 'times and did', tot_even, 'even games')

Agent won 210 times, lost 589 times and did 201 even games


In [4]:
# Evaluates agents performances based on board size
board_sizes = [2, 3, 4, 5, 6, 10]
results = dict()
for board_size in board_sizes:
    print(f"Testing board of size {board_size}x{board_size}")
    agent = Agent(size=board_size)
    random_agent = Agent(size=board_size, policy=advanced_random_policy)
    game = Game(agent, random_agent, n_dim=2, size=board_size)
    sarsa(game, agent, random_policy, alpha=0.8, alpha_factor=0.9999, gamma=0.9, epsilon=1.0, epsilon_factor=0.9998, \
          r_win=5.0, r_lose=0.0, r_even=1.0, r_even2=1.5, num_episodes=10000)
    results[agent.q_array.shape[0]] = game.simulate_games(1000)
    print(f" current results are {results}")

  0%|          | 15/10000 [00:00<01:12, 137.33it/s]

Testing board of size 2x2


  8%|▊         | 816/10000 [00:04<00:51, 177.59it/s]


KeyboardInterrupt: 

In [None]:
winrates = [(val[0] / (val[0] + val[1])) * 100 for key, val in results.items()]
sizes = [key for key, val in results.items()]
ax = sns.lineplot(x=sizes, y=winrates, marker="o")
ax.set(xlabel='Q table size', ylabel='Win %')
ax.set(xscale="log")
plt.show()
# Early stopping ?

In [None]:
game.play_a_game()