In [1]:
import torch
device = 'cuda' if torch.cuda.is_available else 'cpu'
from agent import Agent, advanced_random_policy, random_policy, sarsa
from game import Game
import seaborn as sns
import matplotlib.pyplot as plt

In [28]:
# Trains and test a single Agent

agent = Agent(size=3)
random_agent = Agent(size=3, policy=advanced_random_policy)
game = Game(agent, random_agent, n_dim=2, size=3)
sarsa(game, agent, random_policy, alpha=0.8, alpha_factor=0.999, gamma=0.9, epsilon=1.0, epsilon_factor=0.999, \
      r_win=5.0, r_lose=0.0, r_even=1.0, r_even2=1.5, num_episodes=10000)

100%|██████████| 10000/10000 [01:09<00:00, 143.71it/s]


In [3]:
win_p1, win_p2, tot_even = game.simulate_games(1000)
print('Agent won', win_p1, 'times, lost', win_p2, 'times and did', tot_even, 'even games')

Agent won 252 times, lost 0 times and did 748 even games


In [4]:
# Evaluates agents performances based on board size
board_sizes = [2, 3, 4, 5, 6, 10]
results = dict()
for board_size in board_sizes:
    print(f"Testing board of size {board_size}x{board_size}")
    agent = Agent(size=board_size)
    random_agent = Agent(size=board_size, policy=advanced_random_policy)
    game = Game(agent, random_agent, n_dim=2, size=board_size)
    sarsa(game, agent, random_policy, alpha=0.8, alpha_factor=0.9999, gamma=0.9, epsilon=1.0, epsilon_factor=0.9998, \
          r_win=5.0, r_lose=0.0, r_even=1.0, r_even2=1.5, num_episodes=10000)
    results[agent.q_array.shape[0]] = game.simulate_games(1000)
    print(f" current results are {results}")

100%|██████████| 10000/10000 [00:23<00:00, 421.70it/s]
 69%|██████▊   | 6874/10000 [00:48<00:21, 142.25it/s]


Testing board of size 2x2
 current results are {4: (500, 500, 0)}
Testing board of size 3x3


KeyboardInterrupt: 

In [None]:
winrates = [(val[0] / (val[0] + val[1])) * 100 for key, val in results.items()]
sizes = [key for key, val in results.items()]
ax = sns.lineplot(x=sizes, y=winrates, marker="o")
ax.set(xlabel='Q table size', ylabel='Win %')
ax.set(xscale="log")
plt.show()
# Early stopping ?

In [None]:
game.play_a_game()

In [4]:
import neuralAgent as na
import torch
import numpy as np
device = 'cuda' if torch.cuda.is_available else 'cpu'
from game import Game

game = Game(None, None, n_dim=2, size=3)
agent1 = na.Model()
agent1, values, _, _, _ =na.train_network(agent1, game, 10000, 1000)
print(values)

100%|██████████| 10000/10000 [00:52<00:00, 189.40it/s]


[array(0.2295402, dtype=float32), array(0.2295402, dtype=float32), array(0.22736114, dtype=float32), array(0.22736114, dtype=float32), array(0.23094377, dtype=float32), array(0.23094377, dtype=float32), array(0.24089786, dtype=float32), array(0.24089786, dtype=float32), array(0.24606462, dtype=float32), array(0.24606462, dtype=float32), array(0.23788981, dtype=float32), array(0.23788981, dtype=float32), array(0.22719891, dtype=float32), array(0.22719891, dtype=float32), array(0.2398719, dtype=float32), array(0.2398719, dtype=float32), array(0.22098902, dtype=float32), array(0.22098902, dtype=float32)]


In [6]:
wins, draw, loses = na.test_against_random(agent1, game, 1000)
print("Win {}, Draw {}, Loses {}".format(wins, draw, loses))

Win 866, Draw 64, Loses 70


In [8]:
game = Game(None, None, n_dim=3, size=3)
agent1 = na.Model(3, 3)
agent1, values, _, _, _ =na.train_network(agent1, game, 10000, 1000)
print(values)
wins, draw, loses = na.test_against_random(agent1, game, 1000)
print("Win {}, Draw {}, Loses {}".format(wins, draw, loses))

  8%|▊         | 837/10000 [01:13<13:27, 11.35it/s]


KeyboardInterrupt: 