In [1]:
from tournament.agents.constant import AllC, AllD
from tournament.agents.pavlov import Pavlov
from tournament.agents.q_learning.dqn import DeepQLearner
from tournament.agents.tft import TFTT, GenerousTFT, TitForTat
from tournament.environments.single import SingleRuleBasedAgentEnvironment
from tournament.environments.multiple import MultipleRuleBasedAgentEnvironment
from tournament.action import Action
from tournament.agents.agents import AGENTS
from tournament.agents.axelrod_first import (
    Davis,
    Downing,
    Feld,
    Grofman,
    Grudger,
    Joss,
    Nydegger,
    Shubik,
    SteinAndRapoport,
    TidemanAndChieruzzi,
    Tullock,
)
from tournament.agents.tft import (
    TFTT,
    TTFT,
    GenerousTFT,
    GradualTFT,
    OmegaTFT,
    TitForTat,
)
from tournament.tournament import RoundRobinTournament

import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np

# plt.rcParams["figure.figsize"] = (20, 12)
plt.rcParams["figure.figsize"] = (10, 6)

In [2]:
class QNetwork(nn.Module):
    def __init__(self, lookback, n1=64):
        super().__init__()

        self.flatten = nn.Flatten()
        self.layer1 = nn.Linear(2 * lookback, n1)
        self.layer2 = nn.Linear(n1, n1)
        self.layer3 = nn.Linear(n1, 2)

        nn.init.kaiming_uniform_(self.layer1.weight, mode="fan_in", nonlinearity="relu")
        nn.init.kaiming_uniform_(self.layer2.weight, mode="fan_in", nonlinearity="relu")
        nn.init.kaiming_uniform_(self.layer3.weight, mode="fan_in", nonlinearity="relu")

    def forward(self, x):
        x = x.unsqueeze(dim=0)
        x = self.flatten(x)
        x = torch.relu(self.layer1(x))
        x = torch.relu(self.layer2(x))
        x = torch.relu(self.layer3(x))

        return x

class QLearningTest(DeepQLearner):
    lookback = 4
    epsilon = 0.2

    def __init__(self):
        super().__init__()

        self._learning_rate = 0.001
        self._discount_rate = 0.95
        self._q_network = QNetwork(self.lookback)

        try:
            self._q_network.load_state_dict(torch.load("models/2022-03-15 00-46-08 (796.991935483871).pt"))
        except:
            pass


agent = QLearningTest()
agent._q_network.eval()

QNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (layer1): Linear(in_features=8, out_features=64, bias=True)
  (layer2): Linear(in_features=64, out_features=64, bias=True)
  (layer3): Linear(in_features=64, out_features=2, bias=True)
)

In [4]:
tournament = RoundRobinTournament(AGENTS, [agent])

scores, times = tournament.play(
    continuation_probability=0.99654, repetitions=100, jobs=12
)

results = [
    (agent, round(sum(scores[agent]) / len(scores[agent])), sum(times[agent]))
    for agent in scores
]
results.sort(key=lambda x: x[1], reverse=True)

for c, score, time in results:
    print(f"{c.__name__:<30} {score:<20} {time:<20}")


100%|██████████| 961/961 [07:16<00:00,  2.20matches/s]

Borufsen                       805                  32.755993999999994  
TTFT                           795                  27.79484800000001   
TFTT                           788                  27.571765999999997  
SecondByWhiteK72R              785                  32.920328000000005  
SecondByGraaskampKatzen        784                  32.91872300000001   
SteinAndRapoport               783                  85.34950900000001   
SecondByBlackK83R              780                  39.857178           
OmegaTFT                       775                  31.17649            
Davis                          771                  44.683851000000004  
Champion                       771                  152.59199900000002  
TidemanAndChieruzzi            770                  32.120107000000004  
SecondByWeiner                 769                  79.30880799999998   
Grudger                        765                  25.809770999999998  
Leyvraz                        764                 


