In [None]:
from tournament.agents.constant import AllC, AllD
from tournament.agents.pavlov import Pavlov
from tournament.agents.q_learning.dqn import DeepQLearner
from tournament.agents.tft import TFTT, GenerousTFT, TitForTat
from tournament.environments.single import SingleRuleBasedAgentEnvironment
from tournament.environments.multiple import MultipleRuleBasedAgentEnvironment
from tournament.action import Action
from tournament.agents.agents import AGENTS
from tournament.agents.axelrod_first import (
    Davis,
    Downing,
    Feld,
    Grofman,
    Grudger,
    Joss,
    Nydegger,
    Shubik,
    SteinAndRapoport,
    TidemanAndChieruzzi,
    Tullock,
)
from tournament.agents.tft import (
    TFTT,
    TTFT,
    GenerousTFT,
    GradualTFT,
    OmegaTFT,
    TitForTat,
)
from tournament.tournament import RoundRobinTournament

import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# plt.rcParams["figure.figsize"] = (20, 12)
plt.rcParams["figure.figsize"] = (10, 6)

lb = [] # lookback
eps = [] # epsilon
eps_dcay = [] # epsilon decay
dr = [] # discount rate
nw = [] # network
placement = [] # placement
score_q_agent = [] # score of q agent
final_loss = [] # final loss
final_avg_reward = [] # final avg reward
final_avg_rolling_reward = [] # final avg rolling reward last 20
coop_percentage = [] # cooperaion percentage

In [None]:
# env = SingleRuleBasedAgentEnvironment(TitForTat)
env = MultipleRuleBasedAgentEnvironment([
    TitForTat,
    TidemanAndChieruzzi,
    Nydegger,
    Grofman,
    Shubik,
])

In [None]:
class QNetwork(nn.Module):
    def __init__(self, lookback, n=32):
        super().__init__()

        # self.elu = nn.ELU()

        self.flatten = nn.Flatten()
        self.layer1 = nn.Linear(2 * lookback, n)
        self.layer2 = nn.Linear(n, n)
        # self.layer3 = nn.Linear(32, 32)
        self.layer4 = nn.Linear(n, 2)

        nn.init.kaiming_uniform_(self.layer1.weight, mode='fan_in', nonlinearity='relu')
        nn.init.kaiming_uniform_(self.layer2.weight, mode='fan_in', nonlinearity='relu')
        # nn.init.kaiming_uniform_(self.layer3.weight, mode='fan_in', nonlinearity='relu')
        nn.init.kaiming_uniform_(self.layer4.weight, mode='fan_in', nonlinearity='relu')

    def forward(self, x):
        x = x.unsqueeze(dim=0)
        x = self.flatten(x)
        x = torch.relu(self.layer1(x))
        x = torch.relu(self.layer2(x))
        # x = torch.relu(self.layer3(x))
        x = torch.relu(self.layer4(x))

        return x

class QLearningTest(DeepQLearner):

    def __init__(self, lookback, epsilon, epsilon_decay, discount_rate, n):
        super().__init__()
        self.lookback = lookback
        self.epsilon = epsilon
        self._epsilon_decay = epsilon_decay
        self._discount_rate = discount_rate
        self._learning_rate = 0.01
        self._q_network = QNetwork(self.lookback, n)



In [None]:

def one_train(a, b, c, d, e):
    agent = QLearningTest(a, b, c, d, e)
    env.train(
        trainee=agent,
        continuation_probability=1,
        limit=200,
        noise=0,
        repetitions=1,
        epochs=200,
    )
    tournament = RoundRobinTournament(AGENTS, [agent])

    scores, times = tournament.play(
        continuation_probability=0.99654, repetitions=50, jobs=4
    )

    results = [
        (agent, round(sum(scores[agent]) / len(scores[agent])), sum(times[agent]))
        for agent in scores
    ]
    results.sort(key=lambda x: x[1], reverse=True)

    N = 20
    # print(str(agent._q_network))
    # print("loss: "+str(env.metric_history[-1]))
    # print("average reward: "+str(np.mean(env.rewards)))
    # print("rolling average reward: "+str(np.convolve(env.rewards, np.ones(N), mode='valid')[-1] / N))
    s = sum(env.counts.values())
    ratio = {a: env.counts[a] / s for a in env.counts}
    # print("cooperation ratio:" + str(ratio.get(Action.COOPERATE)))
    for i in results:
        if i[0] == QLearningTest:
            # print("placement:" + str(results.index(i)))
            placement.append(results.index(i))
            # print("score:" + str(i[1]))
            score_q_agent.append(i[1])
    lb.append(agent.lookback)
    eps.append(agent.epsilon)
    eps_dcay.append(agent._epsilon_decay)
    dr.append(agent._discount_rate)
    nw.append(agent._q_network)

    final_loss.append(env.metric_history[-1])
    final_avg_reward.append(np.mean(env.rewards))
    final_avg_rolling_reward.append(np.convolve(env.rewards, np.ones(N), mode='valid')[-1] / N)
    coop_percentage.append(ratio.get(Action.COOPERATE))
    

In [None]:
lb_para = [1,2,4,6,8,10]
eps_para = [0.05, 0.1, 0.15, 0.2]
eps_dcay_para = [0.0, 0.002]
dr_para = [0.95, 0.99]
n_para = [4, 8, 16, 32, 64, 128]

for e in n_para:
    for d in dr_para:
        for c in eps_dcay_para:
            for b in eps_para:
                for a in lb_para:
                    one_train(a, b, c, d, e)


In [None]:
dic = {
    "lookback": lb,
    "epsilon": eps,
    "epsilon_decay": eps_dcay,
    "discount_rate": dr,
    "network": nw,
    "placement": placement,
    "score": score_q_agent,
    "final_loss": final_loss,
    "avg_reward": final_avg_reward,
    "final_rolling_reward": final_avg_rolling_reward,
    "coop_percentage": coop_percentage
}
print(dic)

df = pd.DataFrame(dic)
df.to_csv('fisrt_tournament_result.csv')