In [None]:
from itertools import chain
from random import random

import matplotlib.pyplot as plt
import numpy as np
from tqdm.notebook import tqdm

from society.action import Action
from society.agent import Agent
from society.simulations.adaptive import AdaptiveSimulation
from society.agents.constant import AllC, AllD
from society.agents.qlearning import TabularQLearner
from society.agents.random import Random
from society.agents.tft import TitForTat
from society.visualisation.network import *

plt.rcParams["figure.figsize"] = (10, 6)

In [None]:
def generate_population(communities, size):
    population = communities * size

    agents = [TabularQLearner(lookback=3) for i in range(population)]

    G = nx.connected_caveman_graph(communities, size)

    weights_matrix = np.zeros((population, population))
    for u, v, d in G.edges(data=True):
        weights_matrix[u, v] = weights_matrix[v, u] = 1.0
        try:
            d["weight"] = weights_matrix[u, v]
        except:
            d["weight"] = 0

    return agents, weights_matrix, G

In [None]:
def compute_policies(agents, rankings):
    policies = [
        (agents[partner[0]]._q_table.argmax(axis=-1), partner[1])
        for partner in rankings
    ]

    return [
        (
            "".join(
                [
                    ("C", "D")[policy[0][i, j, k]]
                    for i in range(4)
                    for j in range(4)
                    for k in range(4)
                ]
            ),
            policy[1],
        )
        for policy in policies
    ]


def calculate_cooperativeness(history):
    count = history.count(Action.COOPERATE)

    return count / len(history)

In [None]:
ACTION_LABELS = ["(C, C)", "(C, D)", "(D, C)", "(D, D)"]
POPULATION = (2, 4)
ROUNDS = 20_000

cumulative_reward_results = []
cooperativeness_results = []

for run in range(20):
    # Generate a new population
    agents, weights_matrix, G = generate_population(*POPULATION)

    # Run a number of rounds
    sim = AdaptiveSimulation(agents, weights_matrix)
    sim.reset()
    for i in tqdm(range(ROUNDS), desc=f"Run {run + 1}"):
        sim.play_round()

    # Rank agents by cumulative reward
    cumulative_rewards_matrix = [
        [sum(r) if len(r) > 0 else 0 for r in agent] for agent in sim.rewards
    ]
    cumulative_reward_rankings = sorted(
        [(i, sum(cumulative_rewards_matrix[i])) for i in range(len(agents))],
        key=lambda x: x[1],
        reverse=True,
    )

    tqdm.write(
        f"BEST REWARD: {cumulative_reward_rankings[0][1]:<28} WORST REWARD: {cumulative_reward_rankings[-1][1]}"
    )

    cumulative_reward_results.append(
        compute_policies(agents, cumulative_reward_rankings)
    )

    # Rank agents by cooperativeness
    cooperativeness_rankings = sorted(
        [
            (i, calculate_cooperativeness(list(chain(*history))))
            for i, history in enumerate(sim.action_histories)
        ],
        key=lambda x: x[1],
        reverse=True,
    )

    tqdm.write(
        f"BEST COOPERATOR: {cooperativeness_rankings[0][1]:<24} WORST COOPERATOR: {cooperativeness_rankings[-1][1]}"
    )

    cooperativeness_results.append(compute_policies(agents, cooperativeness_rankings))

In [None]:
cumulative_reward_policy_ranks = {}

for result in cumulative_reward_results:
    for rank, (policy, score) in enumerate(result):
        if policy not in cumulative_reward_policy_ranks:
            cumulative_reward_policy_ranks[policy] = []

        cumulative_reward_policy_ranks[policy].append(rank + 1)

mean_cumulative_reward_policy_ranks = {
    policy: np.mean(ranks) for policy, ranks in cumulative_reward_policy_ranks.items()
}

for policy in sorted(
    mean_cumulative_reward_policy_ranks,
    key=lambda x: mean_cumulative_reward_policy_ranks[x],
):
    print(f"{mean_cumulative_reward_policy_ranks[policy]:<24} {policy}")

In [None]:
cooperativeness_policy_ranks = {}
cooperativeness_policy_scores = {}

for result in cooperativeness_results:
    for rank, (policy, score) in enumerate(result):
        if policy not in cooperativeness_policy_ranks:
            cooperativeness_policy_ranks[policy] = []
            cooperativeness_policy_scores[policy] = []

        cooperativeness_policy_ranks[policy].append(rank + 1)
        cooperativeness_policy_scores[policy].append(score)

mean_cooperativeness_policy_ranks = {
    policy: (np.mean(ranks), len(ranks))
    for policy, ranks in cooperativeness_policy_ranks.items()
}

mean_cooperativeness_policy_scores = {
    policy: (np.mean(scores), len(scores))
    for policy, scores in cooperativeness_policy_scores.items()
}

for policy in sorted(
    mean_cooperativeness_policy_ranks,
    key=lambda x: mean_cooperativeness_policy_ranks[x][0],
):
    print(
        f"{mean_cooperativeness_policy_ranks[policy][0]:<24} {policy:<24} {mean_cooperativeness_policy_ranks[policy][1]:<8} {mean_cooperativeness_policy_scores[policy][0]}"
    )

In [None]:
for policy in sorted(
    mean_cooperativeness_policy_scores,
    key=lambda x: mean_cooperativeness_policy_scores[x][0],
    reverse=True,
):
    print(
        f"{mean_cooperativeness_policy_ranks[policy][0]:<24} {policy:<24} {mean_cooperativeness_policy_ranks[policy][1]:<8} {mean_cooperativeness_policy_scores[policy][0]}"
    )