In [1]:
import matplotlib.pyplot as plt
import numpy as np
from random import random

from society.agent import Agent, TrainableAgent
from society.simulations.weighted import WeightedNetworkSimulation
from society.strategies.gameplay.constant import AllC, AllD
from society.strategies.gameplay.random import RandomGameplayStrategy
from society.strategies.gameplay.tft import TitForTat
from society.strategies.gameplay.qlearning import TabularQLearningGameplayStrategy
from society.visualisation.network import *
from society.action import Action

from tqdm import tqdm

plt.rcParams["figure.figsize"] = (10, 6)

In [2]:
def generate_population(communities, size):
    population = communities * size

    agents = [
        Agent(TabularQLearningGameplayStrategy(lookback=1), i, population)
        for i in range(population)
    ]

    G = nx.connected_caveman_graph(communities, size)

    weights_matrix = np.zeros((population, population))
    for u, v, d in G.edges(data=True):
        weights_matrix[u, v] = weights_matrix[v, u] = 1.0
        try:
            d["weight"] = weights_matrix[u, v]
        except:
            d["weight"] = 0

    return agents, weights_matrix, G

In [3]:
ACTION_LABELS = ["(C, C)", "(C, D)", "(D, C)", "(D, D)"]
POPULATION = (2, 16)
ROUNDS = 15_000

results = []

for run in range(20):
    # Generate a new population
    agents, weights_matrix, G = generate_population(*POPULATION)

    # Run a number of rounds
    sim = WeightedNetworkSimulation(agents, weights_matrix)
    sim.reset()
    for i in tqdm(range(ROUNDS), desc=f"Run {run + 1}"):
        sim.play_round()

    # Rank agents by cumulative reward
    cumulative_rewards_matrix = [
        [sum(r) if len(r) > 0 else 0 for r in agent] for agent in sim.rewards
    ]
    ranked_partners = sorted(
        [(i, sum(cumulative_rewards_matrix[i])) for i in range(len(agents))],
        key=lambda x: x[1],
        reverse=True,
    )

    print(f"BEST: {ranked_partners[0][1]:<12} WORST: {ranked_partners[-1][1]}")

    # Find the policy of each agent
    policies = [
        (agents[partner[0]].gameplay_strategy._q_table.argmax(axis=-1), partner[1])
        for partner in ranked_partners
    ]

    # Log the performance of each policy
    results.append([
        (''.join([
            ('C', 'D')[policy[0][i]] # , j, k
            for i, a in enumerate(ACTION_LABELS)
            # for j, b in enumerate(ACTION_LABELS)
            # for k, c in enumerate(ACTION_LABELS)
        ]), policy[1])
        for policy in policies
    ])


Run 1: 100%|██████████| 15000/15000 [00:27<00:00, 541.81it/s]


BEST: 69573        WORST: 52335


Run 2: 100%|██████████| 15000/15000 [00:27<00:00, 541.92it/s]


BEST: 71456        WORST: 52993


Run 3: 100%|██████████| 15000/15000 [00:27<00:00, 539.53it/s]


BEST: 70363        WORST: 53373


Run 4: 100%|██████████| 15000/15000 [00:28<00:00, 533.20it/s]


BEST: 67284        WORST: 55281


Run 5: 100%|██████████| 15000/15000 [00:32<00:00, 463.53it/s]


BEST: 68681        WORST: 56277


Run 6: 100%|██████████| 15000/15000 [00:32<00:00, 459.95it/s]


BEST: 71413        WORST: 57264


Run 7: 100%|██████████| 15000/15000 [00:28<00:00, 534.76it/s]


BEST: 64959        WORST: 52450


Run 8: 100%|██████████| 15000/15000 [00:34<00:00, 438.22it/s]


BEST: 68194        WORST: 52884


Run 9: 100%|██████████| 15000/15000 [00:31<00:00, 482.44it/s]


BEST: 69905        WORST: 54630


Run 10: 100%|██████████| 15000/15000 [00:27<00:00, 549.73it/s]


BEST: 70548        WORST: 48551


Run 11: 100%|██████████| 15000/15000 [00:28<00:00, 519.61it/s]


BEST: 72668        WORST: 53311


Run 12: 100%|██████████| 15000/15000 [00:27<00:00, 536.06it/s]


BEST: 72168        WORST: 57985


Run 13: 100%|██████████| 15000/15000 [00:24<00:00, 623.49it/s]


BEST: 65459        WORST: 53102


Run 14: 100%|██████████| 15000/15000 [00:24<00:00, 600.81it/s]


BEST: 68088        WORST: 53828


Run 15: 100%|██████████| 15000/15000 [00:23<00:00, 633.24it/s]


BEST: 66530        WORST: 54626


Run 16: 100%|██████████| 15000/15000 [00:23<00:00, 641.11it/s]


BEST: 68262        WORST: 52882


Run 17: 100%|██████████| 15000/15000 [00:26<00:00, 560.54it/s]


BEST: 68306        WORST: 53571


Run 18: 100%|██████████| 15000/15000 [00:25<00:00, 577.39it/s]


BEST: 61984        WORST: 50692


Run 19: 100%|██████████| 15000/15000 [00:22<00:00, 663.81it/s]


BEST: 57882        WORST: 48951


Run 20: 100%|██████████| 15000/15000 [00:25<00:00, 589.86it/s]

BEST: 66100        WORST: 54753





In [4]:
policy_ranks = {}

for result in results:
    for rank, (policy, score) in enumerate(result):
        if policy not in policy_ranks:
            policy_ranks[policy] = []

        policy_ranks[policy].append(rank + 1)

mean_policy_ranks = {policy: np.mean(ranks) for policy, ranks in policy_ranks.items()}

print(ACTION_LABELS)

for policy in sorted(mean_policy_ranks, key=lambda x: mean_policy_ranks[x]):
    print(f"{mean_policy_ranks[policy]:<16} {policy}")

5.857142857142857 DCDD
7.333333333333333 CCDD
10.0             DCCD
10.0             DCCC
12.25            CCCC
13.3125          DDCD
15.661290322580646 CDDC
15.714285714285714 DDCC
16.0             DCDC
16.192307692307693 CDDD
16.3125          CDCC
17.29032258064516 DDDD
17.88            CDCD
20.692307692307693 DDDC
24.75            CCDC
29.0             CCCD
