In [1]:
import matplotlib.pyplot as plt
import numpy as np
from random import random

from society.agent import Agent, TrainableAgent
from society.simulations.weighted import WeightedNetworkSimulation
from society.strategies.gameplay.constant import AllC, AllD
from society.strategies.gameplay.random import RandomGameplayStrategy
from society.strategies.gameplay.tft import TitForTat
from society.strategies.gameplay.qlearning import TabularQLearningGameplayStrategy
from society.visualisation.network import *
from society.action import Action

from tqdm import tqdm

plt.rcParams["figure.figsize"] = (10, 6)

In [2]:
def generate_population(communities, size):
    population = communities * size

    agents = [
        Agent(TabularQLearningGameplayStrategy(lookback=2), i, population)
        for i in range(population)
    ]

    G = nx.connected_caveman_graph(communities, size)

    weights_matrix = np.zeros((population, population))
    for u, v, d in G.edges(data=True):
        weights_matrix[u, v] = weights_matrix[v, u] = 1.0
        try:
            d["weight"] = weights_matrix[u, v]
        except:
            d["weight"] = 0

    return agents, weights_matrix, G

In [3]:
ACTION_LABELS = ["(C, C)", "(C, D)", "(D, C)", "(D, D)"]
POPULATION = (2, 16)
ROUNDS = 25_000

results = []

for run in range(20):
    # Generate a new population
    agents, weights_matrix, G = generate_population(*POPULATION)

    # Run a number of rounds
    sim = WeightedNetworkSimulation(agents, weights_matrix)
    sim.reset()
    for i in tqdm(range(ROUNDS), desc=f"Run {run + 1}"):
        sim.play_round()

    # Rank agents by cumulative reward
    cumulative_rewards_matrix = [
        [sum(r) if len(r) > 0 else 0 for r in agent] for agent in sim.rewards
    ]
    ranked_partners = sorted(
        [(i, sum(cumulative_rewards_matrix[i])) for i in range(len(agents))],
        key=lambda x: x[1],
        reverse=True,
    )

    print(f"BEST: {ranked_partners[0][1]:<12} WORST: {ranked_partners[-1][1]}")

    # Find the policy of each agent
    policies = [
        (agents[partner[0]].gameplay_strategy._q_table.argmax(axis=-1), partner[1])
        for partner in ranked_partners
    ]

    # Log the performance of each policy
    results.append([
        (''.join([
            ('C', 'D')[policy[0][i, j]] # , k
            for i, a in enumerate(ACTION_LABELS)
            for j, b in enumerate(ACTION_LABELS)
            # for k, c in enumerate(ACTION_LABELS)
        ]), policy[1])
        for policy in policies
    ])


Run 1: 100%|██████████| 25000/25000 [00:53<00:00, 466.09it/s]


BEST: 164233       WORST: 116316


Run 2: 100%|██████████| 25000/25000 [00:51<00:00, 486.29it/s]


BEST: 153287       WORST: 114997


Run 3: 100%|██████████| 25000/25000 [00:51<00:00, 481.53it/s]


BEST: 151923       WORST: 120924


Run 4: 100%|██████████| 25000/25000 [00:51<00:00, 483.49it/s]


BEST: 166422       WORST: 123763


Run 5: 100%|██████████| 25000/25000 [00:51<00:00, 486.33it/s]


BEST: 155286       WORST: 110806


Run 6: 100%|██████████| 25000/25000 [00:54<00:00, 458.50it/s]


BEST: 159418       WORST: 119016


Run 7: 100%|██████████| 25000/25000 [00:51<00:00, 483.02it/s]


BEST: 143108       WORST: 118923


Run 8: 100%|██████████| 25000/25000 [00:53<00:00, 466.35it/s]


BEST: 158508       WORST: 117456


Run 9: 100%|██████████| 25000/25000 [00:55<00:00, 447.68it/s]


BEST: 150010       WORST: 115785


Run 10: 100%|██████████| 25000/25000 [00:58<00:00, 428.26it/s]


BEST: 147145       WORST: 117284


Run 11: 100%|██████████| 25000/25000 [00:51<00:00, 486.57it/s]


BEST: 145410       WORST: 117963


Run 12: 100%|██████████| 25000/25000 [00:59<00:00, 417.16it/s]


BEST: 154041       WORST: 110245


Run 13: 100%|██████████| 25000/25000 [00:54<00:00, 457.57it/s]


BEST: 158578       WORST: 110620


Run 14: 100%|██████████| 25000/25000 [00:54<00:00, 461.29it/s]


BEST: 146139       WORST: 117737


Run 15: 100%|██████████| 25000/25000 [00:55<00:00, 448.75it/s]


BEST: 160944       WORST: 112004


Run 16: 100%|██████████| 25000/25000 [00:53<00:00, 467.37it/s]


BEST: 156263       WORST: 112171


Run 17: 100%|██████████| 25000/25000 [00:57<00:00, 433.50it/s]


BEST: 152625       WORST: 126900


Run 18: 100%|██████████| 25000/25000 [00:52<00:00, 473.01it/s]


BEST: 171228       WORST: 114336


Run 19: 100%|██████████| 25000/25000 [00:57<00:00, 435.02it/s]


BEST: 152662       WORST: 118847


Run 20: 100%|██████████| 25000/25000 [00:52<00:00, 471.86it/s]


BEST: 165659       WORST: 118035


In [4]:
policy_ranks = {}

for result in results:
    for rank, (policy, score) in enumerate(result):
        if policy not in policy_ranks:
            policy_ranks[policy] = []

        policy_ranks[policy].append(rank + 1)

mean_policy_ranks = {policy: np.mean(ranks) for policy, ranks in policy_ranks.items()}

for policy in sorted(mean_policy_ranks, key=lambda x: mean_policy_ranks[x]):
    print(f"{mean_policy_ranks[policy]:<16} {policy}")

1.0              DDDDDCDCDDDDDCDD
1.0              DCDDCDDCDDCDCCCD
1.0              DCDCCDCCDCDDCCDD
1.0              CCCDDCCCCCCDDCDD
1.0              CCDCDDCCDCCCDCDC
1.0              DDDDDDCDCDDCCDDD
1.0              DDCDDDDCDDCDDCCD
1.0              DCCDCCCDDDCCDDDD
1.0              DCCDDDCCDDCDDDDD
1.0              CDCDCDCCDDCCDDDD
1.0              CDDDCDDDDDDCDDDC
1.0              CDDCCDCDDCCDDCDD
1.0              DCDDDCCCDDDDDCDC
1.0              DCDDCDCDDDDDCCDD
1.0              DCCCDDDDDDCDDCCD
1.0              CCDDCDDCCCCDCCDC
2.0              DCDDDDDCDDDDDDDD
2.0              CDCDDDCCCCCDDCCD
2.0              DDDDCDDDCCDDDDCC
2.0              DCDDDCDCDCDDDCDC
2.0              CCCCDCCDCDDDCDDD
2.0              DCDDDDDCDDDDDDCD
2.0              CDDDDDCCDCCDCDCD
2.0              DCCDCDCDDCDCDCDC
2.0              CDDDDDDDDDCDDCCC
2.0              DCDDDCCDDCDDDCDD
2.0              DCDDDDCCCCDCDDDC
2.0              CDCDDDCCDCDDCCCC
2.0              DDCDCDDDCDDDDCDC
2.0           