In [9]:
import matplotlib.pyplot as plt
import numpy as np
from random import random

from society.agent import Agent, TrainableAgent
from society.simulations.weighted import WeightedNetworkSimulation
from society.strategies.gameplay.constant import AllC, AllD
from society.strategies.gameplay.random import RandomGameplayStrategy
from society.strategies.gameplay.tft import TitForTat
from society.strategies.gameplay.qlearning import TabularQLearningGameplayStrategy
from society.visualisation.network import *
from society.action import Action

from tqdm import tqdm

plt.rcParams["figure.figsize"] = (10, 6)

In [10]:
def generate_population(communities, size):
    population = communities * size

    agents = [
        Agent(TabularQLearningGameplayStrategy(lookback=2), i, population)
        for i in range(population)
    ]

    G = nx.connected_caveman_graph(communities, size)

    weights_matrix = np.zeros((population, population))
    for u, v, d in G.edges(data=True):
        weights_matrix[u, v] = weights_matrix[v, u] = 1.0
        try:
            d["weight"] = weights_matrix[u, v]
        except:
            d["weight"] = 0

    return agents, weights_matrix, G

In [11]:
ACTION_LABELS = ["(C, C)", "(C, D)", "(D, C)", "(D, D)"]
POPULATION = (2, 16)
ROUNDS = 25_000

results = []

for run in range(20):
    # Generate a new population
    agents, weights_matrix, G = generate_population(*POPULATION)

    # Run a number of rounds
    sim = WeightedNetworkSimulation(agents, weights_matrix)
    sim.reset()
    for i in tqdm(range(ROUNDS), desc=f"Run {run + 1}"):
        sim.play_round()

    # Rank agents by cumulative reward
    cumulative_rewards_matrix = [
        [sum(r) if len(r) > 0 else 0 for r in agent] for agent in sim.rewards
    ]
    ranked_partners = sorted(
        [(i, sum(cumulative_rewards_matrix[i])) for i in range(len(agents))],
        key=lambda x: x[1],
        reverse=True,
    )

    print(f"BEST: {ranked_partners[0][1]:<12} WORST: {ranked_partners[-1][1]}")

    # Find the policy of each agent
    policies = [
        (agents[partner[0]].gameplay_strategy._q_table.argmax(axis=-1), partner[1])
        for partner in ranked_partners
    ]

    # Log the performance of each policy
    results.append([
        (''.join([
            ('C', 'D')[policy[0][i, j]] # , k
            for i, a in enumerate(ACTION_LABELS)
            for j, b in enumerate(ACTION_LABELS)
            # for k, c in enumerate(ACTION_LABELS)
        ]), policy[1])
        for policy in policies
    ])


Run 1: 100%|██████████| 10000/10000 [00:18<00:00, 548.16it/s]


BEST: 68520        WORST: 51808


Run 2: 100%|██████████| 10000/10000 [00:16<00:00, 604.26it/s]


BEST: 68481        WORST: 48569


Run 3: 100%|██████████| 10000/10000 [00:17<00:00, 557.53it/s]


BEST: 68804        WORST: 49499


Run 4: 100%|██████████| 10000/10000 [00:17<00:00, 580.11it/s]


BEST: 70008        WORST: 49330


Run 5: 100%|██████████| 10000/10000 [00:17<00:00, 580.72it/s]


BEST: 66126        WORST: 51515


Run 6: 100%|██████████| 10000/10000 [00:16<00:00, 602.26it/s]


BEST: 77675        WORST: 49707


Run 7: 100%|██████████| 10000/10000 [00:17<00:00, 583.91it/s]


BEST: 63869        WORST: 51240


Run 8: 100%|██████████| 10000/10000 [00:17<00:00, 571.44it/s]


BEST: 68736        WORST: 50060


Run 9: 100%|██████████| 10000/10000 [00:18<00:00, 528.76it/s]


BEST: 68568        WORST: 49144


Run 10: 100%|██████████| 10000/10000 [00:22<00:00, 451.12it/s]


BEST: 72380        WORST: 47074


Run 11: 100%|██████████| 10000/10000 [00:22<00:00, 454.08it/s]


BEST: 66695        WORST: 49104


Run 12: 100%|██████████| 10000/10000 [00:17<00:00, 561.89it/s]


BEST: 70843        WORST: 52897


Run 13: 100%|██████████| 10000/10000 [00:16<00:00, 593.05it/s]


BEST: 71242        WORST: 51239


Run 14: 100%|██████████| 10000/10000 [00:16<00:00, 601.32it/s]


BEST: 65509        WORST: 55621


Run 15: 100%|██████████| 10000/10000 [00:16<00:00, 596.58it/s]


BEST: 71483        WORST: 52640


Run 16: 100%|██████████| 10000/10000 [00:19<00:00, 502.29it/s]


BEST: 70292        WORST: 53774


Run 17: 100%|██████████| 10000/10000 [00:20<00:00, 489.33it/s]


BEST: 64109        WORST: 54446


Run 18: 100%|██████████| 10000/10000 [00:17<00:00, 564.88it/s]


BEST: 68226        WORST: 52336


Run 19: 100%|██████████| 10000/10000 [00:16<00:00, 596.66it/s]


BEST: 70300        WORST: 51922


Run 20: 100%|██████████| 10000/10000 [00:16<00:00, 605.14it/s]

BEST: 70959        WORST: 52432





In [12]:
policy_ranks = {}

for result in results:
    for rank, (policy, score) in enumerate(result):
        if policy not in policy_ranks:
            policy_ranks[policy] = []

        policy_ranks[policy].append(rank + 1)

mean_policy_ranks = {policy: np.mean(ranks) for policy, ranks in policy_ranks.items()}

for policy in sorted(mean_policy_ranks, key=lambda x: mean_policy_ranks[x]):
    print(f"{mean_policy_ranks[policy]:<16} {policy}")

1.0              DCCDCCCCDCCCDCCC
1.0              DCDCCDDCDCDCCCCC
1.0              DCCDDDCCDCCCCDCC
1.0              DDDCCCCCDCDCDDCC
1.0              DDCCDCCCDCCDCCCC
1.0              CDCCCDDCCDDCCCCC
1.0              DCDCDCCCDCDCDCCC
1.0              DDCDCCDCDCCCCCCD
1.0              DDDCCCCCDCCCCCCC
1.0              DDCCCCCCDCCCDCCC
2.0              DCCCCCCCDCCDDCCC
2.0              DCDCCCCCDCDCDCCC
2.0              DCCCCCDCCCCCDCCC
2.0              DCDCDCDCDCCDDCCC
2.0              DCDCCCCDCCDCCCCC
2.0              DCCCCCCDCCCCCCDC
2.0              DDDDCCCDCDCCCDCC
2.0              DDCDCDDCDCCCCCDC
2.0              DDCCCCCCDCCCCDDC
2.0              DDDCDDCCDCCCCCCC
2.0              DCDCCCCCDDCCCCCC
2.0              DCDDCCDCCCDCCCDD
2.5              DCCCDDCCCCCCCCCC
3.0              DCCCDCCCDCCDDDCC
3.0              DCDCDCCCCCDCDCCC
3.0              DDCCCDCCDCCCCDCC
3.0              DCDCDCCCDCCCDCCD
3.0              DCDCDDCCDCCCDCCC
3.0              DCCCCDCDDCCDCCCC
3.0           