In [1]:
import matplotlib.pyplot as plt
import numpy as np
from random import random

from society.agent import Agent, TrainableAgent
from society.simulations.weighted import WeightedNetworkSimulation
from society.strategies.gameplay.constant import AllC, AllD
from society.strategies.gameplay.random import RandomGameplayStrategy
from society.strategies.gameplay.tft import TitForTat
from society.strategies.gameplay.qlearning import TabularQLearningGameplayStrategy
from society.visualisation.network import *
from society.action import Action

from tqdm import tqdm

plt.rcParams["figure.figsize"] = (10, 6)

In [2]:
def generate_population(communities, size):
    population = communities * size

    agents = [
        Agent(TabularQLearningGameplayStrategy(lookback=2), i, population)
        for i in range(population)
    ]

    G = nx.connected_caveman_graph(communities, size)

    weights_matrix = np.zeros((population, population))
    for u, v, d in G.edges(data=True):
        weights_matrix[u, v] = weights_matrix[v, u] = 1.0
        try:
            d["weight"] = weights_matrix[u, v]
        except:
            d["weight"] = 0

    return agents, weights_matrix, G

In [4]:
ACTION_LABELS = ["(C, C)", "(C, D)", "(D, C)", "(D, D)"]
POPULATION = (2, 16)
ROUNDS = 10_000

results = []

for run in range(20):
    # Generate a new population
    agents, weights_matrix, G = generate_population(*POPULATION)

    # Run a number of rounds
    sim = WeightedNetworkSimulation(agents, weights_matrix)
    sim.reset()
    for i in tqdm(range(ROUNDS), desc=f"Run {run + 1}"):
        sim.play_round()

    # Rank agents by cumulative reward
    cumulative_rewards_matrix = [
        [sum(r) if len(r) > 0 else 0 for r in agent] for agent in sim.rewards
    ]
    ranked_partners = sorted(
        [(i, sum(cumulative_rewards_matrix[i])) for i in range(len(agents))],
        key=lambda x: x[1],
        reverse=True,
    )

    print(f"BEST: {ranked_partners[0][1]:<12} WORST: {ranked_partners[-1][1]}")

    # Find the policy of each agent
    policies = [
        (agents[partner[0]].gameplay_strategy._q_table.argmax(axis=-1), partner[1])
        for partner in ranked_partners
    ]

    # Log the performance of each policy
    results.append([
        (''.join([
            ('C', 'D')[policy[0][i, j]] # , k
            for i, a in enumerate(ACTION_LABELS)
            for j, b in enumerate(ACTION_LABELS)
            # for k, c in enumerate(ACTION_LABELS)
        ]), policy[1])
        for policy in policies
    ])


Run 1: 100%|██████████| 10000/10000 [00:18<00:00, 542.94it/s]


BEST: 64464        WORST: 53424


Run 2: 100%|██████████| 10000/10000 [00:17<00:00, 582.34it/s]


BEST: 63425        WORST: 56604


Run 3: 100%|██████████| 10000/10000 [00:17<00:00, 562.72it/s]


BEST: 69070        WORST: 55507


Run 4: 100%|██████████| 10000/10000 [00:20<00:00, 492.89it/s]


BEST: 67031        WORST: 53341


Run 5: 100%|██████████| 10000/10000 [00:17<00:00, 556.90it/s]


BEST: 66742        WORST: 53087


Run 6: 100%|██████████| 10000/10000 [00:18<00:00, 540.42it/s]


BEST: 69124        WORST: 53525


Run 7: 100%|██████████| 10000/10000 [00:20<00:00, 483.55it/s]


BEST: 64872        WORST: 51276


Run 8: 100%|██████████| 10000/10000 [00:21<00:00, 476.05it/s]


BEST: 65355        WORST: 51852


Run 9: 100%|██████████| 10000/10000 [00:23<00:00, 433.21it/s]


BEST: 69291        WORST: 56160


Run 10: 100%|██████████| 10000/10000 [00:19<00:00, 511.67it/s]


BEST: 66685        WORST: 55695


Run 11: 100%|██████████| 10000/10000 [00:17<00:00, 560.51it/s]


BEST: 63768        WORST: 55558


Run 12: 100%|██████████| 10000/10000 [00:19<00:00, 515.15it/s]


BEST: 65611        WORST: 51166


Run 13: 100%|██████████| 10000/10000 [00:19<00:00, 507.23it/s]


BEST: 66894        WORST: 55435


Run 14: 100%|██████████| 10000/10000 [00:19<00:00, 516.47it/s]


BEST: 69758        WORST: 54893


Run 15: 100%|██████████| 10000/10000 [00:19<00:00, 508.65it/s]


BEST: 64566        WORST: 53834


Run 16: 100%|██████████| 10000/10000 [00:20<00:00, 478.95it/s]


BEST: 69057        WORST: 51904


Run 17: 100%|██████████| 10000/10000 [00:19<00:00, 500.52it/s]


BEST: 67885        WORST: 54148


Run 18: 100%|██████████| 10000/10000 [00:19<00:00, 519.10it/s]


BEST: 68136        WORST: 52849


Run 19: 100%|██████████| 10000/10000 [00:17<00:00, 568.67it/s]


BEST: 66381        WORST: 55037


Run 20: 100%|██████████| 10000/10000 [00:20<00:00, 484.45it/s]

BEST: 65417        WORST: 54546





In [8]:
policy_ranks = {}

for result in results:
    for rank, (policy, score) in enumerate(result):
        if policy not in policy_ranks:
            policy_ranks[policy] = []

        policy_ranks[policy].append(rank + 1)

mean_policy_ranks = {policy: np.mean(ranks) for policy, ranks in policy_ranks.items()}

for policy in sorted(mean_policy_ranks, key=lambda x: mean_policy_ranks[x]):
    print(f"{mean_policy_ranks[policy]:<16} {policy}")

1.0              DCDCCCCCCCCCCCDCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCDCCDCDCCCCCCCCCCCCC
1.0              DDCCCCCCCCCCCCCCCCDCCDDCDCCCCCCCDCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCC
1.0              DCCCDCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCDCCCCCCCCCC
1.0              DCCCDCDCCCCCCCCCCCCCCCDCCCCCCCCCCCDCCCCDCCDCCCCCDCDCCCCCCCCCDCCC
1.0              DCCCCCCCDCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCDCCCCCCC
1.0              DCCCDCDCDCCCCCCCDCCCCCCCCCCCCCCCCCCDCDCCDCCDCCCCCCCCDCCCCCCDCCCC
1.0              DCCCDCCCDCCCCCCCCDCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCDDCC
1.0              DCCCCCCCDCCCCCCCDCCCCCCCCCCCCCDCDCCCCCDCCCCCCCCCDCCCCCDCDCCCCCCC
1.0              DCCCDCCCDCCDCCCCCCCCCCCCCCDCCCCCDCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCC
1.0              DCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCDCCCCCDCCCCCCC
1.0              CCCCDCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCC
1.0              DDCCCCCCDCDCCDCCCDDCDCCCCCDCDCCCCCCCCCCDDCCCCCCCDCCDCCCCCCCCDCCC
1.0             