In [1]:
from itertools import chain
from random import random

import matplotlib.pyplot as plt
import numpy as np
from tqdm.notebook import tqdm

from society.action import Action
from society.agent import Agent, TrainableAgent
from society.simulations.weighted import WeightedNetworkSimulation
from society.strategies.gameplay.constant import AllC, AllD
from society.strategies.gameplay.qlearning import TabularQLearningGameplayStrategy
from society.strategies.gameplay.random import RandomGameplayStrategy
from society.strategies.gameplay.tft import TitForTat
from society.visualisation.network import *

plt.rcParams["figure.figsize"] = (10, 6)

In [2]:
def generate_population(communities, size):
    population = communities * size

    agents = [
        Agent(TabularQLearningGameplayStrategy(lookback=2), i, population)
        for i in range(population)
    ]

    G = nx.connected_caveman_graph(communities, size)

    weights_matrix = np.zeros((population, population))
    for u, v, d in G.edges(data=True):
        weights_matrix[u, v] = weights_matrix[v, u] = 1.0
        try:
            d["weight"] = weights_matrix[u, v]
        except:
            d["weight"] = 0

    return agents, weights_matrix, G

In [3]:
def compute_policies(agents, rankings):
    policies = [
        (agents[partner[0]].gameplay_strategy._q_table.argmax(axis=-1), partner[1])
        for partner in rankings
    ]

    return [
        (
            "".join(
                [
                    ("C", "D")[policy[0][i, j]]  # , k
                    for i in range(4)
                    for j in range(4)
                    # for k in range(4)
                ]
            ),
            policy[1],
        )
        for policy in policies
    ]

def calculate_cooperativeness(history):
    count = history.count(Action.COOPERATE)

    return count / len(history)

In [4]:
# PAYOFF_LABELS = ["(C, C)", "(C, D)", "(D, C)", "(D, D)"]

PAYOFF_LABELS = ["R", "S", "T", "P"]

POPULATION = (2, 32)
ROUNDS = 10_000

cumulative_reward_results = []
cooperativeness_results = []

for run in range(20):
    # Generate a new population
    agents, weights_matrix, G = generate_population(*POPULATION)

    # Run a number of rounds
    sim = WeightedNetworkSimulation(agents, weights_matrix)
    sim.reset()
    for i in tqdm(range(ROUNDS), desc=f"Run {run + 1}"):
        sim.play_round()

    # Rank agents by cumulative reward
    cumulative_rewards_matrix = [
        [sum(r) if len(r) > 0 else 0 for r in agent] for agent in sim.rewards
    ]
    cumulative_reward_rankings = sorted(
        [(i, sum(cumulative_rewards_matrix[i])) for i in range(len(agents))],
        key=lambda x: x[1],
        reverse=True,
    )

    tqdm.write(
        f"BEST REWARD: {cumulative_reward_rankings[0][1]:<28} WORST REWARD: {cumulative_reward_rankings[-1][1]}"
    )

    cumulative_reward_results.append(compute_policies(agents, cumulative_reward_rankings))

    # Rank agents by cooperativeness
    cooperativeness_rankings = sorted(
        [
            (i, calculate_cooperativeness(list(chain(*history))))
            for i, history in enumerate(sim.action_histories)
        ],
        key=lambda x: x[1],
        reverse=True,
    )

    tqdm.write(
        f"BEST COOPERATOR: {cooperativeness_rankings[0][1]:<24} WORST COOPERATOR: {cooperativeness_rankings[-1][1]}"
    )

    cooperativeness_results.append(compute_policies(agents, cooperativeness_rankings))


Run 1:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 75325                        WORST REWARD: 49631
BEST COOPERATOR: 0.9454235930302424       WORST COOPERATOR: 0.31669008430349255


Run 2:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 76127                        WORST REWARD: 53433
BEST COOPERATOR: 0.9483990209301164       WORST COOPERATOR: 0.38623673222999144


Run 3:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 78211                        WORST REWARD: 50439
BEST COOPERATOR: 0.948892674616695        WORST COOPERATOR: 0.27893719327814404


Run 4:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 80881                        WORST REWARD: 49649
BEST COOPERATOR: 0.9482320083786345       WORST COOPERATOR: 0.19189701626584174


Run 5:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 74016                        WORST REWARD: 48071
BEST COOPERATOR: 0.9448019304242912       WORST COOPERATOR: 0.32847007095033476


Run 6:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 68512                        WORST REWARD: 53378
BEST COOPERATOR: 0.9494615577260206       WORST COOPERATOR: 0.5491598319663933


Run 7:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 71036                        WORST REWARD: 54114
BEST COOPERATOR: 0.947268074858261        WORST COOPERATOR: 0.5515310497906473


Run 8:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 70617                        WORST REWARD: 51694
BEST COOPERATOR: 0.9478464840045915       WORST COOPERATOR: 0.4746389485782819


Run 9:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 70493                        WORST REWARD: 50912
BEST COOPERATOR: 0.9433980869736712       WORST COOPERATOR: 0.4623392222611481


Run 10:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 69564                        WORST REWARD: 54956
BEST COOPERATOR: 0.9486639594926556       WORST COOPERATOR: 0.5743256743256743


Run 11:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 84894                        WORST REWARD: 51722
BEST COOPERATOR: 0.9500725398969433       WORST COOPERATOR: 0.19099099099099098


Run 12:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 74035                        WORST REWARD: 50566
BEST COOPERATOR: 0.9494458097196449       WORST COOPERATOR: 0.37892343433344866


Run 13:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 74190                        WORST REWARD: 54306
BEST COOPERATOR: 0.9453513486362951       WORST COOPERATOR: 0.47393788819875776


Run 14:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 72091                        WORST REWARD: 52294
BEST COOPERATOR: 0.9432170348895331       WORST COOPERATOR: 0.499252094136418


Run 15:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 80307                        WORST REWARD: 50260
BEST COOPERATOR: 0.9432483381287826       WORST COOPERATOR: 0.24818064001595055


Run 16:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 72913                        WORST REWARD: 50670
BEST COOPERATOR: 0.9353873766078372       WORST COOPERATOR: 0.37704262655539456


Run 17:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 73265                        WORST REWARD: 52970
BEST COOPERATOR: 0.9483481385367801       WORST COOPERATOR: 0.427120185990094


Run 18:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 72109                        WORST REWARD: 53371
BEST COOPERATOR: 0.9475545373045124       WORST COOPERATOR: 0.4595648912228057


Run 19:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 82670                        WORST REWARD: 51636
BEST COOPERATOR: 0.946037014685174        WORST COOPERATOR: 0.251023669230001


Run 20:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 75535                        WORST REWARD: 50573
BEST COOPERATOR: 0.9488001603126096       WORST COOPERATOR: 0.4141877583946758


In [5]:
cumulative_reward_policy_ranks = {}

for result in cumulative_reward_results:
    for rank, (policy, score) in enumerate(result):
        if policy not in cumulative_reward_policy_ranks:
            cumulative_reward_policy_ranks[policy] = []

        cumulative_reward_policy_ranks[policy].append(rank + 1)

mean_cumulative_reward_policy_ranks = {policy: np.mean(ranks) for policy, ranks in cumulative_reward_policy_ranks.items()}

for policy in sorted(mean_cumulative_reward_policy_ranks, key=lambda x: mean_cumulative_reward_policy_ranks[x]):
    print(f"{mean_cumulative_reward_policy_ranks[policy]:<24} {policy}")

1.0                      DCDDCCCCCCDCCCDC
1.0                      DCDCCDCCCCDCCCCC
1.0                      DCDCDCDDCCDCDCCC
1.0                      DCDCDCCCDCDDCCDD
1.0                      DCDCCCDDCCDDDCDC
1.0                      DCDCCCCDCCDCCCCC
1.0                      DCDCDCCDCCDCDCCD
1.0                      DCDCCDDCCCDCDCCD
1.0                      DCDCDCCCDCDCCCDC
1.0                      DCDCDCDCCCDCDDCC
1.0                      DDDCCDCCCCDCDDCC
1.0                      DCDCCCCCCDDCDDCC
1.0                      DCDCCDCDCCDCCCCC
1.0                      DCDDCCDCCCDCCCDC
1.0                      DCDCCCDCCDDCCCCC
2.0                      DCDCCCCCDDDCDCCC
2.0                      DCDCCCCCDCDCCCDC
2.0                      DCDDCCCCCDDCCCCC
2.0                      DCCDDCCCDCCCCCCD
2.0                      DCDCCDCCDCDCCCDD
2.0                      DCDCCCDDCCDCCCCC
2.0                      DDCCCCDCDCCCCCCC
2.0                      DCDDCCCDDCDDCCCC
2.0                      DCDCCCCCC

In [6]:
cooperativeness_policy_ranks = {}
cooperativeness_policy_scores = {}

for result in cooperativeness_results:
    for rank, (policy, score) in enumerate(result):
        if policy not in cooperativeness_policy_ranks:
            cooperativeness_policy_ranks[policy] = []
            cooperativeness_policy_scores[policy] = []

        cooperativeness_policy_ranks[policy].append(rank + 1)
        cooperativeness_policy_scores[policy].append(score)        

mean_cooperativeness_policy_ranks = {policy: (np.mean(ranks), len(ranks)) for policy, ranks in cooperativeness_policy_ranks.items()}

mean_cooperativeness_policy_scores = {policy: (np.mean(scores), len(scores)) for policy, scores in cooperativeness_policy_scores.items()}

for policy in sorted(mean_cooperativeness_policy_ranks, key=lambda x: mean_cooperativeness_policy_ranks[x][0]):
    print(f"{mean_cooperativeness_policy_ranks[policy][0]:<24} {policy:<24} {mean_cooperativeness_policy_ranks[policy][1]:<8} {mean_cooperativeness_policy_scores[policy][0]}")

2.0                      CCCCCCCCCCCDCDCD         1        0.9470290645769269
4.0                      CCCCCCCCCCCCCDCD         1        0.9379686094171749
6.0                      CCCCCCCCCCCDCCCD         3        0.9459667395730421
7.0                      CCCCCCCCCCCDCCDC         2        0.9455700291090428
7.0                      CCCDDCCCCCCDCCCD         1        0.9329301814183617
8.0                      CCCDCCCCCCCCCCDC         1        0.9398882570088796
9.0                      DCCCDCCDCCCDCCCC         1        0.9322379885486681
9.5                      CCCCCCCDCCCCCCCD         2        0.9427959529042521
9.666666666666666        CCCDCCCDCCCDCCCC         3        0.9406365230148847
10.0                     CCCCCCCCCCCDCDCC         1        0.9431658291457287
11.0                     CCCCDCCCCCCDCDCC         1        0.9380722406680753
11.0                     DCCCDCCDCCCCCCCC         2        0.9351167159979236
11.2                     DCCCCCCCCCCCCDCC         5        0.939

In [7]:
for policy in sorted(mean_cooperativeness_policy_scores, key=lambda x: mean_cooperativeness_policy_scores[x][0], reverse=True):
    print(f"{mean_cooperativeness_policy_ranks[policy][0]:<24} {policy:<24} {mean_cooperativeness_policy_ranks[policy][1]:<8} {mean_cooperativeness_policy_scores[policy][0]}")

2.0                      CCCCCCCCCCCDCDCD         1        0.9470290645769269
6.0                      CCCCCCCCCCCDCCCD         3        0.9459667395730421
7.0                      CCCCCCCCCCCDCCDC         2        0.9455700291090428
10.0                     CCCCCCCCCCCDCDCC         1        0.9431658291457287
9.5                      CCCCCCCDCCCCCCCD         2        0.9427959529042521
27.0                     CCCCCCDCCCCCDCCD         1        0.9419820717131474
21.0                     CCCDCCCCCCCDCDCC         1        0.9418575509999498
21.0                     CCCCCCCCCCCCCDDC         2        0.9412533925487293
15.0                     CCCCCCCCCCCCCDCC         13       0.9412459712552195
16.3                     CCCCCCCCCCCCCCCD         10       0.9411691636129849
17.0                     CCCCCCDCCCCCCCDC         1        0.9407140353512593
9.666666666666666        CCCDCCCDCCCDCCCC         3        0.9406365230148847
14.0                     CCCCCCCDCCCDCCCC         1        0.940