In [1]:
from itertools import chain
from random import random

import matplotlib.pyplot as plt
import numpy as np
from tqdm.notebook import tqdm

from society.action import Action
from society.agent import Agent, TrainableAgent
from society.simulations.weighted import WeightedNetworkSimulation
from society.strategies.gameplay.constant import AllC, AllD
from society.strategies.gameplay.qlearning import TabularQLearningGameplayStrategy
from society.strategies.gameplay.random import RandomGameplayStrategy
from society.strategies.gameplay.tft import TitForTat
from society.visualisation.network import *

plt.rcParams["figure.figsize"] = (10, 6)

In [2]:
def generate_population(communities, size):
    population = communities * size

    agents = [
        Agent(TabularQLearningGameplayStrategy(lookback=3), i, population)
        for i in range(population)
    ]

    G = nx.connected_caveman_graph(communities, size)

    weights_matrix = np.zeros((population, population))
    for u, v, d in G.edges(data=True):
        weights_matrix[u, v] = weights_matrix[v, u] = 1.0
        try:
            d["weight"] = weights_matrix[u, v]
        except:
            d["weight"] = 0

    return agents, weights_matrix, G

In [3]:
def compute_policies(agents, rankings):
    policies = [
        (agents[partner[0]].gameplay_strategy._q_table.argmax(axis=-1), partner[1])
        for partner in rankings
    ]

    return [
        (
            "".join(
                [
                    ("C", "D")[policy[0][i, j, k]]
                    for i in range(4)
                    for j in range(4)
                    for k in range(4)
                ]
            ),
            policy[1],
        )
        for policy in policies
    ]

def calculate_cooperativeness(history):
    count = history.count(Action.COOPERATE)

    return count / len(history)

In [4]:
ACTION_LABELS = ["(C, C)", "(C, D)", "(D, C)", "(D, D)"]
POPULATION = (2, 32)
ROUNDS = 20_000

cumulative_reward_results = []
cooperativeness_results = []

for run in range(20):
    # Generate a new population
    agents, weights_matrix, G = generate_population(*POPULATION)

    # Run a number of rounds
    sim = WeightedNetworkSimulation(agents, weights_matrix)
    sim.reset()
    for i in tqdm(range(ROUNDS), desc=f"Run {run + 1}"):
        sim.play_round()

    # Rank agents by cumulative reward
    cumulative_rewards_matrix = [
        [sum(r) if len(r) > 0 else 0 for r in agent] for agent in sim.rewards
    ]
    cumulative_reward_rankings = sorted(
        [(i, sum(cumulative_rewards_matrix[i])) for i in range(len(agents))],
        key=lambda x: x[1],
        reverse=True,
    )

    tqdm.write(
        f"BEST REWARD: {cumulative_reward_rankings[0][1]:<28} WORST REWARD: {cumulative_reward_rankings[-1][1]}"
    )

    cumulative_reward_results.append(compute_policies(agents, cumulative_reward_rankings))

    # Rank agents by cooperativeness
    cooperativeness_rankings = sorted(
        [
            (i, calculate_cooperativeness(list(chain(*history))))
            for i, history in enumerate(sim.action_histories)
        ],
        key=lambda x: x[1],
        reverse=True,
    )

    tqdm.write(
        f"BEST COOPERATOR: {cooperativeness_rankings[0][1]:<24} WORST COOPERATOR: {cooperativeness_rankings[-1][1]}"
    )

    cooperativeness_results.append(compute_policies(agents, cooperativeness_rankings))


Run 1:   0%|          | 0/20000 [00:00<?, ?it/s]

BEST REWARD: 139310                       WORST REWARD: 111860
BEST COOPERATOR: 0.9479709418837675       WORST COOPERATOR: 0.5437295583352059


Run 2:   0%|          | 0/20000 [00:00<?, ?it/s]

BEST REWARD: 140574                       WORST REWARD: 110802
BEST COOPERATOR: 0.949617220123285        WORST COOPERATOR: 0.5574617506897417


Run 3:   0%|          | 0/20000 [00:00<?, ?it/s]

BEST REWARD: 135706                       WORST REWARD: 112044
BEST COOPERATOR: 0.9486491232471214       WORST COOPERATOR: 0.595757969421405


Run 4:   0%|          | 0/20000 [00:00<?, ?it/s]

BEST REWARD: 140551                       WORST REWARD: 109211
BEST COOPERATOR: 0.950913955640019        WORST COOPERATOR: 0.46437602260895433


Run 5:   0%|          | 0/20000 [00:00<?, ?it/s]

BEST REWARD: 137408                       WORST REWARD: 110139
BEST COOPERATOR: 0.948576859586814        WORST COOPERATOR: 0.5766357483078466


Run 6:   0%|          | 0/20000 [00:00<?, ?it/s]

BEST REWARD: 138690                       WORST REWARD: 109372
BEST COOPERATOR: 0.9490515990190681       WORST COOPERATOR: 0.5818419137026531


Run 7:   0%|          | 0/20000 [00:00<?, ?it/s]

BEST REWARD: 140809                       WORST REWARD: 105784
BEST COOPERATOR: 0.9450018766420618       WORST COOPERATOR: 0.5514807465273417


Run 8:   0%|          | 0/20000 [00:00<?, ?it/s]

BEST REWARD: 134185                       WORST REWARD: 107565
BEST COOPERATOR: 0.9497641686007336       WORST COOPERATOR: 0.6295309699263095


Run 9:   0%|          | 0/20000 [00:00<?, ?it/s]

BEST REWARD: 147188                       WORST REWARD: 111121
BEST COOPERATOR: 0.9479008346829451       WORST COOPERATOR: 0.4509740178549101


Run 10:   0%|          | 0/20000 [00:00<?, ?it/s]

BEST REWARD: 132236                       WORST REWARD: 107245
BEST COOPERATOR: 0.9489604911772781       WORST COOPERATOR: 0.6190096045902457


Run 11:   0%|          | 0/20000 [00:00<?, ?it/s]

BEST REWARD: 135825                       WORST REWARD: 110362
BEST COOPERATOR: 0.948629793214214        WORST COOPERATOR: 0.6278082565846635


Run 12:   0%|          | 0/20000 [00:00<?, ?it/s]

BEST REWARD: 134446                       WORST REWARD: 104094
BEST COOPERATOR: 0.9446213241745844       WORST COOPERATOR: 0.6176463272057909


Run 13:   0%|          | 0/20000 [00:00<?, ?it/s]

BEST REWARD: 136800                       WORST REWARD: 111076
BEST COOPERATOR: 0.9483105390185036       WORST COOPERATOR: 0.6158391547109342


Run 14:   0%|          | 0/20000 [00:00<?, ?it/s]

BEST REWARD: 133070                       WORST REWARD: 112585
BEST COOPERATOR: 0.948599067715904        WORST COOPERATOR: 0.676762624115249


Run 15:   0%|          | 0/20000 [00:00<?, ?it/s]

BEST REWARD: 145915                       WORST REWARD: 109472
BEST COOPERATOR: 0.9492007992007993       WORST COOPERATOR: 0.4689454253208429


Run 16:   0%|          | 0/20000 [00:00<?, ?it/s]

BEST REWARD: 136283                       WORST REWARD: 112628
BEST COOPERATOR: 0.9495207988358674       WORST COOPERATOR: 0.6116938908037365


Run 17:   0%|          | 0/20000 [00:00<?, ?it/s]

BEST REWARD: 127962                       WORST REWARD: 113876
BEST COOPERATOR: 0.9494760011005228       WORST COOPERATOR: 0.7684086699704661


Run 18:   0%|          | 0/20000 [00:00<?, ?it/s]

BEST REWARD: 138141                       WORST REWARD: 112138
BEST COOPERATOR: 0.9493331998899092       WORST COOPERATOR: 0.6007475791480608


Run 19:   0%|          | 0/20000 [00:00<?, ?it/s]

BEST REWARD: 138004                       WORST REWARD: 109183
BEST COOPERATOR: 0.948057485390384        WORST COOPERATOR: 0.6069377153988312


Run 20:   0%|          | 0/20000 [00:00<?, ?it/s]

BEST REWARD: 138585                       WORST REWARD: 111721
BEST COOPERATOR: 0.949023833989123        WORST COOPERATOR: 0.6037480921760452


In [5]:
cumulative_reward_policy_ranks = {}

for result in cumulative_reward_results:
    for rank, (policy, score) in enumerate(result):
        if policy not in cumulative_reward_policy_ranks:
            cumulative_reward_policy_ranks[policy] = []

        cumulative_reward_policy_ranks[policy].append(rank + 1)

mean_cumulative_reward_policy_ranks = {policy: np.mean(ranks) for policy, ranks in cumulative_reward_policy_ranks.items()}

for policy in sorted(mean_cumulative_reward_policy_ranks, key=lambda x: mean_cumulative_reward_policy_ranks[x]):
    print(f"{mean_cumulative_reward_policy_ranks[policy]:<24} {policy}")

1.0                      DCCCDCCCDCCCDCCCDCCCCDCCCCCCCCCCDCCCDCCCCCCCDCCCCCCCDCCDDCCCCCCC
1.0                      DDCDDCDCDCCCCCCCCCCCCCCCDCCCDCCCDCCCDCDCCCCCDCCCDCCCCCCCCCCCDCCC
1.0                      DCCCDCCCDCCCDCCCCCCCCCDCCCCCCDDCCCCCCCDCDCCCCCCCDCCCCCCCDCCCCCCC
1.0                      DCDCCCCCCCDDCCCCCCCCCDCCCCCCCCCCCCDCCCCCCCDCCCCCCCDCCCCCCCCCCCCC
1.0                      DCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCDCDCCCCCCDCCCCCCCCCCDCCCC
1.0                      DCCCCCCCDCDCDCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCDCCCDCCCDCCCCCCCCCCC
1.0                      DCCCDDCCDCDCCDCCCCCCCCCCCCCCCCCCDCCCDCDCDCCDCCCDCCCCCCCCCCCCCCDC
1.0                      DCCCDCCCDCCCCCCCCCCCCCCCCCCCCCDCDCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCC
1.0                      DDDCCCCCCCDCCCDCCCCCCCCCCCCCCCCCDCCCCDCCCCDCCCCCDCDCCCCDCCCCCCCC
1.0                      DCCCCCCCDCCCCCCDCCCCCCCCCCCCDCCCDCCCDCCCCCCCCCCCCCCCCCCCCCCCDCCC
1.0                      DCCCCCCCDCCCCCCCDCCCCCCCCCCCCCCCCCCCCCDCDCCCCCCCCCCCCCCCCCCCCCCC
1.0       

In [6]:
cooperativeness_policy_ranks = {}
cooperativeness_policy_scores = {}

for result in cooperativeness_results:
    for rank, (policy, score) in enumerate(result):
        if policy not in cooperativeness_policy_ranks:
            cooperativeness_policy_ranks[policy] = []
            cooperativeness_policy_scores[policy] = []

        cooperativeness_policy_ranks[policy].append(rank + 1)
        cooperativeness_policy_scores[policy].append(score)        

mean_cooperativeness_policy_ranks = {policy: (np.mean(ranks), len(ranks)) for policy, ranks in cooperativeness_policy_ranks.items()}

mean_cooperativeness_policy_scores = {policy: (np.mean(scores), len(scores)) for policy, scores in cooperativeness_policy_scores.items()}

for policy in sorted(mean_cooperativeness_policy_ranks, key=lambda x: mean_cooperativeness_policy_ranks[x][0]):
    print(f"{mean_cooperativeness_policy_ranks[policy][0]:<24} {policy:<24} {mean_cooperativeness_policy_ranks[policy][1]:<8} {mean_cooperativeness_policy_scores[policy][0]}")

1.0                      CCCCCCCCCCCCCCCCCCCCCCCCCDDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC 1        0.9479709418837675
1.0                      CCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCC 1        0.949617220123285
1.0                      CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCD 1        0.9486491232471214
1.0                      CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCDCCCCCCCDCCCC 1        0.948576859586814
1.0                      CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCDCCCDCCC 1        0.9490515990190681
1.0                      CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCDDCCCCC 1        0.9450018766420618
1.0                      CCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCD 1        0.9497641686007336
1.0                      CCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCDCC 1        0.948629793214214
1.0                      CCCCCCCCCCCCCCCDCCCCCCCDCCCCCCCCCC

In [7]:
for policy in sorted(mean_cooperativeness_policy_scores, key=lambda x: mean_cooperativeness_policy_scores[x][0], reverse=True):
    print(f"{mean_cooperativeness_policy_ranks[policy][0]:<24} {policy:<24} {mean_cooperativeness_policy_ranks[policy][1]:<8} {mean_cooperativeness_policy_scores[policy][0]}")

1.0                      CCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCD 1        0.9497641686007336
1.0                      CCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCC 1        0.949617220123285
1.0                      DCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCDCCCDCCCCCCCCCCCCC 1        0.9495207988358674
1.0                      CCCCCCCCCCCCCCDCCCCCCCCCCCCDCCCCCCCCCCCCCCCCDDCCCCCCCCCCCCCCCCCC 1        0.9494760011005228
2.0                      DCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCC 1        0.9494154676258992
1.0                      CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCDCCCC 1        0.9492007992007993
1.0                      CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCDCCCDCCC 1        0.9490515990190681
1.0                      CCCCCCCCCCCCCDDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC 1        0.949023833989123
2.0                      CCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCC