In [1]:
from itertools import chain
from random import random

import matplotlib.pyplot as plt
import numpy as np
from tqdm.notebook import tqdm

from society.action import Action
from society.agent import Agent, TrainableAgent
from society.simulations.weighted import WeightedNetworkSimulation
from society.strategies.gameplay.constant import AllC, AllD
from society.strategies.gameplay.qlearning import TabularQLearningGameplayStrategy
from society.strategies.gameplay.random import RandomGameplayStrategy
from society.strategies.gameplay.tft import TitForTat
from society.visualisation.network import *

plt.rcParams["figure.figsize"] = (10, 6)

In [2]:
def generate_population(communities, size):
    population = communities * size

    agents = [
        Agent(TabularQLearningGameplayStrategy(lookback=2), i, population)
        for i in range(population)
    ]

    G = nx.connected_caveman_graph(communities, size)

    weights_matrix = np.zeros((population, population))
    for u, v, d in G.edges(data=True):
        weights_matrix[u, v] = weights_matrix[v, u] = 1.0
        try:
            d["weight"] = weights_matrix[u, v]
        except:
            d["weight"] = 0

    return agents, weights_matrix, G

In [3]:
def compute_policies(agents, rankings):
    policies = [
        (agents[partner[0]].gameplay_strategy._q_table.argmax(axis=-1), partner[1])
        for partner in rankings
    ]

    return [
        (
            "".join(
                [
                    ("C", "D")[policy[0][i, j]]  # , k
                    for i in range(4)
                    for j in range(4)
                    # for k in range(4)
                ]
            ),
            policy[1],
        )
        for policy in policies
    ]

def calculate_cooperativeness(history):
    count = history.count(Action.COOPERATE)

    return count / len(history)

In [4]:
ACTION_LABELS = ["(C, C)", "(C, D)", "(D, C)", "(D, D)"]
POPULATION = (2, 16)
ROUNDS = 10_000

cumulative_reward_results = []
cooperativeness_results = []

for run in range(20):
    # Generate a new population
    agents, weights_matrix, G = generate_population(*POPULATION)

    # Run a number of rounds
    sim = WeightedNetworkSimulation(agents, weights_matrix)
    sim.reset()
    for i in tqdm(range(ROUNDS), desc=f"Run {run + 1}"):
        sim.play_round()

    # Rank agents by cumulative reward
    cumulative_rewards_matrix = [
        [sum(r) if len(r) > 0 else 0 for r in agent] for agent in sim.rewards
    ]
    cumulative_reward_rankings = sorted(
        [(i, sum(cumulative_rewards_matrix[i])) for i in range(len(agents))],
        key=lambda x: x[1],
        reverse=True,
    )

    tqdm.write(
        f"BEST REWARD: {cumulative_reward_rankings[0][1]:<28} WORST REWARD: {cumulative_reward_rankings[-1][1]}"
    )

    cumulative_reward_results.append(compute_policies(agents, cumulative_reward_rankings))

    # Rank agents by cooperativeness
    cooperativeness_rankings = sorted(
        [
            (i, calculate_cooperativeness(list(chain(*history))))
            for i, history in enumerate(sim.action_histories)
        ],
        key=lambda x: x[1],
        reverse=True,
    )

    tqdm.write(
        f"BEST COOPERATOR: {cooperativeness_rankings[0][1]:<24} WORST COOPERATOR: {cooperativeness_rankings[-1][1]}"
    )

    cooperativeness_results.append(compute_policies(agents, cooperativeness_rankings))


Run 1:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 75787                        WORST REWARD: 48019
BEST COOPERATOR: 0.9500378501135504       WORST COOPERATOR: 0.31609824003181863


Run 2:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 75785                        WORST REWARD: 53101
BEST COOPERATOR: 0.949314004659591        WORST COOPERATOR: 0.4549115132233048


Run 3:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 67274                        WORST REWARD: 54349
BEST COOPERATOR: 0.9515390609779856       WORST COOPERATOR: 0.6053941082802548


Run 4:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 69602                        WORST REWARD: 50534
BEST COOPERATOR: 0.9497198318991394       WORST COOPERATOR: 0.5083458219213477


Run 5:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 70566                        WORST REWARD: 48171
BEST COOPERATOR: 0.9472820410802208       WORST COOPERATOR: 0.5263237139272271


Run 6:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 69069                        WORST REWARD: 48931
BEST COOPERATOR: 0.9524854165627961       WORST COOPERATOR: 0.54622336055671


Run 7:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 71193                        WORST REWARD: 50373
BEST COOPERATOR: 0.9489519606859894       WORST COOPERATOR: 0.4236236236236236


Run 8:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 60290                        WORST REWARD: 55740
BEST COOPERATOR: 0.9494323013054569       WORST COOPERATOR: 0.8614024572969733


Run 9:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 68756                        WORST REWARD: 53300
BEST COOPERATOR: 0.9484171231880545       WORST COOPERATOR: 0.5569868666029286


Run 10:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 68016                        WORST REWARD: 51332
BEST COOPERATOR: 0.9502321825156471       WORST COOPERATOR: 0.520692874183176


Run 11:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 70045                        WORST REWARD: 50507
BEST COOPERATOR: 0.9471562919838782       WORST COOPERATOR: 0.42284589892294944


Run 12:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 66817                        WORST REWARD: 53736
BEST COOPERATOR: 0.9517241379310345       WORST COOPERATOR: 0.6151121008148186


Run 13:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 77917                        WORST REWARD: 50252
BEST COOPERATOR: 0.9467842776135317       WORST COOPERATOR: 0.3679537064751073


Run 14:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 69111                        WORST REWARD: 52827
BEST COOPERATOR: 0.9452791317019246       WORST COOPERATOR: 0.5744712764361782


Run 15:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 73002                        WORST REWARD: 51427
BEST COOPERATOR: 0.9481530143008742       WORST COOPERATOR: 0.5737786392326425


Run 16:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 69679                        WORST REWARD: 53008
BEST COOPERATOR: 0.952222892778113        WORST COOPERATOR: 0.5950591300861896


Run 17:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 78716                        WORST REWARD: 49356
BEST COOPERATOR: 0.9340236094437775       WORST COOPERATOR: 0.2920946626384693


Run 18:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 64643                        WORST REWARD: 50453
BEST COOPERATOR: 0.9499542851643328       WORST COOPERATOR: 0.6292563061030039


Run 19:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 73321                        WORST REWARD: 54293
BEST COOPERATOR: 0.9493803195460654       WORST COOPERATOR: 0.5058705469141997


Run 20:   0%|          | 0/10000 [00:00<?, ?it/s]

BEST REWARD: 73276                        WORST REWARD: 48476
BEST COOPERATOR: 0.9480383348307877       WORST COOPERATOR: 0.46974149524331327


In [5]:
cumulative_reward_policy_ranks = {}

for result in cumulative_reward_results:
    for rank, (policy, score) in enumerate(result):
        if policy not in cumulative_reward_policy_ranks:
            cumulative_reward_policy_ranks[policy] = []

        cumulative_reward_policy_ranks[policy].append(rank + 1)

mean_cumulative_reward_policy_ranks = {policy: np.mean(ranks) for policy, ranks in cumulative_reward_policy_ranks.items()}

for policy in sorted(mean_cumulative_reward_policy_ranks, key=lambda x: mean_cumulative_reward_policy_ranks[x]):
    print(f"{mean_cumulative_reward_policy_ranks[policy]:<24} {policy}")

1.0                      DCDCDCCCDCDCDCCD
1.0                      DCDCCCDCCCDCCCCC
1.0                      DDCCCCDCCDCCCDDC
1.0                      CDDCCCDCCCDCCCDC
1.0                      DCDCDCCDCCDCCCCC
1.0                      DCCCDCCCDDCCDCCC
1.0                      DCDDCCDCCDDCCDCC
1.0                      DCCCDCCCDCCCCDCD
1.0                      DCDCCDDCDCDDCDCC
1.0                      DCCDCCCCDCCDCCCD
1.0                      DCCDCCCDDCCDCCCC
1.0                      DCCCCDCCDCCCCCDC
1.0                      DCDCCDCCCCDCDCCC
1.0                      DDCDCDDCDDCDCCCC
1.0                      DDDCCCCCDCDCCCCC
1.3333333333333333       DCDCCCCCCCDCDCCC
2.0                      DCCDDCCCDCCCCCCC
2.0                      DCDCDDCCCCCCCCCC
2.0                      DCCCDDCCDDCCCCDC
2.0                      DDCDCCCCDCCCCCDC
2.0                      CCCCCCDCCCCCCCDC
2.0                      DCDCCCCCDCCCCCCC
2.0                      DDCCCCCCDCCCDCCC
2.0                      DCCCDCCCD

In [10]:
cooperativeness_policy_ranks = {}
cooperativeness_policy_scores = {}

for result in cooperativeness_results:
    for rank, (policy, score) in enumerate(result):
        if policy not in cooperativeness_policy_ranks:
            cooperativeness_policy_ranks[policy] = []
            cooperativeness_policy_scores[policy] = []

        cooperativeness_policy_ranks[policy].append(rank + 1)
        cooperativeness_policy_scores[policy].append(score)        

mean_cooperativeness_policy_ranks = {policy: (np.mean(ranks), len(ranks)) for policy, ranks in cooperativeness_policy_ranks.items()}

mean_cooperativeness_policy_scores = {policy: (np.mean(scores), len(scores)) for policy, scores in cooperativeness_policy_scores.items()}

for policy in sorted(mean_cooperativeness_policy_ranks, key=lambda x: mean_cooperativeness_policy_ranks[x][0]):
    print(f"{mean_cooperativeness_policy_ranks[policy][0]:<24} {policy:<24} {mean_cooperativeness_policy_ranks[policy][1]:<8} {mean_cooperativeness_policy_scores[policy][0]}")

1.0                      CCCCCCCCCCCDCCCD         1        0.9515390609779856
2.0                      CCCDCDCCCCCCCCCC         1        0.9332371523804786
3.0                      DCCCCCCCCCCCCDCC         1        0.9454783221003923
4.0                      CCCCCCCCCCCCDDDC         1        0.9475975975975977
4.0                      CCCCCCDDCCCCCCCC         1        0.9420949902407287
5.0                      CCCDCCCCCCCCCDCC         2        0.9393387470705339
6.0                      CCCCCCCDCCCCCDCD         1        0.9438725004949515
6.0                      CCCCCCCDCCCCCCCD         2        0.9469223410858749
6.0                      CCCCCCCCCCCCCDCD         1        0.9478416372758433
6.142857142857143        CCCCCCCDCCCCCCCC         7        0.9413036882477629
7.0                      CCCCCCCDCCCCDCCC         1        0.945045045045045
7.0                      CCCCCCCDCCCDDCCC         1        0.9413794828189866
7.5                      CCCCCCCDCDCCCCCC         2        0.9442

In [12]:
for policy in sorted(mean_cooperativeness_policy_scores, key=lambda x: mean_cooperativeness_policy_scores[x][0], reverse=True):
    print(f"{mean_cooperativeness_policy_ranks[policy][0]:<24} {policy:<24} {mean_cooperativeness_policy_ranks[policy][1]:<8} {mean_cooperativeness_policy_scores[policy][0]}")

1.0                      CCCCCCCCCCCDCCCD         1        0.9515390609779856
6.0                      CCCCCCCCCCCCCDCD         1        0.9478416372758433
4.0                      CCCCCCCCCCCCDDDC         1        0.9475975975975977
6.0                      CCCCCCCDCCCCCCCD         2        0.9469223410858749
8.0                      CCCCCCCDCCCDCCCC         1        0.9463244455519562
3.0                      DCCCCCCCCCCCCDCC         1        0.9454783221003923
7.0                      CCCCCCCDCCCCDCCC         1        0.945045045045045
8.0                      CCCCCCCCCCCCCDDC         1        0.9448196508079444
7.583333333333333        CCCCCCCCCCCCCCDC         12       0.9447882169573066
9.0                      CCCDCCCDCCCCCCCC         2        0.9447717533831559
7.5                      CCCCCCCDCDCCCCCC         2        0.9442542265038196
8.0                      CCCCCCDDCCCCCDCC         1        0.9440380761523046
6.0                      CCCCCCCDCCCCCDCD         1        0.9438