In [1]:
from itertools import chain
from random import random

import matplotlib.pyplot as plt
import numpy as np
from tqdm.notebook import tqdm

from society.action import Action
from society.agent import Agent, TrainableAgent
from society.simulations.weighted import WeightedNetworkSimulation
from society.strategies.gameplay.constant import AllC, AllD
from society.strategies.gameplay.qlearning import TabularQLearningGameplayStrategy
from society.strategies.gameplay.random import RandomGameplayStrategy
from society.strategies.gameplay.tft import TitForTat
from society.visualisation.network import *

plt.rcParams["figure.figsize"] = (10, 6)

In [2]:
def generate_population(communities, size):
    population = communities * size

    agents = [
        Agent(TabularQLearningGameplayStrategy(lookback=1), i, population)
        for i in range(population)
    ]

    G = nx.connected_caveman_graph(communities, size)

    weights_matrix = np.zeros((population, population))
    for u, v, d in G.edges(data=True):
        weights_matrix[u, v] = weights_matrix[v, u] = 1.0
        try:
            d["weight"] = weights_matrix[u, v]
        except:
            d["weight"] = 0

    return agents, weights_matrix, G

In [3]:
def compute_policies(agents, rankings):
    policies = [
        (agents[partner[0]].gameplay_strategy._q_table.argmax(axis=-1), partner[1])
        for partner in rankings
    ]

    return [
        (
            "".join(
                [
                    ("C", "D")[policy[0][i]]  # , j, k
                    for i in range(4)
                    # for j, b in enumerate(ACTION_LABELS)
                    # for k, c in enumerate(ACTION_LABELS)
                ]
            ),
            policy[1],
        )
        for policy in policies
    ]

def calculate_cooperativeness(history):
    count = history.count(Action.COOPERATE)

    return count / len(history)

In [4]:
ACTION_LABELS = ["(C, C)", "(C, D)", "(D, C)", "(D, D)"]
POPULATION = (2, 16)
ROUNDS = 5_000

cumulative_reward_results = []
cooperativeness_results = []

for run in range(20):
    # Generate a new population
    agents, weights_matrix, G = generate_population(*POPULATION)

    # Run a number of rounds
    sim = WeightedNetworkSimulation(agents, weights_matrix)
    sim.reset()
    for i in tqdm(range(ROUNDS), desc=f"Run {run + 1}"):
        sim.play_round()

    # Rank agents by cumulative reward
    cumulative_rewards_matrix = [
        [sum(r) if len(r) > 0 else 0 for r in agent] for agent in sim.rewards
    ]
    cumulative_reward_rankings = sorted(
        [(i, sum(cumulative_rewards_matrix[i])) for i in range(len(agents))],
        key=lambda x: x[1],
        reverse=True,
    )

    tqdm.write(
        f"BEST REWARD: {cumulative_reward_rankings[0][1]:<29} WORST REWARD: {cumulative_reward_rankings[-1][1]}"
    )

    cumulative_reward_results.append(compute_policies(agents, cumulative_reward_rankings))

    # Rank agents by cooperativeness
    cooperativeness_rankings = sorted(
        [
            (i, calculate_cooperativeness(list(chain(*history))))
            for i, history in enumerate(sim.action_histories)
        ],
        key=lambda x: x[1],
        reverse=True,
    )

    tqdm.write(
        f"BEST COOPERATOR: {cooperativeness_rankings[0][1]:<24} WORST COOPERATOR: {cooperativeness_rankings[-1][1]}"
    )

    cooperativeness_results.append(compute_policies(agents, cooperativeness_rankings))


Run 1:   0%|          | 0/5000 [00:00<?, ?it/s]

BEST REWARD: 31861        WORST REWARD: 21571
BEST COOPERATOR: 0.9414936102236422 WORST COOPERATOR: 0.3239009600808489


Run 2:   0%|          | 0/5000 [00:00<?, ?it/s]

BEST REWARD: 38028        WORST REWARD: 22477
BEST COOPERATOR: 0.88789888106092 WORST COOPERATOR: 0.1438877359422868


Run 3:   0%|          | 0/5000 [00:00<?, ?it/s]

BEST REWARD: 36455        WORST REWARD: 24651
BEST COOPERATOR: 0.9138002918490723 WORST COOPERATOR: 0.43788045105278917


Run 4:   0%|          | 0/5000 [00:00<?, ?it/s]

BEST REWARD: 34671        WORST REWARD: 24800
BEST COOPERATOR: 0.8450351053159478 WORST COOPERATOR: 0.3683556108175319


Run 5:   0%|          | 0/5000 [00:00<?, ?it/s]

BEST REWARD: 37560        WORST REWARD: 25570
BEST COOPERATOR: 0.8969793958791759 WORST COOPERATOR: 0.4115246815286624


Run 6:   0%|          | 0/5000 [00:00<?, ?it/s]

BEST REWARD: 34269        WORST REWARD: 19041
BEST COOPERATOR: 0.7836519315013939 WORST COOPERATOR: 0.15921982286794706


Run 7:   0%|          | 0/5000 [00:00<?, ?it/s]

BEST REWARD: 36351        WORST REWARD: 23686
BEST COOPERATOR: 0.9410708944062219 WORST COOPERATOR: 0.33122933573790203


Run 8:   0%|          | 0/5000 [00:00<?, ?it/s]

BEST REWARD: 39821        WORST REWARD: 23105
BEST COOPERATOR: 0.8570280317492214 WORST COOPERATOR: 0.18087546116262837


Run 9:   0%|          | 0/5000 [00:00<?, ?it/s]

BEST REWARD: 35527        WORST REWARD: 23872
BEST COOPERATOR: 0.871295006962403 WORST COOPERATOR: 0.39988021561189857


Run 10:   0%|          | 0/5000 [00:00<?, ?it/s]

BEST REWARD: 36756        WORST REWARD: 22620
BEST COOPERATOR: 0.8461767922839813 WORST COOPERATOR: 0.18033117932148626


Run 11:   0%|          | 0/5000 [00:00<?, ?it/s]

BEST REWARD: 37065        WORST REWARD: 23144
BEST COOPERATOR: 0.893483962073835 WORST COOPERATOR: 0.23601348098218586


Run 12:   0%|          | 0/5000 [00:00<?, ?it/s]

BEST REWARD: 34513        WORST REWARD: 22207
BEST COOPERATOR: 0.818894472361809 WORST COOPERATOR: 0.22021479474054


Run 13:   0%|          | 0/5000 [00:00<?, ?it/s]

BEST REWARD: 39467        WORST REWARD: 21768
BEST COOPERATOR: 0.8016873348241512 WORST COOPERATOR: 0.11804164997997597


Run 14:   0%|          | 0/5000 [00:00<?, ?it/s]

BEST REWARD: 37636        WORST REWARD: 23190
BEST COOPERATOR: 0.8364408535322146 WORST COOPERATOR: 0.1819647355163728


Run 15:   0%|          | 0/5000 [00:00<?, ?it/s]

BEST REWARD: 37374        WORST REWARD: 18971
BEST COOPERATOR: 0.8553578568572571 WORST COOPERATOR: 0.25272007912957467


Run 16:   0%|          | 0/5000 [00:00<?, ?it/s]

BEST REWARD: 37029        WORST REWARD: 19891
BEST COOPERATOR: 0.82         WORST COOPERATOR: 0.16913643331630046


Run 17:   0%|          | 0/5000 [00:00<?, ?it/s]

BEST REWARD: 37813        WORST REWARD: 23681
BEST COOPERATOR: 0.9312908086388749 WORST COOPERATOR: 0.39755199522340534


Run 18:   0%|          | 0/5000 [00:00<?, ?it/s]

BEST REWARD: 38213        WORST REWARD: 22777
BEST COOPERATOR: 0.8354289190833503 WORST COOPERATOR: 0.14799919565654535


Run 19:   0%|          | 0/5000 [00:00<?, ?it/s]

BEST REWARD: 31938        WORST REWARD: 22969
BEST COOPERATOR: 0.8753634813997794 WORST COOPERATOR: 0.4015280989242988


Run 20:   0%|          | 0/5000 [00:00<?, ?it/s]

BEST REWARD: 40056        WORST REWARD: 22072
BEST COOPERATOR: 0.8451121874073342 WORST COOPERATOR: 0.12261806130903065


In [5]:
ACTION_LABELS

['(C, C)', '(C, D)', '(D, C)', '(D, D)']

In [6]:
cumulative_reward_policy_ranks = {}

for result in cumulative_reward_results:
    for rank, (policy, score) in enumerate(result):
        if policy not in cumulative_reward_policy_ranks:
            cumulative_reward_policy_ranks[policy] = []

        cumulative_reward_policy_ranks[policy].append(rank + 1)

mean_cumulative_reward_policy_ranks = {policy: np.mean(ranks) for policy, ranks in cumulative_reward_policy_ranks.items()}

for policy in sorted(mean_cumulative_reward_policy_ranks, key=lambda x: mean_cumulative_reward_policy_ranks[x]):
    print(f"{mean_cumulative_reward_policy_ranks[policy]:<24} {policy}")

3.5                      CCDD
7.907407407407407        DCDD
9.8                      DCDC
12.733333333333333       DCCD
14.636363636363637       CCDC
14.816326530612244       DCCC
15.55                    CDDC
16.441860465116278       DDDC
17.333333333333332       CCCC
17.70731707317073        CDCC
18.0                     CCCD
18.031007751937985       DDDD
18.575221238938052       DDCC
18.818181818181817       DDCD
19.2                     CDCD
19.24137931034483        CDDD


In [7]:
cooperativeness_policy_ranks = {}

for result in cooperativeness_results:
    for rank, (policy, score) in enumerate(result):
        if policy not in cooperativeness_policy_ranks:
            cooperativeness_policy_ranks[policy] = []

        cooperativeness_policy_ranks[policy].append(rank + 1)

mean_cooperativeness_policy_ranks = {policy: np.mean(ranks) for policy, ranks in cooperativeness_policy_ranks.items()}

for policy in sorted(mean_cooperativeness_policy_ranks, key=lambda x: mean_cooperativeness_policy_ranks[x]):
    print(f"{mean_cooperativeness_policy_ranks[policy]:<24} {policy}")

9.28048780487805         CDCC
11.16326530612245        DCCC
11.866666666666667       CCCC
12.902654867256636       DDCC
13.5                     CCCD
15.35                    CDDC
16.6                     DCDC
16.976744186046513       DDDC
16.977272727272727       DDCD
18.333333333333332       CDCD
19.06896551724138        CDDD
19.8                     DCCD
20.410852713178294       DDDD
20.90909090909091        CCDC
27.425925925925927       DCDD
30.0                     CCDD
