In [1]:
import sys
sys.path.append("../")

from SALib.sample import sobol
from network_model import *
from tqdm import tqdm
import numpy as np
import pickle
import lzma
import os


import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
plt.rcParams.update(plt.rcParamsDefault)
plt.rcParams.update({"font.size" : 15,
                     "figure.dpi" : 100, 
                     "grid.alpha" : 0.3, 
                     "axes.grid": True, 
                     "axes.axisbelow" : True,
                     "figure.figsize":(8,6),
                     "mathtext.fontset":"cm",
                     "xtick.labelsize": 14,
                     "ytick.labelsize": 14,
                     "axes.labelsize": 16, 
                     "legend.fontsize": 13.5})
USE_TEX = False
if USE_TEX:
    plt.rc("text", usetex=True)
    plt.rc("text.latex", preamble=r"""
     \usepackage{times}
     \usepackage{mathptmx}""")
else:
    plt.rc("text", usetex=False)
plt.rc("font", family="serif")

In [2]:
def gini(W):
    n = len(W)
    total = 0
    for i in range(n):
        total += sum(np.abs(W[i]-W))
    return total / (2 * n**2 * np.mean(W))

In [3]:
def category_1(data):
    decreasing = []
    for i in range(len(data)):
        d = data[i]
        mono_dec = True
        for row in d:
            if max(row[1:] - row[:-1]) > 0:
                mono_dec = False
                break
        if mono_dec:
            decreasing.append(i)
    return decreasing


def category_2(data, ignore):
    increase_less_initial = []
    for i in range(len(data)):
        if i not in ignore:
            d = data[i]
            if np.all(np.max(data[i], axis=1) == data[i][:,0]):
                increase_less_initial.append(i)
    return increase_less_initial


def category_3(data, ignore):
    surpass_end_poor = []
    for i in range(len(data)):
        if i not in ignore:
            if max(data[i].T[-1]) < 0.1:
                surpass_end_poor.append(i)
    return surpass_end_poor


def category_4(data, ignore):
    two_equilibria_less_initial = []
    for i in range(len(data)):
        if i not in ignore:
            if np.all(data[i].T[-1] < data[i].T[0]):
                two_equilibria_less_initial.append(i)
    return two_equilibria_less_initial


def category_5(data, ignore):
    two_equilibria_richer = []
    for i in range(len(data)):
        if i not in ignore:
            two_equilibria_richer.append(i)
    return two_equilibria_richer

In [4]:
directories = ["../data/sobol/concat_W_arrays", 
               "../data/sobol/concat_W_arrays_random", 
               "../data/sobol/concat_W_arrays_cpt",
               "../data/sobol/concat_W_arrays_cpt_random/"]

labels = ["MPT (Holme-Kim)", 
          "MPT (Random)", 
          "CPT (Holme-Kim)",
          "CPT (Random)"]

communities_file = ["../augmented_communities.pickle", 
                    "../augmented_communities_random.pickle",
                    "../augmented_communities.pickle",
                    "../augmented_communities_random.pickle"]

# Analysis at agent level

In [8]:
gini_cutoffs = None
RESULTS = {}

for dir_idx, D in enumerate(directories):

    print(f"Running analysis for {labels[dir_idx]}...")

    category_counts = {i:[] for i in range(7)}
    gini_coefficients = []

    for f in tqdm(os.listdir(D)):

        data = pickle.load(lzma.open(os.path.join(D,f)))

        cat1 = category_1(data)
        cat2 = category_2(data, cat1)
        cat3 = category_3(data, cat1+cat2)
        cat4 = category_4(data, cat1+cat2+cat3)
        cat5 = category_5(data, cat1+cat2+cat3+cat4)
 
        assert len(cat1+cat2+cat3+cat4+cat5) == len(data)

        gini_coefficients.append([gini(data[i].T[-1]) for i in cat5])

        for i,cat in enumerate([cat1, cat2, cat3, cat4]):
            category_counts[i].append(len(cat))

    if gini_cutoffs is None:
        concatenated_gini_coefficients = np.concatenate(gini_coefficients)
        sorted_coeffs = np.array(sorted(concatenated_gini_coefficients))
        split_arrays = np.array_split(sorted_coeffs, 3)
        gini_cutoffs = [arr[-1] for arr in split_arrays]

    for rep in gini_coefficients:
        gini_categories = {cutoff:[] for cutoff in gini_cutoffs}
        for coeff in rep:
            for key in gini_categories:
                if coeff <= key:
                    gini_categories[key].append(coeff)
                    break

        for i,cutoff in enumerate(gini_categories):
            category_counts[4+i].append(len(gini_categories[cutoff]))

    RESULTS[labels[dir_idx]] = category_counts
    print("--- RESULTS ---")
    print(category_counts)
    print()

Running analysis for CPT (Random)...


100%|██████████| 10/10 [1:31:19<00:00, 547.92s/it]

--- RESULTS ---
{0: [1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024], 1: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 2: [6968, 6341, 5670, 6300, 6278, 6033, 6956, 6687, 6621, 6510], 3: [91, 272, 276, 87, 74, 136, 159, 142, 117, 95], 4: [543, 558, 568, 595, 500, 474, 495, 571, 507, 558], 5: [305, 729, 843, 535, 439, 760, 352, 343, 387, 676], 6: [285, 292, 835, 675, 901, 789, 230, 449, 560, 353]}






In [None]:
with open("category_count_results_individual.pickle", "wb") as f:
    pickle.dump(RESULTS, f)

# Analysis at community level

In [10]:
gini_cutoffs = None
RESULTS = {}

for dir_idx, D in enumerate(directories):
    
    with open(communities_file[dir_idx], "rb") as f:
        communities = pickle.load(f) 
    community_keys = sorted(list(communities.keys()))[:-1]
    
    print(f"Running analysis for {labels[dir_idx]}...")
    
    category_counts = {i:[] for i in range(7)}
    gini_coefficients = []
    
    for f in tqdm(os.listdir(D)):
        data = pickle.load(lzma.open(os.path.join(D,f)))
        data_communities = np.zeros((data.shape[0], len(community_keys), data.shape[2]))
        for c in community_keys:
            for i in range(len(data)):
                data_communities[i][c] = np.mean(data[i][communities[c]], axis=0)
        
        data = data_communities
        cat1 = category_1(data)
        cat2 = category_2(data, cat1)
        cat3 = category_3(data, cat1+cat2)
        cat4 = category_4(data, cat1+cat2+cat3)
        cat5 = category_5(data, cat1+cat2+cat3+cat4)
        
        assert len(cat1+cat2+cat3+cat4+cat5) == len(data)

        gini_coefficients.append([gini(data[i].T[-1]) for i in cat5])

        for i,cat in enumerate([cat1, cat2, cat3, cat4]):
            category_counts[i].append(len(cat))

    if gini_cutoffs is None:
        concatenated_gini_coefficients = np.concatenate(gini_coefficients)
        sorted_coeffs = np.array(sorted(concatenated_gini_coefficients))
        split_arrays = np.array_split(sorted_coeffs, 3)
        gini_cutoffs = [arr[-1] for arr in split_arrays]

    for rep in gini_coefficients:
        gini_categories = {cutoff:[] for cutoff in gini_cutoffs}
        for coeff in rep:
            for key in gini_categories:
                if coeff <= key:
                    gini_categories[key].append(coeff)
                    break

        for i,cutoff in enumerate(gini_categories):
            category_counts[4+i].append(len(gini_categories[cutoff]))

    RESULTS[labels[dir_idx]] = category_counts
    print("--- RESULTS ---")
    print(category_counts)
    print()

Running analysis for CPT (Random)...


100%|██████████| 10/10 [21:41<00:00, 130.18s/it]

--- RESULTS ---
{0: [1395, 1276, 1290, 1361, 1425, 1357, 1372, 1526, 1353, 1613], 1: [599, 421, 203, 340, 407, 399, 533, 286, 658, 825], 2: [6051, 5890, 5336, 5702, 5544, 5382, 6183, 5959, 5703, 5152], 3: [278, 642, 558, 454, 648, 475, 531, 394, 384, 363], 4: [401, 417, 393, 475, 417, 341, 352, 432, 346, 390], 5: [285, 370, 594, 389, 274, 563, 185, 322, 397, 585], 6: [207, 200, 842, 495, 501, 699, 60, 297, 375, 288]}






In [296]:
with open("category_count_results_communities.pickle", "wb") as f:
    pickle.dump(RESULTS, f)