In [15]:
import pandas as pd
import numpy as np
import json
from scipy import stats
import tqdm


In [16]:

def steady_state_coevolving(model, name, max_iterations=100000, nsteady=1000, sensibility=0.00001, node_status=True,
                     progress_bar=False):
        """
        Execute a bunch of model iterations

        :param max_iterations: the maximum number of iterations to execute
        :param nsteady: number of required stable states
        :param sensibility: sensibility check for a steady state
        :param node_status: if the incremental node status has to be returned.
        :param progress_bar: whether to display a progress bar, default False

        :return: a list containing for each iteration a dictionary {"iteration": iteration_id, "status": dictionary_node_to_status}
        """

        print(type(model.graph.graph))

        system_status = []
        steady_it = 0
        for it in tqdm.tqdm(range(0, max_iterations), disable=not progress_bar):
            its = model.iteration(node_status)

            if it > 0:
                old = np.array(list(system_status[-1]['status'].values()))
                actual = np.array(list(its['status'].values()))
                # res = np.abs(old - actual)
                if np.all((its['max_diff'] < sensibility)):
                    steady_it += 1
                else:
                    steady_it = 0

                if it % 500 == 0:
                    G = model.graph.graph
                    nx.write_edgelist(G, f"snapshotGraphs/edgelist {name} {it}.csv", delimiter=",")
                    with open(f"snapshotGraphs/opinions {name} {it}.txt", "w") as opfile:
                        for op in list(actual):
                            opfile.write(str(op)+"\n")

            system_status.append(its)
            if steady_it == nsteady:
                G = model.graph.graph
                nx.write_edgelist(G, f"snapshotGraphs/edgelist {name} {it}.csv", delimiter=",")
                with open(f"snapshotGraphs/opinions {name} {it}.txt", "w") as opfile:
                    for op in list(actual):
                        opfile.write(str(op)+"\n")
                        
                return system_status[:-nsteady]

        return system_status

# Save snapshots of graph during evolution

In [17]:
import sys
sys.path.append("/home/pansanella/mydata/GitHub/local_packages/")
sys.path.append("/data1/users/pansanella/mydata/GitHub/local_packages/")
sys.path.append("/data1/users/pansanella/mydata/GitHub/local_packages/netdspatch_local/")

import json
import networkx as nx
import ndlib_local.ndlib.models.ModelConfig as mc
import ndlib_local.ndlib.models.opinions as op
import warnings
warnings.filterwarnings("ignore")

graphname = "ba"
p = 5
n = 250
graph = nx.barabasi_albert_graph(n, p)
nruns = 1
max_it = 100000
pr = 0.5
e = 0.3
g = 1.0

final_opinions = dict()
final_iterations = dict()
name = f"rewiring {graphname}{p} pr{pr} e{e} g{g} mi{max_it}"

model = op.AdaptiveAlgorithmicBiasModel(graph)
# Model configuration
config = mc.Configuration()
config.add_model_parameter("epsilon", e)
config.add_model_parameter("gamma", g)
config.add_model_parameter("p", pr)
model.set_initial_status(config)

steady_status = steady_state_coevolving(model=model, name=name, max_iterations=max_it, nsteady=1000, sensibility=0.00001, node_status=True, progress_bar=True)


                

<class 'networkx.classes.graph.Graph'>


  0%|          | 500/100000 [00:01<05:06, 324.74it/s]


FileNotFoundError: [Errno 2] No such file or directory: 'snapshotsGraphs/edgelist rewiring ba5 pr0.5 e0.3 g1.0 mi100000 500.csv'

In [3]:
def nclusters(data, threshold):
    data = [float(el) for el in data]
    data = sorted(data)
    start = data[0]
    max_val = start + threshold
    c = (start, max_val)
    cluster = dict()
    for i in data:
        if i <= max_val:
            if c in cluster.keys():
                cluster[c] += 1
            else:
                cluster[c] = 1
        else:
            max_val = i + threshold
            c = (i, max_val)
            cluster[c] = 1
    #ora ho il dizionario con i cluster di una run
    C_num = len(data)**2
    C_den = 0
    for k in cluster.keys():
        C_den += cluster[k]*cluster[k]
    C = C_num / C_den
    return C

In [4]:
def entropy(opinions, n, nbins):
    bincounts, bin_edges = np.histogram(opinions, bins = np.linspace(0, 1, nbins))
    probabilities = bincounts/n
    entr = stats.entropy(probabilities)
    return entr

def nodeperc_withininterval(opinions, center, width):
    sortedops = sorted(opinions)
    sortedopsarr = np.array(sortedops)
    withinrange = np.where(np.logical_and(sortedopsarr>=(center-width), sortedopsarr<=(center+width)))
    withinrange = withinrange[0]
    perc = len(withinrange) / len(sortedops)
    return perc

def average_opinions(opinions):
    a = np.array(opinions)
    return np.average(a)

def std_opinions(opinions):
    a = np.array(opinions)
    return np.std(a)

def median_opinions(opinions):
    a = np.array(opinions)
    return np.median(a)

maxentr = entropy(np.random.uniform(0.0, 1.0, 250), 250, 11)


In [5]:
model = "rewiring"
gr = 0.0
finalfile = f"{model} aggregate results.csv"

In [6]:
#create one aggregate file
import os
aggrfile = open("aggregate/{}".format(finalfile), "w+")
header = "model,graph,n,p,pr,eps,gam,max_it\n"
aggrfile.write(header)
aggrfile.close()
aggrfile = open("aggregate/{}".format(finalfile), "a+")
for graph in ['er', 'ba']:
    if graph == "er":
        p = 0.1
    else:
        p=5
    for pr in [0.0, 0.1, 0.2, 0.3, 0.4, 0.5]:
        for eps in [0.2, 0.3, 0.4]:
            for gam in [0.0, 0.4, 0.8, 1.2, 1.6]:
                name = f"{model} {graph}{p} pr{pr} e{eps} g{gam} mi100000"
                s = f"{model},{graph},250,{p},{pr},{eps},{gam},100000\n"
                aggrfile.write(s)
aggrfile.close()

In [7]:
results = pd.read_csv(f"aggregate/{finalfile}")
results

Unnamed: 0,model,graph,n,p,pr,eps,gam,max_it
0,rewiring,er,250,0.1,0.0,0.2,0.0,100000
1,rewiring,er,250,0.1,0.0,0.2,0.4,100000
2,rewiring,er,250,0.1,0.0,0.2,0.8,100000
3,rewiring,er,250,0.1,0.0,0.2,1.2,100000
4,rewiring,er,250,0.1,0.0,0.2,1.6,100000
...,...,...,...,...,...,...,...,...
175,rewiring,ba,250,5.0,0.5,0.4,0.0,100000
176,rewiring,ba,250,5.0,0.5,0.4,0.4,100000
177,rewiring,ba,250,5.0,0.5,0.4,0.8,100000
178,rewiring,ba,250,5.0,0.5,0.4,1.2,100000


In [8]:
def nclusters(data, threshold):
    data = [float(el) for el in data]
    data = sorted(data)
    start = data[0]
    max_val = start + threshold
    c = (start, max_val)
    cluster = dict()
    for i in data:
        if i <= max_val:
            if c in cluster.keys():
                cluster[c] += 1
            else:
                cluster[c] = 1
        else:
            max_val = i + threshold
            c = (i, max_val)
            cluster[c] = 1
    #ora ho il dizionario con i cluster di una run
    C_num = len(data)**2
    C_den = 0
    for k in cluster.keys():
        C_den += cluster[k]*cluster[k]
    C = C_num / C_den
    return C

In [9]:
avg = []
std = []
for index, row in results.iterrows():
    eps = row['eps']
    gam = row['gam']
    pr = row['pr']
    graph = row['graph']
    if graph == "ba":
        p = int(row['p'])
    else:
        p = row['p']
    name = f'{model} {graph}{p} pr{pr} e{eps} g{gam} mi100000'
    print(f'doing {name}')
    jsonfile = open(f'finals/final_iterations {name}.json')
    data = json.load(jsonfile)
    finalitarr = []
    for nr in data.keys():
        finalitarr.append(data[nr])
    finalitarr = np.array(finalitarr)
    avg.append(np.average(finalitarr))
    std.append(np.std(finalitarr))
results["avg_nit"] = avg
results["std_nit"] = std

doing rewiring er0.1 pr0.0 e0.2 g0.0 mi100000


FileNotFoundError: [Errno 2] No such file or directory: 'finals/final_iterations rewiring er0.1 pr0.0 e0.2 g0.0 mi100000.json'

In [11]:
avg01 = []
std01 = []
avg001=[]
std001=[]
avg00001=[]
std00001=[]
for index, row in results.iterrows():
    eps = row['eps']
    gam = row['gam']
    pr = row['pr']
    graph = row['graph']
    if graph == "ba":
        p = int(row['p'])
    else:
        p = row['p']
    name = f'{model} {graph}{p} pr{pr} e{eps} g{gam} mi100000'
    print(f'doing {name}')
    jsonfile = open(f'aggregate/final_opinions {name}.json')
    data = json.load(jsonfile)
    ncarray01 = []
    ncarray001 = []
    ncarray00001 = []
    for nr in data.keys():
        finalops = list(data[nr])
        nc01 = nclusters(finalops, 0.1)
        nc001 = nclusters(finalops, 0.01)
        nc00001 = nclusters(finalops, 0.00001)
        ncarray01.append(nc01)
        ncarray001.append(nc001)
        ncarray00001.append(nc00001)
    ncarray01 = np.array(ncarray01)
    ncarray001 = np.array(ncarray001)
    ncarray00001 = np.array(ncarray00001)
    avg01.append(np.average(ncarray01))
    std01.append(np.std(ncarray01))
    avg001.append(np.average(ncarray001))
    std001.append(np.std(ncarray001))
    avg00001.append(np.average(ncarray00001))
    std00001.append(np.std(ncarray00001))
results["avg_ncluster_01"] = avg01
results["std_ncluster_01"] = std01
results["avg_ncluster_001"] = avg001
results["std_ncluster_001"] = std001
results["avg_ncluster_00001"] = avg00001
results["std_ncluster_00001"] = std00001

doing rewiring er0.1 pr0.0 e0.2 g0.0 mi100000
doing rewiring er0.1 pr0.0 e0.2 g0.4 mi100000
doing rewiring er0.1 pr0.0 e0.2 g0.8 mi100000
doing rewiring er0.1 pr0.0 e0.2 g1.2 mi100000
doing rewiring er0.1 pr0.0 e0.2 g1.6 mi100000
doing rewiring er0.1 pr0.0 e0.3 g0.0 mi100000
doing rewiring er0.1 pr0.0 e0.3 g0.4 mi100000
doing rewiring er0.1 pr0.0 e0.3 g0.8 mi100000
doing rewiring er0.1 pr0.0 e0.3 g1.2 mi100000
doing rewiring er0.1 pr0.0 e0.3 g1.6 mi100000
doing rewiring er0.1 pr0.0 e0.4 g0.0 mi100000
doing rewiring er0.1 pr0.0 e0.4 g0.4 mi100000
doing rewiring er0.1 pr0.0 e0.4 g0.8 mi100000
doing rewiring er0.1 pr0.0 e0.4 g1.2 mi100000
doing rewiring er0.1 pr0.0 e0.4 g1.6 mi100000
doing rewiring er0.1 pr0.1 e0.2 g0.0 mi100000
doing rewiring er0.1 pr0.1 e0.2 g0.4 mi100000
doing rewiring er0.1 pr0.1 e0.2 g0.8 mi100000
doing rewiring er0.1 pr0.1 e0.2 g1.2 mi100000
doing rewiring er0.1 pr0.1 e0.2 g1.6 mi100000
doing rewiring er0.1 pr0.1 e0.3 g0.0 mi100000
doing rewiring er0.1 pr0.1 e0.3 g0

In [12]:
results.to_csv(f"aggregate/{finalfile}")

In [13]:
results = pd.read_csv(f"aggregate/{finalfile}", index_col=[0])
results.head()

Unnamed: 0,model,graph,n,p,pr,eps,gam,max_it,avg_ncluster_01,std_ncluster_01,avg_ncluster_001,std_ncluster_001,avg_ncluster_00001,std_ncluster_00001
0,rewiring,er,250,0.1,0.0,0.2,0.0,100000,2.170596,0.476778,2.170636,0.476767,2.202778,0.512769
1,rewiring,er,250,0.1,0.0,0.2,0.4,100000,2.413687,0.525845,2.413696,0.525832,2.542178,0.619739
2,rewiring,er,250,0.1,0.0,0.2,0.8,100000,2.228691,0.386793,2.229085,0.386658,2.364724,0.382978
3,rewiring,er,250,0.1,0.0,0.2,1.2,100000,2.579699,0.44525,2.645225,0.471204,2.742667,0.55548
4,rewiring,er,250,0.1,0.0,0.2,1.6,100000,7.85471,0.402084,32.45624,5.06105,64.245012,10.079154


In [14]:
averageops = []
averagestdops = []
averagemedianops = []

for index, row in results.iterrows():
    eps = row['eps']
    gam = row['gam']
    pr = row['pr']
    graph = row['graph']
    if graph == "ba":
        p = int(row['p'])
    else:
        p = row['p']
    name = f'{model} {graph}{p} pr{pr} e{eps} g{gam} mi100000'
    print(f'doing {name}')
    jsonfile = open(f'aggregate/final_opinions {name}.json')
    data = json.load(jsonfile)
    avgarray = []
    stdarray = []
    medarray = []
    for nr in data.keys():
        finalops = list(data[nr])
        avg = average_opinions(finalops)
        med = median_opinions(finalops)
        std = std_opinions(finalops)
        avgarray.append(avg)
        medarray.append(med)
        stdarray.append(std)
    avgarray = np.array(avgarray)
    medarray = np.array(medarray)
    stdarray = np.array(stdarray)
    averageops.append(np.average(avgarray))
    averagestdops.append(np.average(stdarray))
    averagemedianops.append(np.average(medarray))
results["avg_mean_opinion"] = averageops
results["avg_std_opinions"] = averagestdops
results["avg_median_opinion"] = averagemedianops

doing rewiring er0.1 pr0.0 e0.2 g0.0 mi100000
doing rewiring er0.1 pr0.0 e0.2 g0.4 mi100000
doing rewiring er0.1 pr0.0 e0.2 g0.8 mi100000
doing rewiring er0.1 pr0.0 e0.2 g1.2 mi100000
doing rewiring er0.1 pr0.0 e0.2 g1.6 mi100000
doing rewiring er0.1 pr0.0 e0.3 g0.0 mi100000
doing rewiring er0.1 pr0.0 e0.3 g0.4 mi100000
doing rewiring er0.1 pr0.0 e0.3 g0.8 mi100000
doing rewiring er0.1 pr0.0 e0.3 g1.2 mi100000
doing rewiring er0.1 pr0.0 e0.3 g1.6 mi100000
doing rewiring er0.1 pr0.0 e0.4 g0.0 mi100000
doing rewiring er0.1 pr0.0 e0.4 g0.4 mi100000
doing rewiring er0.1 pr0.0 e0.4 g0.8 mi100000
doing rewiring er0.1 pr0.0 e0.4 g1.2 mi100000
doing rewiring er0.1 pr0.0 e0.4 g1.6 mi100000
doing rewiring er0.1 pr0.1 e0.2 g0.0 mi100000
doing rewiring er0.1 pr0.1 e0.2 g0.4 mi100000
doing rewiring er0.1 pr0.1 e0.2 g0.8 mi100000
doing rewiring er0.1 pr0.1 e0.2 g1.2 mi100000
doing rewiring er0.1 pr0.1 e0.2 g1.6 mi100000
doing rewiring er0.1 pr0.1 e0.3 g0.0 mi100000
doing rewiring er0.1 pr0.1 e0.3 g0

In [15]:
results.to_csv(f"aggregate/{finalfile}")

In [16]:
results = pd.read_csv(f"aggregate/{finalfile}", index_col=[0])
results.head()

Unnamed: 0,model,graph,n,p,pr,eps,gam,max_it,avg_ncluster_01,std_ncluster_01,avg_ncluster_001,std_ncluster_001,avg_ncluster_00001,std_ncluster_00001,avg_mean_opinion,avg_std_opinions,avg_median_opinion
0,rewiring,er,250,0.1,0.0,0.2,0.0,100000,2.170596,0.476778,2.170636,0.476767,2.202778,0.512769,0.493026,0.235189,0.443834
1,rewiring,er,250,0.1,0.0,0.2,0.4,100000,2.413687,0.525845,2.413696,0.525832,2.542178,0.619739,0.503692,0.240937,0.455
2,rewiring,er,250,0.1,0.0,0.2,0.8,100000,2.228691,0.386793,2.229085,0.386658,2.364724,0.382978,0.506752,0.237929,0.496806
3,rewiring,er,250,0.1,0.0,0.2,1.2,100000,2.579699,0.44525,2.645225,0.471204,2.742667,0.55548,0.500606,0.249831,0.546255
4,rewiring,er,250,0.1,0.0,0.2,1.6,100000,7.85471,0.402084,32.45624,5.06105,64.245012,10.079154,0.49288,0.277877,0.486494


In [17]:
values_avg = []
values_std = []

for index, row in results.iterrows():
    eps = row['eps']
    gam = row['gam']
    pr = row['pr']
    graph = row['graph']
    if graph == "ba":
        p = int(row['p'])
    else:
        p = row['p']
    name = f'{model} {graph}{p} pr{pr} e{eps} g{gam} mi100000'
    print(f'doing {name}')
    jsonfile = open(f'aggregate/final_opinions {name}.json')
    data = json.load(jsonfile)
    entrarray = []
    for nr in data.keys():
        finalops = list(data[nr])
        entr = entropy(finalops, 100, 11)
        entrarray.append(entr)
    entrarray = np.array(entrarray)
    values_avg.append(np.average(entrarray))
    values_std.append(np.std(entrarray))

results["10B_avg_entr"] = values_avg
results["10B_std_entr"] = values_std

results['10B_avg_norm_entr'] = results['10B_avg_entr'].apply(lambda x: x/maxentr)

doing rewiring er0.1 pr0.0 e0.2 g0.0 mi100000
doing rewiring er0.1 pr0.0 e0.2 g0.4 mi100000
doing rewiring er0.1 pr0.0 e0.2 g0.8 mi100000
doing rewiring er0.1 pr0.0 e0.2 g1.2 mi100000
doing rewiring er0.1 pr0.0 e0.2 g1.6 mi100000
doing rewiring er0.1 pr0.0 e0.3 g0.0 mi100000
doing rewiring er0.1 pr0.0 e0.3 g0.4 mi100000
doing rewiring er0.1 pr0.0 e0.3 g0.8 mi100000
doing rewiring er0.1 pr0.0 e0.3 g1.2 mi100000
doing rewiring er0.1 pr0.0 e0.3 g1.6 mi100000
doing rewiring er0.1 pr0.0 e0.4 g0.0 mi100000
doing rewiring er0.1 pr0.0 e0.4 g0.4 mi100000
doing rewiring er0.1 pr0.0 e0.4 g0.8 mi100000
doing rewiring er0.1 pr0.0 e0.4 g1.2 mi100000
doing rewiring er0.1 pr0.0 e0.4 g1.6 mi100000
doing rewiring er0.1 pr0.1 e0.2 g0.0 mi100000
doing rewiring er0.1 pr0.1 e0.2 g0.4 mi100000
doing rewiring er0.1 pr0.1 e0.2 g0.8 mi100000
doing rewiring er0.1 pr0.1 e0.2 g1.2 mi100000
doing rewiring er0.1 pr0.1 e0.2 g1.6 mi100000
doing rewiring er0.1 pr0.1 e0.3 g0.0 mi100000
doing rewiring er0.1 pr0.1 e0.3 g0

In [18]:
results.head()

Unnamed: 0,model,graph,n,p,pr,eps,gam,max_it,avg_ncluster_01,std_ncluster_01,avg_ncluster_001,std_ncluster_001,avg_ncluster_00001,std_ncluster_00001,avg_mean_opinion,avg_std_opinions,avg_median_opinion,10B_avg_entr,10B_std_entr,10B_avg_norm_entr
0,rewiring,er,250,0.1,0.0,0.2,0.0,100000,2.170596,0.476778,2.170636,0.476767,2.202778,0.512769,0.493026,0.235189,0.443834,0.78904,0.192599,0.344689
1,rewiring,er,250,0.1,0.0,0.2,0.4,100000,2.413687,0.525845,2.413696,0.525832,2.542178,0.619739,0.503692,0.240937,0.455,0.886833,0.20117,0.38741
2,rewiring,er,250,0.1,0.0,0.2,0.8,100000,2.228691,0.386793,2.229085,0.386658,2.364724,0.382978,0.506752,0.237929,0.496806,0.830325,0.156175,0.362724
3,rewiring,er,250,0.1,0.0,0.2,1.2,100000,2.579699,0.44525,2.645225,0.471204,2.742667,0.55548,0.500606,0.249831,0.546255,1.008177,0.179297,0.440418
4,rewiring,er,250,0.1,0.0,0.2,1.6,100000,7.85471,0.402084,32.45624,5.06105,64.245012,10.079154,0.49288,0.277877,0.486494,2.220175,0.038018,0.969876


In [19]:
results.to_csv("aggregate/{}".format(finalfile))
results = pd.read_csv("aggregate/{}".format(finalfile), index_col=[0])

In [20]:
results.head()

Unnamed: 0,model,graph,n,p,pr,eps,gam,max_it,avg_ncluster_01,std_ncluster_01,avg_ncluster_001,std_ncluster_001,avg_ncluster_00001,std_ncluster_00001,avg_mean_opinion,avg_std_opinions,avg_median_opinion,10B_avg_entr,10B_std_entr,10B_avg_norm_entr
0,rewiring,er,250,0.1,0.0,0.2,0.0,100000,2.170596,0.476778,2.170636,0.476767,2.202778,0.512769,0.493026,0.235189,0.443834,0.78904,0.192599,0.344689
1,rewiring,er,250,0.1,0.0,0.2,0.4,100000,2.413687,0.525845,2.413696,0.525832,2.542178,0.619739,0.503692,0.240937,0.455,0.886833,0.20117,0.38741
2,rewiring,er,250,0.1,0.0,0.2,0.8,100000,2.228691,0.386793,2.229085,0.386658,2.364724,0.382978,0.506752,0.237929,0.496806,0.830325,0.156175,0.362724
3,rewiring,er,250,0.1,0.0,0.2,1.2,100000,2.579699,0.44525,2.645225,0.471204,2.742667,0.55548,0.500606,0.249831,0.546255,1.008177,0.179297,0.440418
4,rewiring,er,250,0.1,0.0,0.2,1.6,100000,7.85471,0.402084,32.45624,5.06105,64.245012,10.079154,0.49288,0.277877,0.486494,2.220175,0.038018,0.969876


Unused functions

# def nclusters_meanshift(data, bw):
#     ops = {i: data[i] for i in range(len(data))}
#     sorted_ops = sorted(ops.items(), key = lambda kv:(kv[1], kv[0]))
#     A=np.array([el[1] for el in sorted_ops]).reshape(-1,1)
#     clustering = MeanShift(bandwidth=bw).fit(A)
#     lbls = clustering.labels_
#     labels = np.arange(len(sorted_ops))
#     for i in range(len(labels)):
#         cl = lbls[i]
#         labels[sorted_ops[i][0]]=cl
#     cluster_participation_dict = {}
#     for l in labels:
#         if l not in cluster_participation_dict:
#             cluster_participation_dict[l] = 1
#         else:
#             cluster_participation_dict[l] += 1
#     #computing effective number of clusters using function explained in the paper
#     C_num = 0
#     C_den = 0
#     for k in cluster_participation_dict:
#         C_num += cluster_participation_dict[k]
#         C_den += ((cluster_participation_dict[k])**2)
#     C_num = (C_num**2)
#     C = C_num/C_den
#     return C