In [None]:
# import the libraries.
import networkx as nx
import warnings
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import powerlaw
import seaborn as sns
import random
import datetime
import csv
import dynetx as dn
from networkx.algorithms import approximation
from tqdm import tqdm

warnings.filterwarnings('ignore')

In [None]:
%%time
# import the replies removing replies related to rConBot e self-replies

n1_list=[] # author of the reply
n2_list=[] # author of the comment/submission to which the response is addressed
time_list=[] # time of the reply

name_file=r"C:\Users\Alessandro Batignani\Desktop\sna-2023-2023_batignani_fattorini_iannello\data_collection\conspiracy_2m_final_2.csv"
with open(name_file) as f:
    f = csv.reader(f)
    for l in f:
        if l[0]!='rConBot' and l[1]!='rConBot' and l[0]!=l[1]: 
            n1_list.append(l[0])
            n2_list.append(l[1])
            time_list.append(round(int(l[2])/86400)) # to change the time scale: from seconds to days
            

In [None]:
# time rescaling
supp_time = np.array(time_list) - min(time_list)*np.ones(len(time_list)) + 1
time_list = list(supp_time)

In [None]:
# globally we count 60 timestamps
min(time_list), max(time_list)

In [None]:
# coversion in pandas dataframe to order interactions as time increases

d = {'node1': n1_list, 'node2': n2_list, 'timestamp': time_list}
df = pd.DataFrame(data=d)

In [None]:
# creation of an ordered matrix according to timestamps

reply_matrix = df.sort_values(by=['timestamp'], ascending=True).values

In [None]:
max_timestamp = max(reply_matrix[:,2])

**for the analysis we remove t=1 and t=60**

## Links as instantaneous interactions

In [None]:
%%time

g = dn.DynGraph(edge_removal=True) # empty undirected dynamic graph

#insertion of instantaneous interactions between nodes
for i in range(len(reply_matrix)):
        g.add_interaction(u=reply_matrix[i][0], v=reply_matrix[i][1], t=int(reply_matrix[i][2]))

### number of interactions for each timestamp

In [None]:
%%time

edges_list = []

for i in range(2, 60):
    edges_list.append(g.number_of_interactions(t=i))

In [None]:
plt.errorbar(np.linspace(2, 59, 58), edges_list, linestyle='--', marker='o')
plt.xlabel('timestamp', fontsize=13)
plt.ylabel('number of interactions', fontsize=15)
plt.tight_layout()
#plt.savefig('number_interactions.png')
plt.show()

### total interactions for each timestamp

In [None]:
# (u,v,t),(u,v,t) are counted as 2  different interactions
# (u,v,t),(v,u,t) are counted as 2  different interactions

total_interaction_list = []

for i in range(2, 60):
    total_interaction_list.append(len(df[df['timestamp']==i]))

In [None]:
plt.errorbar(np.linspace(2, 59, 58), total_interaction_list, linestyle='--', marker='o')
plt.xlabel('timestamp', fontsize=13)
plt.ylabel('total interactions', fontsize=15)
plt.tight_layout()
#plt.savefig('total_interactions.png')
plt.show()

### nodes for each timestamp

In [None]:
%%time
nodes_int = []

for t in range(2, 60):
    nodes_int.append(g.time_slice(t).number_of_nodes())

In [None]:
plt.scatter(np.linspace(2, 59, 58), nodes_int)
plt.xlabel('timestamp', fontsize=13)
plt.ylabel('N', fontsize=17)
plt.tight_layout()
plt.show()

# Links as relations

### graph construction

In [None]:
%%time

G = dn.DynGraph(edge_removal=True) # empty undirected dynamic graph

#inserimento delle interazioni tra nodi
for i in range(len(reply_matrix)):
        G.add_interaction(u=reply_matrix[i][0], v=reply_matrix[i][1], t=int(reply_matrix[i][2]), e=int(max_timestamp+1))

## nodes and edges analysis

In [None]:
tot_nodes = []
tot_edges = []

for t in tqdm(range(2, 60), desc ="loop "):
    
    supp = G.time_slice(t)
    tot_nodes.append(supp.number_of_nodes())
    tot_edges.append(supp.number_of_edges())
    


In [None]:
# Plot of N 

plt.scatter(np.linspace(2, 59, 58), tot_nodes)
plt.xlabel('timestamp', fontsize=13)
plt.ylabel('N', fontsize=17)
plt.tight_layout()
#plt.savefig('N_relation.png')
plt.show()

In [None]:
# Plot of E

plt.scatter(np.linspace(2, 59, 58), tot_edges)
plt.xlabel('timestamp', fontsize=13)
plt.ylabel('E', fontsize=17)
plt.tight_layout()
#plt.savefig('E_relation.png')
plt.show()

## average degree analysis

In [None]:
%%time

K_avg = []

for t in tqdm(range(2, 60), desc='loop'):

    j=G.time_slice(t).degree()
    k=sum(list(j.values()))/len(j)
    K_avg.append(k)

In [None]:
# Plot of of average degree <k>

plt.scatter(np.linspace(2, 59, 58), K_avg)
plt.xlabel('timestamp', fontsize=13)
plt.ylabel(r'$ \langle k \rangle$ ', fontsize=17)
plt.tight_layout()
#plt.savefig('avg_degree_relation.png')
plt.show()

## density analysis

In [None]:
%%time
density_list = []
for t in tqdm(range(2, 60), desc='loop'):

    density_list.append(G.snapshot_density(t))

In [None]:
# Plot of d 
plt.scatter(np.linspace(2, 59, 58), density_list)
plt.xlabel('timestamp', fontsize=13)
plt.ylabel('d', fontsize=17)
plt.tight_layout()
#plt.savefig('density_relation.png')
plt.show()

## clustering coefficient analysis

In [None]:
%%time

avg_clustering = []

for t in tqdm(range(2, 60), desc='loop'):
    
    avg_clustering.append(nx.average_clustering(G.time_slice(t)))

In [None]:
# Plot of <C> 

plt.scatter(np.linspace(2, 59, 58), avg_clustering)
plt.xlabel('timestamp', fontsize=13)
plt.ylabel(r'$\langle C \rangle$ ', fontsize=17)
plt.tight_layout()
#plt.savefig('avg_clustering_relation.png')
plt.show()

## diameter analysis
clearly related to the giant component

In [None]:
%%time
diameter_giant_comp = []

for t in tqdm(range(2, 60), desc='loop'):
    
    comps = list(nx.connected_components(G.time_slice(t)))
    G_giant_comp = G.subgraph(comps[0])
    diameter_giant_comp.append(approximation.diameter(G_giant_comp))

In [None]:
# Plot of the diameter of giant component

plt.scatter(np.linspace(2, 59, 58), diameter_giant_comp)
plt.xlabel('timestamp')
plt.ylabel('diameter of the giant component')
plt.tight_layout()
#plt.savefig('diameter_giant_comp_relation.png')
plt.show()

## number of connected components

In [None]:
%%time

components_list = []

for t in tqdm(range(2, 60), desc='loop'): 
    components_list.append(nx.number_connected_components(G.time_slice(t)))

In [None]:
# Plot of the number of connected components

plt.errorbar(np.linspace(2, 59, 58), components_list, linestyle='--', marker='o')
plt.xlabel('timestamp', fontsize=13)
plt.ylabel('# of connected components', fontsize=15)
plt.tight_layout()
#plt.savefig('connected_components_relation.png')
plt.show()