In [None]:
import os

import networkx as nx
import pandas as pd
import seaborn as sns
import numpy as np
import scipy

import matplotlib.pyplot as plt

import json
import powerlaw
import imageio
import warnings
import pickle

from fa2 import ForceAtlas2

# create number for each group to allow use of colormap
from itertools import count

In [None]:
%run Utilyties.ipynb

In [None]:
def percentage(part, whole):
  percentage = 100 * float(part)/float(whole)
  return str(round(percentage, 2)) + "%"

In [None]:
def Average(lst):
    return sum(lst) / len(lst)

In [None]:
path = ""
data = read_data(path+'../data/data_complete.json')

G = init_network(data)

In [None]:
G0 = giant_component(G)

# Macro

## ForceAtlas2 

In [None]:
forceatlas2 = ForceAtlas2(
                # Behavior alternatives
                outboundAttractionDistribution=False,  # Dissuade hubs
                linLogMode=False,  # NOT IMPLEMENTED
                adjustSizes=False,  # Prevent overlap (NOT IMPLEMENTED)
                edgeWeightInfluence=1.0,

                # Performance
                jitterTolerance=1.0,  # Tolerance
                barnesHutOptimize=True,
                barnesHutTheta=1.2,
                multiThreaded=False,  # NOT IMPLEMENTED

                # Tuning
                scalingRatio=2.0,
                strongGravityMode=False,
                gravity=1.0,

                # Log
                verbose=True)

In [None]:
### to get the positions use the file pos.pkl ###

# asciiattention! This process takes a lot of time
positions = forceatlas2.forceatlas2_networkx_layout(G0, pos=None, iterations=1000)

# saving the positions in a file
a_file = open("pos.pkl", "wb") 
pickle.dump(positions, a_file) 
a_file.close()

In [None]:
# loading positions from pos.pkl
a_file = open(path+"/pos.pkl", "rb")
positions = pickle.load(a_file)

In [None]:
from matplotlib.pyplot import figure
sns.set_style(style='white')
figure(figsize=(10, 10))

nx.draw_networkx_nodes(G0, positions, node_size=2, with_labels=False, 
                       node_color=[G0.nodes[node]['classification'] for node in G0.nodes()], 
                       cmap = 'coolwarm', alpha=0.4)
nx.draw_networkx_edges(G0, positions, edge_color="grey", alpha=0.2)

x = np.linspace(-3000, 3000, 5)
plt.plot(x, -1.1*x+80, linestyle='-', color='black')
plt.axis('off')
plt.show()


In [None]:
# Assigning 'red' or 'blue' value to the "echo_chaber" attribute to network nodes based

for p in positions:
    if (positions[p][1] + 1.1*positions[p][0] - 80 > 0):
        G0.nodes[p]['echo_chamber'] = 'blue'
    else:
        G0.nodes[p]['echo_chamber'] = 'red'

In [None]:
n_red = len([x for x,y in G0.nodes(data=True) if y['echo_chamber']=='red'])
n_blue = len([x for x,y in G0.nodes(data=True) if y['echo_chamber']=='blue'])

print(f"Echo chambers dimensions:\n\nRed\t{n_red}\nBlue\t{n_blue}")

In [None]:
# creation of two subgraphs, one with the nodes and links of the section below the line (subgraph R), and one with those above it (subgraph B).
H_red = G0.subgraph([x for x,y in G0.nodes(data=True) if y['echo_chamber']=='red'])
H_blue = G0.subgraph([x for x,y in G0.nodes(data=True) if y['echo_chamber']=='blue'])

In [None]:
H0_red = giant_component(H_red) #Sottografo R
H0_blue = giant_component(H_blue) #Sottografo B

## Studio delle partizioni

In [None]:
# calcolo della percentuale di utenti a favore, contrari e neutri 
def partitions_profiling(g):
    # percentuale di pro
    pros = len([x for x,y in g.nodes(data=True) if y['cont_classification']<= -0.5])
    cons = len([x for x,y in g.nodes(data=True) if y['cont_classification']>= 0.5])
    neutrals = (g.number_of_nodes() - pros - cons)
    average = Average([y['cont_classification'] for x,y in g.nodes(data=True)])
    
    print(f"N. supporters: {percentage(pros, g.number_of_nodes())}\n"+
          f"N. opponents: {percentage(cons, g.number_of_nodes())}\n"+
          f"N. neutrals or unclassifiable: {percentage(neutrals, g.number_of_nodes())}\n"+
          f"Average opinion: {round(average,2)}")
    #return pros, cons, neutrals

In [None]:
print(f"Disconnected nodes in the EC Red: {H_red.number_of_nodes() - H0_red.number_of_nodes()}")
print(f"Disconnected nodes in the EC Blue: {H_blue.number_of_nodes() - H0_blue.number_of_nodes()}")

In [None]:
network_info(H0_red, 'Echo chamber - Red') #Sottografo R

In [None]:
partitions_profiling(H0_red) 

In [None]:
network_info(H0_blue, 'Echo chamber - Blue') #Sottografo B

In [None]:
partitions_profiling(H0_blue)

In [None]:
print(f"Number of edges between the two echo chambers: {G0.number_of_edges() - (H0_red.number_of_edges()+H0_blue.number_of_edges())} ({round((((G0.number_of_edges() - (H0_red.number_of_edges()+H0_blue.number_of_edges()))/G0.number_of_edges())*100), 2)}%)")

### Hubs analysis 

In [None]:
from operator import itemgetter, attrgetter

node_degree = G.degree()
node_degree_r = H0_red.degree()
node_degree_b = H0_blue.degree()

hubs = sorted(node_degree, key=itemgetter(1), reverse=True)
hubs_r = sorted(node_degree_r, key=itemgetter(1), reverse=True)
hubs_b = sorted(node_degree_b, key=itemgetter(1), reverse=True)

C = nx.get_node_attributes(G, 'cont_classification')
C_r = nx.get_node_attributes(H0_red, 'cont_classification')
C_b = nx.get_node_attributes(H0_blue, 'cont_classification')

In [None]:
n = 2 #percentage per thousand of hub nodes on all nodes

npm = int(round(G.number_of_nodes()*n/1000,0)) #percentace of nodes

In [None]:
average_degree_red = 0
average_degree_blue = 0

n_red = 0
n_blue = 0

print(f"\nList of {int(npm)} hubs ({n}%° of nodes of the entire network sorted by degree) \n")
print("Rank\tEC\tUsername\tC\u1D64\tDegree")

i=1
for h in hubs[:npm]: #2%° dei nodi in base al grado
    if H0_red.has_node(h[0]):
        print(f"{i}.\tR\t{h[0]} \t{round(C[h[0]],1)} \t\t{h[1]}")
        average_degree_red += h[1]
        n_red += 1
    else:    
        print(f"{i}.\tB\t{h[0]} \t{round(C[h[0]],1)} \t\t{h[1]}")
        average_degree_blue += h[1]
        n_blue += 1
    i+=1   
    
print(f"\nAverage hubs degree in the EC Red\t{round(average_degree_red/n_red,1)}")
print(f"Average hubs degree in the EC Blue\t{round(average_degree_blue/n_blue,1)}")

In [None]:
G = nx.Graph()
c = nx.Graph()

c_red = nx.Graph()
c_blue = nx.Graph()

n_nodes = []
n_nodes_favore = []
n_nodes_contro = []
n_nodes_neutri = []

r_hubs = 0
b_hubs = 0
n_hubs = 0

l_r_hubs = []
l_b_hubs = []
l_n_hubs = []

perc_net = []
r_perc_ego = []
b_perc_ego = []

time = 1

for snap, dates in zip(snapshots, ranges):
    
    print(f"--- Network dal: {dates[0]} al {dates[-1]} (Tempo {time})--- \n")
    build_network(snap, G)
    
    n_nodes.append(G.number_of_nodes())
    n_nodes_contro.append(G.subgraph([x for x,y in G0.nodes(data=True) if y['cont_classification']>=0.5]).number_of_nodes())
    n_nodes_favore.append(G.subgraph([x for x,y in G0.nodes(data=True) if y['cont_classification']<=-0.5]).number_of_nodes())
    n_nodes_neutri.append(G.subgraph([x for x,y in G0.nodes(data=True) if (y['cont_classification']>-0.5) & (y['cont_classification']<0.5)]).number_of_nodes())
    
    for node in real_hubs:
        if G.has_node(node):
            n_hubs += 1
            node_ego = nx.ego_graph(G, node)
            c = nx.compose(c, node_ego)
            if node in real_hub_red:
                c_red = nx.compose(c_red, node_ego)
                r_hubs += 1
            elif node in real_hub_blue:
                c_blue = nx.compose(c_blue, node_ego)
                b_hubs += 1
                
    l_r_hubs.append(r_hubs)
    l_b_hubs.append(b_hubs)
    l_n_hubs.append(n_hubs)
    
    perc_net.append(round(c.number_of_nodes()/G.number_of_nodes()*100, 2))
    r_perc_ego.append(round(c_red.number_of_nodes()/c.number_of_nodes()*100, 2))
    b_perc_ego.append(round(c_blue.number_of_nodes()/c.number_of_nodes()*100, 2))
    
    print(f"Hubs: {n_hubs}" + 
          f"\nNodes connected to the hubs: {c.number_of_nodes()} ({perc_net[-1]}% of the network)" + #Numero totale di nodi dell'unione delle ego-networks degli hubs
          f"\nHubs in the subgraph R: {r_hubs}" +
          f"\n\tNodes connected to the hubs in the subgraph R: {c_red.number_of_nodes()} ({round(c_red.number_of_nodes()/G.number_of_nodes()*100, 2)}% of the network" +
          f"\nHubs in the subgraph B: {b_hubs}" +
          f"\n\tNodes connected to the hubs in the subgraph B: {c_blue.number_of_nodes()} ({round(c_blue.number_of_nodes()/G.number_of_nodes()*100, 2)}% of the network" + 
          f"\nNodes connected to both hubs in subgraph R and B: {-c.number_of_nodes()+c_red.number_of_nodes()+c_blue.number_of_nodes()} ({round((-c.number_of_nodes()+c_red.number_of_nodes()+c_blue.number_of_nodes())/G.number_of_nodes()*100, 2)}% of the network)\n\n")
    
    r_hubs = 0
    b_hubs = 0
    n_hubs = 0
    time += 1

In [None]:
columns = ['T1', 'T2', 'T3', 'T4', 'T5', 'T6', 'T7']
df_net_growth = pd.DataFrame([n_nodes_favore, n_nodes_contro, n_nodes], 
                                columns = columns, 
                                index = ['pro', 'cons', 'all'])
df_net_growth

In [None]:
labels = ["Turkey - Italy", 
          "Italy - Switzerland", 
          "Italy - Wales", 
          "Italy - Austria", 
          "Belgium - Italy", 
          "Italy - Spain", 
          "Italy - England"]

x = np.arange(len(labels))  # the label locations
width = 0.35  # the width of the bars

sns.set(rc={'figure.figsize':(17,4.8), "lines.linewidth": 0.9, "grid.linewidth": 0.4}, font_scale=1.3)
sns.set_style("whitegrid")

fig, ax = plt.subplots(1, 2, sharex=True)

y=[n_nodes_favore, n_nodes_contro, n_nodes_neutri]
ax[0].stackplot(x,y, 
                colors = ['#3480b9', '#b1182b', '#d4d4d452'],  alpha=.7,  
                labels=['pros','cons', 'neutral'], 
                linewidth=2)
ax[0].set_ylabel('Number of nodes', rotation=90, labelpad=20)
ax[0].set_xticks(x)
ax[0].legend(loc='upper left')
ax[0].set_xticklabels(labels, rotation=20)

ax[1].plot(perc_net,
         linestyle='-',
         marker='o', color='black', linewidth=2.0)


ax2 = ax[1].twinx()
ax2.bar(x - width/2, l_r_hubs, width, color = '#b1182b', alpha=.7, label=r'$\in$ Subgraph R')
ax2.bar(x + width/2, l_b_hubs, width, color = '#3480b9', alpha=.7, label=r'$\in$ Subgraph B')

ax2.set_ylabel('Number of hubs', rotation=-90, labelpad=20)
ax2.legend(loc='upper left')

# move ax in front
ax[1].set_zorder(1)
ax[1].patch.set_visible(False)

ax[1].set_ylabel('Percentage of nodes (%)', labelpad=20)
ax[1].set_xticks(x)
ax[1].yaxis.grid(False)

sns.despine(top=True, right=True, left=False, bottom=True, offset=10, trim=True, ax = ax[0])
sns.despine(top=True, right=False, left=False, bottom=True, offset=10, trim=True, ax = ax2)
sns.despine(top=True, right=False, left=False, bottom=True, offset=10, trim=True, ax = ax[1])
sns.despine(offset=10, trim=True, ax = ax[1])

ax[1].set_xticklabels(labels, rotation=20)

fig.tight_layout()
plt.show()

# Micro

In order to make our research more consistent, we choose to carry out this analysis on the users who wrote at least two tweets.

In [None]:
snapshots = get_snapshot(path+"../data/data_complete.json", ranges)

In [None]:
df = pd.DataFrame(data)

n_users = df.user.value_counts()
user_list_occ = df[df.user.isin(n_users.index[n_users.gt(1)])].groupby('user').mean() # lista di utenti autori di almeno 2 tweet

In [None]:
def discretize_opinion(G):
    for node in G.nodes():
        if G.nodes[node]['cont_classification'] < 0.5 and G.nodes[node]['cont_classification'] > -0.5:
            G.nodes[node]['cont_classification'] = 0
        elif G.nodes[node]['cont_classification'] >= 0.5:
            G.nodes[node]['cont_classification'] = 1
        else:
            G.nodes[node]['cont_classification'] = -1

In [None]:
def get_ass_mix(G):
    discretize_opinion(G) # discretizza l'opinione in -1 (a favore), 1 (contrari), 0 (neutri/non classificabili)
    return nx.attribute_assortativity_coefficient(G, "cont_classification") 

In [None]:
# studio sulla  opinione media dei vicini CN(u) rispetto all’opinione media di un utente Cu
def neighbors_av_opinion(G, T, lista_ass_mix): 
    nx.set_node_attributes(G, 0, "neighbours_opinion") # inizializzazione dell'attributo
    
    # calcolo e assegnazione del valore dell'attributo neighbours_opinion ad ogni nodo
    for node in G.nodes():
        if node in user_list_occ.index: 
            for n in(G.neighbors(node)):
                G.nodes[node]['neighbours_opinion'] += G.nodes[n]["cont_classification"]
            if (G.degree[node] != 0): 
                G.nodes[node]['neighbours_opinion'] = G.nodes[node]['neighbours_opinion']/G.degree[node]
            else:
                G.nodes[node]['neighbours_opinion'] = G.nodes[node]['cont_classification']
    
    # creazione del dizionario
    n_class = {}
    for node in G.nodes():
        if node in user_list_occ.index:
            n_class[node] = [G.nodes[node]['cont_classification']]
            n_class[node].append(G.nodes[node]['neighbours_opinion'])
    
    # creazione del dataframe
    df_class = pd.DataFrame.from_dict(n_class, orient='index',
                           columns=['opinion','neighbours_opinion'])
    
    # discretizzazione dell'opinione 
    df_class["class"] = np.where(
        df_class["opinion"] >= 0.5, 'contro', 
        np.where(df_class["opinion"] <= -0.5, 'pro', 'neutro')
    )
    
    ### PLOT ###
    plt.figure(figsize=(6,6))
    ax = plt.axes()
    ax.set_facecolor("black")
    
    ax = sns.kdeplot(data = df_class,  x = "opinion", y = "neighbours_opinion", 
                fill=True, thresh=0, levels=100, cmap="CMRmap") #ax=axes[T]) #o mako
    ax.set(xlim=(-3, 3), ylim=(-3, 3))
    ax.set_xlabel('$C_{u}$', fontsize = 26)
    ax.set_ylabel('$C_{N(u)}$', fontsize = 26)
    
    # calcolo del p-value e del coefficiente di pearson 
    r, p = scipy.stats.pearsonr(df_class['opinion'], df_class['neighbours_opinion'])
    print(f"Pearson coefficient: {round(r,2)}\tp-value: {round(p,4)} \n")
    
    # calcolo dell'assortative mixing 
    G_tmp = G.copy()
    lista_ass_mix.append(get_ass_mix(G_tmp))
    
    ax.tick_params(axis='both', labelsize=16)
    
    tempo = T+1
    ax.text(x=0.04, y=0.96, transform = ax.transAxes, s="$ρ = %.2f$" % r,\
        fontsize=26, verticalalignment='top', horizontalalignment='left', color='white')
    ax.text(x=0.04, y=0.84, transform = ax.transAxes, s="$r = %.2f$" % lista_ass_mix[T],\
        fontsize=26, verticalalignment='top', horizontalalignment='left', color='white')
    ax.text(x=0.80, y=0.13, transform = ax.transAxes, s=r"t = %.0f" % tempo,\
        fontsize=26, verticalalignment='top', horizontalalignment='left', color='white')
    
    # create file name and append it to a list
    #filename = f'Density_2tweets_{T}.png'
    #filenames.append(filename)
    
    # save frame
    #plt.savefig(filename, transparent=True)
    #plt.close()
    
    plt.show()    

In [None]:
G=nx.Graph()
T = 0
lista_ass_mix = []
filenames = []   

for snap in snapshots:
    build_network(snap, G)
    neighbors_av_opinion(G, T, lista_ass_mix)
    T += 1  