In [12]:
# import libraries

import csv
import networkx as nx
import matplotlib
import matplotlib.pyplot as plt
%matplotlib tk



In [13]:
# function for computing betweeness centrality for finding the influencer 

def btc_node(graph):
    
    btc = nx.betweenness_centrality(graph)
    node_size = []
    
    for i in graph.nodes():
        ns = btc[i]*1000+10
        node_size.append(ns)
    
    return node_size

In [14]:
# function for computing page rank for finding the influencer 

def pg_node(graph):
    
    pg = nx.pagerank(graph)
    node_size = []
    
    for i in graph.nodes():
        node_size.append(pg[i]*10000+50)
    
    return node_size

In [15]:
# Function to get the weigth for the Island Model

def compute_weights(graph_w):
    wgt = []
    for frm, to, edg_data in graph_w.edges(data=True):
        wgt.append(edg_data['weight'])
    return wgt

In [16]:
# function to sort components for Island model

def deg_sort(graph):
    dg_graph = dict(nx.degree(graph))
    graph_sort = sorted(dg_graph.items(),key=lambda uv:(-uv[1], uv[0]))
    return graph_sort

In [17]:
# function to remove weight for Island Model

def weight_remove(graph, weight=1):
    """
    Remove edges with weights less then a threshold parameter ("weight")
    """
    graph_r = nx.Graph()
    for frm, to, edg_data in graph.edges(data=True):
        if edg_data['weight'] > weight:
            graph_r.add_edge(frm,to, attr_dict=edg_data)
    return graph_r

In [18]:
# Read Twitter Retweet File 

graph_file = csv.reader(open('Data/higgs-retweet_network.edgelist','rt'), delimiter=' ')

In [19]:
# Create an empty graph

retweet_graph = nx.Graph()

In [20]:
# Create a retweet graph between users

for details in graph_file:
    retweet_graph.add_edge(details[0],details[1],weight=int(details[2]))

In [21]:
# Total Number of Nodes in Grpah

len(retweet_graph.nodes())

256491

In [54]:
# Plot Weight Distribution of the largest componenet in retweet

weights = compute_weights(retweet_graph) 
plt.xlabel('Weight', fontsize=20)
plt.ylabel('Number of Nodes', fontsize=20)

plt.title('Re-tweet Weight Distribution Of Original Graph', fontsize=20)
plt.tight_layout()
hist=plt.hist(weights,100)

In [55]:
# Total Number of Connected Components in Retweeted Graph

retweet_subgraphs=list(nx.connected_component_subgraphs(retweet_graph))
retweet_subgraphs.sort(key=len, reverse=True)
print('Total Number  of Components', len(retweet_subgraphs))

Total Number  of Components 13199


In [56]:
# Get the largest component 

retweet_lgst_component = retweet_subgraphs[0]

In [57]:
# len of largest component

len(retweet_lgst_component)

223833

In [58]:
# Plot Weight Distribution of the largest componenet in retweet

weights = compute_weights(retweet_lgst_component) 
plt.xlabel('Weight', fontsize=20)
plt.ylabel('Number of Nodes', fontsize=20)
plt.title('Retweet Weight Distribution of LWCC', fontsize=20)
plt.tight_layout()
hist=plt.hist(weights,100)

In [90]:
# removes edges to find the core componen20
retweet_lgst_component_trim = weight_remove(retweet_lgst_component, weight=15)

In [91]:
# Extract the connected components of 'retweet_lgst_component_trim' graph

retweet_lgst_component_trim_sub_components = list(nx.connected_component_subgraphs(retweet_lgst_component_trim))

In [85]:
# sort the sub largest trim component

retweet_lgst_component_trim_sub_components.sort(key=len, reverse=True)

In [86]:
# Get largest component of retweet_lgst_component_trim_compenents

final_graph = retweet_lgst_component_trim_sub_components[0]
len(final_graph)

72

In [87]:
nx.draw(final_graph, with_labels=False)
plt.tight_layout()
plt.savefig('.pn10g', format='PNG',dpi=300)
fig = plt.show()



In [88]:
# Get positon of the nodes

node_positinon = nx.spring_layout(final_graph)

In [80]:
# compute Betweeness centrality score

node_sz_btc = btc_node(final_graph)
nx.draw(final_graph, pos=node_positinon, node_size=node_sz_btc, with_labels=True, font_size=9)
plt.savefig('18.png', format='PNG',dpi=300)
fig = plt.show()

In [89]:
# compute page rank score

node_sz_pg = pg_node(final_graph)
nx.draw_networkx(final_graph,pos=node_positinon,node_size=node_sz_pg,with_labels=True)
plt.tight_layout()
plt.savefig('10.png', format='PNG',dpi=300)
fig = plt.show()

In [82]:
max(node_sz_pg)

4806.766887836359

In [None]:
import networkx as nx
import ndlib.models.ModelConfig as mc
import ndlib.models.epidemics.IndependentCascadesModel as ids


# Model selection
model = ids.IndependentCascadesModel(final_graph)

# Model Configuration
config = mc.Configuration()
config.add_model_parameter('percentage_infected', 0.25)

# Setting the edge parameters
threshold = 0.9
for e in final_graph.edges():
    config.add_edge_configuration("threshold", e, threshold)

model.set_initial_status(config)

# Simulation execution
iterations = model.iteration_bunch(20)
trends = model.build_trends(iterations)

In [None]:
from bokeh.io import output_notebook, show
from ndlib.viz.bokeh.DiffusionTrend import DiffusionTrend

viz = DiffusionTrend(model, trends)
p = viz.plot(width=800, height=800)
show(p)