In [49]:
import networkx as nx
import powerlaw as pl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from collections import Counter 
from networkx.algorithms.community.quality import modularity

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [50]:
G = nx.read_edgelist('LFM1.csv', delimiter=',')

In [158]:
print(G)
G.degree['7237']

Graph with 7624 nodes and 27806 edges


216

In [52]:
# import networkx as nx
# import EoN
# import matplotlib.pyplot as plt

# # Parameters
# beta = 0.2  # transmission rate
# gamma = 0.01  # recovery rate
# initial_infected = [1] #starting point 
# G = nx.convert_node_labels_to_integers(G, first_label=0)

# # Run the SIR model on the network
# t, S, I, R = EoN.fast_SIR(G, beta, gamma, initial_infecteds = initial_infected)

# # Plot the results
# plt.plot(t, S, label='Susceptible')
# plt.plot(t, I, label='Infected')
# plt.plot(t, R, label='Recovered')
# plt.xlabel('Time')
# plt.ylabel('Number of nodes')
# plt.title('SIR Model on Network')
# plt.legend()
# plt.show()


Now we are going to do community discovery. We are going to use networkx module **community**.

In [53]:
# Run Label Propagation algorithm
# import community
communities_lp = nx.algorithms.community.label_propagation.label_propagation_communities(G)
communities_louvain = nx.algorithms.community.louvain_communities(G)
communities_greedy_mod = nx.algorithms.community.greedy_modularity_communities(G)

# Convert communities to a list for better visualization
communities_list_lp = [list(community) for community in communities_lp]
communities_list_louvain = [list(community) for community in communities_louvain]
communities_list_gm = [list(community) for community in communities_greedy_mod]

# Print the detected communities
# print("Detected Communities:")
# for i, community in enumerate(communities_list):
#     print(f"Community {i + 1}: {community}")


In [54]:
print(len(communities_list_lp))
print(len(communities_list_louvain))
print(len(communities_list_gm))

modularity_lp = modularity(G, communities_list_lp)
modularity_louvain = modularity(G, communities_list_louvain) #the chosen one
modularity_gm = modularity(G, communities_list_gm)
print(modularity_lp)
print(modularity_louvain)
print(modularity_gm)
#print modularity

650
28
44
0.7522556757225006
0.8157813971297904
0.7955830887901616


In [87]:
# lengths_com = [(len(c)/7624)*100 for c in communities_list_louvain]
lengths_com = [len(c) for c in communities_list_louvain]
print(sorted(lengths_com, reverse=True))

[1122, 1016, 929, 861, 666, 663, 500, 466, 271, 240, 197, 155, 126, 97, 70, 64, 60, 42, 13, 12, 11, 11, 7, 7, 5, 5, 4, 4]


In [92]:
min([len(c) for c in communities_list_louvain])

4

We chose louvain communities algorithm, because it gives highest modularity and a stable number of communities (read more about it). 

We saw there are some communities on the bottom that only contain 5-4 nodes, so we will ignore them (why? - think of good reason). 

So we take the top 16 communities, each containing more than 1% of the nodes in the network.

From those we choose the biggest, smallest and middle one and see how the contagion spreads when we infect the nodes in them with highes degree/betweenness centrality. That should give us 3 different models. For each of them we will try several combinations of infection and recovery rate.

We want to also see a random case in which we randomly choose a node/3 nodes from the network, regardless of community.

OUR RESEARCH QUESTION: **Is the speed of the spread influenced by the size of the community associated with the initial node?**

In [93]:
#limitation: not 100% likely that people will be in a single disjoint community; in future we are going to extend this to overlapping communities.

In [107]:
sorted_communities = sorted(communities_list_louvain, key = len, reverse = True)
result_dict = {i: sorted_communities[i] for i in range(1,len(sorted_communities))}
merged_list = [item for sublist in sorted_communities[14:] for item in sublist]

# Remove the first 16 key-value pairs
for i in range(14,len(sorted_communities)):
    result_dict.pop(i, None)

# Add the merged list as the 17th key-value pair
result_dict[14] = merged_list

# Print the resulting dictionary
print(len(result_dict[13]))

97


In [166]:
A = result_dict[1]
B = result_dict[13]
C = result_dict[14]

In [159]:
# the largest community
A_node_degree = {}
for node in A:
    A_node_degree[node] = G.degree(node)


found_key_A = next((key for key, value in A_node_degree.items() if value == max(A_node_degree.values())), None)
found_key_A

'7237'

In [165]:
# the smalest community
B_node_degree = {}
for node in B:
    B_node_degree[node] = G.degree(node)


found_key_B = next((key for key, value in B_node_degree.items() if value == max(B_node_degree.values())), None)
found_key_B


'3571'

In [164]:
# without community
C_node_degree = {}
for node in C:
    C_node_degree[node] = G.degree(node)


found_key_C = next((key for key, value in C_node_degree.items() if value == max(C_node_degree.values())), None)
found_key_C


'5851'