In [62]:
import networkx as nx
from diffusion_models import Weighted_Cascade, Trivalency_Model, Independent_Cascade, Decreasing_Cascade, linear_threshold, general_threshold
from influence_max_algorithms import MIA
import matplotlib.pyplot as plt
import numpy as np
import tqdm as tqdm
import time as time


In [63]:
pb2020 = nx.read_gml('/Users/davidguzman/Documents/GitHub/Network-Science-final-Maximization-/Proud_Boys_Datsets/PB2020.gml')

In [64]:
#invert edge direction for PB2020
pb2020 = pb2020.reverse()

In [92]:
#number of nodes and edges
print("Number of nodes: ", pb2020.number_of_nodes())
print("Number of edges: ", pb2020.number_of_edges())

Number of nodes:  13217
Number of edges:  14477


# Measuring the spread of the baseline, influence maximization centralities

## import and intialization of diffusion models

In [66]:
ICM_model_2020 = Independent_Cascade()
ICM_model_2020.fit(pb2020)

In [91]:
decreasing_cascade_model_2020 = Decreasing_Cascade()
decreasing_cascade_model_2020.fit(pb2020)

In [68]:
weighted_cascade_model_2020 = Weighted_Cascade()
weighted_cascade_model_2020.fit(pb2020)

In [69]:
trivalency_model_2020 = Trivalency_Model()
trivalency_model_2020.fit(pb2020)

In [70]:
linear_threshold_model_2020 = linear_threshold()
linear_threshold_model_2020.fit(pb2020)

<networkx.classes.digraph.DiGraph at 0x16a173e80>

In [71]:
general_threshold_model_2020 = general_threshold()
general_threshold_model_2020.fit(pb2020)

<networkx.classes.digraph.DiGraph at 0x16a173e80>

In [72]:
seed_size = 30

## K-core centralities

In [73]:
k_core_centralities = nx.core_number(pb2020)
# set it as an attribute
nx.set_node_attributes(pb2020, k_core_centralities, 'k_core_centralities')

In [74]:
#for nodes without attribute 'followers', set it to 0
for node in pb2020.nodes():
    if 'followers' not in pb2020.nodes[node]:
        pb2020.nodes[node]['followers'] = 0

Since there are only 4 levels or k-shells in this case, in an effort to improve diffusion we sort first by shell and then by follower count (or some other attribute)

In [75]:
# sort by k_core_centralities first and by followers second
sorted_nodes = sorted(pb2020.nodes(data=True), key=lambda x: (x[1]['k_core_centralities'], x[1]['followers']), reverse=True)

In [76]:
# get the top 10 nodes, only the first element of the tuple
seed_k_core = [x[0] for x in sorted_nodes]

In [77]:
activate_nums = ICM_model_2020.monte_carlo_diffusion_all(seed_k_core[:seed_size], num_simulations=100)
print(activate_nums)
print('total nodes reached ICM: ' + str(sum(activate_nums)))

activate_nums = weighted_cascade_model_2020.monte_carlo_diffusion_all(seed_k_core[:seed_size], num_simulations=100)
print(activate_nums)
print('total nodes reached WCM: ' + str(sum(activate_nums)))

activate_nums = trivalency_model_2020.monte_carlo_diffusion_all(seed_k_core[:seed_size], num_simulations=100)
print(activate_nums)
print('total nodes reached Trivalency: ' + str(sum(activate_nums)))

activate_nums = decreasing_cascade_model_2020.monte_carlo_diffusion_all(seed_k_core[:seed_size], num_simulations=100)
print(activate_nums)
print('total nodes reached DCM: ' + str(sum(activate_nums)))

activate_nums = linear_threshold_model_2020.diffusion_all(seed_k_core[:seed_size], 0.001)
#print(activate_nums)
print('total nodes reached Linear Threshold: ' + str(sum(activate_nums[1])))

activate_nums = general_threshold_model_2020.diffusion_all(seed_k_core[:seed_size])
#print(activate_nums)
print('total nodes reached General Threshold: ' + str(sum(activate_nums[1])))

[3.00000e+01 1.49714e+03 3.84700e+01 3.29800e+01 1.02000e+00]
total nodes reached ICM: 1599.6100000000001
[3.00000e+01 1.45827e+03 1.11000e+00 9.70000e-01]
total nodes reached WCM: 1490.35
[3.000e+01 5.423e+01 5.000e-02]
total nodes reached Trivalency: 84.27999999999999
[3.0000e+01 1.4972e+03 4.0980e+01 3.0060e+01 3.5000e-01]
total nodes reached DCM: 1598.59
total nodes reached Linear Threshold: 1768
total nodes reached General Threshold: 1697


In [78]:
seed_k_core[:seed_size]

['realDonaldTrump',
 'IvankaTrump',
 'joerogan',
 'TuckerCarlson',
 'benshapiro',
 'RealCandaceO',
 'RealJamesWoods',
 'jordanbpeterson',
 'PrisonPlanet',
 'Cernovich',
 'PAMsLOvE',
 'CassandraRules',
 'laurenacooley',
 'mike29401',
 'principe_giovan',
 'MoralDK',
 'Deplorable_Bass',
 'ProudBoysGBG',
 'ColoradoPOYB',
 'DetroitProudBoy',
 'enrique_tarrio',
 'Premises187',
 'proudboy_',
 'elonmusk',
 'TheNotoriousMMA',
 'DonaldJTrumpJr',
 'PressSec',
 'seanhannity',
 'tedcruz',
 'IngrahamAngle']

it's also possible to sort by out-degree, which should reflect better how much influence they have on the local networks (as opposed to the follower count, which reflects all of twitter)

In [79]:
# sort by k_core_centralities first and by out-degree second
sorted_nodes = sorted(pb2020.nodes(data=True), key=lambda x: (x[1]['k_core_centralities'], pb2020.out_degree(x[0])), reverse=True)

In [80]:
# get the top 10 nodes, only the first element of the tuple
seed_k_core = [x[0] for x in sorted_nodes]

In [81]:
activate_nums = ICM_model_2020.monte_carlo_diffusion_all(seed_k_core[:seed_size], num_simulations=100)
print(activate_nums)
print('total nodes reached ICM: ' + str(sum(activate_nums)))

activate_nums = weighted_cascade_model_2020.monte_carlo_diffusion_all(seed_k_core[:seed_size], num_simulations=100)
print(activate_nums)
print('total nodes reached WCM: ' + str(sum(activate_nums)))

activate_nums = trivalency_model_2020.monte_carlo_diffusion_all(seed_k_core[:seed_size], num_simulations=100)
print(activate_nums)
print('total nodes reached Trivalency: ' + str(sum(activate_nums)))

activate_nums = decreasing_cascade_model_2020.monte_carlo_diffusion_all(seed_k_core[:seed_size], num_simulations=100)
print(activate_nums)
print('total nodes reached DCM: ' + str(sum(activate_nums)))

activate_nums = linear_threshold_model_2020.diffusion_all(seed_k_core[:seed_size], 0.001)
#print(activate_nums)
print('total nodes reached Linear Threshold: ' + str(sum(activate_nums[1])))

activate_nums = general_threshold_model_2020.diffusion_all(seed_k_core[:seed_size])
#print(activate_nums)
print('total nodes reached General Threshold: ' + str(sum(activate_nums[1])))

[3.00000e+01 1.66931e+03 2.35300e+01 5.00000e-02]
total nodes reached ICM: 1722.8899999999999
[3.00000e+01 1.60899e+03 7.00000e-01]
total nodes reached WCM: 1639.69
[30.   60.7   0.17]
total nodes reached Trivalency: 90.87
[3.00000e+01 1.66936e+03 2.29300e+01 4.00000e-02]
total nodes reached DCM: 1722.33
total nodes reached Linear Threshold: 1776
total nodes reached General Threshold: 1775


## non-backtracking centrality

In [82]:
from nbc import non_backtracking_centrality

In [83]:
pb2020_int = nx.convert_node_labels_to_integers(pb2020)

In [84]:
#get the node labels in a list
node_labels = list(pb2020.nodes())

In [85]:
# calcualate the non-backtrack centrality
non_backtrack_centralities = non_backtracking_centrality(pb2020_int)

In [86]:
# use node labels to create a dictionary
non_backtrack_centralities_dict = dict(zip(node_labels, non_backtrack_centralities))

In [87]:
# add non-backtrack centrality as an attribute, the first element of the list goes with the first node, etc.
nx.set_node_attributes(pb2020, non_backtrack_centralities_dict, 'non_backtrack_centralities')


In [88]:
#print the top 10 nodes with the highest non-backtrack centrality, extracting its value from the attribute
name_nbc_pairs = [(node, pb2020.nodes[node]['non_backtrack_centralities']) for node in pb2020.nodes]
name_nbc_pairs.sort(key=lambda x: x[1], reverse=True)
name_nbc_pairs[:10]

[('Premises187', 0.3178476412084301),
 ('principe_giovan', 0.10539135548376868),
 ('SnarkieVlogs', 0.02520947219021082),
 ('Minximusmaximus', 0.025209472190210816),
 ('FeedTheOcelot', 0.025209472190210816),
 ('bikerheadz', 0.025209472190210816),
 ('RightInTheDesu', 0.025209472190210816),
 ('Ace_blue_', 0.025209472190210816),
 ('DriftInnovation', 0.025209472190210816),
 ('FreeMotorLegal', 0.025209472190210816)]

In [89]:
#get only the labels in a list
name_nbc_pairs_labels = [x[0] for x in name_nbc_pairs]
seed_nbc = name_nbc_pairs_labels


In [90]:
activate_nums = ICM_model_2020.monte_carlo_diffusion_all(seed_nbc[:seed_size], num_simulations=100)
print(activate_nums)
print('total nodes reached ICM: ' + str(sum(activate_nums)))

activate_nums = weighted_cascade_model_2020.monte_carlo_diffusion_all(seed_nbc[:seed_size], num_simulations=100)
print(activate_nums)
print('total nodes reached WCM: ' + str(sum(activate_nums)))

activate_nums = trivalency_model_2020.monte_carlo_diffusion_all(seed_nbc[:seed_size], num_simulations=100)
print(activate_nums)
print('total nodes reached Trivalency: ' + str(sum(activate_nums)))

activate_nums = decreasing_cascade_model_2020.monte_carlo_diffusion_all(seed_nbc[:seed_size], num_simulations=100)
print(activate_nums)
print('total nodes reached DCM: ' + str(sum(activate_nums)))

activate_nums = linear_threshold_model_2020.diffusion_all(seed_nbc[:seed_size], 0.001)
#print(activate_nums)
print('total nodes reached Linear Threshold: ' + str(sum(activate_nums[1])))

activate_nums = general_threshold_model_2020.diffusion_all(seed_nbc[:seed_size])
#print(activate_nums)
print('total nodes reached General Threshold: ' + str(sum(activate_nums[1])))

[3.0000e+01 9.0400e+02 4.1500e+00 1.5835e+02 8.6000e-01 4.1560e+01
 9.2000e-01 9.2700e+00]
total nodes reached ICM: 1149.11
[3.0000e+01 8.6703e+02 2.0000e-01 6.4200e+00]
total nodes reached WCM: 903.65
[30.   33.85  0.05  0.09]
total nodes reached Trivalency: 63.99
[3.0000e+01 9.0400e+02 4.4400e+00 1.6252e+02 9.1000e-01 5.0620e+01
 2.9000e-01 1.1150e+01 6.0000e-02 7.2000e-01]
total nodes reached DCM: 1164.71
total nodes reached Linear Threshold: 1910
total nodes reached General Threshold: 1374


## collective influence centrality 

there is an open-source implementation available at https://github.com/nravindranath10/collective-influence but it takes more than 80 miniutes tp run for the dataset at hand. 

For this reason is not going to be considered. 

# Saving network with centralities as attributes to gexf files (compatible with Gephi for visualizations)

In [34]:
# Export the graph to GEXF
nx.write_gexf(pb2020, "k_core_centralities.gexf")