In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import networkx as nx
import utilities as util


['c:\\Users\\Vince\\OneDrive\\Documenten\\School\\TU\\2223\\modelling-networks-and-data-analysis\\project_higgs\\higgs-twitter-network-science\\src', 'c:\\Users\\Vince\\AppData\\Local\\Programs\\Python\\Python39\\python39.zip', 'c:\\Users\\Vince\\AppData\\Local\\Programs\\Python\\Python39\\DLLs', 'c:\\Users\\Vince\\AppData\\Local\\Programs\\Python\\Python39\\lib', 'c:\\Users\\Vince\\AppData\\Local\\Programs\\Python\\Python39', '', 'c:\\Users\\Vince\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages', 'c:\\users\\vince\\projects\\fantoom\\emmel\\basy-synths', 'c:\\users\\vince\\projects\\fantoom\\emmel\\evairy', 'c:\\Users\\Vince\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\win32', 'c:\\Users\\Vince\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\win32\\lib', 'c:\\Users\\Vince\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\Pythonwin', 'c:\\Users\\Vince\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\IPy

In [2]:
social_file_path = "../network-data/higgs-social_network.edgelist"
mention_file_path = "../network-data/higgs-mention_network.edgelist"
retweet_file_path = "../network-data/higgs-retweet_network.edgelist"
reply_file_path = "../network-data/higgs-reply_network.edgelist"

In [3]:
def extract_nodes(df):
    return set(df['source'].unique()).union(set(df['target'].unique()))

def load_nodes(file_path):
    # much faster to do the operations on dataframes
    df = pd.read_csv(file_path, delimiter = ' ', names=['source', 'target', 'weight'])
    nodes = extract_nodes(df)
    print(f"loaded {file_path} has {len(nodes)} nodes")
    return nodes


In [4]:
# create the intersection of nodes across all interaction mechanisms.

reply_nodes = load_nodes(reply_file_path)
mention_nodes = load_nodes(mention_file_path)
reply_mention_nodes = reply_nodes.intersection(mention_nodes)
del reply_nodes, mention_nodes


retweet_nodes = load_nodes(retweet_file_path)
reply_mention_retweet_nodes = reply_mention_nodes.intersection(retweet_nodes)
del reply_mention_nodes, retweet_nodes

print(f"intersected network has {len(reply_mention_retweet_nodes)} nodes")

loaded ../network-data/higgs-reply_network.edgelist has 38918 nodes
loaded ../network-data/higgs-mention_network.edgelist has 116408 nodes
loaded ../network-data/higgs-retweet_network.edgelist has 256491 nodes
intersected network has 21346 nodes


In [5]:
def largest_connected_component(G):
    # not supported for directed networks
    subgraphs = sorted(nx.connected_components(G), key=len, reverse=True)
    return G.subgraph(subgraphs[0])

In [18]:
def build_network(df, directed = False):

    if directed: 
        G = nx.DiGraph()
    else:
        G = nx.Graph()

    to_add = zip(df["source"], df["target"])
    G.add_edges_from(to_add)
    return G

In [19]:
def extract_bidirectional_subnetwork(G):
    """
    creates an undirected sub network from a directed network
     
    that only contains the edges where both 
    
    u -> v and v -> u

    are in the directed network. In the followers/following sense,

    This extracts the sub network where the users follow eachother.

    """
    sub_G = nx.Graph()

    for e in G.edges:
        if len(e) == 2:
            u, v = e
        else:
            u, v, _ = e
        if G.has_edge(v, u):
            sub_G.add_edge(u,v)

    return sub_G

In [20]:
def load_subnetwork(file_path, sub_nodes, directed = False):

    df = pd.read_csv(file_path, delimiter = ' ', names=['source', 'target', 'weight'])

    sub_df = df[((df['source'].isin(sub_nodes)) & (df['target'].isin(sub_nodes)))]

    return build_network(sub_df, directed)

In [21]:
def get_friendships(G):

    G = extract_bidirectional_subnetwork(G.to_undirected())

    return G

def get_lcc(G, directed = False):

    lcc = largest_connected_component(G.to_undirected())

    return lcc        

In [22]:
# following/followers is a directed network
social_network = load_subnetwork(social_file_path, reply_mention_retweet_nodes)
lcc_social_network = get_lcc(social_network)

friends_network = get_friendships(social_network)

del social_network
lcc_friends_network = get_lcc(friends_network)
del friends_network

nx.write_edgelist(lcc_friends_network, '../output/higgs-friends-lcc.edgelist')
nx.write_edgelist(lcc_social_network, '../output/higgs-social-lcc.edgelist')

In [None]:
social_reply_network = load_subnetwork(reply_file_path, lcc_social_network.nodes)
social_mention_network = load_subnetwork(mention_file_path, lcc_social_network.nodes)
social_retweet_network = load_subnetwork(retweet_file_path, lcc_social_network.nodes)

friends_reply_network = load_subnetwork(reply_file_path, lcc_friends_network.nodes)
friends_mention_network = load_subnetwork(mention_file_path, lcc_friends_network.nodes)
friends_retweet_network = load_subnetwork(retweet_file_path, lcc_friends_network.nodes)

nx.write_edgelist(social_reply_network, '../output/higgs-social-reply.edgelist')
nx.write_edgelist(social_mention_network, '../output/higgs-social-mention.edgelist')
nx.write_edgelist(social_retweet_network, '../output/higgs-social-retweet.edgelist')
nx.write_edgelist(friends_reply_network, '../output/higgs-friends-reply.edgelist')
nx.write_edgelist(friends_mention_network, '../output/higgs-friends-mention.edgelist')
nx.write_edgelist(friends_retweet_network, '../output/higgs-friends-retweet.edgelist')