# Network Measures

## Path

In [None]:
SNA_PROJECT_PATH = "/home/sna_bros/SNA_Project"

## Imports

In [None]:
import pandas as pd
import os
import ijson
from typing import List, Any, Dict
from tqdm.notebook import tqdm
import networkx as nx
import networkit as nk
import seaborn as sns
from matplotlib import pyplot as plt
import nx_cugraph as nxcg
import gravis as gv

In [None]:
%env NX_CUGRAPH_AUTOCONFIG=True

## Loading the Network

In [None]:
df_list = []
for k in range(8):
  chunk_edge_df = pd.read_parquet(f"{SNA_PROJECT_PATH}/edge_chunks/edge_chunks/edge_chunk_{k}.parquet")
  df = chunk_edge_df
  df_list.append(df)

final_df = pd.concat(df_list, ignore_index=True)
print(final_df.shape)
final_df.head(100)

In [None]:
following_df = final_df[final_df['relation']=='following']
following_graph = nx.from_pandas_edgelist(following_df, 'source_id', 'target_id', create_using=nx.DiGraph())
followers_df = final_df[final_df['relation']=='followers']
followers_graph = nx.from_pandas_edgelist(followers_df, 'target_id', 'source_id', create_using=nx.DiGraph())
full_graph = nx.compose(following_graph, followers_graph)

In [None]:
print(full_graph)

In [None]:
full_graph_gpu = nxcg.from_networkx(full_graph)

In [None]:
full_graph_nk = nk.nxadapter.nx2nk(full_graph, data=True)
idmap = dict((id, u) for (id, u) in zip(full_graph.nodes(), range(full_graph.number_of_nodes())))

In [None]:
labels = pd.read_csv(f"{SNA_PROJECT_PATH}/label.csv")
label_dict = labels.set_index('id')['label'].to_dict()
nx.set_node_attributes(full_graph, label_dict, 'label')

In [None]:
nodes = full_graph.nodes()
edge_dict = {}
for e in full_graph.edges():
  start_label = nodes[e[0]]['label']
  end_label = nodes[e[1]]['label']
  edge_dict[e] = start_label + '_' + end_label

nx.set_edge_attributes(full_graph, edge_dict, 'edge_label')

### Creating a human node only graph

In [None]:
human_graph = full_graph.subgraph([n for n in full_graph.nodes() if full_graph.nodes()[n]['label']=='human'])
print(human_graph)

### Creating a human-human graph

In [None]:
human_edge_graph = full_graph.edge_subgraph([e for e in full_graph.edges() if full_graph[e[0]][e[1]]['edge_label']=='human_human'])
print(human_edge_graph)

### Number of humans that rely on bots for connections

In [None]:
isolated_humans = len(human_graph)-len(human_edge_graph)
print(f"A total of {isolated_humans} is connected only to Bots")

## Computing Measures of the whole graph

### Weakly connected components

In [None]:
#weak_df = nxcg.weakly_connected_components(full_graph)
weak_df = nx.weakly_connected_components(full_graph)
print([
    len(c)
    for c in sorted(weak_df, key=len, reverse=True)
])

### Strongly connected components

In [None]:
strong_df = nx.weakly_connected_components(full_graph)
#strong_df.head(5)
print([
    len(c)
    for c in sorted(strong_df, key=len, reverse=True)
])

### Are these networks scale-free?

In [None]:
import powerlaw

def power_law_check(network):
    degrees = [d for _, d in network.degree()]

    fit = powerlaw.Fit(degrees)
    print(f"alpha (power-law exponent): {fit.power_law.alpha}")
    print(f"xmin (starting point of the power law): {fit.power_law.xmin}")

    R, p = fit.distribution_compare('power_law', 'exponential')
    print(f"Loglikelihood ratio: {R}, p-value: {p}")

In [None]:
print("Verifying if the complete network is scale-free")
power_law_check(full_graph)

In [None]:
print("Verifying if the network containing only human-human connections is scale-free")
power_law_check(human_edge_graph)

In [None]:
print("Verifying if the network containing only human users is scale-free")
power_law_check(human_graph)

### Network density

In [None]:
print(f"Density of the complete network is: {nx.density(full_graph)}")

In [None]:
print(f"Density of the human-human connection network is: {nx.density(human_edge_graph)}")

In [None]:
print(f"Density of the human user network is: {nx.density(human_graph)}")

### How much do bots affect clustering coefficient?

In [None]:
print(f"Average clustering coefficient in complete network: {nx.average_clustering(full_graph.to_undirected())}" )

In [None]:
print(f"Average clustering coefficient in the human-human connection network: {nx.average_clustering(human_edge_graph.to_undirected())}" )

In [None]:
print(f"Average clustering in the human user network: {nx.average_clustering(human_graph.to_undirected())}" )