In [None]:
!pip install pelote
!pip install ipysigma

In [None]:
import networkx as nx
from ipysigma import Sigma, SigmaGrid
from pelote import remove_leaves, graph_to_edges_dataframe, remove_nodes
from random import random
import csv

In [None]:
### read csv line by line
with open('edgelist_v5.csv') as f:
    g = nx.DiGraph()
    #cut off before 2020
    for row in csv.DictReader(f):
        if row ['datetime'][:4] < '2020':
            continue
        if row['source'] == row['target']:
            continue
        g.add_node(row['source'], channel_category='basis' in row['source'])
        g.add_node(row['target'], channel_category='basis' in row['target'])
        if g.has_edge(row['source'], row['target']):
            g[row['source']][row['target']]['weight'] += 1
        else:
            g.add_edge(row['source'], row['target'], weight=1)
remove_leaves(g) # function that searches node degree = 1 and drops it 
#remove_nodes(g, lambda n, a: ('channel_category' in a)) # this to drop node if not in a; but it made graph unreadable
g.order(), g.size()

In [None]:
graph_to_edges_dataframe(g).weight.plot.hist()

In [None]:
max(g.edges.data(), key=lambda t: t[2]['weight'])

In [None]:
from collections import Counter
weights = Counter()
for source, target, weight in g.edges.data('weight'):
    weights[(source, target)] += weight

weights.most_common(10)

perhaps merge nodes based on string, but not "basis"?

In [None]:
#for single graph
#Sigma(g, node_size=g.degree, node_color=lambda n: random() > 0.5)

In [None]:
SigmaGrid(g, node_size=g.in_degree, node_zindex='channel_category', views=[
 {"node_metrics": ['louvain'], 'node_color': 'louvain'}, 
 {  'node_color': 'channel_category', 
      'node_color_palette': {True: "red", False: "lightblue"}
    }
])

check out ipysigma on git medialab/ipysigma

**METRICS**
use in-degree (n edges)
weighted (sum of all weights edges)
degree centrality (normalised version of degree) -> relative importance of node for network
page rank (better than betweenness), recursive importance -> node important if important nodes sent links to it

In [None]:
import pandas as pd
degree_dict = dict(g.degree())
in_degree_dict = dict(g.in_degree())
page_rank_dict = nx.pagerank(g)
degree_centrality_dict = nx.degree_centrality(g)
betweenness_centrality_dict = nx.betweenness_centrality(g)
degree_df = pd.DataFrame(degree_dict.items(), columns=['Node', 'Degree']).sort_values(by='Degree', ascending=False)
in_degree_df = pd.DataFrame(in_degree_dict.items(), columns=['Node', 'In-Degree']).sort_values(by='In-Degree', ascending=False)
page_rank_df = pd.DataFrame(page_rank_dict.items(), columns=['Node', 'PageRank']).sort_values(by='PageRank', ascending=False)
degree_centrality_df = pd.DataFrame(degree_centrality_dict.items(), columns=['Node', 'Degree Centrality']).sort_values(by='Degree Centrality', ascending=False)
betweenness_centrality_df = pd.DataFrame(betweenness_centrality_dict.items(), columns=['Node', 'Betweenness Centrality']).sort_values(by='Betweenness Centrality', ascending=False)
top_20_degree = degree_df.head(20)
top_20_in_degree = in_degree_df.head(20)
top_20_page_rank = page_rank_df.head(20)
top_20_degree_centrality = degree_centrality_df.head(20)
top_20_betweenness_centrality = betweenness_centrality_df.head(20)

In [None]:
print("Top 20 Nodes by Degree:\n", top_20_degree, "\n")
print("Top 20 Nodes by In-Degree:\n", top_20_in_degree, "\n")
print("Top 20 Nodes by PageRank:\n", top_20_page_rank, "\n")
print("Top 20 Nodes by Degree Centrality:\n", top_20_degree_centrality, "\n")
print("Top 20 Nodes by Betweenness Centrality:\n", top_20_betweenness_centrality, "\n")

In [None]:
# Convert to LaTeX
print(top_20_degree.to_latex(index=False))
print(top_20_in_degree.to_latex(index=False))
print(top_20_page_rank.to_latex(index=False))
print(top_20_degree_centrality.to_latex(index=False))
print(top_20_betweenness_centrality.to_latex(index=False))

In [None]:
import networkx as nx

# Assuming 'g' is your directed graph and communities are stored in node attribute 'louvain'
def count_community_sizes(graph):
    # Initialize a dictionary to count the nodes in each community
    community_sizes = {}
    
    # Iterate over all nodes and their attributes
    for node, data in graph.nodes(data=True):
        # Get the community id from the node attributes
        community_id = data.get('louvain')
        if community_id in community_sizes:
            community_sizes[community_id] += 1
        else:
            community_sizes[community_id] = 1

    return community_sizes


In [None]:
# Call the function and print the community sizes
community_sizes = count_community_sizes(g)
for community, size in community_sizes.items():
    print(f"Community {community}: {size} nodes")