In [1]:
import gzip
import networkx as nx

# Load the Twitter data from the gzipped file as a directed graph
with gzip.open('twitter_combined.txt.gz', 'rt') as f:
    G_directed = nx.parse_edgelist(f, nodetype=int, create_using=nx.DiGraph())

# Conduct the triadic census
triadic_census = nx.algorithms.triads.triadic_census(G_directed)

# Output the results
for triad_type, count in triadic_census.items():
    print(f"{triad_type}: {count}")


003: 89468907704504
012: 74184310421
102: 34528784533
021D: 18501168
021U: 89375507
021C: 19227090
111D: 30179774
111U: 19700828
030T: 3400791
030C: 23165
201: 13861389
120D: 2089310
120U: 2473737
120C: 546214
210: 2730985
300: 1818304


In [2]:
import gzip
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
plt.switch_backend('Agg')  # Use a non-interactive backend

In [3]:
# Load the Twitter data and create a directed graph
with gzip.open('twitter_combined.txt.gz', 'rt') as f:
    G_directed = nx.parse_edgelist(f, create_using=nx.DiGraph())

In [4]:
# Use only the largest connected component for faster processing
largest_cc = max(nx.connected_components(G_directed.to_undirected()), key=len)
G_undirected = G_directed.subgraph(largest_cc).to_undirected()

In [None]:
# Calculate centrality measures on the undirected graph
degree_centrality = nx.degree_centrality(G_undirected)
closeness_centrality = nx.closeness_centrality(G_undirected)
betweenness_centrality = nx.betweenness_centrality(G_undirected, k=100, normalized=True, seed=7)

In [None]:
# Generate and save histograms for each centrality measure
centrality_measures = [degree_centrality, closeness_centrality, betweenness_centrality]
titles = ['Degree Centrality', 'Closeness Centrality', 'Betweenness Centrality']

In [None]:
for centrality, title in zip(centrality_measures, titles):
    plt.figure(figsize=(10, 4))
    plt.hist(centrality.values(), bins=100)
    plt.title(title)
    plt.savefig(f"{title.replace(' ', '_')}.png")
    plt.close()

In [None]:
# Instead of analyzing the top nodes, let's just print the mean centrality values
print("Mean Degree Centrality:", np.mean(list(degree_centrality.values())))
print("Mean Closeness Centrality:", np.mean(list(closeness_centrality.values())))
print("Mean Betweenness Centrality:", np.mean(list(betweenness_centrality.values())))