In [8]:
import os
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd

In [5]:
edges = []
nodes_in_network = set()
chosen_ego = 549089429

PATH = 'twitter/926981.edges'
with open(PATH, 'r') as f:
    for line in f:
        parts = line.strip().split()
        if len(parts) == 2:
            node1, node2 = parts
            edges.append((node1, node2))
            nodes_in_network.add(node1)
            nodes_in_network.add(node2)

# Create directed graph
G = nx.DiGraph()
G.add_edges_from(edges)

G.add_node(chosen_ego)

# Connect ego to all other nodes (ego follows everyone)
for node in nodes_in_network:
    G.add_edge(chosen_ego, node)


degree_cent = nx.degree_centrality(G)
betweenness_cent = nx.betweenness_centrality(G)
closeness_cent = nx.closeness_centrality(G)

#quick stats
print(f"\nNetwork stats:")
print(f"Number of nodes: {G.number_of_nodes()}")
print(f"Number of edges: {G.number_of_edges()}")
print(f"Density: {nx.density(G):.4f}")
print(f"Average Degree: {sum(dict(G.degree()).values()) / G.number_of_nodes()}")
print(f"Average In-Degree: {sum(dict(G.in_degree()).values()) / G.number_of_nodes()}")
print(f"Average Out-Degree: {sum(dict(G.out_degree()).values()) / G.number_of_nodes()}")
print(f"avg_degree_centrality: {sum(degree_cent.values()) / len(degree_cent)}")
print(f"max_degree_centrality: {max(degree_cent.values())}")
print(f"avg_betweenness: {sum(betweenness_cent.values()) / len(betweenness_cent)}")
print(f"max_betweenness: {max(betweenness_cent.values())}")
print(f"avg_closeness: {sum(closeness_cent.values()) / len(closeness_cent)}")
print(f"max_closeness: {max(closeness_cent.values())}")


Network stats:
Number of nodes: 203
Number of edges: 5822
Density: 0.1420
Average Degree: 57.35960591133005
Average In-Degree: 28.679802955665025
Average Out-Degree: 28.679802955665025
avg_degree_centrality: 0.2839584451055943
max_degree_centrality: 1.0742574257425743
avg_betweenness: 0.005638781656270178
max_betweenness: 0.08964284271353158
avg_closeness: 0.4316100003672475
max_closeness: 0.70271988288323


In [None]:
# File paths
edges_path = 'twitter/926981.edges'
feat_path = 'twitter/926981.feat'
featnames_path = 'twitter/926981.featnames'
egofeat_path = 'twitter/926981.egofeat'

chosen_ego = 549089429

In [None]:
feat = pd.read_csv(feat_path, sep=" ", header=None)
featnames = pd.read_csv(featnames_path, sep=" ", header=None)
egofeat = pd.read_csv(egofeat_path, sep=" ", header=None)

# For all nodes
feat.columns = ['node'] + featnames[1].tolist()

# For ego node (no node ID column)
egofeat.columns = featnames[1].tolist()

# Look at ego node's features (only show ones that are used)
ego_features = egofeat.iloc[0][egofeat.iloc[0] == 1]
print("Ego node features:")
print(ego_features)

Ego node features:
#MorrisseyonMars    1
@Coudal             1
@bailey             1
@biorhythmist       1
@czeins             1
@daveshumka         1
@edcasey            1
@fireland           1
@grahamclark        1
@hodgman            1
@hotdogsladies      1
@jessiechar:        1
@johnroderick       1
@kasper_hauser      1
@kwmurphy           1
@lianamaeby         1
@lonelysandwich     1
@matthewbaldwin     1
@mrgan              1
@paulandstorm       1
@robbaedeker        1
@shanecyr           1
@texburgher         1
@ylnt:              1
Name: 0, dtype: int64


In [None]:
feature_name = '#winning'

# Nodes that have this feature
nodes_with_feature = feat[feat[feature_name] == 1]['node'].tolist()

# Count edges among them
internal_edges = [(u,v) for u,v in edges if u in nodes_with_feature and v in nodes_with_feature]
print(f"Number of edges among nodes with {feature_name}: {len(internal_edges)}") # There are no edges connecting two nodes with #winning, further methods needed to find connections based on hashtags


Number of edges among nodes with #winning: 0
