In [1]:
import pandas as pd
import networkx as nx
from networkx.algorithms.community import greedy_modularity_communities

# Cargar los archivos CSV con el delimitador y la codificación correctos
df_20191004 = pd.read_csv('./data/tweets_20191004.csv', encoding='iso-8859-3', encoding_errors='replace', sep=';')
df_20191019 = pd.read_csv('./data/tweets_20191019.csv', encoding='iso-8859-3', encoding_errors='replace', sep=';')
df_20191103 = pd.read_csv('./data/tweets_20191103.csv', encoding='iso-8859-3', encoding_errors='replace', sep=';')

# Unir los DataFrames
df_combined = pd.concat([df_20191004, df_20191019, df_20191103])

# Crear un grafo dirigido
G = nx.DiGraph()

# Agregar nodos y aristas al grafo basados en menciones
for index, row in df_combined.iterrows():
    user_id = row['user_id']
    tweet_text = row['text']
    mentions = [word[1:] for word in tweet_text.split() if word.startswith('@')]
    
    for mention in mentions:
        G.add_edge(user_id, mention)

# Encontrar comunidades usando el algoritmo de modularidad
communities = greedy_modularity_communities(G)

# Asignar la comunidad a cada nodo
community_dict = {}
for i, community in enumerate(communities):
    for node in community:
        community_dict[node] = i

# Agregar la comunidad como atributo del nodo
nx.set_node_attributes(G, community_dict, 'community')

# Calcular centralidad de grado
degree_centrality = nx.degree_centrality(G)
nx.set_node_attributes(G, degree_centrality, 'degree_centrality')

# Calcular centralidad de intermediación
betweenness_centrality = nx.betweenness_centrality(G)
nx.set_node_attributes(G, betweenness_centrality, 'betweenness_centrality')

# Calcular centralidad de cercanía
closeness_centrality = nx.closeness_centrality(G)
nx.set_node_attributes(G, closeness_centrality, 'closeness_centrality')

# Guardar el grafo con las comunidades y medidas de centralidad
graphml_path = "./data/tweets_graph_with_centrality_and_communities.graphml"
nx.write_graphml(G, graphml_path)

# Análisis de las comunidades
community_sizes = {i: len(list(c)) for i, c in enumerate(communities)}
largest_communities = sorted(community_sizes.items(), key=lambda x: x[1], reverse=True)

# Identificar los nodos con mayor centralidad de grado
top_degree_centrality = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)[:5]

# Identificar los nodos con mayor centralidad de intermediación
top_betweenness_centrality = sorted(betweenness_centrality.items(), key=lambda x: x[1], reverse=True)[:5]

# Identificar los nodos con mayor centralidad de cercanía
top_closeness_centrality = sorted(closeness_centrality.items(), key=lambda x: x[1], reverse=True)[:5]

largest_communities, top_degree_centrality, top_betweenness_centrality, top_closeness_centrality


([(0, 831),
  (1, 369),
  (2, 232),
  (3, 217),
  (4, 146),
  (5, 108),
  (6, 104),
  (7, 104),
  (8, 98),
  (9, 84),
  (10, 76),
  (11, 70),
  (12, 64),
  (13, 54),
  (14, 53),
  (15, 45),
  (16, 44),
  (17, 43),
  (18, 36),
  (19, 30),
  (20, 29),
  (21, 26),
  (22, 24),
  (23, 22),
  (24, 17),
  (25, 17),
  (26, 17),
  (27, 15),
  (28, 15),
  (29, 15),
  (30, 14),
  (31, 14),
  (32, 13),
  (33, 13),
  (34, 13),
  (35, 13),
  (36, 13),
  (37, 11),
  (38, 11),
  (39, 11),
  (40, 11),
  (41, 11),
  (42, 11),
  (43, 10),
  (44, 10),
  (45, 10),
  (46, 10),
  (47, 10),
  (48, 9),
  (49, 9),
  (50, 9),
  (51, 9),
  (52, 8),
  (53, 8),
  (54, 8),
  (55, 8),
  (56, 8),
  (57, 7),
  (58, 7),
  (59, 7),
  (60, 7),
  (61, 7),
  (62, 7),
  (63, 7),
  (64, 7),
  (65, 7),
  (66, 7),
  (67, 7),
  (68, 7),
  (69, 7),
  (70, 7),
  (71, 7),
  (72, 7),
  (73, 7),
  (74, 6),
  (75, 6),
  (76, 6),
  (77, 6),
  (78, 6),
  (79, 6),
  (80, 6),
  (81, 6),
  (82, 6),
  (83, 6),
  (84, 6),
  (85, 6),
  (86, 6

In [2]:
# Analizar el tamaño de cada comunidad
community_sizes = {i: len(list(c)) for i, c in enumerate(communities)}
largest_communities = sorted(community_sizes.items(), key=lambda x: x[1], reverse=True)

# Mostrar las 5 comunidades más grandes
largest_communities[:5]


[(0, 831), (1, 369), (2, 232), (3, 217), (4, 146)]