# Análisis de redes sociales

In [None]:
%pip install pandas networkx matplotlib praw python-dotenv

In [None]:
import os, pandas as pd, networkx as nx, matplotlib.pyplot as plt
from dotenv import load_dotenv; load_dotenv()

import praw
reddit = praw.Reddit(
    client_id=os.getenv("REDDIT_CLIENT_ID"),
    client_secret=os.getenv("REDDIT_CLIENT_SECRET"),
    user_agent="sds-mod3-practico/1.0 (by u/TU_USUARIO)"
)  # OAuth + buen user agent, como pide Reddit. :contentReference[oaicite:3]{index=3}


## Descargar hilos y comentarios (últimos N por subreddit)

In [None]:
SUBS = ["bolivia", "argentina", "peru"]
N_SUBMISSIONS = 60  # ajústalo a la cuota/límites del aula

rows = []
for s in SUBS:
    for post in reddit.subreddit(s).new(limit=N_SUBMISSIONS):
        post.comments.replace_more(limit=0)  # expandir arbol de comentarios  :contentReference[oaicite:4]{index=4}
        for c in post.comments.list():
            parent_author = None
            if not c.is_root:
                try:
                    parent_author = str(c.parent().author)  # autor del padre  :contentReference[oaicite:5]{index=5}
                except Exception:
                    parent_author = None
            rows.append({
                "subreddit": s,
                "post_id": post.id,
                "comment_id": c.id,
                "author": str(c.author) if c.author else "[deleted]",
                "reply_to": parent_author,
                "is_root": c.is_root
            })
df = pd.DataFrame(rows)
df.head()


## Construir grafo dirigido (responde→a_quién)

In [None]:
edges = (df.query("~is_root and author != '[deleted]' and reply_to.notnull() and reply_to != '[deleted]'")
           .groupby(["author","reply_to"])
           .size().reset_index(name="w"))

G = nx.DiGraph()
G.add_weighted_edges_from(edges[["author","reply_to","w"]].itertuples(index=False, name=None))
len(G), G.number_of_edges()


## Centralidades = influencia/difusión

In [None]:
deg_in  = nx.in_degree_centrality(G)       # prestigio por recibir respuestas (autoridad local)
btw     = nx.betweenness_centrality(G, weight="w", normalized=True)   # “puentes” de conversación
pr      = nx.pagerank(G, weight="w")       # PageRank para prestigio global  :contentReference[oaicite:6]{index=6}

rank = (pd.DataFrame({
    "usuario": list(G.nodes()),
    "in_degree": [deg_in[n] for n in G.nodes()],
    "betweenness": [btw[n] for n in G.nodes()],
    "pagerank": [pr[n] for n in G.nodes()],
}).sort_values(["pagerank","in_degree"], ascending=False))
rank.head(15)


## Comunidades (polarización estructural)

In [None]:
# Convertimos a no dirigida para modularidad (opción simple)
UG = nx.Graph()
for u,v,data in G.edges(data=True):
    w = data.get("w",1)
    if UG.has_edge(u,v): UG[u][v]["weight"] += w
    else: UG.add_edge(u,v,weight=w)

from networkx.algorithms.community import louvain_communities
from networkx.algorithms.community.quality import modularity
coms = louvain_communities(UG, weight="weight", seed=42)   # Louvain  :contentReference[oaicite:7]{index=7}
Q = modularity(UG, coms, weight="weight")                  # Q de Newman–Girvan  :contentReference[oaicite:8]{index=8}
Q, [len(c) for c in coms[:5]]


## Visualización rápida

plt.figure(figsize=(9,7))
pos = nx.spring_layout(UG, seed=7, weight="weight")
sizes = [3000*(rank.set_index("usuario").loc[n,"pagerank"]+1e-3) if n in rank["usuario"].values else 200 for n in UG.nodes()]
nx.draw_networkx(UG, pos, with_labels=False, node_size=sizes, width=[UG[u][v]["weight"]**0.5 for u,v in UG.edges()])
plt.title("Red de respuestas (Reddit) — últimos hilos")
plt.axis("off"); plt.tight_layout()
