In [None]:
#Realizado por Margarita Mamani
import praw

reddit = praw.Reddit(
    client_id="CLIENTID",
    client_secret="CLIENTSECRET",  
    username="USERNAME",
    password="PASSWORD",
    user_agent="USERAGENT"
)

print("Autenticado como:", reddit.user.me())


In [None]:
import pandas as pd
from datetime import datetime

# --- Configuración ---
subreddits = ["politics", "PoliticalDiscussion", "worldnews"]  # sin "r/"
mode = "top"   # cambia a "top" 

# --- Recolección ---
rows = []
for name in subreddits:
    sr = reddit.subreddit(name)
    listing = getattr(sr, mode)(limit=20)  # 20 posts por subreddit, de 'hot' o 'top'
    for s in listing:
        rows.append({
            "subreddit": name,
            "title": s.title,
            "score": s.score,
            "num_comments": s.num_comments,
            "id": s.id,
            "url": s.url
        })

df = pd.DataFrame(rows, columns=["subreddit", "title", "score", "num_comments", "id", "url"])
print(f"Total filas: {len(df)} (deberían ser {len(subreddits)*20})")
df.head()


In [None]:
stamp = datetime.now().strftime("%Y%m%d_%H%M")
out_path = f"reddit_posts_{mode}_{stamp}.csv"
df.to_csv(out_path, index=False, encoding="utf-8")
print("Guardado en:", out_path)


In [None]:
comments_data = []

for pid in df["id"].astype(str):
    submission = reddit.submission(id=pid)
    submission.comment_sort = "top"  # ordenar por relevancia
    submission.comments.replace_more(limit=0)

    # Tomar máximo 5 comentarios válidos
    for c in submission.comments[:5]:
        if hasattr(c, "body") and c.body.lower() not in ("[deleted]", "[removed]"):
            comments_data.append({
                "post_id": pid,
                "body": c.body,
                "score": c.score
            })

df_comments = pd.DataFrame(comments_data, columns=["post_id", "body", "score"])
print("Comentarios recolectados:", len(df_comments))
df_comments.head()


In [None]:
print("¿Existe df_posts?", "df_posts" in globals())

In [None]:
import pandas as pd

# Asegura nombres y tipos
df["id"] = df["id"].astype(str)                # posts
df_comments["post_id"] = df_comments["post_id"].astype(str)  # comments

# 1) guarda tablas “limpias”
df.to_csv("reddit_posts.csv", index=False, encoding="utf-8")
df_comments.to_csv("reddit_comments.csv", index=False, encoding="utf-8")

# 2) crea tabla combinada vinculando cada comentario con su post
combined = df_comments.merge(
    df[["id","subreddit","title","url"]],
    left_on="post_id", right_on="id", how="left", validate="many_to_one"
).rename(columns={"id":"post_id"})[["post_id","subreddit","title","body","score","url"]]

combined.to_csv("reddit_comments_joined.csv", index=False, encoding="utf-8")
print("Guardados: reddit_posts.csv, reddit_comments.csv, reddit_comments_joined.csv")
combined.head()
