In [4]:
import praw
import asyncio
import asyncpraw
import pandas as pd
from dotenv import load_dotenv
import os

In [None]:
load_dotenv(r"C:\Users\kurai\OneDrive\Desktop\Programming Projects\SAN-PDX Word Embeddings\SAN_PDX_word_embeddings\config.env")

client_id = os.getenv("REDDIT_CLIENT_ID")
client_secret = os.getenv("REDDIT_CLIENT_SECRET")
user_agent = os.getenv("REDDIT_USER_AGENT")

# Authenticate with Reddit
reddit = praw.Reddit(
    client_id=client_id,
    client_secret=client_secret,
    user_agent=user_agent
)

In [None]:
# Reddit API Authentication
reddit = asyncpraw.Reddit(
    client_id=client_id,
    client_secret=client_secret,
    user_agent=user_agent
)

# Parameters
subreddits = ["Portland", "SanDiego"]
post_limit = 500
comments_limit = 1000
output_file = "reddit_data.csv"

async def fetch_posts(subreddit_name, limit=500):
    """Fetch posts asynchronously."""
    print(f"Fetching posts from r/{subreddit_name}...")
    subreddit = await reddit.subreddit(subreddit_name)
    posts = []
    counter = 0

    async for post in subreddit.new(limit=None):  # Fetch new posts
        if counter >= limit:
            break
        posts.append({
            "id": post.id,
            "title": post.title,
            "score": post.score,
            "created_utc": post.created_utc,
            "selftext": post.selftext,
            "num_comments": post.num_comments,
            "subreddit": subreddit_name
        })
        counter += 1

    print(f"Fetched {len(posts)} posts from r/{subreddit_name}.")
    return posts

async def fetch_comments(post_ids, limit=500):
    """Fetch comments asynchronously."""
    print("Fetching comments...")
    comments = []
    counter = 0

    for post_id in post_ids:
        submission = await reddit.submission(id=post_id)
        await submission.comments.replace_more(limit=0)  # Avoid "load more" comments
        for comment in submission.comments.list():
            if counter >= limit:
                break
            comments.append({
                "post_id": post_id,
                "comment_id": comment.id,
                "body": comment.body,
                "score": comment.score,
                "created_utc": comment.created_utc,
            })
            counter += 1

    print(f"Fetched {len(comments)} comments.")
    return comments

async def main():
    all_posts = []
    all_comments = []

    for subreddit in subreddits:
        # Fetch posts
        posts = await fetch_posts(subreddit, limit=post_limit)
        all_posts.extend(posts)

        # Fetch comments for posts
        post_ids = [post["id"] for post in posts]
        comments = await fetch_comments(post_ids, limit=comments_limit)
        all_comments.extend(comments)

    # Save to CSV
    print("Saving data to CSV...")
    posts_df = pd.DataFrame(all_posts)
    comments_df = pd.DataFrame(all_comments)

    posts_df.to_csv("reddit_posts_async.csv", index=False)
    comments_df.to_csv("reddit_comments_async.csv", index=False)

# Run the async main loop
await main()



Fetching posts from r/Portland...
Fetched 500 posts from r/Portland.
Fetching comments...
Fetched 1000 comments.
Fetching posts from r/SanDiego...
Fetched 500 posts from r/SanDiego.
Fetching comments...
Fetched 1000 comments.
Saving data to CSV...


In [None]:
comments_df = pd.read_csv('reddit_comments_async.csv')
posts_df = pd.read_csv('reddit_posts_async.csv')

# Examine comment and post dataframes
print(comments_df.head())
print(posts_df.head())

   post_id comment_id                                               body  \
0  1npqy22    ng1h2re  did no one else see the post of the person who...   
1  1npqy22    ng1g38e                      Op what’s the current status?   
2  1npqy22    ng1gn9v  Oh sweet girl. I can help with transport OP, p...   
3  1npqy22    ng1gwds                                    Current status?   
4  1npqy22    ng1jac6  Get this dog some help! Wtf?! It's bleeding an...   

   score   created_utc  
0     14  1.758758e+09  
1      7  1.758758e+09  
2      9  1.758758e+09  
3      5  1.758758e+09  
4      5  1.758759e+09  
        id                                              title  score  \
0  1npqy22  Found dog downtown unable to locate owner, wha...     55   
1  1nprjjp  After Tense Discussion, East Portland Councilo...      2   
2  1npp5wr  Portland discloses gaping hole in homeless she...     42   
3  1npo1hg  Portland mayor plans to start citing homeless ...    292   
4  1npnt12                   Surv