# Get Reddit posts using PRAW API 

### Imports

In [35]:
import os, time, pytz
import tqdm
import pandas as pd
from dotenv import load_dotenv
from datetime import datetime, timedelta
import praw

In [36]:
load_dotenv()

True

### Client and config

In [46]:
PROJ_PATH = "D:/Projects/reddit_sentiment/"

In [47]:
reddit = praw.Reddit(
    client_id=os.getenv("REDDIT_CLIENT_ID"),
    client_secret=os.getenv("REDDIT_CLIENT_SECRET"),
    user_agent=os.getenv("REDDIT_USER_AGENT")
)

In [48]:
# --- Config ---
subreddit_name = "apple"
keyword = None  # or set to something like "M3 chip"
overwrite = True

### Request posts

To load top comments, you can iterate through submission.comments

```
for comment in submission.comments:
    if isinstance(comment, praw.models.Comment):
        records.append({
            "subreddit": subreddit_name,
            "created_at": datetime.fromtimestamp(comment.created_utc, tz=ct),
            "retrieved_at": retrieved_at,
            "type": "comment",
            "text": comment.body,
            "score": comment.score,
            "num_comments": None
        })
```

In [49]:
# 📅 Local time setup (US Central Time)
ct = pytz.timezone("US/Central")
retrieved_at = datetime.now(ct)

records = []
subreddit = reddit.subreddit(subreddit_name)

for submission in tqdm.tqdm(subreddit.top(time_filter="year", limit=1000), total=1000, desc="Processing posts"):
    # Add post
    records.append({
        "subreddit": subreddit_name,
        "created_at": datetime.fromtimestamp(submission.created_utc, tz=ct),
        "retrieved_at": retrieved_at,
        "type": "post",
        "text": submission.title + "\n\n" + submission.selftext,
        "score": submission.score,
        "num_comments": submission.num_comments
    })

Processing posts: 100%|████████████████████████████████████████████████████████████| 1000/1000 [00:11<00:00, 88.71it/s]


In [50]:
# Create DataFrame
df = pd.DataFrame(records)
df.created_at.describe().loc[['min','max']]

min    2024-04-09 09:37:51-05:00
max    2024-12-19 13:32:28-06:00
Name: created_at, dtype: object

In [51]:
print(df.text.sample(1).item())

Revealed: Complete iPhone 16 Pro and iPhone 16 Pro Max Dimensions




### Persist

In [52]:
# Save locally and append if exists
folder = PROJ_PATH + "reddit_data"
os.makedirs(folder, exist_ok=True)
filename = f"{folder}/{subreddit_name}_{retrieved_at.strftime('%Y-%m-%d')}.csv"

if (not overwrite) and os.path.exists(filename):
    existing = pd.read_csv(filename)
    df = pd.concat([existing, df], ignore_index=True)

df.to_csv(filename, index=False)
print(f"Saved {len(df)} entries to {filename}")

Saved 1000 entries to D:/Projects/reddit_sentiment/reddit_data/apple_2025-04-12.csv


### Rate Limits

In [53]:
# Extract and format rate limit info
limits = reddit.auth.limits
used = limits['used']
remaining = limits['remaining']
reset_ts = limits['reset_timestamp']
current_ts = time.time()
seconds_until_reset = round(reset_ts - current_ts)

# Convert to local time
reset_time = datetime.fromtimestamp(reset_ts).strftime('%Y-%m-%d %H:%M:%S')

print(f"🔄 Reddit API Rate Limit Info")
print(f"Requests used:      {used}")
print(f"Requests remaining: {remaining}")
print(f"Resets at:          {reset_time} ({seconds_until_reset} seconds from now)")

🔄 Reddit API Rate Limit Info
Requests used:      30
Requests remaining: 970.0
Resets at:          2025-04-12 11:30:00 (276 seconds from now)
