In [None]:
# Milestone 1 – Reddit API Extraction & PostgreSQL Load

In [None]:
!pip install praw psycopg2-binary sqlalchemy python-dotenv

Collecting praw
  Downloading praw-7.8.1-py3-none-any.whl.metadata (9.4 kB)
Collecting psycopg2-binary
  Downloading psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)
Collecting python-dotenv
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Collecting prawcore<3,>=2.4 (from praw)
  Downloading prawcore-2.4.0-py3-none-any.whl.metadata (5.0 kB)
Collecting update_checker>=0.18 (from praw)
  Downloading update_checker-0.18.0-py3-none-any.whl.metadata (2.3 kB)
Downloading praw-7.8.1-py3-none-any.whl (189 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m189.3/189.3 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0 MB[0m [31m33.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading python_dotenv-1.1.0-py3-none-any.whl (20 kB)
Downloading p

In [None]:
# Setting Reddit credentials
import os

os.environ["REDDIT_CLIENT_ID"] = "YOUR_CLIENT_ID"
os.environ["REDDIT_CLIENT_SECRET"] = "YOUR_CLIENT_SECRET"
os.environ["REDDIT_USER_AGENT"] = "YOUR_USER_AGENT"
os.environ["REDDIT_USERNAME"] = "YOUR_USERNAME"
os.environ["REDDIT_PASSWORD"] = "YOUR_PASSWORD"

In [None]:
import praw  # COMMENT: Reddit API wrapper

In [None]:
# COMMENT: Authenticate Reddit API with full credentials (for script-based app)
reddit = praw.Reddit(
    client_id=os.environ["REDDIT_CLIENT_ID"],
    client_secret=os.environ["REDDIT_CLIENT_SECRET"],
    user_agent=os.environ["REDDIT_USER_AGENT"],
    username=os.environ["REDDIT_USERNAME"],
    password=os.environ["REDDIT_PASSWORD"]
)

# Confirm connection
print("Read-only mode:", reddit.read_only)

Read-only mode: False


In [None]:
# COMMENT: Test Reddit connection by printing one hot post from r/snapchat
for post in reddit.subreddit("snapchat").hot(limit=1):
    print("Reddit connection successful! Example post title:", post.title)

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Reddit connection successful! Example post title: Wednesday Megathread


In [None]:
import pandas as pd
from datetime import datetime

# COMMENT: Extract top 100 hot posts from r/snapchat
posts = []
for post in reddit.subreddit("snapchat").hot(limit=100):
    posts.append({
        "id": post.id,
        "title": post.title,
        "author": str(post.author),
        "score": post.score,
        "num_comments": post.num_comments,
        "created_utc": datetime.fromtimestamp(post.created_utc),
        "selftext": post.selftext,
        "permalink": f"https://www.reddit.com{post.permalink}"
    })

# COMMENT: Convert list of dictionaries into a pandas DataFrame
df_reddit = pd.DataFrame(posts)

# COMMENT: Display first few rows to confirm structure
df_reddit.head()

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



Unnamed: 0,id,title,author,score,num_comments,created_utc,selftext,permalink
0,1jv2381,Wednesday Megathread,AutoModerator,2,0,2025-04-09 10:01:16,"This megathread is for updates, general app t...",https://www.reddit.com/r/snapchat/comments/1jv...
1,1jyv6rc,Monday Megathread - Let Freedom Reign (This Po...,AutoModerator,3,16,2025-04-14 10:02:13,\n\nConsider this thread The Purge in subred...,https://www.reddit.com/r/snapchat/comments/1jy...
2,1jyxh19,31 [F4A] Lets be friends!!,Sumbeech1330,43,1,2025-04-14 12:20:26,I need more snap buddies! \nI'm a concert lovi...,https://www.reddit.com/r/snapchat/comments/1jy...
3,1jz0ppf,26 [F4M] looking for cool people!,Fickle_Employee_5694,17,1,2025-04-14 14:49:00,hey there! a little about me. Live the US. I l...,https://www.reddit.com/r/snapchat/comments/1jz...
4,1jz4tu5,33[m4f] good vibes only 🔥,ProductRed_92,2,1,2025-04-14 17:37:09,\nHey!! Looking for a daily bestie to chat wit...,https://www.reddit.com/r/snapchat/comments/1jz...


In [None]:
# COMMENT: Imports for PostgreSQL connection
import pandas as pd
from sqlalchemy import create_engine

# COMMENT: Load the Reddit post data you collected earlier (replace with actual DataFrame if different)
reddit_df = pd.DataFrame([
    {
        "id": "1jv2381",
        "title": "Wednesday Megathread",
        "author": "AutoModerator",
        "score": 2,
        "num_comments": 0,
        "created_utc": "2025-04-09 10:01:16",
        "selftext": "This megathread is for updates, general app talk...",
        "permalink": "https://www.reddit.com/r/snapchat/comments/1jv2381"
    }
    # Add more sample rows here
])

# Replacing with my own secure values before running locally
pg_user = "YOUR_USERNAME"
pg_password = "YOUR_PASSWORD"
pg_host = "YOUR_HOST"
pg_port = "5432"
pg_database = "YOUR_DB_NAME"

db_url = f"postgresql+psycopg2://{pg_user}:{pg_password}@{pg_host}:{pg_port}/{pg_database}"
engine = create_engine(db_url)

# COMMENT: Load DataFrame into PostgreSQL (schema = raw, table = reddit_posts)
reddit_df.to_sql("reddit_posts", engine, schema="public", if_exists="replace", index=False)

print("Data loaded to PostgreSQL - raw.reddit_posts")

✅ Data loaded to PostgreSQL - raw.reddit_posts
