In [1]:
# Import libs
import os
import pandas as pd
import time
import praw
from dotenv import load_dotenv
import re

# Load env vars for reddit api
load_dotenv()

True

In [2]:
# Create reddit object using praw
reddit = praw.Reddit(
    client_id=os.getenv('client_id'),
    client_secret=os.getenv('client_secret'),
    user_agent='genalphaslang'
)

In [3]:
# Create subreddit objects
uoft_subreddit = reddit.subreddit('UofT')
ubc_subreddit = reddit.subreddit('UBC')
mcgill_subreddit = reddit.subreddit('mcgill')

In [None]:
# Function to get top 1000 submissions by upvotes from a subreddit
def get_submissions(subreddit): 
    all_submissions = []
    for submission in subreddit.top(limit=1000):
        submissions_dict = {}
        submissions_dict['date_created'] = submission.created_utc
        submissions_dict['title'] = submission.title
        submissions_dict['description'] = submission.selftext
        submission.comments.replace_more(limit=0)
        submissions_dict['comments'] = [comment.body for comment in submission.comments.list()]
        submissions_dict['upvotes'] = submission.score
        submissions_dict['flair'] = submission.link_flair_text
        all_submissions.append(submissions_dict)
    return all_submissions

In [None]:
# Collecting for uoft
uoft_df = pd.DataFrame(get_submissions(uoft_subreddit))
uoft_df['date_created'] = pd.to_datetime(uoft_df['date_created'], unit='s')
uoft_df['school'] = 'uoft'

In [None]:
# Collecting for ubc
ubc_df = pd.DataFrame(get_submissions(ubc_subreddit))
ubc_df['date_created'] = pd.to_datetime(ubc_df['date_created'], unit='s')
ubc_df['school'] = 'ubc'

In [None]:
# Collecting for mcgill
mcgill_df = pd.DataFrame(get_submissions(mcgill_subreddit))
mcgill_df['date_created'] = pd.to_datetime(mcgill_df['date_created'], unit='s')
mcgill_df['school'] = 'mcgill'

In [None]:
# Combine all posts and export
all_posts = pd.concat([uoft_df, ubc_df, mcgill_df], ignore_index=True)
all_posts.to_csv("all_posts.csv", index=False)