In [2]:
# imports 
import pandas as pd
import requests
import time

## Function to Pull Submission Data from a Subreddit

In [3]:
def pull_subreddit(subreddit, size, until):
    url = 'https://api.pushshift.io/reddit/search/submission'
    
    params = {
    'subreddit' : subreddit,
    'size' : size,
    'until': until
    }
    
    res = requests.get(url, params)
    
    if res.status_code == 200:
        data = res.json()
        posts = data['data']
        df = pd.DataFrame(posts)
        return df[['subreddit', 'title','created_utc', 'is_video', 'num_comments']]
    
    else:
        return 'Connection Failure'

In [4]:
pd.set_option('display.max_colwidth', None)

## Building /r/Cycling DataFrames

In [5]:
cycling1 = pull_subreddit('cycling', 1000, 1682463716)
cycling1

Unnamed: 0,subreddit,title,created_utc,is_video,num_comments
0,cycling,Women cyclists. What do you think when the men in the group take a nature stop on the side of the road?,1682463592,False,0
1,cycling,Inaccurate Shimano Power Meter,1682463280,False,0
2,cycling,Never been on a road bike before and WOW,1682459187,False,0
3,cycling,2012 Fuji Supreme LTD Team for 600? Good deal?,1682458960,False,0
4,cycling,bike making weird sound when biking in 1st,1682456616,False,0
...,...,...,...,...,...
995,cycling,Bike cleaning &amp; maintenance,1681222836,False,0
996,cycling,New season. New sores. Need new bibs that are not crazy expensive? Any recommendations?,1681217422,False,0
997,cycling,Garmin 540 / 840 (opt. Solar) released,1681217371,False,0
998,cycling,Gravel bike upgrade,1681217152,False,0


In [6]:
cycling2 = pull_subreddit('cycling', 1000, 1681216721)
cycling2

Unnamed: 0,subreddit,title,created_utc,is_video,num_comments
0,cycling,New Journeyer Sora 700c tire recommendation?,1681216565,False,0
1,cycling,Chain Suck,1681214820,False,0
2,cycling,Tubeless tape + tires,1681214797,False,0
3,cycling,Should I buy a new bike?,1681212164,False,0
4,cycling,Bike from 7 speed to 1x10,1681210522,False,0
...,...,...,...,...,...
994,cycling,Wouldn't triple crank work great even these days?,1679580068,False,0
995,cycling,Shimano FC-RS510 compatibility with Shimano Ultegra 6800 and Shimano R7000,1679579846,False,1
996,cycling,Instructions... for Garneau Urban Cycling Shoes,1679579157,False,0
997,cycling,I'm selling my bike but not sure about the price,1679578978,False,0


In [7]:
cycling3 = pull_subreddit('cycling', 1000, 1679578843)
cycling3

Unnamed: 0,subreddit,title,created_utc,is_video,num_comments
0,cycling,Deda alanera DCR or Vision Metron 6D? This is for a 2020 Cannondale Supersix Evo Gen3.,1679578690,False,0
1,cycling,Which bike computer ?,1679578424,False,0
2,cycling,Suplements,1679574879,False,0
3,cycling,Anyone use MapMyRun? It seems way too liberal with calories burnt!,1679572899,False,0
4,cycling,Power pedals query.,1679571734,False,0
...,...,...,...,...,...
995,cycling,Heart rate training zones,1677799731,False,0
996,cycling,Storing bike vertically on trainer,1677799606,False,0
997,cycling,Fit system is rubbing my head,1677798973,False,0
998,cycling,Why do folks hate cyclists?,1677796669,False,0


In [8]:
cycling_df = pd.concat([cycling1, cycling2, cycling3])
cycling_df

Unnamed: 0,subreddit,title,created_utc,is_video,num_comments
0,cycling,Women cyclists. What do you think when the men in the group take a nature stop on the side of the road?,1682463592,False,0
1,cycling,Inaccurate Shimano Power Meter,1682463280,False,0
2,cycling,Never been on a road bike before and WOW,1682459187,False,0
3,cycling,2012 Fuji Supreme LTD Team for 600? Good deal?,1682458960,False,0
4,cycling,bike making weird sound when biking in 1st,1682456616,False,0
...,...,...,...,...,...
995,cycling,Heart rate training zones,1677799731,False,0
996,cycling,Storing bike vertically on trainer,1677799606,False,0
997,cycling,Fit system is rubbing my head,1677798973,False,0
998,cycling,Why do folks hate cyclists?,1677796669,False,0


In [9]:
cycling_df.to_csv('../data/cycling.csv')

## Building /r/Running DataFrames

In [10]:
running1 = pull_subreddit('running', 1000, 1682463716)
running1

Unnamed: 0,subreddit,title,created_utc,is_video,num_comments
0,running,Looking to run a faster half marathon,1682463029,False,1
1,running,Crahses after half marathon over 1 week ago,1682462783,False,1
2,running,WHOOP thoughts?,1682462057,False,1
3,running,Do any of you run alot but also smoke cigarettes?,1682461439,False,1
4,running,Disrupted Training Advice Needed,1682459720,False,1
...,...,...,...,...,...
995,running,How to free up my running form?,1681271679,False,1
996,running,Exercise Induced Rhinitis?,1681271360,False,0
997,running,What's your favorite post-race meal?,1681271317,False,1
998,running,CRAMP/SIDE STITCH,1681270775,False,1


In [11]:
running2 = pull_subreddit('running', 1000, 1681269922)
running2

Unnamed: 0,subreddit,title,created_utc,is_video,num_comments
0,running,"Novice runner, doing C25K, hit a wall post run - what happened?",1681266132,False,1
1,running,Getting used to road running again/quad strain/struggling a bit,1681265938,False,1
2,running,When is running best?,1681264476,False,1
3,running,Any tips on breaking 4:30?,1681263842,False,1
4,running,How do you choose your pace?,1681262794,False,1
...,...,...,...,...,...
995,running,Have you ever noticed a change in perceived musical tempo while running? Academic Survey,1680022543,False,1
996,running,Help!,1680018749,False,1
997,running,Injury 6 weeks out of first marathon,1680018441,False,1
998,running,overpronation CAUSED by shoes ?,1680017221,False,1


In [12]:
running3 = pull_subreddit('running', 1000, 1680016242)
running3

Unnamed: 0,subreddit,title,created_utc,is_video,num_comments
0,running,Is my watch accurate?,1680015282,False,1
1,running,Rasselbocks Backyard ultra race report from The Running Channel.,1680015189,False,1
2,running,Question,1680015074,False,1
3,running,Best running substitute when Achilles issues forcing me switch training modality,1680014976,False,1
4,running,Couch to 1.5 mile in a month possible?,1680014433,False,1
...,...,...,...,...,...
994,running,Grandma's Marathon 2023,1678899332,False,1
995,running,"I’ve just started running, how many times a week should I be going on runs",1678897479,False,1
996,running,Running Instagram Profiles,1678895309,False,1
997,running,What are the latest justifications for why super shoes aren't cheating?,1678894757,False,1


In [13]:
running_df = pd.concat([running1, running2, running3])
running_df

Unnamed: 0,subreddit,title,created_utc,is_video,num_comments
0,running,Looking to run a faster half marathon,1682463029,False,1
1,running,Crahses after half marathon over 1 week ago,1682462783,False,1
2,running,WHOOP thoughts?,1682462057,False,1
3,running,Do any of you run alot but also smoke cigarettes?,1682461439,False,1
4,running,Disrupted Training Advice Needed,1682459720,False,1
...,...,...,...,...,...
994,running,Grandma's Marathon 2023,1678899332,False,1
995,running,"I’ve just started running, how many times a week should I be going on runs",1678897479,False,1
996,running,Running Instagram Profiles,1678895309,False,1
997,running,What are the latest justifications for why super shoes aren't cheating?,1678894757,False,1


In [14]:
running_df.to_csv('../data/running.csv')

## Concatenate into one large dataframe

In [15]:
combined_df = pd.concat([cycling_df, running_df])
combined_df

Unnamed: 0,subreddit,title,created_utc,is_video,num_comments
0,cycling,Women cyclists. What do you think when the men in the group take a nature stop on the side of the road?,1682463592,False,0
1,cycling,Inaccurate Shimano Power Meter,1682463280,False,0
2,cycling,Never been on a road bike before and WOW,1682459187,False,0
3,cycling,2012 Fuji Supreme LTD Team for 600? Good deal?,1682458960,False,0
4,cycling,bike making weird sound when biking in 1st,1682456616,False,0
...,...,...,...,...,...
994,running,Grandma's Marathon 2023,1678899332,False,1
995,running,"I’ve just started running, how many times a week should I be going on runs",1678897479,False,1
996,running,Running Instagram Profiles,1678895309,False,1
997,running,What are the latest justifications for why super shoes aren't cheating?,1678894757,False,1


In [16]:
combined_df.to_csv('../data/combined.csv')