### Load Libraries 

In [1]:
import pandas as pd
import datetime as dt
import time
import requests

### Automating Multiple Pull Requests

In [38]:
def query_pushshift(subreddit, kind = 'submission', day_window = 30, n = 10):
    SUBFIELDS = ['title', 'selftext', 'subreddit', 'created_utc', 'author', 'num_comments', 'score', 'is_self']
    
    # establish base url and stem
    BASE_URL = f"https://api.pushshift.io/reddit/search/{kind}" # also known as the "API endpoint" 
    stem = f"{BASE_URL}?subreddit={subreddit}&size=500" # always pulling max of 500
    
    # instantiate empty list for temp storage
    posts = []
    
    # implement for loop with `time.sleep(2)`
    for i in range(1, n + 1):
        URL = "{}&after={}d".format(stem, day_window * i)
        print("Querying from: " + URL)
        response = requests.get(URL)
        assert response.status_code == 200
        mine = response.json()['data']
        df = pd.DataFrame.from_dict(mine)
        posts.append(df)
        time.sleep(2)
    
    # pd.concat storage list
    full = pd.concat(posts, sort=False)
    
    # if submission
    if kind == "submission":
        # select desired columns
        full = full[SUBFIELDS]
        # drop duplicates
        full.drop_duplicates(inplace = True)
        # select `is_self` == True
        full = full.loc[full['is_self'] == True]

    # create `timestamp` column
    full['timestamp'] = full["created_utc"].map(dt.date.fromtimestamp)
    
    print("Query Complete!")    
    return full 

In [30]:
results = query_pushshift("movies")

Querying from: https://api.pushshift.io/reddit/search/submission?subreddit=movies&size=500&after=30d
Querying from: https://api.pushshift.io/reddit/search/submission?subreddit=movies&size=500&after=60d
Querying from: https://api.pushshift.io/reddit/search/submission?subreddit=movies&size=500&after=90d
Querying from: https://api.pushshift.io/reddit/search/submission?subreddit=movies&size=500&after=120d
Querying from: https://api.pushshift.io/reddit/search/submission?subreddit=movies&size=500&after=150d
Querying from: https://api.pushshift.io/reddit/search/submission?subreddit=movies&size=500&after=180d
Querying from: https://api.pushshift.io/reddit/search/submission?subreddit=movies&size=500&after=210d
Querying from: https://api.pushshift.io/reddit/search/submission?subreddit=movies&size=500&after=240d
Querying from: https://api.pushshift.io/reddit/search/submission?subreddit=movies&size=500&after=270d
Querying from: https://api.pushshift.io/reddit/search/submission?subreddit=movies&siz

In [45]:
data_movies = results

In [46]:
data_movies.shape

(2723, 9)

In [47]:
data_movies.head()

Unnamed: 0,title,selftext,subreddit,created_utc,author,num_comments,score,is_self,timestamp
3,Need help to find a name of cyberpunk movie I ...,[removed],movies,1584584017,slvrbat,0,1,True,2020-03-18
4,my coronavirus movie picks,pls comment if you want too add films or categ...,movies,1584584059,Bobatron1010,13,2,True,2020-03-18
5,Need Help deciding a movie to watch,[removed],movies,1584584341,Shidoitsukasan,0,1,True,2020-03-18
6,Need help deciding what to watch,[removed],movies,1584584378,Shidoitsukasan,0,1,True,2020-03-18
15,Would anyone else like to see Arnold in a film...,[removed],movies,1584585786,sirjackiechiles,0,1,True,2020-03-18


In [48]:
data_movies.to_csv("./Data/data_movies.csv", index = False)

In [24]:
data_music = query_pushshift("music")

Querying from: https://api.pushshift.io/reddit/search/submission?subreddit=music&size=500&after=30d
Querying from: https://api.pushshift.io/reddit/search/submission?subreddit=music&size=500&after=60d
Querying from: https://api.pushshift.io/reddit/search/submission?subreddit=music&size=500&after=90d
Querying from: https://api.pushshift.io/reddit/search/submission?subreddit=music&size=500&after=120d
Querying from: https://api.pushshift.io/reddit/search/submission?subreddit=music&size=500&after=150d
Querying from: https://api.pushshift.io/reddit/search/submission?subreddit=music&size=500&after=180d
Querying from: https://api.pushshift.io/reddit/search/submission?subreddit=music&size=500&after=210d
Querying from: https://api.pushshift.io/reddit/search/submission?subreddit=music&size=500&after=240d
Querying from: https://api.pushshift.io/reddit/search/submission?subreddit=music&size=500&after=270d
Querying from: https://api.pushshift.io/reddit/search/submission?subreddit=music&size=500&afte

In [49]:
data_music.shape

(2014, 9)

In [50]:
data_music.head()

Unnamed: 0,title,selftext,subreddit,created_utc,author,num_comments,score,is_self,timestamp
4,Suggest me an album about race or class?,Working on a school project. I need a music al...,Music,1584583496,_drcomicbooknerd_,7,1,True,2020-03-18
8,Copyright question for a yoga studio teaching ...,My girlfriend’s yoga studio is currently shut ...,Music,1584583659,Trap_daddy1,7,2,True,2020-03-18
16,BONNAROO TICKETS FOR SALE!!!,[removed],Music,1584584469,graceolivia13,2,1,True,2020-03-18
33,Hey I don’t mean to be annoying but would appr...,[removed],Music,1584585518,bjoshu27,2,1,True,2020-03-18
43,I found this band on Spotify and they only get...,[https://open.spotify.com/artist/2lA4Wznl9Bwd5...,Music,1584586165,aidandorigo,0,1,True,2020-03-18


In [51]:
data_music.to_csv("./Data/data_music.csv", index = False)

In [39]:
data_books = query_pushshift("books")

Querying from: https://api.pushshift.io/reddit/search/submission?subreddit=books&size=500&after=30d
Querying from: https://api.pushshift.io/reddit/search/submission?subreddit=books&size=500&after=60d
Querying from: https://api.pushshift.io/reddit/search/submission?subreddit=books&size=500&after=90d
Querying from: https://api.pushshift.io/reddit/search/submission?subreddit=books&size=500&after=120d
Querying from: https://api.pushshift.io/reddit/search/submission?subreddit=books&size=500&after=150d
Querying from: https://api.pushshift.io/reddit/search/submission?subreddit=books&size=500&after=180d
Querying from: https://api.pushshift.io/reddit/search/submission?subreddit=books&size=500&after=210d
Querying from: https://api.pushshift.io/reddit/search/submission?subreddit=books&size=500&after=240d
Querying from: https://api.pushshift.io/reddit/search/submission?subreddit=books&size=500&after=270d
Querying from: https://api.pushshift.io/reddit/search/submission?subreddit=books&size=500&afte

In [52]:
data_books.shape

(3995, 9)

In [53]:
data_books.head()

Unnamed: 0,title,selftext,subreddit,created_utc,author,num_comments,score,is_self,timestamp
1,"My wife, the media specialist of course, makes...",[removed],books,1584583917,aWal956,0,1,True,2020-03-18
3,The Count of Monte Cristo,[removed],books,1584584877,selim_challie,2,1,True,2020-03-18
5,Seamless carbon steel pipe stocks. Delivery wi...,[removed],books,1584586867,fengling130724,2,1,True,2020-03-18
7,Inter-workings and affairs of the Elite,[removed],books,1584588944,Lilly_G169,8,1,True,2020-03-18
8,Just finished reading Pride and Prejudice by J...,And as silly as it may sound it caused a minor...,books,1584590181,Red_inSaree,12,1,True,2020-03-18


In [54]:
data_books.to_csv("./Data/data_books.csv", index = False)