In [26]:
import requests
import pandas as pd
from datetime import datetime
#import time to call for sleep function to prevent server crash.
import time

In [27]:
def scrap_posts(subreddit, n_posts):
    posts = []
    url = 'https://api.pushshift.io/reddit/search/submission'
    
    bef_dict = {'before': 1640908800}
    
    for i in range(n_posts):
        params = {
                'subreddit':subreddit,
                'size': 100,
                'before': bef_dict['before']
                }
            
        res = requests.get(url, params)
        time.sleep(0.5)
        
        if res.status_code != 200:
            print(f'Error Code {res.status_code}, {res.reason}')
            break
        
        data = res.json()
        posts.extend(data['data'])
            
        bef_dict['before'] = data['data'][-1]['created_utc']
    
    print(f"r/{subreddit} - Code:{res.status_code}, Status:{res.reason}")
    
    # create dataframe for scrapped posts
    df = pd.DataFrame(posts)
    df['created'] = df['created_utc'].apply(lambda x: datetime.fromtimestamp(x))
    
    # Stamping post and datetime while scraping 
    latest_post_stamped = datetime.fromtimestamp(df['created_utc'].iloc[0:].values[0])
    last_post_stamped = datetime.fromtimestamp(df['created_utc'].iloc[-1:].values[0])
    
    print(f"Scrapped {df.shape[0]} posts from {latest_post_stamped} to {last_post_stamped}")
    print()
    
    return df

In [28]:
ldr=scrap_posts('lanadelrey',15)

r/lanadelrey - Code:200, Status:OK
Scrapped 1498 posts from 2021-12-30 17:52:33 to 2021-11-24 02:10:21



In [29]:
ldr.columns

Index(['all_awardings', 'allow_live_comments', 'author',
       'author_flair_css_class', 'author_flair_richtext', 'author_flair_text',
       'author_flair_type', 'author_fullname', 'author_is_blocked',
       'author_patreon_flair', 'author_premium', 'awarders', 'can_mod_post',
       'contest_mode', 'created_utc', 'domain', 'full_link', 'gildings', 'id',
       'is_created_from_ads_ui', 'is_crosspostable', 'is_meta',
       'is_original_content', 'is_reddit_media_domain', 'is_robot_indexable',
       'is_self', 'is_video', 'link_flair_background_color',
       'link_flair_richtext', 'link_flair_template_id', 'link_flair_text',
       'link_flair_text_color', 'link_flair_type', 'locked', 'media_only',
       'no_follow', 'num_comments', 'num_crossposts', 'over_18',
       'parent_whitelist_status', 'permalink', 'pinned', 'post_hint',
       'preview', 'pwls', 'retrieved_on', 'score', 'selftext', 'send_replies',
       'spoiler', 'stickied', 'subreddit', 'subreddit_id',
       'subred

In [30]:
ldr.shape

(1498, 84)

In [31]:
ldr.head()

Unnamed: 0,all_awardings,allow_live_comments,author,author_flair_css_class,author_flair_richtext,author_flair_text,author_flair_type,author_fullname,author_is_blocked,author_patreon_flair,...,media,media_embed,secure_media,secure_media_embed,crosspost_parent,crosspost_parent_list,author_cakeday,edited,banned_by,created
0,[],False,samandrewny,,[],,text,t2_9sp67sdb,False,False,...,,,,,,,,,,2021-12-30 17:52:33
1,[],False,artisticphangirl,,[],,text,t2_4ezoh67f,False,False,...,,,,,,,,,,2021-12-30 17:24:14
2,[],False,ambriebat,,[],,text,t2_4itrzwbz,False,False,...,,,,,,,,,,2021-12-30 16:05:20
3,[],False,user2113311,,[],,text,t2_5o85vp93,False,False,...,,,,,,,,,,2021-12-30 15:54:50
4,[],False,BLIGATORY,,[],,text,t2_bgmgkrte,False,False,...,,,,,,,,,,2021-12-30 15:50:06


In [32]:
ldr['selftext'][:5]

0                                                     
1    I really wanted to buy one and I finally saved...
2    i was lucky to find it for 56$ at a local reco...
3                                            [removed]
4    Hi everyone! We are a community-focused music ...
Name: selftext, dtype: object

In [33]:
#save to file as csv
ldr.to_csv('lana.csv', index=False)

In [34]:
metal=scrap_posts('metallica',15)

r/metallica - Code:200, Status:OK
Scrapped 1498 posts from 2021-12-30 17:18:53 to 2021-12-02 12:55:42



In [35]:
metal.shape

(1498, 83)

In [36]:
metal.head()

Unnamed: 0,all_awardings,allow_live_comments,author,author_flair_css_class,author_flair_richtext,author_flair_text,author_flair_type,author_fullname,author_is_blocked,author_patreon_flair,...,removed_by_category,gallery_data,media_metadata,author_flair_background_color,crosspost_parent,crosspost_parent_list,author_cakeday,distinguished,edited,created
0,[],False,THuitema35,,[],,text,t2_41woh4be,False,False,...,,,,,,,,,,2021-12-30 17:18:53
1,[],False,Only-Fruit7339,,[],,text,t2_8mwr4tw0,False,False,...,,,,,,,,,,2021-12-30 16:49:20
2,[],False,metallicarow,,[],,text,t2_1dst8n8t,False,False,...,,,,,,,,,,2021-12-30 16:31:30
3,[],False,HomemadeTopHat69,,[],,text,t2_g1qlvryc,False,False,...,,,,,,,,,,2021-12-30 16:23:34
4,[],False,hero_oftheray,2.0,"[{'e': 'text', 't': 'Kill 'Em All'}]",Kill 'Em All,richtext,t2_gxewmx65,False,False,...,,,,,,,,,,2021-12-30 15:58:50


In [37]:
metal['selftext'][:5]

0    I’ve recently started jamming with my friends,...
1                                                     
2                                                     
3    1. The Frayed Ends Of Sanity \n2. Sweet Amber ...
4                                                     
Name: selftext, dtype: object

In [38]:
#save to file as csv
metal.to_csv('metall.csv', index=False)