In [2]:
# Import libraries

import praw
import time
import datetime
import requests
import pandas as pd
import datetime
from json import load, dump, JSONDecodeError
from typing import List, Dict

In [3]:
# Read config (client id, secret, user agent) and create Reddit object

CONFIG_PATH = 'config.json'

config = load(open(CONFIG_PATH))

reddit = praw.Reddit(
    client_id=config['reddit_client_id'],
    client_secret=config['reddit_client_secret'],
    user_agent=config['reddit_user_agent'],
)

print("Read only?:", "Yes" if reddit.read_only else "No")

Read only?: Yes


Version 7.5.0 of praw is outdated. Version 7.6.0 was released Tuesday May 10, 2022.


In [29]:
# Example PRAW (Check 20 hot posts in r/politics and print title, date)

posts = reddit.subreddit("politics").hot(limit=5)

for i, post in enumerate(posts):
    print("Post {}: \n\t Title: {} \n\t Date: {} \n\t Url: {} \n\t Score: {} \n\t Author: {}".format(
        i, post.title, datetime.datetime.fromtimestamp(post.created), post.url, post.score, post.author))

Post 0: 
	 Title: Discussion Thread: House Jan 6 Public Hearings, Day 1 - 06/09/2022 at 8 pm ET 
	 Date: 2022-06-10 00:58:17 
	 Url: https://www.reddit.com/r/politics/comments/v8tbox/discussion_thread_house_jan_6_public_hearings_day/ 
	 Score: 6299 
	 Author: PoliticsModeratorBot
Post 1: 
	 Title: The January 6 hearings showed why it’s reasonable to call Trump a fascist 
	 Date: 2022-06-11 01:37:55 
	 Url: https://www.vox.com/policy-and-politics/2022/6/10/23162442/january-6-committee-hearing-june-10-trump-fascist 
	 Score: 7365 
	 Author: coffeespeaking
Post 2: 
	 Title: Nearly 20M watched Jan. 6 hearing: Nielsen 
	 Date: 2022-06-10 22:41:48 
	 Url: https://thehill.com/blogs/pundits-blog/media/3519284-nearly-20m-watched-jan-6-hearing-nielsen/ 
	 Score: 46011 
	 Author: h2oape
Post 3: 
	 Title: AOC Asked Gaetz, Boebert, and Greene If They Asked for Pardons After January 6 
	 Date: 2022-06-11 05:33:07 
	 Url: https://www.businessinsider.com/aoc-matt-gaetz-lauren-boebert-marjorie-taylor-g

In [12]:
# Get url and search query

example_url = "https://api.pushshift.io/reddit/search/comment/"
example_url += "?q=gun+control&subreddit=conservative&before=21d&after=31d"
example_url += "&size=100"

def get_url(query: str, 
            subreddit: str,
            before: datetime.datetime,
            after: datetime.datetime,
            size=500):
    main_url = "https://api.pushshift.io/reddit/search/comment/"
    url = main_url
    url += "?q={}".format("+".join(query.split()))

    if subreddit:
        url += "&subreddit={}".format(subreddit)

    url += "&before={}".format(int(before.timestamp()))
    url += "&after={}".format(int(after.timestamp()))
    url += "&size={}".format(size)

    return url

#  end_date = datetime.datetime(2022, 3, 5, 0, 0)

def get_comments(query: str,
                 subreddit: str,
                 start_date: datetime.datetime,
                 day_count=14,
                 hours=6,
                 verbose=True):
    intervals = [start_date + datetime.timedelta(
        hours=n * hours) for n in range(int(day_count * 24 / hours))]
    comments = list()

    for i, current in enumerate(intervals):
        after = current + datetime.timedelta(hours=hours)
        url = get_url(query, subreddit, after, current)

        response = None

        try:
            response = requests.get(url).json()
        except:
            print("Error! Wait 1.5 sec...")
            time.sleep(3)
            response = requests.get(url).json()

        if verbose:
            print("Url:", url)
            print("Current:", current, "After:", after, "i:", i, "len:", len(response['data']))

        for comment in response['data']:
            comments.append((comment['id'],
                             comment['parent_id'],
                             comment['body'],
                             comment['score'],
                             comment['created_utc'],
                             comment['subreddit'],
                             query))
    return comments


def get_comments_from_all(queries: List[str],
                          subreddits: List[str],
                          start_date: datetime.datetime,
                          day_count=14,
                          hours=6,
                          verbose=True):
    comments = list()

    for subreddit in subreddits:
        for query in queries:
            comments += get_comments(query,
                                     subreddit,
                                     start_date,
                                     day_count=day_count,
                                     hours=hours,
                                     verbose=verbose)
    
    return comments


def create_df(comments: List[tuple]):
    columns = ['id', 'parent_id', 'text', 'score', 'created_at', 'subreddit', 'query']
    df = pd.DataFrame(comments, columns=columns)

    return df

In [13]:
# Example for one query/subreddit

query = "gun laws"
subreddit = "Conservative"
start_date = datetime.datetime(2022, 5, 22, 0, 0)
day_count = 14
subreddit = None

comments = get_comments(query, subreddit, start_date, day_count=1/15, hours=1/12)
df = create_df(comments)
df

Url: https://api.pushshift.io/reddit/search/comment/?q=gun+laws&before=1653170700&after=1653170400&size=500
Current: 2022-05-22 00:00:00 After: 2022-05-22 00:05:00 i: 0 len: 2
Url: https://api.pushshift.io/reddit/search/comment/?q=gun+laws&before=1653171000&after=1653170700&size=500
Current: 2022-05-22 00:05:00 After: 2022-05-22 00:10:00 i: 1 len: 5
Url: https://api.pushshift.io/reddit/search/comment/?q=gun+laws&before=1653171300&after=1653171000&size=500
Current: 2022-05-22 00:10:00 After: 2022-05-22 00:15:00 i: 2 len: 4
Url: https://api.pushshift.io/reddit/search/comment/?q=gun+laws&before=1653171600&after=1653171300&size=500
Current: 2022-05-22 00:15:00 After: 2022-05-22 00:20:00 i: 3 len: 3
Url: https://api.pushshift.io/reddit/search/comment/?q=gun+laws&before=1653171900&after=1653171600&size=500
Current: 2022-05-22 00:20:00 After: 2022-05-22 00:25:00 i: 4 len: 1
Url: https://api.pushshift.io/reddit/search/comment/?q=gun+laws&before=1653172200&after=1653171900&size=500
Current: 202

Unnamed: 0,id,parent_id,text,score,created_at,subreddit,query
0,i9hwl9u,t1_i9hdkch,About 50% of completed suicides are people who...,1,1653170544,bestoflegaladvice,gun laws
1,i9hwpbf,t1_i9hgms0,"The main reason to me, as a gun owner, is that...",1,1653170603,centrist,gun laws
2,i9hwzos,t1_i9h87y8,1. Don’t forget 20 other people where killed ...,1,1653170750,elonmusk,gun laws
3,i9hx0g6,t1_i9h0tot,I'm aware and I agree we need stricter gun con...,1,1653170761,ShitMomGroupsSay,gun laws
4,i9hx78r,t1_i9hq3m8,Most gun owners don't have a problem with back...,1,1653170859,ToiletPaperUSA,gun laws
5,i9hxbw7,t3_uux0w4,Stricter gun laws so everyone can have the sam...,1,1653170926,AskReddit,gun laws
6,i9hxfyn,t1_i9hv3y4,"Mate, Bernie is an outlier in the Democratic P...",1,1653170984,news,gun laws
7,i9hxizt,t1_i9hvw0b,I appreciated your list of links. I was confus...,1,1653171029,mildlyinteresting,gun laws
8,i9hxmmz,t1_i9hwcv0,&gt;You’re having an argument that I’m not int...,1,1653171082,meme,gun laws
9,i9hxmsn,t3_uuavag,Ehhh...I that's been proven untrue. \n\nThere ...,1,1653171084,libertarianmeme,gun laws


In [14]:
# Part 1 (Extract all comments from given subreddits/queries)

subreddits = [
    #'worldnews',
    #'worldpolitics',
    #'news',
    #'politics',
    #'uspolitics',
    'PoliticalDiscussion',
    'Democrats',
    'Republican',
    'Conservative',
    'Liberal',
    'progun',
    'guncontrol',
    'gunpolitics'
]
subreddits = [None]

queries = [
    'gun control',
    #'gun reform',
    #'ban gun',
    #'gun laws',
    #'gun restrict',
    #'gun problem',
    #'pro gun',
    #'anti gun',
    #'gun ownership',
    #'gun politics'
]

start_date = datetime.datetime(2022, 5, 17, 0, 0)
day_count = 21

comments = get_comments_from_all(queries, subreddits, start_date, day_count=day_count, hours=1/12)
df = create_df(comments)
df.to_csv('data/reddit3.csv')

Url: https://api.pushshift.io/reddit/search/comment/?q=gun+control&before=1652738700&after=1652738400&size=500
Current: 2022-05-17 00:00:00 After: 2022-05-17 00:05:00 i: 0 len: 5
Url: https://api.pushshift.io/reddit/search/comment/?q=gun+control&before=1652739000&after=1652738700&size=500
Current: 2022-05-17 00:05:00 After: 2022-05-17 00:10:00 i: 1 len: 11
Url: https://api.pushshift.io/reddit/search/comment/?q=gun+control&before=1652739300&after=1652739000&size=500
Current: 2022-05-17 00:10:00 After: 2022-05-17 00:15:00 i: 2 len: 11
Url: https://api.pushshift.io/reddit/search/comment/?q=gun+control&before=1652739600&after=1652739300&size=500
Current: 2022-05-17 00:15:00 After: 2022-05-17 00:20:00 i: 3 len: 10
Url: https://api.pushshift.io/reddit/search/comment/?q=gun+control&before=1652739900&after=1652739600&size=500
Current: 2022-05-17 00:20:00 After: 2022-05-17 00:25:00 i: 4 len: 11
Url: https://api.pushshift.io/reddit/search/comment/?q=gun+control&before=1652740200&after=1652739900

In [14]:
# Uvalde (Extract all comments from given subreddits/queries)

subreddits = [
    #'worldnews',
    #'worldpolitics',
    #'news',
    #'politics',
    #'uspolitics',
    'PoliticalDiscussion',
    'Democrats',
    'Republican',
    'Conservative',
    'Liberal',
    'progun',
    'guncontrol',
    'gunpolitics'
]
subreddits = [None]

queries = [
    #'gun control',
    #'gun reform',
    #'ban gun',
    #'gun laws',
    #'gun restrict',
    #'gun problem',
    #'gun rights',
    #'gun policy',
    #'gun legislation',
    'gun regulation',
    #'pro gun',
    #'anti gun',
    #'gun ownership',
    #'gun politics',

]

start_date = datetime.datetime(2022, 5, 17, 0, 0)
day_count = 21

comments = get_comments_from_all(queries, subreddits, start_date, day_count=day_count, hours=1/3)
df = create_df(comments)
df.to_csv('data/reddit8.csv', index=False)

Url: https://api.pushshift.io/reddit/search/comment/?q=gun+regulation&before=1652739600&after=1652738400&size=500
Current: 2022-05-17 00:00:00 After: 2022-05-17 00:20:00 i: 0 len: 1
Url: https://api.pushshift.io/reddit/search/comment/?q=gun+regulation&before=1652740800&after=1652739600&size=500
Current: 2022-05-17 00:20:00 After: 2022-05-17 00:40:00 i: 1 len: 2
Url: https://api.pushshift.io/reddit/search/comment/?q=gun+regulation&before=1652742000&after=1652740800&size=500
Current: 2022-05-17 00:40:00 After: 2022-05-17 01:00:00 i: 2 len: 5
Url: https://api.pushshift.io/reddit/search/comment/?q=gun+regulation&before=1652743200&after=1652742000&size=500
Current: 2022-05-17 01:00:00 After: 2022-05-17 01:20:00 i: 3 len: 0
Url: https://api.pushshift.io/reddit/search/comment/?q=gun+regulation&before=1652744400&after=1652743200&size=500
Current: 2022-05-17 01:20:00 After: 2022-05-17 01:40:00 i: 4 len: 1
Url: https://api.pushshift.io/reddit/search/comment/?q=gun+regulation&before=1652745600&af

In [None]:
# El Paso (Extract all comments from given subreddits/queries)

subreddits = [
    #'worldnews',
    #'worldpolitics',
    #'news',
    #'politics',
    #'uspolitics',
    'PoliticalDiscussion',
    'Democrats',
    'Republican',
    'Conservative',
    'Liberal',
    'progun',
    'guncontrol',
    'gunpolitics'
]
subreddits = [None]

queries = [
    #'gun control',
    #'gun reform',
    #'ban gun',
    #'gun laws',
    #'gun restrict',
    #'gun problem',
    #'gun rights',
    #'gun policy',
    #'gun legislation',
    'gun regulation',
    #'pro gun',
    #'anti gun',
    #'gun ownership',
    #'gun politics',

]

start_date = datetime.datetime(2019, 7, 27, 0, 0)
day_count = 21

comments = get_comments_from_all(queries, subreddits, start_date, day_count=day_count, hours=1/3)
df = create_df(comments)
df.to_csv('data/elpaso.csv', index=False)

In [56]:
comments += comments2

In [57]:
df = create_df(comments)
df.to_csv('data/reddit2.csv')

In [67]:
df

Unnamed: 0,id,parent_id,text,score,created_at,subreddit,query
0,i8vioj7,t1_i8v7gbs,"I’m a gun owner too, but it is t cognitive dis...",1,1652738877,politics,gun control
1,i8vj58f,t1_i8vileg,The places with the most control seem to have ...,1,1652739088,politics,gun control
2,i8vkju8,t1_i8vk5e0,Yep the government reserves the right to regul...,1,1652739712,politics,gun control
3,i8vl2kb,t1_i8vjj96,Statistics from 1997 would also reflect dramat...,1,1652739947,politics,gun control
4,i8vmz2y,t1_i8uy3e9,"I don’t know if anyone’s told you, but there’s...",1,1652740822,politics,gun control
...,...,...,...,...,...,...,...
18777,ibepi8o,t1_ibensh9,Yeah I'm completely okay with massive reform o...,1,1654547337,news,gun reform
18778,ibepob1,t1_ibeom2d,That's uhhh that's a lot of strawmen you just ...,1,1654547410,news,gun reform
18779,ibesi5y,t1_ibdis46,"""Well maybe it happened so we can get some God...",1,1654548643,news,gun reform
18780,ibesifx,t1_iber7cl,"If you aren't afraid of guns, we aren't going ...",1,1654548646,news,gun reform
