In [1]:
# python pushshift.py

import requests
query="seo" #Define Your Query
url = f"https://api.pushshift.io/reddit/search/comment/?q={query}"
request = requests.get(url)
json_response = request.json()
json_response

# Step #1: Create a Function to Call Pushshift AP
def get_pushshift_data(data_type, **kwargs):
    """
    Gets data from the pushshift api.
 
    data_type can be 'comment' or 'submission'
    The rest of the args are interpreted as payload.
 
    Read more: https://github.com/pushshift/api
    """
 
    base_url = f"https://api.pushshift.io/reddit/search/{data_type}/"
    payload = kwargs
    request = requests.get(base_url, params=payload)
    return request.json()
	
# Step #2: Define Your Parameters
data_type="comment"     # give me comments, use "submission" to publish something
query="python"          # Add your query
duration="30d"          # Select the timeframe. Epoch value or Integer + "s,m,h,d" (i.e. "second", "minute", "hour", "day")
size=1000               # maximum 1000 comments
sort_type="score"       # Sort by score (Accepted: "score", "num_comments", "created_utc")
sort="desc"             # sort descending
aggs="subreddit"        #"author", "link_id", "created_utc", "subreddit"

# Step #3: Make the Reddit API Call
get_pushshift_data(data_type=data_type,     
                   q=query,                 
                   after=duration,          
                   size=size,               
                   sort_type=sort_type,
                   sort=sort)
				   
# Step #4: Find in Which Subreddit is Talking More About Your Keyword   
data = get_pushshift_data(data_type=data_type,
                          q=query,
                          after=duration,
                          size=size,
                          aggs=aggs)
						  
#data = data.get("aggs").get(aggs)
				   
# Step #5: Add the Data to a Data Frame
import pandas as pd
df = pd.DataFrame.from_records(data)[0:10]
print(df)

                                                data
0  {'all_awardings': [], 'archived': False, 'asso...
1  {'all_awardings': [], 'archived': False, 'asso...
2  {'all_awardings': [], 'archived': False, 'asso...
3  {'all_awardings': [], 'archived': False, 'asso...
4  {'all_awardings': [], 'archived': False, 'asso...
5  {'all_awardings': [], 'archived': False, 'asso...
6  {'all_awardings': [], 'archived': False, 'asso...
7  {'all_awardings': [], 'archived': False, 'asso...
8  {'all_awardings': [], 'archived': False, 'asso...
9  {'all_awardings': [], 'archived': False, 'asso...


In [9]:
# Step #6: Plot the Data Using Plotly
import plotly.express as px

# x subredits # original
# y #comments # original

xx = df['subreddit']
yy = df['score']
    
# Value of 'x' is not the name of a column in 'data_frame'. 
#Expected one of ['author', 'subreddit', 'score', 'body', 'permalink'] but received: key

px.bar(df,              # our dataframe
       x=xx,#"key",         # x will be the 'key' column of the dataframe
       y=yy,#doc_count",   # y will be the 'doc_count' column of the dataframe
       title=f'Subreddits with most activity - comments with "{query}" in the last "{duration}"',
       labels={"doc_count": "# comments","key": "Subreddits"}, # the axis names
       color_discrete_sequence=["#1f77b4"], # the colors used
       height=500,
       width=800)

In [3]:
# Step #7: Find the Most Up-Voted Comments	   
# Call the API
data = get_pushshift_data(data_type=data_type,
                          q=query,
                          after="7d",
                          size=10,
                          sort_type=sort_type,
                          sort=sort).get("data")
 
# Select the columns you care about
df = pd.DataFrame.from_records(data)[["author", "subreddit", "score", "body", "permalink"]]
 
# Keep the first 400 characters
df['body'] = df['body'].str[0:400] + "..."
 
# Append the string to all the permalink entries so that we have a link to the comment
df['permalink'] = "https://reddit.com" + df['permalink'].astype(str)
 
 
# Create a function to make the link to be clickable and style the last column
def make_clickable(val):
    """ Makes a pandas column clickable by wrapping it in some html.
    """
    return '<a href="{}">Link</a>'.format(val,val)
 
 
df.style.format({'permalink': make_clickable})


Unnamed: 0,author,subreddit,score,body,permalink
0,iamrifki,neoliberal,2,"Not everyone wants to code in C. I personally like it, but Python with Numpy is much better for AI stuff. I think there are also AIs that are interpreted in compiled languages....",Link
1,1Kiru,teenagersbutpog,2,it has a shit ton of python stuff iidk but it seems like a bunch of useless code...,Link
2,LeakyFaucett32,reptiles,2,Ball python!...,Link
3,HexagonOfVirtue,neoliberal,2,"The actual number crunching isn’t done via python, instead, numpy and other libraries interface with other libraries like the MKL and OpenBLAS which are faster and not python....",Link
4,chrisgarzon19,dataengineering,1,"1) Creating your own database is awesome! Make sure you’re doing it with best practices though - fact tables and dim tables with the appropriate primary and foreign keys 2) I like leetcode, but don’t overstudy one part of the DE interview. SQL is king, but you can only study so much SQL to pass 1 out of the 5 rounds. No need to do 200 python leetcode questions either 3) it’s hard to give you a...",Link
5,ExpensiveBeat14,ti84hacks,1,"I use the TI-84 Plus CE-T Python Edition, so this doesn't work for me?...",Link
6,razzbow1,ProgrammerHumor,1,I forgot to renew my python library card...,Link
7,monkeypreen,Python,1,Georgia tech edx has an intro to computer science course taught via Python that's relatively low cost....,Link
8,dfphd,datascience,1,"Here's the thing: if your goal as a company is to eventually have a full-fledged data science team, you are better off getting data scientists into your org early and let them start laying the groundwork for that team than you are letting a bunch of analysts start doing the work and bringing data scientists in later. And that's mostly because data analysts are not going to be looking to lay any gr...",Link
9,watchmaker82,antiwork,1,In addition to mercyfuck: So what by Metallica (or the anti nowhere league) Tie my pecker to my leg by mojo nixon Never be rude to an Arab by Monty Python...,Link
