In [61]:
import datetime as dt
import pyarrow.feather as feather
import json
import pandas as pd
import pprint
import praw

from psaw import PushshiftAPI

# Display options
pd.set_option('display.max_columns', 50)
pd.set_option('max_colwidth', 400)

In [2]:
# Import client id and secret to use with PRAW
with open("keys.json", "r") as credentials:
    data = json.load(credentials)
    c_id = data["client_id"]
    c_secret = data["client_secret"]
    u = data["username"]
    p = data["password"]

In [21]:
# PRAW instance
reddit = praw.Reddit(
    client_id = c_id,
    client_secret = c_secret,
    user_agent = "my user agent",
    username = u,
    password = p,
)

print(reddit.read_only)

False


In [34]:
# PSAW instance
api = PushshiftAPI(reddit)

In [53]:
# Set start and end dates for data retrieval
start_epoch = int(dt.datetime(2021, 1, 1).timestamp())
end_epoch = int(dt.datetime(2021, 5, 1).timestamp())

In [58]:
# Function to retrieve and store a number of posts from a given subreddit
def get_posts(subreddit, start_epoch, num_posts = 100):
    try:
        
        # Extract submissions via psaw and praw
        submissions = api.search_submissions(subreddit = subreddit, 
                                             limit = num_posts, 
                                             after = start_epoch,
                                             before = end_epoch)
        
        # Convert data to a dataframe
        df = pd.DataFrame( [vars(submission) for submission in submissions] )
        
        # Extract the required columns
        cleaned_df = df[['subreddit', 'title', 'selftext', 'created_utc', 'author', 'link_flair_text', 'num_comments', 'score', 'upvote_ratio']]# , url
        
        # Convert dates to datetime
        cleaned_df['created_utc'] = pd.to_datetime(cleaned_df['created_utc'], unit='s')
        # Convert author from Redditor to string object
        cleaned_df.loc[:,'author'] = cleaned_df.loc[:,'author'].astype(str)
        
        # Save as a feather - heh!
       # path = 'data/' + subreddit
        #feather.write_feather(cleaned_df, path)
        
        return(cleaned_df)
    except Exception as e:
        print('Error:', e)
# TODO: Update function to handle an unlimited number of posts

In [60]:
cmv_df = get_posts('changemyview', start_epoch, end_epoch).sort_values(by = 'score', ascending = False)
cmv_df.head(5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned_df['created_utc'] = pd.to_datetime(cleaned_df['created_utc'], unit='s')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)


Unnamed: 0,subreddit,title,selftext,created_utc,author,link_flair_text,num_comments,score,upvote_ratio
422,changemyview,CMV: Most Americans who oppose a national healthcare system would quickly change their tune once they benefited from it.,"I used to think I was against a national healthcare system until after I got out of the army. Granted the VA isn't always great necessarily, but it feels *fantastic* to walk out of the hospital after an appointment without ever seeing a cash register when it would have cost me potentially thousands of dollars otherwise. It's something that I don't think just veterans should be able to experience. \n\nBoth [Canada](https://www.healthcare-now.org/blog/new-poll-shows-canadians-overwhelmingly-su...",2021-04-27 15:30:56,CrashRiot,Delta(s) from OP,7048,44448,0.8
9146,changemyview,CMV: being a conservative is the least Christ-like political view,"From what I know, Christ was essentially a radical leftist. He was all about helping and loving the poor, hungry, disabled, outcast. He would feed 10 people just in case one was going hungry. He flipped a table when banks were trying to take advantage of people. He was anti-capitalist and pro social responsibility to support, love and respect all members of society. He was, based on location and era, probably a person of color. He would not stand for discrimination. He would overthrow an ins...",2021-01-12 17:36:28,dmackl,Delta(s) from OP,3795,34480,0.72
8690,changemyview,CMV: Democrats and Republicans live in completely different realities and it is destroying our country.,"I would guess that a typical Democrat gets their news from CNN, MSNBC, Washington Post, NYT, etc. And the average conservative gets his news from Fox News, Talk Radio, OAN, Breitbart, The Daily Caller, YouTubers like Stephen Crowder and Ben Shapiro, etc. If you go more into the fringes of the right they probably get their news from QAnon and other conspiracy Facebook groups.\n\nThis disparity in where both sides of the political aisle gets their news from is what I believe is causing the mas...",2021-01-16 14:37:57,rollingboulder89,Delta(s) from OP,2135,28663,0.76
3118,changemyview,"CMV: ""Folks"" is a reasonably inclusive, gender neutral term, and spelling it as ""folx"" is purely virtue signaling","I just want to start by saying this might be the only instance of something that I would actually, unironically call ""virtue signaling"" -- a term I usually disdain and find dismissive of social progress. But in this case, that's exactly what I think it is. \n\n""Folks"" is an inclusive word. It means ""people."" It is inherently gender neutral. It is perhaps one of the few English words to address a group of people that is totally inclusive and innocuous. In a time when we are critically evaluat...",2021-03-30 17:36:28,tit_wrangler,Delta(s) from OP,2996,28489,0.79
9371,changemyview,CMV: Americans in general don't know what socialism or communism are.,[removed],2021-01-11 00:29:38,Souk12,Removed - Submission Rule B,2311,22718,0.83


In [64]:
cmv_df.head(5)

Unnamed: 0,subreddit,title,selftext,created_utc,author,link_flair_text,num_comments,score,upvote_ratio
422,changemyview,CMV: Most Americans who oppose a national healthcare system would quickly change their tune once they benefited from it.,"I used to think I was against a national healthcare system until after I got out of the army. Granted the VA isn't always great necessarily, but it feels *fantastic* to walk out of the hospital after an appointment without ever seeing a cash register when it would have cost me potentially thousands of dollars otherwise. It's something that I don't think just veterans should be able to experien...",2021-04-27 15:30:56,CrashRiot,Delta(s) from OP,7048,44448,0.8
9146,changemyview,CMV: being a conservative is the least Christ-like political view,"From what I know, Christ was essentially a radical leftist. He was all about helping and loving the poor, hungry, disabled, outcast. He would feed 10 people just in case one was going hungry. He flipped a table when banks were trying to take advantage of people. He was anti-capitalist and pro social responsibility to support, love and respect all members of society. He was, based on location a...",2021-01-12 17:36:28,dmackl,Delta(s) from OP,3795,34480,0.72
8690,changemyview,CMV: Democrats and Republicans live in completely different realities and it is destroying our country.,"I would guess that a typical Democrat gets their news from CNN, MSNBC, Washington Post, NYT, etc. And the average conservative gets his news from Fox News, Talk Radio, OAN, Breitbart, The Daily Caller, YouTubers like Stephen Crowder and Ben Shapiro, etc. If you go more into the fringes of the right they probably get their news from QAnon and other conspiracy Facebook groups.\n\nThis disparity ...",2021-01-16 14:37:57,rollingboulder89,Delta(s) from OP,2135,28663,0.76
3118,changemyview,"CMV: ""Folks"" is a reasonably inclusive, gender neutral term, and spelling it as ""folx"" is purely virtue signaling","I just want to start by saying this might be the only instance of something that I would actually, unironically call ""virtue signaling"" -- a term I usually disdain and find dismissive of social progress. But in this case, that's exactly what I think it is. \n\n""Folks"" is an inclusive word. It means ""people."" It is inherently gender neutral. It is perhaps one of the few English words to address...",2021-03-30 17:36:28,tit_wrangler,Delta(s) from OP,2996,28489,0.79
9371,changemyview,CMV: Americans in general don't know what socialism or communism are.,[removed],2021-01-11 00:29:38,Souk12,Removed - Submission Rule B,2311,22718,0.83


In [15]:
cmv_df = pd.DataFrame([vars(submission) for submission in cmv])
cmv_df.head(5)

Unnamed: 0,comment_limit,comment_sort,_reddit,approved_at_utc,subreddit,selftext,author_fullname,saved,mod_reason_title,gilded,clicked,title,link_flair_richtext,subreddit_name_prefixed,hidden,pwls,link_flair_css_class,downs,thumbnail_height,top_awarded_type,hide_score,name,quarantine,link_flair_text_color,upvote_ratio,...,is_robot_indexable,report_reasons,author,discussion_type,num_comments,send_replies,whitelist_status,contest_mode,mod_reports,author_patreon_flair,author_flair_text_color,permalink,parent_whitelist_status,stickied,url,subreddit_subscribers,created_utc,num_crossposts,media,is_video,_fetched,_comments_by_id,post_hint,preview,link_flair_template_id
0,2048,confidence,<praw.reddit.Reddit object at 0x7fac3409b700>,,changemyview,"1. By ""snakes"" I mean (i) have their own agenda; (ii) pursue their own self-interest which is often in conflict to your own at your expense; (iii) will generally act in any selfish or dishonest manner in pursuit of their own interests.\n2. I have used the words ""generally"" and ""often"" - I am not saying this is always the case, and every single person is a snake or will act in a snake like fashion; \n3. I am not saying snakes are evil people, and there are perhaps many worse things than being...",t2_142v0w,False,,0,False,CMV: People are generally snakes in work and often in dating environments,[],r/changemyview,False,6,,0,,,True,t3_n46blf,False,dark,1.0,...,True,,mr_devereux,,0,True,all_ads,False,[],False,,/r/changemyview/comments/n46blf/cmv_people_are_generally_snakes_in_work_and_often/,all_ads,False,https://www.reddit.com/r/changemyview/comments/n46blf/cmv_people_are_generally_snakes_in_work_and_often/,1295507,1620073000.0,0,,False,False,{},,,
1,2048,confidence,<praw.reddit.Reddit object at 0x7fac3409b700>,,changemyview,"Background: From my understanding, wearing ethnic hairstyles like cornrows is appropriation because of the history of Black people getting discriminated for wearing them. Similarly, ethnic foods, like Kimchi, were considered to be unpalatable/smelly and contributed to the discrimination of racial groups. \n\n\nView Point: If wearing ethnic hairstyles like cornrows is cultural appropriation, eating ethnic food like Kimchi is cultural appropriation. I am not here to debate whether or not cul...",t2_bqce262n,False,,0,False,"CMV: If wearing ethnic hairstyles like cornrows is cultural appropriation, eating ethnic food like Kimchi is cultural appropriation.",[],r/changemyview,False,6,,0,,,True,t3_n45wv2,False,dark,0.86,...,True,,didijqwd12,,14,True,all_ads,False,[],False,,/r/changemyview/comments/n45wv2/cmv_if_wearing_ethnic_hairstyles_like_cornrows_is/,all_ads,False,https://www.reddit.com/r/changemyview/comments/n45wv2/cmv_if_wearing_ethnic_hairstyles_like_cornrows_is/,1295507,1620072000.0,0,,False,False,{},,,
2,2048,confidence,<praw.reddit.Reddit object at 0x7fac3409b700>,,changemyview,[removed],t2_7ajao4di,False,,0,False,CMV: cultural appropriation?,[],r/changemyview,False,6,,0,,,True,t3_n45wb4,False,dark,1.0,...,False,,Ill-Kindheartedness5,,0,True,all_ads,False,[],False,,/r/changemyview/comments/n45wb4/cmv_cultural_appropriation/,all_ads,False,https://www.reddit.com/r/changemyview/comments/n45wb4/cmv_cultural_appropriation/,1295507,1620072000.0,0,,False,False,{},,,
3,2048,confidence,<praw.reddit.Reddit object at 0x7fac3409b700>,,changemyview,[removed],t2_ape8u,False,,0,False,CMV: Warriors of Virtue is the original Thor Ragnarok - (Film),[],r/changemyview,False,6,,0,,,True,t3_n45muy,False,dark,1.0,...,False,,Bowl-of-oranges,,0,True,all_ads,False,[],False,,/r/changemyview/comments/n45muy/cmv_warriors_of_virtue_is_the_original_thor/,all_ads,False,https://www.reddit.com/r/changemyview/comments/n45muy/cmv_warriors_of_virtue_is_the_original_thor/,1295507,1620072000.0,0,,False,False,{},,,
4,2048,confidence,<praw.reddit.Reddit object at 0x7fac3409b700>,,changemyview,[removed],t2_b37ym4wg,False,,0,False,"CMV: The Second American Civil War is not only imminent, it's absolutely the only way to save America from herself.",[],r/changemyview,False,6,,0,,,True,t3_n45i49,False,dark,1.0,...,False,,n340g,,0,True,all_ads,False,[],False,,/r/changemyview/comments/n45i49/cmv_the_second_american_civil_war_is_not_only/,all_ads,False,https://www.reddit.com/r/changemyview/comments/n45i49/cmv_the_second_american_civil_war_is_not_only/,1295507,1620071000.0,0,,False,False,{},,,


In [None]:
# Quick look at an example layout
type(reddit.subreddit("learnpython").top("year", limit = 2))

In [None]:
for submission in reddit.subreddit("learnpython").top("year", limit = 1):
    pprint.pprint(vars(submission))

In [None]:
# As a dataframe
df = pd.DataFrame([ vars(post) for post in reddit.subreddit("learnpython").top("year", limit=10)])
df.head(3)

In [None]:
# Function to retrieve and store a number of posts from a given subreddit
def get_posts(subreddit, num_posts = 100):
    try:
        # Extract posts and save to dataframe
        df = pd.DataFrame([ vars(post) for post in reddit.subreddit(subreddit).top("year", limit = num_posts) ])
        # Extract the required columns
        cleaned_df = df[['title', 'selftext', 'created_utc', 'author', 'link_flair_text', 'num_comments', 'score', 'upvote_ratio', 'url']]
        # Convert author from Redditor to string object
        cleaned_df.loc[:,'author'] = cleaned_df.loc[:,'author'].astype(str)
        # Save as a feather - heh!
        path = 'data/' + subreddit
        feather.write_feather(cleaned_df, path)
    except Exception as e:
        print('Error:', e)
        
# TODO:
# CMV: Also whether the post includes a delta (Flairs should do this)

In [None]:
get_posts("changemyview", 1000)

In [None]:
subreddits = ['FemaleDatingStrategy', 'MGTOW2']

In [None]:
for subreddit in subreddits:
    get_posts(subreddit, 1000)

In [None]:
df = pd.DataFrame([ vars(post) for post in reddit.subreddit("AgainstHateSubreddits").top("year", limit=10)])
df.head(3)

In [None]:
# Experimenting with the Pushshift API
# https://github.com/dmarx/psaw/issues/78
api = PushshiftAPI()
gen = api.search_submissions(**{"subreddit": "AgainstHateSubreddits", "selftext:not": "[removed]|[deleted]",
                               "filter": "['title', 'selftext', 'created_utc', 'author', 'link_flair_text', 'num_comments', 'score', 'upvote_ratio', 'url']"
                               })

In [None]:
df = pd.DataFrame([obj.d_ for obj in gen])
df.shape

In [None]:
df.head(2)

In [None]:
# Group by flair, aggregate and order by count
df.groupby(['link_flair_text'])['title'].agg(['count']).sort_values(ascending = False)

In [None]:
# Create a new column based on a macro
