# This Notebook provides a detailed walkthrough of the pushshift_python analytics wrapper

In [1]:
import pushshift_python as push_py

### An example file query using datetime

In [2]:
antivax_query = push_py.pushshift_file_query(
    query_type="subreddit",
    query="antivax",
    time_range={"before": "2019-06-26", "after": "2019-06-20"},
    time_format="datetime",
    post_type=None,
)

### Making the file query

Note this will take substantial time to sort though

In [None]:
antivax_query.make_query()
antivax_query.time_list

### PushShift Web Query Examples

#### PushShift Web Query using unix epoch time

In [4]:
conspiracy_query = push_py.pushshift_web_query(
    query_type="subreddit",
    query="conspiracy",
    time_range={"before": "1609631999", "after": "1609562861"},
)

#### PushShift Web Query using datetime

In [5]:
antivax_query = push_py.pushshift_web_query(
    query_type="subreddit",
    query="antivax",
    time_range={"before": "2021-05-06", "after": "2021-05-05"},
    time_format="datetime",
    post_type=None,
)

#### Making the query request

Note, this will take some time to collect the API request, timing will depend on specified time_range

In [None]:
conspiracy_query.make_query()
antivax_query.make_query()

#### Our queries return dataframes

In [7]:
conspiracy_query.df.head(3)

Unnamed: 0,post_type,subreddit,id,parent_id,link_id,url,permalink,created_utc,datetime,score,...,body,author,author_premium,controversiality,is_self,is_video,post_hint,stickied,total_awards_received,upvote_ratio
0,submission,conspiracy,kor2je,,,https://www.reddit.com/r/conspiracy/comments/k...,/r/conspiracy/comments/kor2je/back_in_february...,1609562967,01/01/2021,1,...,It seems to go against how the british press a...,CockerSpaniard,0.0,,1.0,0.0,self,0.0,0.0,1.0
1,submission,conspiracy,kor341,,,https://www.reddit.com/r/conspiracy/comments/k...,/r/conspiracy/comments/kor341/what_is_one_viru...,1609563026,01/01/2021,1,...,"I can't think of one, why is COVID different?",peetss,False,,True,False,,False,0.0,1.0
2,submission,conspiracy,kor4kf,,,https://mondestuff.com/world-news/lin-wood-if-...,/r/conspiracy/comments/kor4kf/lin_wood_if_penc...,1609563186,01/01/2021,1,...,,gamelover_1,False,,False,False,,False,0.0,1.0


In [8]:
antivax_query.comments.head(3)

Unnamed: 0,post_type,subreddit,id,parent_id,link_id,url,permalink,created_utc,datetime,score,...,body,author,author_premium,controversiality,is_self,is_video,post_hint,stickied,total_awards_received,upvote_ratio
0,comment,antivax,gx0qb56,t3_n4znhj,t3_n4znhj,,/r/antivax/comments/n4znhj/this_made_me_laugh/...,1620218740,05/05/2021,1,...,"There waiting, but when they find it by god do...",toffeejoey1,0.0,,,,,0.0,0.0,
1,comment,antivax,gx0qfom,t1_gwzaw4n,t3_n4znhj,,/r/antivax/comments/n4znhj/this_made_me_laugh/...,1620218809,05/05/2021,4,...,"Did she let you get them after because, you kn...",toffeejoey1,False,,,,,False,0.0,
2,comment,antivax,gx12hj7,t1_gx0qfom,t3_n4znhj,,/r/antivax/comments/n4znhj/this_made_me_laugh/...,1620224680,05/05/2021,3,...,She didn’t really believe in doctors so I wasn...,Shibaru-in-a-Subaru,False,,,,,False,0.0,


### We can take our queried DataFrames and create a Community object

In [9]:
conspiracy = push_py.community(name="conspiracy", dataframe=conspiracy_query.df)
antivax = push_py.community(name="antivax", dataframe=antivax_query.df)

#### Example finding urls embedded in comments

In [11]:
conspiracy.make_urls(column="body", post_type="comment")
conspiracy.text_url_df

Unnamed: 0_level_0,post_type,subreddit,parent_id,link_id,url,permalink,created_utc,datetime,score,num_comments,...,body,author,author_premium,controversiality,is_self,is_video,post_hint,stickied,total_awards_received,upvote_ratio
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ghstwbd,comment,conspiracy,t1_ghstpml,t3_konpby,,/r/conspiracy/comments/konpby/heres_how_we_kno...,1609564471,01/02/2021,1,,...,https://www.youtube.com/watch?v=jrWflCJPM4w,JakeElwoodDim5th,False,,,,,False,0.0,
ghsu2ja,comment,conspiracy,t1_gbeds8x,t3_jgd4rv,,/r/conspiracy/comments/jgd4rv/bidens_use_of_da...,1609564587,01/02/2021,1,,...,https://www.severe-weather.eu/global-weather/i...,mrdrewsin,False,,,,,False,0.0,
ghsva0u,comment,conspiracy,t1_ghstr7t,t3_komqwu,,/r/conspiracy/comments/komqwu/bernie_sanders_t...,1609565400,01/02/2021,1,,...,https://www.google.com/amp/s/www.history.com/....,liamo1882,False,,,,,False,0.0,
ghsw6ai,comment,conspiracy,t1_ghsh0yh,t3_komzud,,/r/conspiracy/comments/komzud/national_guard_d...,1609566020,01/02/2021,1,,...,https://www.news.com.au/world/coronavirus/heal...,Yakapo88,False,,,,,False,0.0,
ghsxn65,comment,conspiracy,t1_ghstnoe,t3_kor4kf,,/r/conspiracy/comments/kor4kf/lin_wood_if_penc...,1609567082,01/02/2021,1,,...,https://www.law.cornell.edu/uscode/text/3/15,DanHalen_phd,False,,,,,False,0.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ghv3kr2,comment,conspiracy,t1_ghuz60w,t3_kp3ypv,,/r/conspiracy/comments/kp3ypv/birds_lay_dead_i...,1609622027,01/02/2021,1,,...,https://www.google.com/amp/s/www.forbes.com/si...,jamesko1989,False,,,,,False,0.0,
ghv7vel,comment,conspiracy,t1_ghv6txr,t3_kp68yn,,/r/conspiracy/comments/kp68yn/heres_where_trum...,1609624263,01/02/2021,1,,...,https://hackinglawpractice.com/blog/20-year-la...,invisiblew1ndow,False,,,,,False,0.0,
ghv91o1,comment,conspiracy,t1_ghv7v20,t3_kp5lxl,,/r/conspiracy/comments/kp5lxl/what_happens_whe...,1609624865,01/02/2021,1,,...,https://www.google.com/amp/s/constitutioncente...,wileydickgoo,False,,,,,False,0.0,
ghvar5a,comment,conspiracy,t1_ghv5bm4,t3_kp19z7,,/r/conspiracy/comments/kp19z7/wheres_my_money_...,1609625757,01/02/2021,1,,...,https://youtu.be/sY2Y-L5cvcA\n\nThere definite...,ZachElmurry,False,,,,,False,0.0,


#### Our community will store the urls after making them

In [12]:
conspiracy.text_urls

Unnamed: 0,count
https://www.mentalfloss.com/article/21833/edward-leedskalnin-and-his-coral-castle,6
https://www.reddit.com/user/Frost_999,3
https://youtu.be/gpuSmIz3oJQ,3
https://i.4cdn.org/pol/1609552105973.png,3
https://ussanews.com/News1/2021/01/02/pence-resignation-shocker-he-is-going/,2
...,...
https://www.independent.co.uk/news/science/cern-human-sacrifice-video-watch-hadron-collider-god-particle-a7196881.html,1
https://www.sbs.com.au/news/why-mixed-race-minorities-struggle-to-find-life-saving-transplant-matches,1
https://abc7.com/lapd-homicide-murder-la/8236529/,1
https://giphy.com/gifs/michael-jackson-6pJNYBYSMFod2,1


#### We can also qurey our community on authors

In [13]:
conspiracy.make_authors()
antivax.make_authors()
conspiracy.authors

Unnamed: 0_level_0,total_submissions,total_submission_score,total_submission_comments,total_comments,total_comment_score,total_posts,total_post_score
author,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
CockerSpaniard,1.0,1.0,18.0,6.0,6.0,7.0,7.0
peetss,1.0,1.0,16.0,0.0,0.0,1.0,1.0
gamelover_1,5.0,5.0,171.0,5.0,5.0,10.0,10.0
s4nskrit,4.0,4.0,274.0,15.0,15.0,19.0,19.0
spock23,3.0,3.0,32.0,15.0,15.0,18.0,18.0
...,...,...,...,...,...,...,...
bottleboy8,0.0,0.0,0.0,1.0,1.0,1.0,1.0
IndependentBall3,0.0,0.0,0.0,1.0,1.0,1.0,1.0
WeDoMusicOfficial,0.0,0.0,0.0,1.0,1.0,1.0,1.0
ninjatoes36,0.0,0.0,0.0,1.0,1.0,1.0,1.0


#### Given 2 Communities we can make pairwise comparisions

In [14]:
outer, inner = conspiracy.compare_authors(antivax)
outer

Unnamed: 0_level_0,total_submissions_conspiracy,total_comments_conspiracy,total_posts_conspiracy,total_submissions_antivax,total_comments_antivax,total_posts_antivax
author,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
---Seraphim---,0.0,1.0,1.0,0.0,0.0,0.0
--Gem,0.0,1.0,1.0,0.0,0.0,0.0
--dontmindme--,0.0,2.0,2.0,0.0,0.0,0.0
-Azrael-Blick-,0.0,6.0,6.0,0.0,0.0,0.0
-BIOS,0.0,1.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...
zorbiburst,0.0,1.0,1.0,0.0,0.0,0.0
zorkzamboni,0.0,2.0,2.0,0.0,0.0,0.0
zuzuofthewolves,0.0,4.0,4.0,0.0,0.0,0.0
zx12y,0.0,1.0,1.0,0.0,0.0,0.0


In [15]:
inner

Unnamed: 0_level_0,total_submissions_conspiracy,total_comments_conspiracy,total_posts_conspiracy,total_submissions_antivax,total_comments_antivax,total_posts_antivax
author,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
[deleted],3.0,783.0,786.0,0.0,5.0,5.0


In [16]:
inner.sum() / outer.sum()

total_submissions_conspiracy    0.006865
total_comments_conspiracy       0.059148
total_posts_conspiracy          0.057477
total_submissions_antivax       0.000000
total_comments_antivax          0.100000
total_posts_antivax             0.094340
dtype: float64

### We can run queries to search for subreddits by condition

In [2]:
subreddit_info = push_py.subreddits()
size = {"min_subscribers": 1000, "max_subscribers": 9999999999}
unix_time = {"min_unix_timestamp": 1111111111, "max_unix_timestamp": 9999999999}
subreddit_info.split_multi(nsfw=False, sizes=size, unix_times=unix_time)
subreddit_info.multi

Unnamed: 0,subreddit,num_subscribers,creation_utc,nsfw_bool,creation_datetime
0,Announcements,133988568,1.245256e+09,False,2009-06-17 12:28:21
1,funny,38751131,1.201243e+09,False,2008-01-25 01:35:56
2,Ask Reddit...,34701980,1.201233e+09,False,2008-01-24 22:52:15
3,r/gaming,31552365,1.190055e+09,False,2007-09-17 14:43:25
4,A subreddit for cute and cuddly pictures,30094250,1.201234e+09,False,2008-01-24 23:07:02
...,...,...,...,...,...
104474,HomeworkHelpForYou,1000,1.618359e+09,False,2021-04-13 20:14:07
104475,Wait Wait Don't Tell Me: The NPR News Quiz,1000,1.345828e+09,False,2012-08-24 12:59:25
104477,unofficial The Fair One Podcast (Grange TV) su...,1000,1.570906e+09,False,2019-10-12 14:47:06
104478,Free Rapidgator Premium Accounts | Daily Updates,1000,1.403590e+09,False,2014-06-24 01:59:53


In [4]:
date_time = {"min_datetime": "2020-02-02", "max_datetime": "2021-01-01"}
subreddit_info.split_multi(nsfw=False, sizes=size, date_times=date_time)
subreddit_info.multi

Unnamed: 0,subreddit,num_subscribers,creation_utc,nsfw_bool,creation_datetime
672,Wallstreetbetsnew,828902,1.584312e+09,False,2020-03-15 18:34:59
1089,Crypto traders with diamond hands 💎🙌,513034,1.582466e+09,False,2020-02-23 09:00:27
1090,byebyejob,512979,1.591489e+09,False,2020-06-06 20:20:38
1175,Awarded… posthumously.,480681,1.600648e+09,False,2020-09-20 20:24:02
1225,SHIBArmy,462406,1.596479e+09,False,2020-08-03 14:27:30
...,...,...,...,...,...
104427,BearWithMe,1001,1.599705e+09,False,2020-09-09 22:29:41
104439,ClassicSkate,1001,1.603180e+09,False,2020-10-20 03:50:56
104445,BrittaXAnnie,1000,1.586115e+09,False,2020-04-05 15:34:22
104447,Flu_Talk/News,1000,1.582263e+09,False,2020-02-21 00:37:28


In [3]:
subreddit_info = push_py.subreddits()
subreddit_info.master

Unnamed: 0,subreddit,num_subscribers,creation_utc,nsfw_bool,creation_datetime
0,Announcements,133988568,1.245256e+09,False,2009-06-17 12:28:21
1,funny,38751131,1.201243e+09,False,2008-01-25 01:35:56
2,Ask Reddit...,34701980,1.201233e+09,False,2008-01-24 22:52:15
3,r/gaming,31552365,1.190055e+09,False,2007-09-17 14:43:25
4,A subreddit for cute and cuddly pictures,30094250,1.201234e+09,False,2008-01-24 23:07:02
...,...,...,...,...,...
2898508,ask XKCD: submit and vote on questions to ask ...,-14,1.208201e+09,False,2008-04-14 15:23:34
2898509,ask Jeph: submit and vote on questions to ask ...,-15,1.207266e+09,False,2008-04-03 19:45:34
2898510,All of the bad Bob Ross Paintings,-17,1.534884e+09,False,2018-08-21 16:41:01
2898511,Children's reddit,-22,1.205483e+09,False,2008-03-14 04:27:39
