In [53]:
import requests
import pandas as pd
url = "https://api.pushshift.io/reddit/search/comment/?q=mumsnet"
request = requests.get(url)
json_response = request.json()

In [54]:
json_response

{'data': [{'all_awardings': [],
   'associated_award': None,
   'author': 'emmabelle101',
   'author_flair_background_color': None,
   'author_flair_css_class': None,
   'author_flair_richtext': [],
   'author_flair_template_id': None,
   'author_flair_text': None,
   'author_flair_text_color': None,
   'author_flair_type': 'text',
   'author_fullname': 't2_sqza8',
   'author_patreon_flair': False,
   'author_premium': False,
   'awarders': [],
   'body': 'Her “but paedophile rings are communities too” has to be the biggest false equivalency I’ve ever seen on Mumsnet.',
   'collapsed_because_crowd_control': None,
   'created_utc': 1599128147,
   'gildings': {},
   'id': 'g3ts8y0',
   'is_submitter': False,
   'link_id': 't3_ilprdw',
   'locked': False,
   'no_follow': True,
   'parent_id': 't1_g3tq6y1',
   'permalink': '/r/MNTrolls/comments/ilprdw/my_daughter_likes_unwholesome_youtube_videos_and/g3ts8y0/',
   'retrieved_on': 1599128158,
   'score': 1,
   'send_replies': True,
   'stick

In [55]:
def get_pushshift_data(data_type, **kwargs):
    """
    Gets data from the pushshift api.

    data_type can be 'comment' or 'submission'
    The rest of the args are interpreted as payload.

    Read more: https://github.com/pushshift/api
    """

    base_url = f"https://api.pushshift.io/reddit/search/{data_type}/"
    payload = kwargs
    request = requests.get(base_url, params=payload)
    return request.json()

In [56]:
get_pushshift_data(data_type="comment",     # give me comments
                   q="python",              # that mention 'python'
                   after="48h",             # in the last 48 hours
                   size=1000,               # maximum 1000 comments
                   sort_type="score",       # sort them by score
                   sort="desc")             # sort descending

{'data': [{'all_awardings': [],
   'associated_award': None,
   'author': 'lazarus_phenomenon',
   'author_flair_background_color': None,
   'author_flair_css_class': None,
   'author_flair_richtext': [],
   'author_flair_template_id': None,
   'author_flair_text': None,
   'author_flair_text_color': None,
   'author_flair_type': 'text',
   'author_fullname': 't2_e9p86',
   'author_patreon_flair': False,
   'author_premium': False,
   'awarders': [],
   'body': 'I wish, it was really lower level grunt work, lots of repetitive data entry. The role did expand over time, and we had opportunities to learn python and regex and transition to a more technical role. \n\nI was paid less than 20 dollars an hour. Was promised a promotion that never happened; I stopped working from home and moved to an apartment closer to work, offered to give up my WFH status. I was stupid to trust them; they never gave me that raise, which I was depending on to be able to pay rent.',
   'collapsed_because_crowd_

In [57]:
data = get_pushshift_data(data_type="comment",
                          q="mumsnet",
                          after="48h",
                          size=1000,
                          aggs="subreddit")

In [58]:
data = data.get("aggs").get("subreddit")
data

[{'doc_count': 10, 'key': 'MNTrolls'},
 {'doc_count': 1, 'key': 'WatchPeopleDieInside'},
 {'doc_count': 1, 'key': 'bristol'}]

In [59]:
df = pd.DataFrame.from_records(data)[0:10]
df

Unnamed: 0,doc_count,key
0,10,MNTrolls
1,1,WatchPeopleDieInside
2,1,bristol


In [60]:
import plotly.express as px

px.bar(df,              # our dataframe
       x="key",         # x will be the 'key' column of the dataframe
       y="doc_count",   # y will be the 'doc_count' column of the dataframe
       title=f"Subreddits with most activity - comments with 'python' in the last 48h",
       labels={"doc_count": "# comments","key": "Subreddits"}, # the axis names
       color_discrete_sequence=["blueviolet"], # the colors used
       height=500,
       width=800)

In [61]:
def make_clickable(val):
    """ Makes a pandas column clickable by wrapping it in some html.
    """
    return '<a href="{}">Link</a>'.format(val,val)

In [62]:
# get the data we need using the function
data = get_pushshift_data(data_type="comment", q="mumsnet", after="7d", size=10, sort_type="score", sort="desc").get("data")

# we only care about certain columns
df = pd.DataFrame.from_records(data)[["author", "subreddit", "score", "body", "permalink"]]

# we only keep the first X characters of the body of the comment (sometimes they are too big)
df['body'] = df['body'].str[0:400] + "..."

# we append the string to all the permalink entries so that we have a link to the comment
df['permalink'] = "https://reddit.com" + df['permalink'].astype(str)

# style the last column to be clickable and print
df.style.format({'permalink': make_clickable})

Unnamed: 0,author,subreddit,score,body,permalink
0,NotATransponster,ChoosingBeggars,9,"Honestly, mumsnet is filled with women that are similar to your sister. One of my friends who is a parent uses the site often and shares the entitled stories with me, it's insane. So many women on there complain that family will not watch their children whenever they want. Helping with childcare now and again is fine, but expecting someone to commit is unfair. Unfortunately, there are many pare...",Link
1,Ranaestella,ChoosingBeggars,6,I only know mumsnet because its always a top result googling pregnancy or postpartum issues. I'd say it was about as useful as reading facebook comments....,Link
2,BraveField,MNTrolls,6,It has finally happened! I got a Morrison’s order delivered and the delivery man asked if I was over 18 to accept the booze. I’m ignoring the fact he was sniggering to himself when he said it and I’m off to Mumsnet to create a post about youthful and teeny tiny I am. What a joyous evening...,Link
3,Chelsk_C,ChoosingBeggars,5,Mumsnet is crazy for this sometimes. “MIL wont watch my kids for free 5 days a week. Should i withhold contact?”...,Link
4,LeatherTadpole,MNTrolls,5,"What a pity mumsnet standards have risen with regard to begging threads. Only one person offered money (and that was probably a sock). In the good old days she'd have had offers of cash, a few online shopping orders on the way and maybe even a takeaway for tea and toys for the kids. Come back Sadwidow! You are needed......",Link
5,NutsEverywhere,ChoosingBeggars,4,If women from Mumsnet were president we would all be begging for Trump to come back....,Link
6,Chopsy76,MNTrolls,4,Ah don’t tell mumsnet. Grey. Johnston’s frosted silver to be precise; it’s a nice pale grey /light blue type. I’m also getting a feature wall papered on Tuesday with this: https://www.ilovewallpaper.co.uk/wallpaper-c1/betty-metallic-wallpaper-navy-silver-p6517 I’ll be excommunicated. Especially if they hear about the velvet curtains. They’re not crushed velvet though.....,Link
7,InflamedLiver,ChoosingBeggars,3,there's websites for estranged parents too. It's crazy how many awful or abusive parents are sad that their adult kids no longer want contact. But with sites like mumsnet it's hard to figure out which side is the crazy ones....,Link
8,GuinessGirl,MNTrolls,3,"Urgh that's awful. Mumsnet and their weird class obsession. That ""30k and a masters"" comment is horrible. In real life, nobody cares about class the way they think people do Edit: they also are not as witty as they think they are......",Link
9,NotATransponster,ChoosingBeggars,3,Mumsnet is filled with women complaining that family will not take care of their children for free. The entitlement is shocking....,Link


In [63]:
import plotly.express as px

px.bar(df,              # our dataframe
       x="subreddit",         # x will be the 'key' column of the dataframe
       y="score",   # y will be the 'doc_count' column of the dataframe
       title=f"Subreddits with most activity - comments with 'python' in the last 48h",
       labels={"doc_count": "# comments","key": "Subreddits"}, # the axis names
       color_discrete_sequence=["blueviolet"], # the colors used
       height=500,
       width=800)

In [64]:
!pip install voila


