In [114]:
import requests
import numpy as np
import pandas as pd
import datetime as dt
from pandas.io.json import json_normalize
from psaw import PushshiftAPI
import praw
import pickle

In [115]:
pd.set_option('display.max_columns', 100)  # or 1000
pd.set_option('display.max_rows', 100)  # or 1000
pd.set_option('display.max_colwidth', -1)  # or 199

In [116]:
reddit = praw.Reddit(client_id='sIF2FculBPoFMg',
                     client_secret='ONlvY9ziXYVBhTeMvp1y4yP4Fg4',
                     user_agent='billyisnotthegoat')

api = PushshiftAPI(reddit)

In [129]:

before_date = int(dt.datetime(2019, 11, 1).timestamp())
after_date = int(dt.datetime(2017, 3, 1).timestamp())

gen = api.search_submissions( after=after_date, before=before_date, subreddit='Random_Acts_Of_Pizza')

## Get raw submissions from Random Acts of Pizza sub

In [130]:
objects = list(gen)

kept = []

count = 0


for idx, obj in enumerate(objects):
    title = obj.title.lower()
    text = obj.selftext
    url = obj.url
    flair = obj.link_flair_text
 
        
    if obj.author == None:
        continue
    
    if "request]" not in title or text == "[removed]" or text == "[deleted]":
        continue
        
    if flair == "No Longer Needed":
        continue
        
    if flair == "Fulfilled":
        count += 1
        
    kept.append(obj)


In [131]:
len(objects), count, len(kept)

(41880, 2670, 4751)

In [15]:
len(objects), count, len(kept)

(41880, 2673, 4754)

In [16]:
count / len(kept)

0.562263357172907

In [132]:
with open('submissions.pkl','wb') as f:
    pickle.dump(kept, f)

In [133]:
submissions = None
with open('submissions.pkl','rb') as f:
    submissions = pickle.load(f)

In [134]:
kept == submissions

True

## Construct necessary data for dataset

In [164]:
def construct_features(idx): #index in submissions
        GiversBotId = 'np6d0'
        submission = submissions[idx]
        #print("STARTING...", idx)
        d = {}

        d['request_id'] = submission.id
        d['request_number_of_comments_at_retrieval'] = submission.num_comments
        d['request_text'] = submission.selftext.lower()
        d['request_title'] = submission.title

        d['number_of_downvotes_of_request_at_retrieval'] = submission.downs
        d['number_of_upvotes_of_request_at_retrieval'] = submission.ups
        
        print ("got to here")

        if submission.edited == False:
            d['post_was_edited'] = False
        else:
            d['post_was_edited'] = True



        d['requester_account_age_in_days_at_request'] = divmod(submission.created_utc - submission.author.created_utc, 86400)[0]

        d['request_url'] = submission.url


        try:
            requester_created_utc = submission.author.created_utc
        except: #if created_utc is not an attribute, the author has been suspended
            return
        
          
        print ("got to here2")


        requester_subreddits_at_request = set()

        for author_comment in submission.author.comments.top('all'):
            if author_comment.subreddit.created_utc < submission.created_utc:
                requester_subreddits_at_request.add(author_comment.subreddit.display_name)

        for author_sub in submission.author.submissions.top('all'):
            if author_sub.subreddit.created_utc < submission.created_utc:
                requester_subreddits_at_request.add(author_sub.subreddit.display_name)

        top_level_comments = list(submission.comments)

        d['requester_num_pizza_received_at_request'] = None
        d['requester_num_pizza_given_at_request'] = None
        d['requester_num_pizza_related_posts_at_request'] = None
        d['requester_num_pizza_related_comments_at_request'] = None
        
        print ("got to here3")

        d['giver_username'] = None
        d['giver_user_flair'] = None
        giver_redditor = None

        for tl_comment in top_level_comments:
            tl_text = tl_comment.body.lower()
            child_comments = []
            getSubComments(tl_comment, child_comments)
            for child_comment in child_comments:
                if "GIFT transaction #"  in child_comment.body:
                    info_lines = child_comment.body.splitlines()
                    for info_line in info_lines:
                        if "**A**" in info_line:
                            info_split = info_line.split("|")
                            if info_split[2] != "Anonymous":
                                giver_redditor_name = info_split[2][3:]
                                giver_redditor = reddit.redditor(giver_redditor_name)
                                d['giver_username'] = giver_redditor_name
                                

            try:
                if tl_comment.author.id == GiversBotId:
                    if "* **Received" in tl_comment.body:
                        info_lines = tl_comment.body.splitlines()
                        d['requester_num_pizza_received_at_request'] = int(info_lines[0].split()[2] )
                        d['requester_num_pizza_given_at_request'] = int(info_lines[1].split()[2] )
                        d['requester_num_pizza_related_posts_at_request'] = int(info_lines[2].split()[2])
                        d['requester_num_pizza_related_comments_at_request'] = int(info_lines[2].split()[7])
            except Exception as e:
                continue
                
                
        print ("got to here4")



        giver_subreddits_at_request = set()
        if giver_redditor:
            try:
                print (d['giver_username'], d['request_url'])

                for author_comment in giver_redditor.comments.top('all'):
                    if author_comment.subreddit.created_utc < submission.created_utc:
                        giver_subreddits_at_request.add(author_comment.subreddit.display_name)

                for author_sub in giver_redditor.submissions.top('all'):
                    if author_sub.subreddit.created_utc < submission.created_utc:
                        giver_subreddits_at_request.add(author_sub.subreddit.display_name)
            except:
                d['giver_username'] = None # giver has deleted account
                pass
                    
        print ("got to here4.5")


        d['giver_subreddits_at_request'] = list(giver_subreddits_at_request)

        d['requester_subreddits_at_request'] = list(requester_subreddits_at_request)

        d['requester_has_verified_email'] = submission.author.has_verified_email

        d['requester_received_pizza'] = 0
        if submission.link_flair_text == "Fulfilled":
            d['requester_received_pizza'] = 1
            
        print ("got to here5")

        d['requester_user_flair'] = submission.author_flair_text
        d['requester_username'] = submission.author.name
        d['unix_timestamp_of_request'] = submission.created_utc

        print(idx,submission.link_flair_text, d['giver_username'], d['request_url'])

        #if d['requester_received_pizza']:
        #    if (d['giver_username'] == None):
                #print ("WHYYY")


        return d

In [165]:
construct_features(896 )

got to here
got to here2
got to here3
got to here4
Smokeydoke30 https://www.reddit.com/r/Random_Acts_Of_Pizza/comments/b4sdwv/request_almost_no_food_and_husband_doesnt_get/
got to here4.5
got to here5
896 Fulfilled None https://www.reddit.com/r/Random_Acts_Of_Pizza/comments/b4sdwv/request_almost_no_food_and_husband_doesnt_get/


{'request_id': 'b4sdwv',
 'request_number_of_comments_at_retrieval': 8,
 'request_text': "i tried posting here before but had some inactivity on my account (was busy with my honeymoon ☺️) so it didn't let me 😔\n\ni got married recently and its amazing but we've been struggling a bit. im unable to work and waiting to get on disability. my husband works hard at a chain restaurant for min wage and gets paid every other week and won't get paid for a week+. i got approved for food stamps a few months ago which has been a life saver 🙏 but we're really low on food now 😕 (i had a can of tuna today) and that doesn't refill until the first .\n\na dominos e gift card would be amazing so me and my husband could order a pizza (mainly me i'm starving and just got over a stomach bug lol) tomorrow  \n\nhopefully it lets me post this time. \n\nthank you sooo much! 🤗\njessica 💕 ",
 'request_title': "[REQUEST] Almost no food and husband doesn't get paid for a week",
 'number_of_downvotes_of_request_at_re

In [120]:
print (reddit.redditor("billyisnotthegoat").id)

4r32j7hi


In [121]:
def test_feat(idx):
    print (idx)
    top_level_comments = list(submissions[idx].comments)
    
    print (submissions[idx].permalink)
    for tl_comment in top_level_comments:
        tl_text = tl_comment.body.lower()
        child_comments = []
        getSubComments(tl_comment, child_comments)
        for child_comment in child_comments:
            if "GIFT transaction #" in child_comment.body:
                info_lines = child_comment.body.splitlines() 
                #print (info_lines) 
                for info_line in info_lines:
                    if "**A**" in info_line:
                        info_split = info_line.split("|")
                        giver_redditor_name = info_split[2][3:]
                        giver_redditor = reddit.redditor(giver_redditor_name)
                        print (giver_redditor_name)

In [122]:
test_feat(745)

745
/r/Random_Acts_Of_Pizza/comments/bj80z5/request_wife_and_i_have_ate_about_10_pounds_of/
regula_et_vita
regula_et_vita


In [137]:
submissions[80]

Submission(id='df9s6b')

In [160]:
len(submissions)

4751

In [159]:
import multiprocessing
pool = multiprocessing.Pool(10)
dict_list = pool.map(construct_features, range(250), time_out=200)
pool.close()

TypeError: map() got an unexpected keyword argument 'time_out'

In [11]:
df = pd.DataFrame(dict_list)


NameError: name 'dict_list' is not defined

In [20]:
len(dict_list)

20

In [64]:
df[["requester_received_pizza", "request_url"]]

Unnamed: 0,requester_received_pizza,request_url
0,0,https://www.reddit.com/r/Random_Acts_Of_Pizza/comments/dpx5fe/request_i_have_4_to_get_me_through_the_next_day/
1,0,https://www.reddit.com/r/Random_Acts_Of_Pizza/comments/dptmx0/request_hit_my_daily_limit_on_my_debit_card/
2,1,https://www.reddit.com/r/Random_Acts_Of_Pizza/comments/dphb2r/request_really_stressful_week/
3,0,https://www.reddit.com/r/Random_Acts_Of_Pizza/comments/dpe2dj/request_first_pay_day_of_new_job_is_friday_havent/
4,1,https://www.reddit.com/r/Random_Acts_Of_Pizza/comments/dpccso/request_could_use_a_nice_pie_if_possible/
5,0,https://www.reddit.com/r/Random_Acts_Of_Pizza/comments/dotuxd/request_cents_to_my_name_payday_is_tomorrow/
6,1,https://www.reddit.com/r/Random_Acts_Of_Pizza/comments/dofjc3/request_vancouver_canada_lowincome_youth_looking/
7,1,https://www.reddit.com/r/Random_Acts_Of_Pizza/comments/dofige/request_37343_broke_no_car_and_no_food_in_my_house/
8,1,https://www.reddit.com/r/Random_Acts_Of_Pizza/comments/doeex3/request_im_struggling_right_now_even_pizza_i_cant/
9,1,https://www.reddit.com/r/Random_Acts_Of_Pizza/comments/doe5we/request_dad_of_3_would_really_like_the_boys_to/


In [None]:
hm = pd.read_csv("raw_psaw.csv", sep = "\t")

In [None]:
hm["selftext"][5]