In [8]:
import pandas as pd
import numpy as np
import sklearn
import seaborn as sns
from matplotlib import pyplot as plt

import codecs
import json
import os

In [9]:
IMPORT = True
EXPORT = True
DISPLAY = True
VERBOSE = False
DATA_DIR = "pizza_request_dataset"

In [10]:
def read_dataset(path):
    with codecs.open(path, 'r', 'utf-8') as myFile:
        content = myFile.read()
        dataset = json.loads(content)
    return dataset

if (IMPORT):
    path = os.path.join(DATA_DIR, 'pizza_request_dataset.json')
    dataset = read_dataset(path)
  
    if (VERBOSE):
        print('The dataset contains %d samples.' %(len(dataset)))
        print('Available attributes: ', sorted(dataset[0].keys()))
        print('First post:')
        print(json.dumps(dataset[0], sort_keys=True, indent=2))

        successes = [r['requester_received_pizza'] for r in dataset]
        success_rate = 100.0 * sum(successes) / float(len(successes))
        print('The average success rate is: %.2f%%' %(success_rate))

In [11]:
dataset

[{'giver_username_if_known': 'N/A',
  'in_test_set': False,
  'number_of_downvotes_of_request_at_retrieval': 2,
  'number_of_upvotes_of_request_at_retrieval': 6,
  'post_was_edited': False,
  'request_id': 't3_w5491',
  'request_number_of_comments_at_retrieval': 7,
  'request_text': "I'm not in College, or a starving artist or anything like that. I've just been a bit unlucky lately. I'm a 36 year old single guy with a job. But rent, and other bills killed me this month. I thought I had enough funds in my account to at least keep me set with Mr. Noodles, I forgot about my monthly banking fee.\n\nI had a small bag of chips Wednesday afternoon, and I get paid Monday, so I'll be fine then.. It's just really painful at this point and food is something I'm constantly thinking about.\n\nI've got a few bucks to get on the bus to work on Saturday, so I can't really use that.\n\nI'm really embarrassed to even be asking like this and I'm not sure how it works, so please be patient with me.\n\nI g

In [12]:
data = pd.DataFrame(dataset)

In [17]:
data = data.set_index("request_id", verify_integrity=True)

In [18]:
data.head()

Unnamed: 0_level_0,giver_username_if_known,in_test_set,number_of_downvotes_of_request_at_retrieval,number_of_upvotes_of_request_at_retrieval,post_was_edited,request_number_of_comments_at_retrieval,request_text,request_text_edit_aware,request_title,requester_account_age_in_days_at_request,...,requester_received_pizza,requester_subreddits_at_request,requester_upvotes_minus_downvotes_at_request,requester_upvotes_minus_downvotes_at_retrieval,requester_upvotes_plus_downvotes_at_request,requester_upvotes_plus_downvotes_at_retrieval,requester_user_flair,requester_username,unix_timestamp_of_request,unix_timestamp_of_request_utc
request_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
t3_w5491,,False,2,6,False,7,"I'm not in College, or a starving artist or an...","I'm not in College, or a starving artist or an...","[Request] Ontario, Canada - On my 3rd of 5 day...",14.416875,...,True,"[AdviceAnimals, WTF, funny, gaming, movies, te...",32,212,48,610,shroom,RitalinYourMemory,1341605000.0,1341601000.0
t3_qysgy,,False,2,6,True,8,Hello! It's been a hard 2 months with money an...,Hello! It's been a hard 2 months with money an...,"[REQUEST] Southern Arizona, Tucson Hungry Family",11.95706,...,True,"[aww, pics]",5,21,13,57,shroom,blubirdhvn,1331868000.0,1331865000.0
t3_if0ed,,False,1,4,True,1,I'm sure there are needier people on this subr...,I'm sure there are needier people on this subr...,[Request] Pizza for a broke college student,454.388461,...,True,"[AskReddit, DoesAnybodyElse, IAmA, Marijuana, ...",1359,2110,2423,3456,shroom,taterpot,1309622000.0,1309619000.0
t3_jr3w1,,False,2,13,False,3,I've been unemployed going on three months now...,I've been unemployed going on three months now...,[Request] Unemployed and Sick of Rice (Suffolk...,141.715625,...,True,"[AdviceAnimals, Art, AskReddit, BookCollecting...",1205,4889,2403,8245,shroom,or_me_bender,1314060000.0,1314056000.0
t3_1d18tc,,False,1,4,1.36685e+09,5,I ran out of money on my meal card a while bac...,I ran out of money on my meal card a while bac...,"[Request] Ohio USA, broke student musician in ...",161.699155,...,True,"[TrueAtheism, atheism, funny]",81,86,225,232,shroom,m_chamberlin,1366840000.0,1366836000.0


In [21]:
test_set = data[data.in_test_set]

In [22]:
train_set = data[~data.in_test_set]

In [23]:
test_set

Unnamed: 0_level_0,giver_username_if_known,in_test_set,number_of_downvotes_of_request_at_retrieval,number_of_upvotes_of_request_at_retrieval,post_was_edited,request_number_of_comments_at_retrieval,request_text,request_text_edit_aware,request_title,requester_account_age_in_days_at_request,...,requester_received_pizza,requester_subreddits_at_request,requester_upvotes_minus_downvotes_at_request,requester_upvotes_minus_downvotes_at_retrieval,requester_upvotes_plus_downvotes_at_request,requester_upvotes_plus_downvotes_at_retrieval,requester_user_flair,requester_username,unix_timestamp_of_request,unix_timestamp_of_request_utc
request_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
t3_n02dy,,True,0,2,False,0,"'Sright. In exchange for sending me pizza, I w...","'Sright. In exchange for sending me pizza, I w...","[REQUEST] [US, CO] I will draw you on me for p...",151.679236,...,True,"[AdviceAnimals, Android, Art, AskReddit, BitMa...",4417,13173,16467,36577,shroom,Kyosama66,1.323030e+09,1.323030e+09
t3_jgtfw,,True,3,6,True,0,"She lives in Round Lake Beach, Illinois, which...","She lives in Round Lake Beach, Illinois, which...",[Request] My waitress friend's broke and has n...,510.230104,...,True,"[ECE, FixedGearBicycle, Scholar, bicycling, ch...",134,4028,222,6052,PIF,r4v5,1.313166e+09,1.313162e+09
t3_ojy6r,,True,0,2,False,6,After spending two and a half months unemploye...,After spending two and a half months unemploye...,"[REQUEST] [IN, USA] Only one thing to make the...",33.840602,...,True,"[AdviceAnimals, AskCulinary, Frisson, IAmA, Ki...",1095,8038,1947,13792,shroom,DocFGeek,1.326755e+09,1.326755e+09
t3_lxx3z,,True,1,15,True,1,"I was laid off a few weeks ago, but ndb as I f...","I was laid off a few weeks ago, but ndb as I f...",[Request] Unemployed for 2 weeks before starti...,103.898449,...,True,"[AskReddit, FoodPorn, StLouis, fffffffuuuuuuuu...",67,597,239,1157,shroom,Entangledphoton,1.320260e+09,1.320257e+09
t3_11yzo2,,True,3,5,False,0,I received a pizza from this sub-reddit about ...,I received a pizza from this sub-reddit about ...,"[Request] Iowa, USA I could use some pizza for...",606.682269,...,True,"[AskReddit, CFB, IAmA, MensRights, Minecraft, ...",2039,2049,6679,6699,shroom,Sorenmatica,1.351035e+09,1.351031e+09
t3_i8xhn,,True,3,9,False,13,Last few days alone in my student house before...,Last few days alone in my student house before...,*Request UK* On the offchance a pizza hero is ...,0.000000,...,True,[],0,25,0,57,shroom,theredditor1142,1.309018e+09,1.309014e+09
t3_16n6pk,,True,7,17,False,7,"I like to make 8 bit drawings as a hobby, and ...","I like to make 8 bit drawings as a hobby, and ...",[Request] Will make 8-bit drawing for pizza!,279.230613,...,True,"[AskReddit, Awww, IAmA, RandomActsOfPizza, Ran...",377,827,565,1231,shroom,I_Cry_Everytime,1.358287e+09,1.358287e+09
t3_i96f9,hogfathom,True,3,11,True,2,"Sorry for the new account, but I never had a r...","Sorry for the new account, but I never had a r...",[REQUEST] Pizza for a United States Redditor,0.000000,...,True,[],0,5,0,15,,iheartreddit420,1.309045e+09,1.309042e+09
t3_x6xfz,donthatemasticate,True,3,6,1.34332e+09,6,"Having a long day at work, and this week has b...","Having a long day at work, and this week has b...",[REQUEST] Illinois USA would love to hang with...,20.599838,...,True,"[AskReddit, RoleplayingForReddit, TheLastAirbe...",398,1208,568,2220,shroom,cinematicandstuff92,1.343319e+09,1.343315e+09
t3_nfom9,,True,1,3,False,5,I've got $0.90 to my name. And her next few pa...,I've got $0.90 to my name. And her next few pa...,[REQUEST] Girlfriend and I are without money. ...,171.478993,...,True,"[AskReddit, Columbus, Games, Random_Acts_Of_Pi...",269,285,483,553,shroom,trutalk,1.324074e+09,1.324074e+09
