In [1]:
import pandas as pd 
import numpy as np 
import random

import matplotlib.pyplot as plt # data visualization
import seaborn as sns
%matplotlib inline

# display trick to display all columns of large dataframes
from IPython.display import display
pd.set_option('display.max_columns', None)
pd.options.mode.chained_assignment = None 

In [2]:
DATA_FOLDER = 'yelp_dataset'

### Loading user data

In [3]:
user_json_path = '{}/yelp_academic_dataset_user.json'.format(DATA_FOLDER)

size = 1000000
user_df = pd.read_json(user_json_path, lines=True,
                       dtype={'user_id':str, 'name':str, 'review_count':int},
                       chunksize=size)
chunk_list = []
for i in user_df:
    chunk_list.append(i)
user_df = pd.concat(chunk_list)

print(user_df.shape)
user_df.head()

(1968703, 22)


Unnamed: 0,user_id,name,review_count,yelping_since,useful,funny,cool,elite,friends,fans,average_stars,compliment_hot,compliment_more,compliment_profile,compliment_cute,compliment_list,compliment_note,compliment_plain,compliment_cool,compliment_funny,compliment_writer,compliment_photos
0,ntlvfPzc8eglqvk92iDIAw,Rafael,553,2007-07-06 03:27:11,628,225,227,,"oeMvJh94PiGQnx_6GlndPQ, wm1z1PaJKvHgSDRKfwhfDg...",14,3.57,3,2,1,0,1,11,15,22,22,10,0
1,FOBRPlBHa3WPHFB5qYDlVg,Michelle,564,2008-04-28 01:29:25,790,316,400,200820092010201120122013,"ly7EnE8leJmyqyePVYFlug, pRlR63iDytsnnniPb3AOug...",27,3.84,36,4,5,2,1,33,37,63,63,21,5
2,zZUnPeh2hEp0WydbAZEOOg,Martin,60,2008-08-28 23:40:05,151,125,103,2010,"Uwlk0txjQBPw_JhHsQnyeg, Ybxr1tSCkv3lYA0I1qmnPQ...",5,3.44,9,6,0,1,0,3,7,17,17,4,1
3,QaELAmRcDc5TfJEylaaP8g,John,206,2008-09-20 00:08:14,233,160,84,2009,"iog3Nyg1i4jeumiTVG_BSA, M92xWY2Vr9w0xoH8bPplfQ...",6,3.08,2,1,0,0,0,7,14,7,7,2,0
4,xvu8G900tezTzbbfqmTKvA,Anne,485,2008-08-09 00:30:27,1265,400,512,200920102011201220142015201620172018,"3W3ZMSthojCUirKEqAwGNw, eTIbuu23j9tOgmIa9POyLQ...",78,4.37,8,9,2,1,1,22,28,31,31,19,31


In [4]:
user_df_friends = user_df[['user_id', 'name', 'friends']]
user_df_friends.head()

Unnamed: 0,user_id,name,friends
0,ntlvfPzc8eglqvk92iDIAw,Rafael,"oeMvJh94PiGQnx_6GlndPQ, wm1z1PaJKvHgSDRKfwhfDg..."
1,FOBRPlBHa3WPHFB5qYDlVg,Michelle,"ly7EnE8leJmyqyePVYFlug, pRlR63iDytsnnniPb3AOug..."
2,zZUnPeh2hEp0WydbAZEOOg,Martin,"Uwlk0txjQBPw_JhHsQnyeg, Ybxr1tSCkv3lYA0I1qmnPQ..."
3,QaELAmRcDc5TfJEylaaP8g,John,"iog3Nyg1i4jeumiTVG_BSA, M92xWY2Vr9w0xoH8bPplfQ..."
4,xvu8G900tezTzbbfqmTKvA,Anne,"3W3ZMSthojCUirKEqAwGNw, eTIbuu23j9tOgmIa9POyLQ..."


### Merging business, review, and user data

In [5]:
business_reviews = pd.read_csv('yelp_dataset/yelp_reviews_home_categories.csv')
print(business_reviews.shape)
#business_reviews.head()

(334327, 22)


In [6]:
user_business_reviews = pd.merge(business_reviews, user_df_friends, on='user_id', how='inner')
print(user_business_reviews.shape)
user_business_reviews.head()

(334326, 24)


Unnamed: 0,business_id,name_x,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours,review_id,user_id,review_stars,useful,funny,cool,text,date,name_y,friends
0,51M2Kk903DFYI6gnB5I6SQ,USE MY GUY SERVICES LLC,4827 E Downing Cir,Mesa,AZ,85205,33.428065,-111.726649,4.5,26.0,1.0,"{'BusinessAcceptsCreditCards': 'True', 'ByAppo...","Home Services, Plumbing, Electricians, Handyma...","{'Monday': '0:0-0:0', 'Tuesday': '9:0-16:0', '...",EY8t3ndAZo2vWY7eeOnVLw,nKBtfZ93gPYybGEz2QOvTQ,5.0,0.0,1.0,0.0,UMGS does an amazing job serving the community...,2016-01-30 03:26:19,Brittney,"fnlLgrXzfFZhMqA5G8MeJQ, WzKaL2lws_-wSnwYU_II5A..."
1,51M2Kk903DFYI6gnB5I6SQ,USE MY GUY SERVICES LLC,4827 E Downing Cir,Mesa,AZ,85205,33.428065,-111.726649,4.5,26.0,1.0,"{'BusinessAcceptsCreditCards': 'True', 'ByAppo...","Home Services, Plumbing, Electricians, Handyma...","{'Monday': '0:0-0:0', 'Tuesday': '9:0-16:0', '...",uoujAVvyx-GLyQnByuon0w,UjVtviHTm2mgZnXCfl33CQ,5.0,0.0,0.0,0.0,I called Connie needing some stuff done on a S...,2016-03-02 20:15:09,Angie,"cssHBZ55fJ8hHXOYfW1o5w, EBrLq4Bi2sFKkOUC80P1xg..."
2,51M2Kk903DFYI6gnB5I6SQ,USE MY GUY SERVICES LLC,4827 E Downing Cir,Mesa,AZ,85205,33.428065,-111.726649,4.5,26.0,1.0,"{'BusinessAcceptsCreditCards': 'True', 'ByAppo...","Home Services, Plumbing, Electricians, Handyma...","{'Monday': '0:0-0:0', 'Tuesday': '9:0-16:0', '...",pECK3p9w7m-_xEp--lGxHg,L498DJb5YDAtoqgv9thWCg,5.0,0.0,0.0,0.0,We are selling our home and needed our back fa...,2017-06-07 18:24:25,Tiffany,"cTi5rF54rDsffJhGUjqC5Q, kuk9VQFkn42GKCcmaLBkaQ..."
3,51M2Kk903DFYI6gnB5I6SQ,USE MY GUY SERVICES LLC,4827 E Downing Cir,Mesa,AZ,85205,33.428065,-111.726649,4.5,26.0,1.0,"{'BusinessAcceptsCreditCards': 'True', 'ByAppo...","Home Services, Plumbing, Electricians, Handyma...","{'Monday': '0:0-0:0', 'Tuesday': '9:0-16:0', '...",981LKlJg8emqRshseIdMrA,L498DJb5YDAtoqgv9thWCg,5.0,2.0,2.0,1.0,We are selling our home and needed our back fa...,2018-04-04 21:39:04,Tiffany,"cTi5rF54rDsffJhGUjqC5Q, kuk9VQFkn42GKCcmaLBkaQ..."
4,b8QAxQUBy14H6AJt7MUMgw,Anytime Auto Glass,"2659 W Guadalupe Rd, Ste D-202",Mesa,AZ,85202,33.363646,-111.892016,4.5,90.0,1.0,"{'ByAppointmentOnly': 'True', 'WiFi': ""u'free'...","Automotive, Home Services, Auto Glass Services...","{'Monday': '6:0-18:0', 'Tuesday': '6:0-18:0', ...",xKVw-kj0ia4XLrK6war2Ow,L498DJb5YDAtoqgv9thWCg,5.0,1.0,0.0,0.0,"Thanks to Rey and Marty, they replaced a winds...",2018-02-08 21:46:59,Tiffany,"cTi5rF54rDsffJhGUjqC5Q, kuk9VQFkn42GKCcmaLBkaQ..."


In [7]:
#csv_name = 'yelp_dataset/yelp_network_data.csv'
#user_business_reviews.to_csv(csv_name, index=False)

### User information
Contains the first names and addresses of users who have reviewed home services

In [9]:
user_info = user_business_reviews[['name_y', 'user_id', 'city', 'state', 'postal_code']]
user_info.rename(columns={'name_y':'name'}, inplace=True) 
user_info_clean = user_info.drop_duplicates('user_id')
print(user_info_clean.shape)
user_info_clean.head()

(223741, 5)


Unnamed: 0,name,user_id,city,state,postal_code
0,Brittney,nKBtfZ93gPYybGEz2QOvTQ,Mesa,AZ,85205
1,Angie,UjVtviHTm2mgZnXCfl33CQ,Mesa,AZ,85205
2,Tiffany,L498DJb5YDAtoqgv9thWCg,Mesa,AZ,85205
12,Lorraine,KQ5XRSIeuzh8JuFyo7GXQA,Mesa,AZ,85205
13,Laura,U_FfJTKoLsutJf-r98H9EA,Mesa,AZ,85205


In [10]:
user_info_clean.to_csv('yelp_dataset/client_data.csv', index=False)

### Business information
Contains the names and addresses of businesses in home services, along with ratings and category information

In [11]:
business_info = user_business_reviews[['name_x', 'business_id', 'address', 'city', 'state', 'postal_code', 
                                       'stars', 'review_count', 'categories']]
business_info.rename(columns={'name_x':'name'}, inplace=True) 
business_info_clean = business_info.drop_duplicates('business_id')
print(business_info_clean.shape)
business_info_clean.head()

(20653, 9)


Unnamed: 0,name,business_id,address,city,state,postal_code,stars,review_count,categories
0,USE MY GUY SERVICES LLC,51M2Kk903DFYI6gnB5I6SQ,4827 E Downing Cir,Mesa,AZ,85205,4.5,26.0,"Home Services, Plumbing, Electricians, Handyma..."
4,Anytime Auto Glass,b8QAxQUBy14H6AJt7MUMgw,"2659 W Guadalupe Rd, Ste D-202",Mesa,AZ,85202,4.5,90.0,"Automotive, Home Services, Auto Glass Services..."
5,Tria Plumbing,d6v2QeGfuHnuNxugkc1PRQ,,Mesa,AZ,85203,5.0,4.0,"Home Services, Plumbing"
6,Beck's Floor Installations,GB09m66C1QKu0dI1HW2hsA,,Mesa,AZ,85205,5.0,24.0,"Carpet Installation, Flooring, Carpeting, Home..."
7,Guild Mortgage,4cChb-ukCS91erbmsKmTtQ,"1400 N Gilbert Rd, Ste B",Gilbert,AZ,85234,3.5,3.0,"Mortgage Brokers, Financial Services, Real Est..."


In [12]:
business_info_clean.to_csv('yelp_dataset/contractor_data.csv', index=False)