In [6]:
import numpy as np
import pandas as pd
import json

#photo data
p_data = pd.read_json('photos.json',lines=True)
#business data
b_data = pd.read_json('business.json',lines=True)

#strip out anything that isn't a restaurant in business data
b_data['restaurant'] = b_data['categories'].map(
    lambda x: 1 if 'Restaurant' in str(x) else 0)

r_data = b_data[b_data['restaurant']==1]
r_data = r_data.reset_index(drop=True)


#create flags for photo types
l = p_data.label.unique()
for i in range(0,len(l)):
    p_data['p_'+l[i]] = p_data['label'].map(
        lambda x: 1 if x==l[i] else 0)

#drop unneccesary columns
p_data = p_data.drop('photo_id',1)
p_data = p_data.drop('label',1)

#create flags for photo caption exists
p_data['p_caption'] = p_data['caption'].map(lambda x: 1 if x != '' else 0)

#create flag for '!' exists in photo caption
p_data['p_caption_!'] = p_data['caption'].map(lambda x: 1 if '!' in x else 0)

#flatten data
p_data_n = p_data.groupby(['business_id'])['p_food', 'p_outside', 'p_inside', 'p_drink',
       'p_menu', 'p_caption', 'p_caption_!'].sum()

p_data_n = p_data_n.reset_index()

#merge photo data onto business data
r_data_n = pd.merge(r_data, p_data_n, how = 'left', on='business_id')

#replace missing data with zeroes
for i in ['p_food','p_outside', 'p_inside', 'p_drink', 'p_menu', 'p_caption','p_caption_!']:
    r_data_n[i].fillna(0, inplace = True)


r_data_n

Unnamed: 0,address,attributes,business_id,categories,city,hours,is_open,latitude,longitude,name,...,stars,state,restaurant,p_food,p_outside,p_inside,p_drink,p_menu,p_caption,p_caption_!
0,2824 Milton Rd,"{'GoodForMeal': {'dessert': False, 'latenight'...",mLwM-h2YhXl2NCgdS84_Bw,"[Food, Soul Food, Convenience Stores, Restaura...",Charlotte,"{'Monday': '10:00-22:00', 'Tuesday': '10:00-22...",0,35.236870,-80.741976,South Florida Style Chicken & Ribs,...,4.5,NC,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,4719 N 20Th St,"{'RestaurantsTableService': False, 'GoodForMea...",duHFBe87uNSXImQmvBh87Q,"[Sandwiches, Restaurants]",Phoenix,{},0,33.505928,-112.038847,Blimpie,...,4.5,AZ,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,9616 E Independence Blvd,"{'Alcohol': 'full_bar', 'HasTV': True, 'NoiseL...",SDMRxmcKPNt1AHPBKqO64Q,"[Burgers, Bars, Restaurants, Sports Bars, Nigh...",Matthews,"{'Monday': '11:00-0:00', 'Tuesday': '11:00-0:0...",1,35.135196,-80.714683,Applebee's,...,2.0,NC,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,190 E Dallas Rd,"{'RestaurantsAttire': 'casual', 'Alcohol': 'no...",iFEiMJoEqyB9O8OUNSdLzA,"[Chinese, Restaurants]",Stanley,"{'Monday': '11:30-22:00', 'Tuesday': '11:30-22...",1,35.355085,-81.087268,China Garden,...,3.0,NC,1,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,4759 Liberty Ave,"{'RestaurantsTableService': True, 'GoodForMeal...",HmI9nhgOkrXlUr6KZGZZew,"[Sandwiches, Restaurants, Italian, Diners, Bre...",Pittsburgh,"{'Sunday': '8:00-12:00', 'Tuesday': '8:00-12:0...",1,40.461350,-79.948113,Rocky's,...,3.0,PA,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,7070 Saint Barbara Boulevard,"{'HasTV': True, 'RestaurantsGoodForGroups': Tr...",qnpvw-uQyRn9nlClWFK9aA,"[Chicken Wings, Restaurants]",Mississauga,{},1,43.639236,-79.716199,Wild Wing,...,2.5,ON,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,4502 East Towne Blvd,"{'RestaurantsTableService': True, 'GoodForMeal...",TXiEgINSZ75d3EtvLvkc4Q,"[Restaurants, Seafood, American (Traditional)]",Madison,"{'Monday': '11:00-22:00', 'Tuesday': '11:00-22...",1,43.128034,-89.307157,Red Lobster,...,3.0,WI,1,6.0,0.0,0.0,0.0,0.0,6.0,2.0
7,1794 Liverpool Road,"{'RestaurantsTableService': True, 'GoodForMeal...",KW4y7uDGjVfU3ClkEjIGhg,"[Burgers, Restaurants]",Pickering,{},1,43.834351,-79.090135,The Works,...,3.0,ON,1,2.0,0.0,0.0,0.0,0.0,2.0,0.0
8,30 High Tech Rd,"{'Alcohol': 'full_bar', 'HasTV': True, 'NoiseL...",reWc1g65PNZnKz_Ub9QKOQ,"[Comfort Food, Canadian (New), Restaurants, Ba...",Richmond Hill,"{'Monday': '11:00-23:00', 'Tuesday': '11:00-23...",1,43.841993,-79.429343,Milestones Restaurants,...,2.5,ON,1,7.0,0.0,0.0,4.0,0.0,1.0,0.0
9,"280 W Beaver Creek Road, Unit 30","{'GoodForMeal': {'dessert': False, 'latenight'...",L1XHTn7S-6har9UGAPjcWQ,"[Taiwanese, Food, Coffee & Tea, Restaurants]",Richmond Hill,{},1,43.843475,-79.387686,Papa Chang's Tea Bistro,...,4.0,ON,1,0.0,1.0,1.0,0.0,1.0,0.0,0.0


In [7]:
p_data_n

Unnamed: 0,business_id,p_food,p_outside,p_inside,p_drink,p_menu,p_caption,p_caption_!
0,--6MefnULPED_I942VcFNA,1,0,0,0,0,1,0
1,--9e1ONYQuAa-CB_Rrw7Tw,109,3,14,1,0,79,15
2,--DaPTJW3-tB1vP-PfdTEg,0,1,0,0,0,0,0
3,--FBCX-N37CMYDfs790Bnw,2,2,0,0,0,2,0
4,--KCl2FvVQpvjzmZSPyviA,3,0,0,0,0,0,0
5,--Ni3oJ4VOqfOEu7Sj2Vzg,7,0,0,0,0,0,0
6,--S62v0QgkqQaVUhFnNHrw,8,0,1,0,0,8,1
7,--SrzpvFLwP_YFwB_Cetow,8,0,2,0,0,8,0
8,--cjBEbXMI2obtaRHNSFrA,2,1,4,3,0,0,0
9,--qJNlGWyvPJfBrqwp9c0w,1,1,0,0,0,2,0
