# Dataset Making
* Yelp.com의 실험 데이터를 생성하는 코드
* 사용자 정보, 리뷰, 식당 정보를 병합한 다음, Categories 컬럼을 기준으로
* Restaurant, Cafe, Bar&Pub 3가지의 하위 도메인으로 분할함

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)
%matplotlib inline

In [2]:
# business 처리
business_json_path = 'yelp_academic_dataset_business.json'
df_business = pd.read_json(business_json_path, lines=True)
print(df_business.shape)
df_business.head()

(150346, 14)


Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours
0,Pns2l4eNsfO8kk83dixA6A,"Abby Rappoport, LAC, CMQ","1616 Chapala St, Ste 2",Santa Barbara,CA,93101,34.426679,-119.711197,5.0,7,0,{'ByAppointmentOnly': 'True'},"Doctors, Traditional Chinese Medicine, Naturop...",
1,mpf3x-BjTdTEA3yCZrAYPw,The UPS Store,87 Grasso Plaza Shopping Center,Affton,MO,63123,38.551126,-90.335695,3.0,15,1,{'BusinessAcceptsCreditCards': 'True'},"Shipping Centers, Local Services, Notaries, Ma...","{'Monday': '0:0-0:0', 'Tuesday': '8:0-18:30', ..."
2,tUFrWirKiKi_TAnsVWINQQ,Target,5255 E Broadway Blvd,Tucson,AZ,85711,32.223236,-110.880452,3.5,22,0,"{'BikeParking': 'True', 'BusinessAcceptsCredit...","Department Stores, Shopping, Fashion, Home & G...","{'Monday': '8:0-22:0', 'Tuesday': '8:0-22:0', ..."
3,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,935 Race St,Philadelphia,PA,19107,39.955505,-75.155564,4.0,80,1,"{'RestaurantsDelivery': 'False', 'OutdoorSeati...","Restaurants, Food, Bubble Tea, Coffee & Tea, B...","{'Monday': '7:0-20:0', 'Tuesday': '7:0-20:0', ..."
4,mWMc6_wTdE0EUBKIGXDVfA,Perkiomen Valley Brewery,101 Walnut St,Green Lane,PA,18054,40.338183,-75.471659,4.5,13,1,"{'BusinessAcceptsCreditCards': 'True', 'Wheelc...","Brewpubs, Breweries, Food","{'Wednesday': '14:0-22:0', 'Thursday': '16:0-2..."


In [7]:
df_business.to_csv('yelp_business.csv',index=False)

In [7]:
# Category Filtering --> Restaurant / Pub / Bar / Cafe
# Food 키워드는 넣지 않았음 --> Grocery Store 나오는 경우가 존재했음!
# 관련 선행연구들 살펴보기...

df_restaurant = df_business[df_business['categories'].str.contains(
              'Restaurant|Food Trucks',
              case=False, na=False)]
print(df_restaurant.shape)
df_restaurant.head()

(52551, 14)


Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours
3,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,935 Race St,Philadelphia,PA,19107,39.955505,-75.155564,4.0,80,1,"{'RestaurantsDelivery': 'False', 'OutdoorSeati...","Restaurants, Food, Bubble Tea, Coffee & Tea, B...","{'Monday': '7:0-20:0', 'Tuesday': '7:0-20:0', ..."
5,CF33F8-E6oudUQ46HnavjQ,Sonic Drive-In,615 S Main St,Ashland City,TN,37015,36.269593,-87.058943,2.0,6,1,"{'BusinessParking': 'None', 'BusinessAcceptsCr...","Burgers, Fast Food, Sandwiches, Food, Ice Crea...","{'Monday': '0:0-0:0', 'Tuesday': '6:0-22:0', '..."
8,k0hlBqXX-Bt0vf1op7Jr1w,Tsevi's Pub And Grill,8025 Mackenzie Rd,Affton,MO,63123,38.565165,-90.321087,3.0,19,0,"{'Caters': 'True', 'Alcohol': 'u'full_bar'', '...","Pubs, Restaurants, Italian, Bars, American (Tr...",
9,bBDDEgkFA1Otx9Lfe7BZUQ,Sonic Drive-In,2312 Dickerson Pike,Nashville,TN,37207,36.208102,-86.76817,1.5,10,1,"{'RestaurantsAttire': ''casual'', 'Restaurants...","Ice Cream & Frozen Yogurt, Fast Food, Burgers,...","{'Monday': '0:0-0:0', 'Tuesday': '6:0-21:0', '..."
11,eEOYSgkmpB90uNA7lDOMRA,Vietnamese Food Truck,,Tampa Bay,FL,33602,27.955269,-82.45632,4.0,10,1,"{'Alcohol': ''none'', 'OutdoorSeating': 'None'...","Vietnamese, Food, Restaurants, Food Trucks","{'Monday': '11:0-14:0', 'Tuesday': '11:0-14:0'..."


In [8]:
df_cafe = df_business[df_business['categories'].str.contains(
              'Cafe|Coffee|Ice Cream|Yogurt|Bakery|Bakeries|Juice|Smoothies|Desserts|Donuts',
              case=False, na=False)]
print(df_cafe.shape)
df_cafe.head()

(15125, 14)


Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours
3,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,935 Race St,Philadelphia,PA,19107,39.955505,-75.155564,4.0,80,1,"{'RestaurantsDelivery': 'False', 'OutdoorSeati...","Restaurants, Food, Bubble Tea, Coffee & Tea, B...","{'Monday': '7:0-20:0', 'Tuesday': '7:0-20:0', ..."
5,CF33F8-E6oudUQ46HnavjQ,Sonic Drive-In,615 S Main St,Ashland City,TN,37015,36.269593,-87.058943,2.0,6,1,"{'BusinessParking': 'None', 'BusinessAcceptsCr...","Burgers, Fast Food, Sandwiches, Food, Ice Crea...","{'Monday': '0:0-0:0', 'Tuesday': '6:0-22:0', '..."
9,bBDDEgkFA1Otx9Lfe7BZUQ,Sonic Drive-In,2312 Dickerson Pike,Nashville,TN,37207,36.208102,-86.76817,1.5,10,1,"{'RestaurantsAttire': ''casual'', 'Restaurants...","Ice Cream & Frozen Yogurt, Fast Food, Burgers,...","{'Monday': '0:0-0:0', 'Tuesday': '6:0-21:0', '..."
14,0bPLkL0QhhPO5kt1_EXmNQ,Zio's Italian Market,2575 E Bay Dr,Largo,FL,33771,27.916116,-82.760461,4.5,100,0,"{'OutdoorSeating': 'False', 'RestaurantsGoodFo...","Food, Delis, Italian, Bakeries, Restaurants","{'Monday': '10:0-18:0', 'Tuesday': '10:0-20:0'..."
20,WKMJwqnfZKsAae75RMP6jA,Roast Coffeehouse and Wine Bar,10359 104 Street NW,Edmonton,AB,T5J 1B9,53.546045,-113.499169,4.0,40,0,"{'OutdoorSeating': 'False', 'Caters': 'True', ...","Coffee & Tea, Food, Cafes, Bars, Wine Bars, Re...","{'Monday': '8:0-18:0', 'Tuesday': '8:0-18:0', ..."


In [9]:
df_bar = df_business[df_business['categories'].str.contains(
              'Bar|Pub|Beer|Wine|Spirits|Breweries|Brewery',
              case=False, na=False)]
print(df_bar.shape)
df_bar.head()

(21178, 14)


Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,attributes,categories,hours
4,mWMc6_wTdE0EUBKIGXDVfA,Perkiomen Valley Brewery,101 Walnut St,Green Lane,PA,18054,40.338183,-75.471659,4.5,13,1,"{'BusinessAcceptsCreditCards': 'True', 'Wheelc...","Brewpubs, Breweries, Food","{'Wednesday': '14:0-22:0', 'Thursday': '16:0-2..."
8,k0hlBqXX-Bt0vf1op7Jr1w,Tsevi's Pub And Grill,8025 Mackenzie Rd,Affton,MO,63123,38.565165,-90.321087,3.0,19,0,"{'Caters': 'True', 'Alcohol': 'u'full_bar'', '...","Pubs, Restaurants, Italian, Bars, American (Tr...",
15,MUTTqe8uqyMdBl186RmNeA,Tuna Bar,205 Race St,Philadelphia,PA,19106,39.953949,-75.143226,4.0,245,1,"{'RestaurantsReservations': 'True', 'Restauran...","Sushi Bars, Restaurants, Japanese","{'Tuesday': '13:30-22:0', 'Wednesday': '13:30-..."
20,WKMJwqnfZKsAae75RMP6jA,Roast Coffeehouse and Wine Bar,10359 104 Street NW,Edmonton,AB,T5J 1B9,53.546045,-113.499169,4.0,40,0,"{'OutdoorSeating': 'False', 'Caters': 'True', ...","Coffee & Tea, Food, Cafes, Bars, Wine Bars, Re...","{'Monday': '8:0-18:0', 'Tuesday': '8:0-18:0', ..."
28,QdN72BWoyFypdGJhhI5r7g,Bar One,767 S 9th St,Philadelphia,PA,19147,39.939825,-75.157447,4.0,65,0,"{'Smoking': 'u'no'', 'NoiseLevel': 'u'average'...","Cocktail Bars, Bars, Italian, Nightlife, Resta...","{'Monday': '16:0-0:0', 'Tuesday': '16:0-0:0', ..."


In [11]:
# 데이터셋 별로 겹치는 categories 빼주기

df_restaurant = df_restaurant[~df_restaurant['categories'].str.contains(
                                'Cafe|Coffee|Pub|Bar|Wine|Spirits|Brewery|Breweries|Ice Cream|Yogurt|Bakery|Bakeries|Juice|Smoothies|desserts|Donuts', 
                                case=False, na=False    
                                )]

df_cafe = df_cafe[~df_cafe['categories'].str.contains(
                                'Beer|Wine|Spirits|Breweries|Brewery|Pub|Restaurant', case=False, na=False
                                )]

df_bar = df_bar[~df_bar['categories'].str.contains(
                                'Restaurant|Cafe|Coffee|Public|Barber|juice', case=False, na=False
                                )]

print(df_restaurant.shape)
print(df_cafe.shape)
print(df_bar.shape)

(31878, 14)
(5551, 14)
(4327, 14)


In [12]:
# business_id 겹치는 식당 빼기
rest_set = set(df_restaurant['business_id'].unique())
cafe_set = set(df_cafe['business_id'].unique())
bar_set = set(df_bar['business_id'].unique())

print(rest_set.intersection(bar_set))
print(cafe_set.intersection(bar_set))

{'ZBaetx-wjXvQVzfB_OnASQ'}
{'8DsUgNIC4q7XBFmR9I-V1w', 'zlJrmMLKmM3AYTePO-qpPA', 'rXrDXe4CDifD44b1Z2_5BQ', 'riTOShRUi4DzcJ-CLEegCw', 'Q36X5xXZkkM8IEpw3mU7sQ', '7jTaLGVD3jy9VF7lxPEgww'}


In [16]:
df_bar = df_bar[(df_bar['business_id'] != 'ZBaetx-wjXvQVzfB_OnASQ') & (df_bar['business_id'] != 'Q36X5xXZkkM8IEpw3mU7sQ')
               & (df_bar['business_id'] != '7jTaLGVD3jy9VF7lxPEgww') & (df_bar['business_id'] != 'rXrDXe4CDifD44b1Z2_5BQ')]

df_cafe = df_cafe[(df_cafe['business_id'] != 'riTOShRUi4DzcJ-CLEegCw') & (df_cafe['business_id'] != 'zlJrmMLKmM3AYTePO-qpPA')
                  & (df_cafe['business_id'] != '8DsUgNIC4q7XBFmR9I-V1w')]


print(df_restaurant.shape)
print(df_cafe.shape)
print(df_bar.shape)

(31878, 14)
(5548, 14)
(4323, 14)


In [27]:
# review 처리
review_json_path = 'yelp_academic_dataset_review.json'
size = 1000000
review = pd.read_json(review_json_path, lines=True,
                      dtype={'review_id':str,'user_id':str,
                             'business_id':str,'stars':int,
                             'date':str,'text':str,'useful':int,
                             'funny':int,'cool':int},
                      chunksize=size)

In [None]:
# There are multiple chunks to be read
chunk_list = []
for chunk_review in review:
    # Inner merge with edited business file so only reviews related to the business remain
    chunk_merged = pd.merge(df_cafe, chunk_review, on='business_id', how='inner')
    # Show feedback on progress
    print(f"{chunk_merged.shape[0]} out of {size:,} related reviews")
    chunk_list.append(chunk_merged)
# After trimming down the review file, concatenate all relevant data back to one dataframe
df_cafe_total = pd.concat(chunk_list, ignore_index=True, join='outer', axis=0)

In [23]:
print(df_cafe_total.shape)
df_cafe_total.head()

(215005, 22)


Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars_x,review_count,...,categories,hours,review_id,user_id,stars_y,useful,funny,cool,text,date
0,JX4tUpd09YFchLBuI43lGw,Naked Cyber Cafe & Espresso Bar,10303 108 Street NW,Edmonton,AB,T5J 1L7,53.544682,-113.506589,4.0,12,...,"Arts & Entertainment, Music Venues, Internet S...","{'Monday': '11:0-1:0', 'Tuesday': '11:0-1:0', ...",muAn7Bsv3qRg2rIM4fgp3Q,xTlqpuuqtO0FWs12gd1v6A,5,1,0,0,Came here to print off some documents and was ...,2014-10-15 01:49:26
1,JX4tUpd09YFchLBuI43lGw,Naked Cyber Cafe & Espresso Bar,10303 108 Street NW,Edmonton,AB,T5J 1L7,53.544682,-113.506589,4.0,12,...,"Arts & Entertainment, Music Venues, Internet S...","{'Monday': '11:0-1:0', 'Tuesday': '11:0-1:0', ...",0R35U5D5EWo5n81zq4-0lA,f9lq4KAus-xCsmJmjXeKVw,3,6,1,1,Naked Cyber & Expresso Bar is a 24 hours cyber...,2010-01-26 21:14:29
2,JX4tUpd09YFchLBuI43lGw,Naked Cyber Cafe & Espresso Bar,10303 108 Street NW,Edmonton,AB,T5J 1L7,53.544682,-113.506589,4.0,12,...,"Arts & Entertainment, Music Venues, Internet S...","{'Monday': '11:0-1:0', 'Tuesday': '11:0-1:0', ...",Yi2G07pOhSczwkDzpOidJA,r3QexFIhBXBT99canAgVEg,4,6,0,1,I visited Naked for the first time this weeken...,2013-02-05 17:01:52
3,JX4tUpd09YFchLBuI43lGw,Naked Cyber Cafe & Espresso Bar,10303 108 Street NW,Edmonton,AB,T5J 1L7,53.544682,-113.506589,4.0,12,...,"Arts & Entertainment, Music Venues, Internet S...","{'Monday': '11:0-1:0', 'Tuesday': '11:0-1:0', ...",Y5KVXAs5mOkG37Z8iGls2w,xjafuSH_qDHXEkU0LyAxvg,4,1,0,0,Neat cafe! Delicious sandwiches and sweets wit...,2017-05-03 16:35:44
4,JX4tUpd09YFchLBuI43lGw,Naked Cyber Cafe & Espresso Bar,10303 108 Street NW,Edmonton,AB,T5J 1L7,53.544682,-113.506589,4.0,12,...,"Arts & Entertainment, Music Venues, Internet S...","{'Monday': '11:0-1:0', 'Tuesday': '11:0-1:0', ...",NARHDTpx4x0YQ_8-r9W1Vg,758g6NGLp9deCbvowz62Ww,3,4,5,0,"Naked is a cyber-cafe, one of the first ones t...",2008-10-22 02:56:43


In [25]:
chunk_list = []
for chunk_review in review:
    chunk_merged = pd.merge(df_bar, chunk_review, on='business_id', how='inner')
    print(f"{chunk_merged.shape[0]} out of {size:,} related reviews")
    chunk_list.append(chunk_merged)
df_bar_total = pd.concat(chunk_list, ignore_index=True, join='outer', axis=0)

28006 out of 1,000,000 related reviews
21637 out of 1,000,000 related reviews
26872 out of 1,000,000 related reviews
25618 out of 1,000,000 related reviews
25285 out of 1,000,000 related reviews
26431 out of 1,000,000 related reviews
24089 out of 1,000,000 related reviews


In [26]:
print(df_bar_total.shape)
df_bar_total.head()

(177938, 22)


Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars_x,review_count,...,categories,hours,review_id,user_id,stars_y,useful,funny,cool,text,date
0,mWMc6_wTdE0EUBKIGXDVfA,Perkiomen Valley Brewery,101 Walnut St,Green Lane,PA,18054,40.338183,-75.471659,4.5,13,...,"Brewpubs, Breweries, Food","{'Wednesday': '14:0-22:0', 'Thursday': '16:0-2...",aymw6DJh-j3Kkb4DhLSHmQ,lrtGPAmDqCFnbfAKiB4NmA,4,0,0,0,The craft brewery scene has finally hit the sl...,2019-11-02 01:18:50
1,mWMc6_wTdE0EUBKIGXDVfA,Perkiomen Valley Brewery,101 Walnut St,Green Lane,PA,18054,40.338183,-75.471659,4.5,13,...,"Brewpubs, Breweries, Food","{'Wednesday': '14:0-22:0', 'Thursday': '16:0-2...",ApjOijn6shg7IRSus0awMg,Qp43wr0CkKw4W79MFu_MGw,5,0,0,0,Glad to have a local brewery so close to my ho...,2019-08-16 13:26:24
2,mWMc6_wTdE0EUBKIGXDVfA,Perkiomen Valley Brewery,101 Walnut St,Green Lane,PA,18054,40.338183,-75.471659,4.5,13,...,"Brewpubs, Breweries, Food","{'Wednesday': '14:0-22:0', 'Thursday': '16:0-2...",LsHkuF1GP9H-6arEgwzUWg,DCvqkfO3exqOaTf0-fvyLQ,5,0,0,0,First visit to this new and very local brewery...,2019-11-24 03:49:39
3,mWMc6_wTdE0EUBKIGXDVfA,Perkiomen Valley Brewery,101 Walnut St,Green Lane,PA,18054,40.338183,-75.471659,4.5,13,...,"Brewpubs, Breweries, Food","{'Wednesday': '14:0-22:0', 'Thursday': '16:0-2...",4WUbaBVdES19JKWF7Gvh7g,Y7BFSuNRNzvFbyZcZSXQJw,5,0,0,0,What an amazing brewery and an absolute asset ...,2020-02-27 06:21:36
4,mWMc6_wTdE0EUBKIGXDVfA,Perkiomen Valley Brewery,101 Walnut St,Green Lane,PA,18054,40.338183,-75.471659,4.5,13,...,"Brewpubs, Breweries, Food","{'Wednesday': '14:0-22:0', 'Thursday': '16:0-2...",pcy8VIa3REGXpFPy2KzRkg,rNU2GcAFNSc4XsZQkEbmWw,5,0,0,0,"Great atmosphere, cool people and excellent be...",2019-08-03 02:24:02


In [28]:
chunk_list = []
for chunk_review in review:
    chunk_merged = pd.merge(df_restaurant, chunk_review, on='business_id', how='inner')
    print(f"{chunk_merged.shape[0]} out of {size:,} related reviews")
    chunk_list.append(chunk_merged)
df_restaurant_total = pd.concat(chunk_list, ignore_index=True, join='outer', axis=0)

321815 out of 1,000,000 related reviews
323522 out of 1,000,000 related reviews
326914 out of 1,000,000 related reviews
324271 out of 1,000,000 related reviews
339532 out of 1,000,000 related reviews
336562 out of 1,000,000 related reviews
307674 out of 1,000,000 related reviews


In [29]:
print(df_restaurant_total.shape)
df_restaurant_total.head()

(2280290, 22)


Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars_x,review_count,...,categories,hours,review_id,user_id,stars_y,useful,funny,cool,text,date
0,eEOYSgkmpB90uNA7lDOMRA,Vietnamese Food Truck,,Tampa Bay,FL,33602,27.955269,-82.45632,4.0,10,...,"Vietnamese, Food, Restaurants, Food Trucks","{'Monday': '11:0-14:0', 'Tuesday': '11:0-14:0'...",oUv3avPmLiTwIgYlmQQXBQ,nnu9h6du4E6oqMasPgKR3Q,5,1,0,0,I eat pho about 4 times a week and from a spec...,2019-04-04 16:03:00
1,eEOYSgkmpB90uNA7lDOMRA,Vietnamese Food Truck,,Tampa Bay,FL,33602,27.955269,-82.45632,4.0,10,...,"Vietnamese, Food, Restaurants, Food Trucks","{'Monday': '11:0-14:0', 'Tuesday': '11:0-14:0'...",BQ4GJP2G8_NdQ9KHboueTg,JlEdjZvhAbFCU-ObZQb1lw,5,2,0,1,I've been in Wesley Chapel area for about 2 ye...,2018-10-23 00:36:29
2,eEOYSgkmpB90uNA7lDOMRA,Vietnamese Food Truck,,Tampa Bay,FL,33602,27.955269,-82.45632,4.0,10,...,"Vietnamese, Food, Restaurants, Food Trucks","{'Monday': '11:0-14:0', 'Tuesday': '11:0-14:0'...",Oe-THb7XKNZppI6AbrnXUQ,P1ccUaz-s3kumq3RHnOLvg,3,1,1,0,Good tasted good - but I'm a bit confused as t...,2019-10-18 18:01:29
3,eEOYSgkmpB90uNA7lDOMRA,Vietnamese Food Truck,,Tampa Bay,FL,33602,27.955269,-82.45632,4.0,10,...,"Vietnamese, Food, Restaurants, Food Trucks","{'Monday': '11:0-14:0', 'Tuesday': '11:0-14:0'...",BY7b9AXLyRGhLcvjrgHdjg,AaoRrywzPbEUI6R3ozS19g,4,2,0,1,"Ordered the chicken wings. They were hot, fre...",2019-03-29 19:09:43
4,eEOYSgkmpB90uNA7lDOMRA,Vietnamese Food Truck,,Tampa Bay,FL,33602,27.955269,-82.45632,4.0,10,...,"Vietnamese, Food, Restaurants, Food Trucks","{'Monday': '11:0-14:0', 'Tuesday': '11:0-14:0'...",1ez0Y6HJrAwsdPL6MWsreQ,y934W72J7CN4owsSSdpU3A,1,2,2,1,How can an order of pho take an 1 hour and 15 ...,2018-12-05 17:38:51


In [30]:
# user dataset 생성
data_file = open('yelp_academic_dataset_user.json')
data = []

for line in data_file:
    data.append(json.loads(line))

user_df = pd.DataFrame(data)
data_file.close()

print(user_df.shape)
user_df.head()

(1987897, 22)


Unnamed: 0,user_id,name,review_count,yelping_since,useful,funny,cool,elite,friends,fans,...,compliment_more,compliment_profile,compliment_cute,compliment_list,compliment_note,compliment_plain,compliment_cool,compliment_funny,compliment_writer,compliment_photos
0,qVc8ODYU5SZjKXVBgXdI7w,Walker,585,2007-01-25 16:47:26,7217,1259,5994,2007,"NSCy54eWehBJyZdG2iE84w, pe42u7DcCH2QmI81NX-8qA...",267,...,65,55,56,18,232,844,467,467,239,180
1,j14WgRoU_-2ZE1aw1dXrJg,Daniel,4333,2009-01-25 04:35:42,43091,13066,27281,"2009,2010,2011,2012,2013,2014,2015,2016,2017,2...","ueRPE0CX75ePGMqOFVj6IQ, 52oH4DrRvzzl8wh5UXyU0A...",3138,...,264,184,157,251,1847,7054,3131,3131,1521,1946
2,2WnXYQFK0hXEoTxPtV2zvg,Steph,665,2008-07-25 10:41:00,2086,1010,1003,20092010201120122013,"LuO3Bn4f3rlhyHIaNfTlnA, j9B4XdHUhDfTKVecyWQgyA...",52,...,13,10,17,3,66,96,119,119,35,18
3,SZDeASXq7o05mMNLshsdIA,Gwen,224,2005-11-29 04:38:33,512,330,299,200920102011,"enx1vVPnfdNUdPho6PH_wg, 4wOcvMLtU6a9Lslggq74Vg...",28,...,4,1,6,2,12,16,26,26,10,9
4,hA5lMy-EnncsH4JoR-hFGQ,Karen,79,2007-01-05 19:40:59,29,15,7,,"PBK4q9KEEBHhFvSXCUirIw, 3FWPpM7KU1gXeOM_ZbYMbA...",1,...,1,0,0,0,1,1,0,0,0,0


In [31]:
# 최종 데이터셋 생성
df_rest = pd.merge(df_restaurant_total, user_df, how='inner', on='user_id')
df_cafe = pd.merge(df_cafe_total, user_df, how='inner', on='user_id')
df_bar = pd.merge(df_bar_total, user_df, how='inner', on='user_id')

print(df_rest.shape)
print(df_cafe.shape)
print(df_bar.shape)

(2280284, 43)
(215003, 43)
(177937, 43)


In [32]:
df_rest.to_csv('yelp_restaurant.csv', index=False)
df_cafe.to_csv('yelp_cafe.csv', index=False)
df_bar.to_csv('yelp_bar.csv', index=False)