# Data Preprocessing
* 앞서 yelp_make_dset.ipynb파일에서 생성한 3가지 도메인 데이터셋을 전처리하는 코드
* 전처리 내용 : 상호작용 5회 미만 사용자/식당 제거, 파생변수 생성

In [None]:
import re, ast
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df_rest = pd.read_csv('yelp_restaurant.csv') 
df_cafe = pd.read_csv('yelp_cafe.csv') 
df_bar = pd.read_csv('yelp_bar.csv') 
print(df_rest.shape)
print(df_cafe.shape)
print(df_bar.shape)

  exec(code_obj, self.user_global_ns, self.user_ns)


(2280284, 43)
(215003, 43)
(177937, 43)


In [76]:
# 도메인별 유저 수 / 식당 수 체크

print(df_rest['user_id'].nunique())
print(df_cafe['user_id'].nunique())
print(df_bar['user_id'].nunique())

print(df_rest['business_id'].nunique())
print(df_cafe['business_id'].nunique())
print(df_bar['business_id'].nunique())

919879
134989
112152
31878
5548
4323


In [77]:
# 도메인별 중복 user 수 확인
# restaurant | cafe -> 47014명
# restaurant | bar  -> 54301명
# cafe | bar        -> 12001명
# 3개 도메인에 모두 중복 -> 11502명

set_rest_user = set(df_rest['user_id'].unique())
set_cafe_user = set(df_cafe['user_id'].unique())
set_bar_user = set(df_bar['user_id'].unique())

rest_cafe_overlap = set_rest_user.intersection(set_cafe_user) 
rest_bar_overlap = set_rest_user.intersection(set_bar_user)
cafe_bar_overlap = set_cafe_user.intersection(set_bar_user)
rest_cafe_bar_overlap = rest_cafe_overlap.intersection(set_bar_user)

print(f'레스토랑 및 카페 중복 유저 수 : {len(rest_cafe_overlap)}명')
print(f'레스토랑 및 바 중복 유저 수 : {len(rest_bar_overlap)}명')
print(f'카페 및 바 중복 유저 수 : {len(cafe_bar_overlap)}명')
print(f'3개 도메인 전체 중복 유저 수 : {len(rest_cafe_bar_overlap)}명')

레스토랑 및 카페 중복 유저 수 : 87695명
레스토랑 및 바 중복 유저 수 : 66561명
카페 및 바 중복 유저 수 : 23808명
3개 도메인 전체 중복 유저 수 : 22313명


In [78]:
# Interaction Filtering 
# 최소 5회 Interaction이 없는 User / Item만을 사용

user_counts = df_rest['user_id'].value_counts()
rest_counts = df_rest['business_id'].value_counts()
filtered_users = user_counts[user_counts >= 5].index
filtered_rests = rest_counts[rest_counts >= 5].index

filtered_df_rest = df_rest[(df_rest['user_id'].isin(filtered_users)) &
                           df_rest['business_id'].isin(filtered_rests)]

print(filtered_df_rest.shape)
print(filtered_df_rest['user_id'].nunique())
print(filtered_df_rest['business_id'].nunique())

(1070842, 43)
87045
31716


In [79]:
user_counts = df_cafe['user_id'].value_counts()
rest_counts = df_cafe['business_id'].value_counts()
filtered_users = user_counts[user_counts >= 5].index
filtered_rests = rest_counts[rest_counts >= 5].index

filtered_df_cafe = df_cafe[(df_cafe['user_id'].isin(filtered_users)) &
                           df_cafe['business_id'].isin(filtered_rests)]

print(filtered_df_cafe.shape)
print(filtered_df_cafe['user_id'].nunique())
print(filtered_df_cafe['business_id'].nunique())

(51103, 43)
5448
5124


In [80]:
user_counts = df_bar['user_id'].value_counts()
rest_counts = df_bar['business_id'].value_counts()
filtered_users = user_counts[user_counts >= 5].index
filtered_rests = rest_counts[rest_counts >= 5].index

filtered_df_bar = df_bar[(df_bar['user_id'].isin(filtered_users)) &
                           df_bar['business_id'].isin(filtered_rests)]

print(filtered_df_bar.shape)
print(filtered_df_bar['user_id'].nunique())
print(filtered_df_bar['business_id'].nunique())

(42360, 43)
4364
3795


In [81]:
# 도메인별 중복 user 수 확인
# restaurant | cafe -> 47014명
# restaurant | bar  -> 54301명
# cafe | bar        -> 12001명
# 3개 도메인에 모두 중복 -> 11502명

set_rest_user = set(filtered_df_rest['user_id'].unique())
set_cafe_user = set(filtered_df_cafe['user_id'].unique())
set_bar_user = set(filtered_df_bar['user_id'].unique())

rest_cafe_overlap = set_rest_user.intersection(set_cafe_user) 
rest_bar_overlap = set_rest_user.intersection(set_bar_user)
cafe_bar_overlap = set_cafe_user.intersection(set_bar_user)
rest_cafe_bar_overlap = rest_cafe_overlap.intersection(set_bar_user)

print(f'레스토랑 및 카페 중복 유저 수 : {len(rest_cafe_overlap)}명')
print(f'레스토랑 및 바 중복 유저 수 : {len(rest_bar_overlap)}명')
print(f'카페 및 바 중복 유저 수 : {len(cafe_bar_overlap)}명')
print(f'3개 도메인 전체 중복 유저 수 : {len(rest_cafe_bar_overlap)}명')

레스토랑 및 카페 중복 유저 수 : 5223명
레스토랑 및 바 중복 유저 수 : 3945명
카페 및 바 중복 유저 수 : 1661명
3개 도메인 전체 중복 유저 수 : 1659명


In [82]:
pd.options.display.max_columns=999

filtered_df_rest.columns

Index(['business_id', 'name_x', 'address', 'city', 'state', 'postal_code',
       'latitude', 'longitude', 'stars_x', 'review_count_x', 'is_open',
       'attributes', 'categories', 'hours', 'review_id', 'user_id', 'stars_y',
       'useful_x', 'funny_x', 'cool_x', 'text', 'date', 'name_y',
       'review_count_y', 'yelping_since', 'useful_y', 'funny_y', 'cool_y',
       'elite', 'friends', 'fans', 'average_stars', 'compliment_hot',
       'compliment_more', 'compliment_profile', 'compliment_cute',
       'compliment_list', 'compliment_note', 'compliment_plain',
       'compliment_cool', 'compliment_funny', 'compliment_writer',
       'compliment_photos'],
      dtype='object')

In [83]:
# 불필요 컬럼 제거
drop_columns = [
       'compliment_hot', 'compliment_more', 'compliment_profile', 
       'compliment_cute','compliment_list', 'compliment_note', 
       'compliment_plain','compliment_cool', 'compliment_funny', 
       'compliment_writer','compliment_photos', 'is_open', 'date', 
       'useful_x', 'funny_x', 'cool_x','review_id', 'review_count_x', 
       'latitude', 'longitude', 'postal_code']

filtered_df_cafe = filtered_df_cafe.drop(columns=drop_columns)
filtered_df_rest = filtered_df_rest.drop(columns=drop_columns)
filtered_df_bar = filtered_df_bar.drop(columns=drop_columns)

In [84]:
# Num of fans, Num of Friends, Useful+funny+cool, Location

def num_friends(x):
    
    if x == 'None':
        x = 0
    
    else :
        x = len(x.split())
    
    return x

filtered_df_rest['num_of_friends'] = filtered_df_rest['friends'].apply(lambda x: num_friends(x))
filtered_df_cafe['num_of_friends'] = filtered_df_cafe['friends'].apply(lambda x: num_friends(x))
filtered_df_bar['num_of_friends'] = filtered_df_bar['friends'].apply(lambda x: num_friends(x))

filtered_df_rest['user_votes'] = filtered_df_rest['useful_y'] + filtered_df_rest['funny_y'] + filtered_df_rest['cool_y']
filtered_df_cafe['user_votes'] = filtered_df_cafe['useful_y'] + filtered_df_cafe['funny_y'] + filtered_df_cafe['cool_y']
filtered_df_bar['user_votes'] = filtered_df_bar['useful_y'] + filtered_df_bar['funny_y'] + filtered_df_bar['cool_y']

In [85]:
filtered_df_cafe['attributes'].value_counts()[:10]

{'BusinessParking': "{'garage': False, 'street': True, 'validated': False, 'lot': False, 'valet': False}", 'BikeParking': 'True', 'RestaurantsPriceRange2': '2', 'WiFi': "u'no'", 'DriveThru': 'False', 'BusinessAcceptsCreditCards': 'True', 'Caters': 'False', 'NoiseLevel': "u'average'", 'OutdoorSeating': 'True', 'GoodForKids': 'True', 'BusinessAcceptsBitcoin': 'False', 'RestaurantsTakeOut': 'True', 'DogsAllowed': 'False', 'RestaurantsDelivery': 'True'}                                                                                                                                                                                                                    452
{'BikeParking': 'True', 'BusinessAcceptsCreditCards': 'True', 'GoodForKids': 'True', 'RestaurantsTakeOut': 'True', 'WiFi': "u'free'", 'RestaurantsGoodForGroups': 'True', 'Caters': 'False', 'BusinessParking': "{'garage': False, 'street': True, 'validated': False, 'lot': False, 'valet': False}", 'ByAppointmentOnly': 'False', 'Resta

In [86]:
filtered_df_rest['attributes'].value_counts()[:10]

{'RestaurantsTakeOut': 'True', 'Alcohol': "'full_bar'", 'RestaurantsPriceRange2': '2', 'RestaurantsReservations': 'False', 'NoiseLevel': "u'loud'", 'BusinessAcceptsCreditCards': 'True', 'RestaurantsGoodForGroups': 'True', 'BYOBCorkage': "'yes_free'", 'RestaurantsAttire': "'casual'", 'Corkage': 'False', 'BikeParking': 'False', 'GoodForKids': 'True', 'HasTV': 'True', 'DogsAllowed': 'False', 'Ambience': "{'touristy': False, 'hipster': False, 'romantic': False, 'divey': False, 'intimate': False, 'trendy': False, 'upscale': False, 'classy': True, 'casual': True}", 'RestaurantsTableService': 'True', 'HappyHour': 'True', 'GoodForMeal': "{'dessert': False, 'latenight': None, 'lunch': True, 'dinner': True, 'brunch': None, 'breakfast': False}", 'Caters': 'True', 'OutdoorSeating': 'None', 'RestaurantsDelivery': 'None', 'CoatCheck': 'False', 'BusinessParking': "{u'valet': False, u'garage': False, u'street': None, u'lot': False, u'validated': False}", 'WiFi': "u'no'"}                               

In [87]:
filtered_df_bar['attributes'].value_counts()[:10]

{'RestaurantsAttire': "'casual'", 'NoiseLevel': "u'loud'", 'BusinessParking': "{'garage': False, 'street': True, 'validated': False, 'lot': False, 'valet': False}", 'GoodForKids': 'False', 'BusinessAcceptsCreditCards': 'True', 'BikeParking': 'True', 'RestaurantsGoodForGroups': 'True', 'Alcohol': "'full_bar'", 'OutdoorSeating': 'True', 'ByAppointmentOnly': 'False', 'RestaurantsReservations': 'False', 'HasTV': 'True', 'BestNights': "{'monday': False, 'tuesday': False, 'friday': True, 'wednesday': False, 'thursday': True, 'sunday': False, 'saturday': True}", 'DogsAllowed': 'False', 'WiFi': "'no'", 'CoatCheck': 'False', 'Ambience': "{'touristy': False, 'hipster': False, 'romantic': False, 'divey': False, 'intimate': False, 'trendy': False, 'upscale': False, 'classy': True, 'casual': True}", 'GoodForDancing': 'False', 'HappyHour': 'True', 'Music': "{'dj': False, 'background_music': False, 'no_music': False, 'jukebox': None, 'live': None, 'video': False, 'karaoke': False}", 'Smoking': "u'yes

In [88]:
filtered_df_bar_2 = filtered_df_bar[(filtered_df_bar['attributes'].str.contains('Music')) 
                                    & (filtered_df_bar['attributes'].str.contains('BestNights')) 
                                    &(filtered_df_bar['attributes'].str.contains('Alcohol'))]

print(f'변경 전 bar 데이터셋 크기 : {filtered_df_bar.shape}')
print(f'변경 후 bar 데이터셋 크기 : {filtered_df_bar_2.shape}')
print(filtered_df_bar_2['user_id'].nunique())
print(filtered_df_bar_2['business_id'].nunique())

변경 전 bar 데이터셋 크기 : (42360, 24)
변경 후 bar 데이터셋 크기 : (17998, 24)
3978
873


In [89]:
filtered_df_cafe_2 = filtered_df_cafe[(filtered_df_cafe['attributes'].str.contains('DriveThru')) |
                                      (filtered_df_cafe['attributes'].str.contains('OutdoorSeating'))]

print(f'변경 전 cafe 데이터셋 크기 : {filtered_df_cafe.shape}')
print(f'변경 후 cafe 데이터셋 크기 : {filtered_df_cafe_2.shape}')
print(filtered_df_cafe_2['user_id'].nunique())
print(filtered_df_cafe_2['business_id'].nunique())


변경 전 cafe 데이터셋 크기 : (51103, 24)
변경 후 cafe 데이터셋 크기 : (28117, 24)
5342
2175


In [90]:
# 도메인별 중복 user 수 확인
# restaurant | cafe -> 47014명
# restaurant | bar  -> 54301명
# cafe | bar        -> 12001명
# 3개 도메인에 모두 중복 -> 11502명

set_rest_user = set(filtered_df_rest['user_id'].unique())
set_cafe_user = set(filtered_df_cafe_2['user_id'].unique())
set_bar_user = set(filtered_df_bar_2['user_id'].unique())

rest_cafe_overlap = set_rest_user.intersection(set_cafe_user) 
rest_bar_overlap = set_rest_user.intersection(set_bar_user)
cafe_bar_overlap = set_cafe_user.intersection(set_bar_user)
rest_cafe_bar_overlap = rest_cafe_overlap.intersection(set_bar_user)

print(f'레스토랑 및 카페 중복 유저 수 : {len(rest_cafe_overlap)}명')
print(f'레스토랑 및 바 중복 유저 수 : {len(rest_bar_overlap)}명')
print(f'카페 및 바 중복 유저 수 : {len(cafe_bar_overlap)}명')
print(f'3개 도메인 전체 중복 유저 수 : {len(rest_cafe_bar_overlap)}명')

레스토랑 및 카페 중복 유저 수 : 5121명
레스토랑 및 바 중복 유저 수 : 3634명
카페 및 바 중복 유저 수 : 1509명
3개 도메인 전체 중복 유저 수 : 1508명


In [91]:
print(filtered_df_bar_2['categories'].nunique())
filtered_df_bar_2['categories'].value_counts()[:10]

587


Bars, Nightlife                                                                               839
Nightlife, Bars                                                                               593
Bars, Lounges, Nightlife                                                                      565
Nightlife, Local Flavor, Dive Bars, Bars                                                      300
Nightlife, Pubs, Bars                                                                         288
Nightlife, Bars, Dive Bars                                                                    285
Cocktail Bars, Nightlife, Lounges, Bars                                                       195
Bars, Nightlife, Dive Bars                                                                    195
Dive Bars, Bars, Nightlife                                                                    174
Jazz & Blues, Bars, Arts & Entertainment, Beer, Wine & Spirits, Wine Bars, Food, Nightlife    165
Name: categories, dt

In [92]:
# categories 전처리
# 같은 카테고리인데 , 기준으로 순서가 바뀐 경우 통일
# ex. Bars, Nightlife 및 Nightlife, Bars
# --> Bars, Nightlife로 통일

def category_extract(x):
    x = x.split(', ')
    x = sorted(x)
    x = str(x)[1:-1]
    return x
    
filtered_df_bar_2['categories'] = filtered_df_bar_2['categories'].apply(lambda x: category_extract(x))
filtered_df_bar_2.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if sys.path[0] == '':


Unnamed: 0,business_id,name_x,address,city,state,stars_x,attributes,categories,hours,user_id,stars_y,text,name_y,review_count_y,yelping_since,useful_y,funny_y,cool_y,elite,friends,fans,average_stars,num_of_friends,user_votes
11,97LIZKQGOt3Wz32Zj3tXrA,The Vault,1160 1st Avenue,King Of Prussia,PA,2.5,"{'BusinessAcceptsCreditCards': 'True', 'HappyH...","'Bars', 'Comedy Clubs', 'Lounges', 'Nightlife'","{'Friday': '20:0-1:0', 'Saturday': '20:45-1:0'}",lrtGPAmDqCFnbfAKiB4NmA,3,I have been here a few times for the monthly c...,Charles,360,2012-01-08 01:16:46,200,44,102,2015201620172018201920202021,"MRrN6DH3QGCFcDv5RENYVg, XhlDuX91F1ofznx1QkaSNQ...",8,3.5,53,346
32,Ak34oJ8zxN8Av7dxS9T8IQ,The Centurion Lounge,"Terminal A West, near Gate A14",Philadelphia,PA,3.5,"{'RestaurantsPriceRange2': '3', 'RestaurantsGo...","'Airport Lounges', 'Bars', 'Nightlife'","{'Monday': '5:30-17:0', 'Tuesday': '5:30-17:0'...",pUNvLJwdJIOemOgU98mp1w,4,This is the first AMEX lounge I've visited. It...,Amy,488,2011-07-24 03:16:25,1191,235,614,"2012,2013,2014,2015,2016,2017,2018,2019,20,20,...","1Px8_etTU-O0NTBLemwDJA, 404akRRXmNzHixRDcFT7rA...",45,4.14,335,2040
36,ElW-iNQbGvb5hGWPSOUIcg,The Victor's Pub,1 Market St,Camden,NJ,3.0,"{'RestaurantsPriceRange2': '2', 'BusinessAccep...","'Bars', 'Nightlife', 'Pubs'","{'Monday': '11:0-2:0', 'Tuesday': '11:0-2:0', ...",pUNvLJwdJIOemOgU98mp1w,4,Victors Pub is located in Camden by the Waterf...,Amy,488,2011-07-24 03:16:25,1191,235,614,"2012,2013,2014,2015,2016,2017,2018,2019,20,20,...","1Px8_etTU-O0NTBLemwDJA, 404akRRXmNzHixRDcFT7rA...",45,4.14,335,2040
38,3qEjUW12sH6KOBm86ZDmTA,Erin Rose,811 Conti St,New Orleans,LA,4.5,"{'RestaurantsPriceRange2': '1', 'Alcohol': ""u'...","'Bars', 'Dive Bars', 'Irish Pub', 'Local Flavo...","{'Monday': '10:0-5:30', 'Tuesday': '10:0-5:30'...",pUNvLJwdJIOemOgU98mp1w,5,A great local-ish dive bar right off Bourbon S...,Amy,488,2011-07-24 03:16:25,1191,235,614,"2012,2013,2014,2015,2016,2017,2018,2019,20,20,...","1Px8_etTU-O0NTBLemwDJA, 404akRRXmNzHixRDcFT7rA...",45,4.14,335,2040
40,g6C1-WtSYt2eH5n0aUHA5A,Lambertville Station Pub,11 Bridge St,Lambertville,NJ,3.5,"{'NoiseLevel': ""u'average'"", 'BusinessAcceptsC...","'Bars', 'Nightlife', 'Pubs', 'Wine Bars'","{'Monday': '11:30-22:0', 'Tuesday': '11:30-22:...",pUNvLJwdJIOemOgU98mp1w,4,The bf and I spend the night in New Hope for a...,Amy,488,2011-07-24 03:16:25,1191,235,614,"2012,2013,2014,2015,2016,2017,2018,2019,20,20,...","1Px8_etTU-O0NTBLemwDJA, 404akRRXmNzHixRDcFT7rA...",45,4.14,335,2040


In [93]:
print(filtered_df_bar_2['categories'].nunique())
filtered_df_bar_2['categories'].value_counts()[:10]

383


'Bars', 'Nightlife'                                                         1432
'Bars', 'Dive Bars', 'Nightlife'                                            1022
'Bars', 'Lounges', 'Nightlife'                                               898
'Bars', 'Nightlife', 'Pubs'                                                  806
'Bars', 'Cocktail Bars', 'Lounges', 'Nightlife'                              727
'Arts & Entertainment', 'Bars', 'Music Venues', 'Nightlife'                  717
'Bars', 'Nightlife', 'Sports Bars'                                           394
'Arts & Entertainment', 'Bars', 'Dive Bars', 'Music Venues', 'Nightlife'     367
'Bars', 'Dive Bars', 'Local Flavor', 'Nightlife'                             360
'Arcades', 'Arts & Entertainment', 'Bars', 'Nightlife'                       243
Name: categories, dtype: int64

In [94]:
print(filtered_df_cafe_2['categories'].nunique())
print(filtered_df_cafe_2['categories'].value_counts()[:10])

743
Food, Coffee & Tea                                         4392
Coffee & Tea, Food                                         4101
Food, Ice Cream & Frozen Yogurt                             864
Ice Cream & Frozen Yogurt, Coffee & Tea, Food, Desserts     452
Coffee & Tea, Food, Coffee Roasteries                       380
Food, Donuts, Coffee & Tea                                  375
Ice Cream & Frozen Yogurt, Food                             278
Donuts, Coffee & Tea, Food                                  260
Ice Cream & Frozen Yogurt, Food, Desserts                   252
Juice Bars & Smoothies, Bubble Tea, Food, Coffee & Tea      238
Name: categories, dtype: int64


In [95]:
filtered_df_cafe_2['categories'] = filtered_df_cafe_2['categories'].apply(lambda x: category_extract(x))
filtered_df_cafe_2.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,business_id,name_x,address,city,state,stars_x,attributes,categories,hours,user_id,stars_y,text,name_y,review_count_y,yelping_since,useful_y,funny_y,cool_y,elite,friends,fans,average_stars,num_of_friends,user_votes
0,JX4tUpd09YFchLBuI43lGw,Naked Cyber Cafe & Espresso Bar,10303 108 Street NW,Edmonton,AB,4.0,"{'OutdoorSeating': 'False', 'BusinessParking':...","'Arts & Entertainment', 'Coffee & Tea', 'Food'...","{'Monday': '11:0-1:0', 'Tuesday': '11:0-1:0', ...",xTlqpuuqtO0FWs12gd1v6A,5,Came here to print off some documents and was ...,Candice,150,2014-08-28 22:33:19,330,75,140,2014201520162017,"JVfx5JXYzdNZrY8P-PH55A, G3h8pIclwUbuu3itJqF7ug...",7,3.67,79,545
1,Mq9gCgsWS6-_1HMUH7iiXw,Starbucks,10387 112 St NW,Edmonton,AB,4.0,"{'WiFi': ""u'free'"", 'OutdoorSeating': 'True', ...","'Coffee & Tea', 'Food'",,xTlqpuuqtO0FWs12gd1v6A,1,I recently popped in here before my nail appoi...,Candice,150,2014-08-28 22:33:19,330,75,140,2014201520162017,"JVfx5JXYzdNZrY8P-PH55A, G3h8pIclwUbuu3itJqF7ug...",7,3.67,79,545
2,Mq9gCgsWS6-_1HMUH7iiXw,Starbucks,10387 112 St NW,Edmonton,AB,4.0,"{'WiFi': ""u'free'"", 'OutdoorSeating': 'True', ...","'Coffee & Tea', 'Food'",,xTlqpuuqtO0FWs12gd1v6A,4,This is my favourite SB location! There's not ...,Candice,150,2014-08-28 22:33:19,330,75,140,2014201520162017,"JVfx5JXYzdNZrY8P-PH55A, G3h8pIclwUbuu3itJqF7ug...",7,3.67,79,545
3,jC9Fn4tGGYbJg332IJ4MHA,Starbucks,14957 Stony Plain Road NW,Edmonton,AB,3.5,"{'RestaurantsPriceRange2': '1', 'WiFi': ""'free...","'Coffee & Tea', 'Food'",,xTlqpuuqtO0FWs12gd1v6A,4,I frequent this location because it is so clos...,Candice,150,2014-08-28 22:33:19,330,75,140,2014201520162017,"JVfx5JXYzdNZrY8P-PH55A, G3h8pIclwUbuu3itJqF7ug...",7,3.67,79,545
4,utJPIg1UrXG_PpE1QLOomQ,Tea Fusion,"10205 101 St, Ste 129",Edmonton,AB,3.5,"{'RestaurantsPriceRange2': '1', 'OutdoorSeatin...","'Coffee & Tea', 'Food'",,xTlqpuuqtO0FWs12gd1v6A,3,"A cute little shop in City Centre, TeaFusion i...",Candice,150,2014-08-28 22:33:19,330,75,140,2014201520162017,"JVfx5JXYzdNZrY8P-PH55A, G3h8pIclwUbuu3itJqF7ug...",7,3.67,79,545


In [96]:
print(filtered_df_cafe_2['categories'].nunique())
print(filtered_df_cafe_2['categories'].value_counts()[:10])

437
'Coffee & Tea', 'Food'                                             8493
'Coffee & Tea', 'Donuts', 'Food'                                   1461
'Coffee & Tea', 'Coffee Roasteries', 'Food'                        1153
'Food', 'Ice Cream & Frozen Yogurt'                                1142
'Coffee & Tea', 'Desserts', 'Food', 'Ice Cream & Frozen Yogurt'     819
'Desserts', 'Food', 'Ice Cream & Frozen Yogurt'                     803
'Bakeries', 'Coffee & Tea', 'Food'                                  629
'Bubble Tea', 'Coffee & Tea', 'Food', 'Juice Bars & Smoothies'      518
'Coffee & Tea', 'Food', 'Ice Cream & Frozen Yogurt'                 355
'Bakeries', 'Coffee & Tea', 'Donuts', 'Food'                        339
Name: categories, dtype: int64


In [97]:
# 도메인별 Context 추출
# Attributes 컬럼에서 원하는 Context만 추출한 다음, 변수로 만들어야 할 듯
# one-hot encoding 또는 LabelEncoding 사용

# 1. Bar & Pub
# Alcohol, BestNights, Music
# Alcohol - LabelEncoding
# BestNights, Music - One-Hot Encoding??

def attribute_extract(x, attribute=None):
    if attribute not in str(x):
        return 'False'
    else:
        x = re.sub("[^a-zA-Z0-9':{},_]", '', x)
        x = ast.literal_eval(x)
        x = x[attribute]
    return x

filtered_df_bar_2['BestNights'] = filtered_df_bar_2['attributes'].apply(lambda x: attribute_extract(x, 'BestNights'))
filtered_df_bar_2['Alcohol'] = filtered_df_bar_2['attributes'].apply(lambda x: attribute_extract(x, 'Alcohol'))
filtered_df_bar_2['Music'] = filtered_df_bar_2['attributes'].apply(lambda x: attribute_extract(x, 'Music'))
filtered_df_bar_2.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,business_id,name_x,address,city,state,stars_x,attributes,categories,hours,user_id,stars_y,text,name_y,review_count_y,yelping_since,useful_y,funny_y,cool_y,elite,friends,fans,average_stars,num_of_friends,user_votes,BestNights,Alcohol,Music
11,97LIZKQGOt3Wz32Zj3tXrA,The Vault,1160 1st Avenue,King Of Prussia,PA,2.5,"{'BusinessAcceptsCreditCards': 'True', 'HappyH...","'Bars', 'Comedy Clubs', 'Lounges', 'Nightlife'","{'Friday': '20:0-1:0', 'Saturday': '20:45-1:0'}",lrtGPAmDqCFnbfAKiB4NmA,3,I have been here a few times for the monthly c...,Charles,360,2012-01-08 01:16:46,200,44,102,2015201620172018201920202021,"MRrN6DH3QGCFcDv5RENYVg, XhlDuX91F1ofznx1QkaSNQ...",8,3.5,53,346,"{'monday': False, 'tuesday': False, 'friday': ...",full_bar,"{'dj': True, 'background_music': False, 'jukeb..."
32,Ak34oJ8zxN8Av7dxS9T8IQ,The Centurion Lounge,"Terminal A West, near Gate A14",Philadelphia,PA,3.5,"{'RestaurantsPriceRange2': '3', 'RestaurantsGo...","'Airport Lounges', 'Bars', 'Nightlife'","{'Monday': '5:30-17:0', 'Tuesday': '5:30-17:0'...",pUNvLJwdJIOemOgU98mp1w,4,This is the first AMEX lounge I've visited. It...,Amy,488,2011-07-24 03:16:25,1191,235,614,"2012,2013,2014,2015,2016,2017,2018,2019,20,20,...","1Px8_etTU-O0NTBLemwDJA, 404akRRXmNzHixRDcFT7rA...",45,4.14,335,2040,"{'monday': False, 'tuesday': False, 'friday': ...",full_bar,"{'dj': False, 'background_music': False, 'no_m..."
36,ElW-iNQbGvb5hGWPSOUIcg,The Victor's Pub,1 Market St,Camden,NJ,3.0,"{'RestaurantsPriceRange2': '2', 'BusinessAccep...","'Bars', 'Nightlife', 'Pubs'","{'Monday': '11:0-2:0', 'Tuesday': '11:0-2:0', ...",pUNvLJwdJIOemOgU98mp1w,4,Victors Pub is located in Camden by the Waterf...,Amy,488,2011-07-24 03:16:25,1191,235,614,"2012,2013,2014,2015,2016,2017,2018,2019,20,20,...","1Px8_etTU-O0NTBLemwDJA, 404akRRXmNzHixRDcFT7rA...",45,4.14,335,2040,"{'monday': False, 'tuesday': False, 'friday': ...",full_bar,"{'dj': False, 'background_music': False, 'no_m..."
38,3qEjUW12sH6KOBm86ZDmTA,Erin Rose,811 Conti St,New Orleans,LA,4.5,"{'RestaurantsPriceRange2': '1', 'Alcohol': ""u'...","'Bars', 'Dive Bars', 'Irish Pub', 'Local Flavo...","{'Monday': '10:0-5:30', 'Tuesday': '10:0-5:30'...",pUNvLJwdJIOemOgU98mp1w,5,A great local-ish dive bar right off Bourbon S...,Amy,488,2011-07-24 03:16:25,1191,235,614,"2012,2013,2014,2015,2016,2017,2018,2019,20,20,...","1Px8_etTU-O0NTBLemwDJA, 404akRRXmNzHixRDcFT7rA...",45,4.14,335,2040,"{'monday': False, 'tuesday': False, 'friday': ...",full_bar,"{'dj': False, 'background_music': False, 'no_m..."
40,g6C1-WtSYt2eH5n0aUHA5A,Lambertville Station Pub,11 Bridge St,Lambertville,NJ,3.5,"{'NoiseLevel': ""u'average'"", 'BusinessAcceptsC...","'Bars', 'Nightlife', 'Pubs', 'Wine Bars'","{'Monday': '11:30-22:0', 'Tuesday': '11:30-22:...",pUNvLJwdJIOemOgU98mp1w,4,The bf and I spend the night in New Hope for a...,Amy,488,2011-07-24 03:16:25,1191,235,614,"2012,2013,2014,2015,2016,2017,2018,2019,20,20,...","1Px8_etTU-O0NTBLemwDJA, 404akRRXmNzHixRDcFT7rA...",45,4.14,335,2040,"{'monday': False, 'tuesday': False, 'friday': ...",full_bar,"{'dj': False, 'background_music': False, 'no_m..."


In [102]:
# BestNights, Music for LabelEncoding
# Key:value 순서를 통일시켜 Unique화

drop_index = filtered_df_bar_2[(filtered_df_bar_2['BestNights'] == 'None') | (filtered_df_bar_2['Music'] == '{}')].index
filtered_df_bar_2 = filtered_df_bar_2.drop(drop_index)

def bn_sort(x):
    x = dict(sorted(x.items()))
    return x

filtered_df_bar_2['BestNights'] =  filtered_df_bar_2['BestNights'].apply(lambda x: bn_sort(x))

music_list = list(filtered_df_bar_2['Music'])
music_keys = set().union(*music_list)
music_list = [{key : music.get(key, False) for key in music_keys} for music in music_list]
music_list = np.asarray(music_list).reshape(17735, -1)
filtered_df_bar_2['Music'] = music_list
filtered_df_bar_2.head()

Unnamed: 0,business_id,name_x,address,city,state,stars_x,attributes,categories,hours,user_id,stars_y,text,name_y,review_count_y,yelping_since,useful_y,funny_y,cool_y,elite,friends,fans,average_stars,num_of_friends,user_votes,BestNights,Alcohol,Music
11,97LIZKQGOt3Wz32Zj3tXrA,The Vault,1160 1st Avenue,King Of Prussia,PA,2.5,"{'BusinessAcceptsCreditCards': 'True', 'HappyH...","'Bars', 'Comedy Clubs', 'Lounges', 'Nightlife'","{'Friday': '20:0-1:0', 'Saturday': '20:45-1:0'}",lrtGPAmDqCFnbfAKiB4NmA,3,I have been here a few times for the monthly c...,Charles,360,2012-01-08 01:16:46,200,44,102,2015201620172018201920202021,"MRrN6DH3QGCFcDv5RENYVg, XhlDuX91F1ofznx1QkaSNQ...",8,3.5,53,346,"{'friday': True, 'monday': False, 'saturday': ...",full_bar,"{'background_music': False, 'jukebox': False, ..."
32,Ak34oJ8zxN8Av7dxS9T8IQ,The Centurion Lounge,"Terminal A West, near Gate A14",Philadelphia,PA,3.5,"{'RestaurantsPriceRange2': '3', 'RestaurantsGo...","'Airport Lounges', 'Bars', 'Nightlife'","{'Monday': '5:30-17:0', 'Tuesday': '5:30-17:0'...",pUNvLJwdJIOemOgU98mp1w,4,This is the first AMEX lounge I've visited. It...,Amy,488,2011-07-24 03:16:25,1191,235,614,"2012,2013,2014,2015,2016,2017,2018,2019,20,20,...","1Px8_etTU-O0NTBLemwDJA, 404akRRXmNzHixRDcFT7rA...",45,4.14,335,2040,"{'friday': False, 'monday': False, 'saturday':...",full_bar,"{'background_music': False, 'jukebox': False, ..."
36,ElW-iNQbGvb5hGWPSOUIcg,The Victor's Pub,1 Market St,Camden,NJ,3.0,"{'RestaurantsPriceRange2': '2', 'BusinessAccep...","'Bars', 'Nightlife', 'Pubs'","{'Monday': '11:0-2:0', 'Tuesday': '11:0-2:0', ...",pUNvLJwdJIOemOgU98mp1w,4,Victors Pub is located in Camden by the Waterf...,Amy,488,2011-07-24 03:16:25,1191,235,614,"2012,2013,2014,2015,2016,2017,2018,2019,20,20,...","1Px8_etTU-O0NTBLemwDJA, 404akRRXmNzHixRDcFT7rA...",45,4.14,335,2040,"{'friday': True, 'monday': False, 'saturday': ...",full_bar,"{'background_music': False, 'jukebox': False, ..."
38,3qEjUW12sH6KOBm86ZDmTA,Erin Rose,811 Conti St,New Orleans,LA,4.5,"{'RestaurantsPriceRange2': '1', 'Alcohol': ""u'...","'Bars', 'Dive Bars', 'Irish Pub', 'Local Flavo...","{'Monday': '10:0-5:30', 'Tuesday': '10:0-5:30'...",pUNvLJwdJIOemOgU98mp1w,5,A great local-ish dive bar right off Bourbon S...,Amy,488,2011-07-24 03:16:25,1191,235,614,"2012,2013,2014,2015,2016,2017,2018,2019,20,20,...","1Px8_etTU-O0NTBLemwDJA, 404akRRXmNzHixRDcFT7rA...",45,4.14,335,2040,"{'friday': True, 'monday': False, 'saturday': ...",full_bar,"{'background_music': False, 'jukebox': True, '..."
40,g6C1-WtSYt2eH5n0aUHA5A,Lambertville Station Pub,11 Bridge St,Lambertville,NJ,3.5,"{'NoiseLevel': ""u'average'"", 'BusinessAcceptsC...","'Bars', 'Nightlife', 'Pubs', 'Wine Bars'","{'Monday': '11:30-22:0', 'Tuesday': '11:30-22:...",pUNvLJwdJIOemOgU98mp1w,4,The bf and I spend the night in New Hope for a...,Amy,488,2011-07-24 03:16:25,1191,235,614,"2012,2013,2014,2015,2016,2017,2018,2019,20,20,...","1Px8_etTU-O0NTBLemwDJA, 404akRRXmNzHixRDcFT7rA...",45,4.14,335,2040,"{'friday': True, 'monday': False, 'saturday': ...",full_bar,"{'background_music': False, 'jukebox': False, ..."


In [116]:
# BestNights, Music : One-Hot Encoding 형태로 생성
# Mon~Sun 여부를 0/1 관계로 표현하는 DataFrame 생성 --> filtered_df~에 concat


bn_total = []
for bn in filtered_df_bar_2['BestNights']:
    each_bn = []
    bn = list(bn.values())
    for elem in bn:
        if elem == True:
            each_bn.append(1)
        else:
            each_bn.append(0)
    
    bn_total.append(each_bn)
            

music_list = list(filtered_df_bar_2['Music'])
music_keys = set().union(*music_list)
music_list = [{key : music.get(key, False) for key in music_keys} for music in music_list]

music_total = []
for music in music_list:
    each_music = []
    music = list(music.values())
    for yn in music:
        if yn == True:
            each_music.append(1)
        else:
            each_music.append(0)
    music_total.append(each_music)   


bn_total = pd.DataFrame(np.asarray(bn_total), columns=['BestNights_Fri', 'BestNights_Mon','BestNights_Sat',
                                                       'BestNights_Sun', 'BestNights_Thu', 'BestNights_Tue', 'BestNights_Wed']) 
music_total = pd.DataFrame(np.asarray(music_total), columns=['background_music', 'jukebox', 'karaoke', 
                                                             'dj', 'no_music', 'video', 'live'])

In [121]:
# 3개의 데이터셋 합치기
filtered_df_bar_2.reset_index(drop=True, inplace=True)
filtered_df_bar_2['index'] = filtered_df_bar_2.index
bn_total['index'] = bn_total.index
music_total['index'] = music_total.index

filtered_df_bar_2 = pd.merge(filtered_df_bar_2, bn_total, how='inner', on='index')
filtered_df_bar_2 = pd.merge(filtered_df_bar_2, music_total, how='inner', on='index')
print(filtered_df_bar_2.shape)
filtered_df_bar_2.head()

(17735, 42)


Unnamed: 0,business_id,name_x,address,city,state,stars_x,attributes,categories,hours,user_id,stars_y,text,name_y,review_count_y,yelping_since,useful_y,funny_y,cool_y,elite,friends,fans,average_stars,num_of_friends,user_votes,BestNights,Alcohol,Music,index,BestNights_Fri,BestNights_Mon,BestNights_Sat,BestNights_Sun,BestNights_Thu,BestNights_Tue,BestNights_Wed,background_music,jukebox,karaoke,dj,no_music,video,live
0,97LIZKQGOt3Wz32Zj3tXrA,The Vault,1160 1st Avenue,King Of Prussia,PA,2.5,"{'BusinessAcceptsCreditCards': 'True', 'HappyH...","'Bars', 'Comedy Clubs', 'Lounges', 'Nightlife'","{'Friday': '20:0-1:0', 'Saturday': '20:45-1:0'}",lrtGPAmDqCFnbfAKiB4NmA,3,I have been here a few times for the monthly c...,Charles,360,2012-01-08 01:16:46,200,44,102,2015201620172018201920202021,"MRrN6DH3QGCFcDv5RENYVg, XhlDuX91F1ofznx1QkaSNQ...",8,3.5,53,346,"{'friday': True, 'monday': False, 'saturday': ...",full_bar,"{'background_music': False, 'jukebox': False, ...",0,1,0,1,0,0,0,0,0,0,0,1,0,0,1
1,Ak34oJ8zxN8Av7dxS9T8IQ,The Centurion Lounge,"Terminal A West, near Gate A14",Philadelphia,PA,3.5,"{'RestaurantsPriceRange2': '3', 'RestaurantsGo...","'Airport Lounges', 'Bars', 'Nightlife'","{'Monday': '5:30-17:0', 'Tuesday': '5:30-17:0'...",pUNvLJwdJIOemOgU98mp1w,4,This is the first AMEX lounge I've visited. It...,Amy,488,2011-07-24 03:16:25,1191,235,614,"2012,2013,2014,2015,2016,2017,2018,2019,20,20,...","1Px8_etTU-O0NTBLemwDJA, 404akRRXmNzHixRDcFT7rA...",45,4.14,335,2040,"{'friday': False, 'monday': False, 'saturday':...",full_bar,"{'background_music': False, 'jukebox': False, ...",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,ElW-iNQbGvb5hGWPSOUIcg,The Victor's Pub,1 Market St,Camden,NJ,3.0,"{'RestaurantsPriceRange2': '2', 'BusinessAccep...","'Bars', 'Nightlife', 'Pubs'","{'Monday': '11:0-2:0', 'Tuesday': '11:0-2:0', ...",pUNvLJwdJIOemOgU98mp1w,4,Victors Pub is located in Camden by the Waterf...,Amy,488,2011-07-24 03:16:25,1191,235,614,"2012,2013,2014,2015,2016,2017,2018,2019,20,20,...","1Px8_etTU-O0NTBLemwDJA, 404akRRXmNzHixRDcFT7rA...",45,4.14,335,2040,"{'friday': True, 'monday': False, 'saturday': ...",full_bar,"{'background_music': False, 'jukebox': False, ...",2,1,0,1,0,1,0,0,0,0,0,0,0,0,0
3,3qEjUW12sH6KOBm86ZDmTA,Erin Rose,811 Conti St,New Orleans,LA,4.5,"{'RestaurantsPriceRange2': '1', 'Alcohol': ""u'...","'Bars', 'Dive Bars', 'Irish Pub', 'Local Flavo...","{'Monday': '10:0-5:30', 'Tuesday': '10:0-5:30'...",pUNvLJwdJIOemOgU98mp1w,5,A great local-ish dive bar right off Bourbon S...,Amy,488,2011-07-24 03:16:25,1191,235,614,"2012,2013,2014,2015,2016,2017,2018,2019,20,20,...","1Px8_etTU-O0NTBLemwDJA, 404akRRXmNzHixRDcFT7rA...",45,4.14,335,2040,"{'friday': True, 'monday': False, 'saturday': ...",full_bar,"{'background_music': False, 'jukebox': True, '...",3,1,0,1,0,1,0,0,0,1,0,0,0,0,0
4,g6C1-WtSYt2eH5n0aUHA5A,Lambertville Station Pub,11 Bridge St,Lambertville,NJ,3.5,"{'NoiseLevel': ""u'average'"", 'BusinessAcceptsC...","'Bars', 'Nightlife', 'Pubs', 'Wine Bars'","{'Monday': '11:30-22:0', 'Tuesday': '11:30-22:...",pUNvLJwdJIOemOgU98mp1w,4,The bf and I spend the night in New Hope for a...,Amy,488,2011-07-24 03:16:25,1191,235,614,"2012,2013,2014,2015,2016,2017,2018,2019,20,20,...","1Px8_etTU-O0NTBLemwDJA, 404akRRXmNzHixRDcFT7rA...",45,4.14,335,2040,"{'friday': True, 'monday': False, 'saturday': ...",full_bar,"{'background_music': False, 'jukebox': False, ...",4,1,0,0,0,0,0,1,0,0,0,0,0,0,0


In [134]:
# 2. Cafe
# OutdoorSeating, DriveThru
# None --> False 변환 후 Label Encoding

filtered_df_cafe_2['OutdoorSeating'] = filtered_df_cafe_2['attributes'].apply(lambda x: attribute_extract(x, 'OutdoorSeating'))
filtered_df_cafe_2['DriveThru'] = filtered_df_cafe_2['attributes'].apply(lambda x: attribute_extract(x, 'DriveThru'))
filtered_df_cafe_2['OutdoorSeating'] = filtered_df_cafe_2['OutdoorSeating'].replace('None', 'False')
filtered_df_cafe_2['DriveThru'] = filtered_df_cafe_2['DriveThru'].replace('None', 'False')

filtered_df_cafe_2.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/p

Unnamed: 0,business_id,name_x,address,city,state,stars_x,attributes,categories,hours,user_id,stars_y,text,name_y,review_count_y,yelping_since,useful_y,funny_y,cool_y,elite,friends,fans,average_stars,num_of_friends,user_votes,OutdoorSeating,DriveThru
0,JX4tUpd09YFchLBuI43lGw,Naked Cyber Cafe & Espresso Bar,10303 108 Street NW,Edmonton,AB,4.0,"{'OutdoorSeating': 'False', 'BusinessParking':...","'Arts & Entertainment', 'Coffee & Tea', 'Food'...","{'Monday': '11:0-1:0', 'Tuesday': '11:0-1:0', ...",xTlqpuuqtO0FWs12gd1v6A,5,Came here to print off some documents and was ...,Candice,150,2014-08-28 22:33:19,330,75,140,2014201520162017,"JVfx5JXYzdNZrY8P-PH55A, G3h8pIclwUbuu3itJqF7ug...",7,3.67,79,545,False,False
1,Mq9gCgsWS6-_1HMUH7iiXw,Starbucks,10387 112 St NW,Edmonton,AB,4.0,"{'WiFi': ""u'free'"", 'OutdoorSeating': 'True', ...","'Coffee & Tea', 'Food'",,xTlqpuuqtO0FWs12gd1v6A,1,I recently popped in here before my nail appoi...,Candice,150,2014-08-28 22:33:19,330,75,140,2014201520162017,"JVfx5JXYzdNZrY8P-PH55A, G3h8pIclwUbuu3itJqF7ug...",7,3.67,79,545,True,True
2,Mq9gCgsWS6-_1HMUH7iiXw,Starbucks,10387 112 St NW,Edmonton,AB,4.0,"{'WiFi': ""u'free'"", 'OutdoorSeating': 'True', ...","'Coffee & Tea', 'Food'",,xTlqpuuqtO0FWs12gd1v6A,4,This is my favourite SB location! There's not ...,Candice,150,2014-08-28 22:33:19,330,75,140,2014201520162017,"JVfx5JXYzdNZrY8P-PH55A, G3h8pIclwUbuu3itJqF7ug...",7,3.67,79,545,True,True
3,jC9Fn4tGGYbJg332IJ4MHA,Starbucks,14957 Stony Plain Road NW,Edmonton,AB,3.5,"{'RestaurantsPriceRange2': '1', 'WiFi': ""'free...","'Coffee & Tea', 'Food'",,xTlqpuuqtO0FWs12gd1v6A,4,I frequent this location because it is so clos...,Candice,150,2014-08-28 22:33:19,330,75,140,2014201520162017,"JVfx5JXYzdNZrY8P-PH55A, G3h8pIclwUbuu3itJqF7ug...",7,3.67,79,545,False,True
4,utJPIg1UrXG_PpE1QLOomQ,Tea Fusion,"10205 101 St, Ste 129",Edmonton,AB,3.5,"{'RestaurantsPriceRange2': '1', 'OutdoorSeatin...","'Coffee & Tea', 'Food'",,xTlqpuuqtO0FWs12gd1v6A,3,"A cute little shop in City Centre, TeaFusion i...",Candice,150,2014-08-28 22:33:19,330,75,140,2014201520162017,"JVfx5JXYzdNZrY8P-PH55A, G3h8pIclwUbuu3itJqF7ug...",7,3.67,79,545,False,False


In [135]:
filtered_df_bar_2.to_csv('yelp_bar_prepro.csv', index=False)
filtered_df_cafe_2.to_csv('yelp_cafe_prepro.csv', index=False)
filtered_df_rest.to_csv('yelp_rest_prepro.csv', index=False)
'''
bn_total.to_csv('bn_total.csv', index=False)
music_total.to_csv('music_total.csv', index=False)
'''

"\nbn_total.to_csv('bn_total.csv', index=False)\nmusic_total.to_csv('music_total.csv', index=False)\n"