# Calculate standard deviation and mean of business and user csv files

In [1]:
#import all the necessary libraries

import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt
import itertools

from collections import defaultdict
pd.set_option('max_columns', 100)

In [2]:
import nltk
import random
from nltk.classify.scikitlearn import SklearnClassifier
import pickle
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import SVC, LinearSVC, NuSVC
from nltk.classify import ClassifierI
from statistics import mode
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk.tokenize import sent_tokenize, word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import confusion_matrix, roc_auc_score
from sklearn.feature_extraction.text import CountVectorizer

In [40]:
#import all the necessary csv files
parent_dir = 'D:/DCU Documents/Semester 2/Data Analytics and Data Mining/Project/Kaggle Dataset/'

path_dic = {
            'B':parent_dir+'business_s.csv',
            'R':parent_dir+'Review2021.csv',
            'U':parent_dir+'U.csv',
            'D':parent_dir+'documents.csv'
           }

user_data = pd.read_csv(path_dic['U'])
business_data = pd.read_csv(path_dic['B'])

In [41]:
#check the mean, standard deviation of review counts in business_data and user_data
print(business_data['review_count'].describe())
print(user_data['review_count'].describe())

count    160544.000000
mean         51.973341
std         130.045446
min           5.000000
25%           8.000000
50%          17.000000
75%          44.000000
max        9185.000000
Name: review_count, dtype: float64
count    2.189457e+06
mean     2.169772e+01
std      7.601255e+01
min      0.000000e+00
25%      2.000000e+00
50%      5.000000e+00
75%      1.500000e+01
max      1.568600e+04
Name: review_count, dtype: float64


In [42]:
#remove all th value less than min_review_per_valid_buz and min_review_per_valid_user
min_review_per_valid_buz = 5
min_review_per_valid_user = 2

business_data = business_data[business_data['review_count'] >= min_review_per_valid_buz]
user_data = user_data[user_data['review_count'] >= min_review_per_valid_user]

print(business_data.shape)
print(user_data.shape)

(160544, 15)
(1836671, 23)


In [43]:
#check the neames of the columns
print("buz cols: ")
print(list(business_data))
print()
print("user cols: ")
print(list(user_data))

buz cols: 
['Unnamed: 0', 'business_id', 'name', 'address', 'city', 'state', 'postal_code', 'latitude', 'longitude', 'stars', 'review_count', 'is_open', 'attributes', 'categories', 'hours']

user cols: 
['Unnamed: 0', 'user_id', 'name', 'review_count', 'yelping_since', 'useful', 'funny', 'cool', 'elite', 'friends', 'fans', 'average_stars', 'compliment_hot', 'compliment_more', 'compliment_profile', 'compliment_cute', 'compliment_list', 'compliment_note', 'compliment_plain', 'compliment_cool', 'compliment_funny', 'compliment_writer', 'compliment_photos']


In [44]:
# renaming the columns
business_data = business_data.rename(columns = {'name':'buz_name', 'review_count':'buz_review_count', 'stars':'buz_stars'})


user_data = user_data.rename(columns = {'cool':'user_cool', 'funny':'user_funny','useful':'user_useful', 'name':'user_name', \
                                  'review_count':'user_review_count'})

In [46]:
#read review dataset
review_data = pd.read_csv(path_dic['R'])

In [47]:
#Find polarity, subjectivity and word list using Textblob

from textblob import TextBlob
review_data['polarity'] = [TextBlob(s).sentiment.polarity for s in review_data['text']]
review_data['subjectivity'] = [TextBlob(s).sentiment.subjectivity for s in review_data['text']]
review_data.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,review_id,user_id,business_id,stars,useful,funny,cool,text,date,polarity,subjectivity
0,138,83008,83008,viFyq_J7aNNpTdpJZz0AZw,VVKciz1V7sK8J6Hg_Qs01A,5QzSq5x1pef5Jr5QWJ8sdw,1.0,0,0,0,They can't manage to get online orders right a...,2020-11-04 13:39:39,0.392857,0.580357
1,144,83014,83014,_gcEQjBhP4hfiRcaFjVA_w,BsO52pt3Ym2TZueHJBZwNw,ufZddj_Wo51i7WKGKORl_g,2.0,0,0,0,Sorry but it was not good we order chicken Mar...,2020-11-11 21:36:56,-0.138333,0.58
2,145,83016,83016,TFgThOGW5jzrFqmcfBtxrA,JUfaoxSnrO2GKJVmncueEA,9hfAQDYCqFP_BXAF5E4PYA,4.0,0,0,0,"Quick service, but crowded due to the close pr...",2020-11-13 00:07:27,0.104167,0.4375
3,146,83017,83017,zplhvIBrl_6pyw--i2Q14w,JmVarS9Zfh63C2inAfG7DA,s8GuNWKlwunSdDnz84iyPw,3.0,0,0,0,This was used to be my family's go to place w...,2020-11-13 01:05:59,0.22059,0.495337
4,147,83018,83018,RNAytjfxE8cGpzqm1x-0mQ,ntcOw69H3dZkQKGbiBVqzg,a94iMfnbvoantcFSFesMYA,5.0,0,0,0,"Our waiter, Alcino, was extremely helpful with...",2020-11-15 00:37:36,0.39803,0.595455


In [21]:
#check out the size and column names of review_data

print(len(review_data))
print(review_data.columns)

134545
Index(['Unnamed: 0.1', 'Unnamed: 0.1.1', 'review_id', 'user_id', 'business_id',
       'stars', 'useful', 'funny', 'cool', 'text', 'date', 'polarity',
       'subjectivity'],
      dtype='object')


In [48]:
#delete all the unecessary files

del review_data['Unnamed: 0']
del business_data['Unnamed: 0']
del user_data['Unnamed: 0']
del review_data['Unnamed: 0.1']
del review_data['Unnamed: 0.1.1']

In [49]:
#rename column names

review_data = review_data.rename(columns = {'text':'text_stem', 'funny':'review_funny','cool':'review_cool', 'useful':'review_useful', \
                                  'stars':'review_stars'})

In [50]:
#print the column nmaes
print(review_data.columns)

Index(['review_id', 'user_id', 'business_id', 'review_stars', 'review_useful',
       'review_funny', 'review_cool', 'text_stem', 'date', 'polarity',
       'subjectivity'],
      dtype='object')


In [51]:
#Save the modified review_data in a CSV file
review_data.head()
review_data.to_csv('D:/DCU Documents/Semester 2/Data Analytics and Data Mining/Project/Kaggle Dataset/' + 'Review_polarity_subjectivity' + '.csv')

In [58]:
#Filter out the use_data data that are present in review_data
print(len(user_data_temp))
user_data_temp = user_data[user_data['user_id'].isin(review_data['user_id'])]

73384


In [60]:
#Save the new user_data in a csv file
user_data_temp.to_csv('D:/DCU Documents/Semester 2/Data Analytics and Data Mining/Project/Kaggle Dataset/' + 'User2020' + '.csv')

In [61]:
#Merge the user_data and review_data and find the aggreagate count, mean and standard deviation
merged_data = user_data_temp[['user_id']].merge(review_data, on = 'user_id', how = 'left')
grouped = merged_data.groupby('user_id').agg(['count', 'mean', 'std'])
res = pd.DataFrame(grouped.to_records())
print(list(res))
res = res[["user_id", "('review_cool', 'count')", "('review_cool', 'mean')", "('review_funny', 'mean')",\
           "('review_useful', 'mean')", "('review_stars', 'mean')", "('review_stars', 'std')",\
           "('polarity', 'mean')", "('polarity', 'std')", "('subjectivity', 'mean')", "('subjectivity', 'std')"]]

res = res.rename(columns = {"('review_cool', 'count')":"user_review_count", "('review_cool', 'mean')":"user_cool_mean", \
                      "('review_funny', 'mean')":"user_funny_mean","('review_useful', 'mean')":"user_useful_mean", \
                      "('review_stars', 'mean')":"user_star_mean", "('review_stars', 'std')":"user_star_std", \
                      "('polarity', 'mean')":"user_polarity_mean", "('polarity', 'std')":"user_polarity_std", \
                      "('subjectivity', 'mean')":"user_subjectivity_mean", "('subjectivity', 'std')":"user_subjectivity_std"})
res = res.fillna(0)
res.head()

['user_id', "('review_stars', 'count')", "('review_stars', 'mean')", "('review_stars', 'std')", "('review_useful', 'count')", "('review_useful', 'mean')", "('review_useful', 'std')", "('review_funny', 'count')", "('review_funny', 'mean')", "('review_funny', 'std')", "('review_cool', 'count')", "('review_cool', 'mean')", "('review_cool', 'std')", "('polarity', 'count')", "('polarity', 'mean')", "('polarity', 'std')", "('subjectivity', 'count')", "('subjectivity', 'mean')", "('subjectivity', 'std')"]


Unnamed: 0,user_id,user_review_count,user_cool_mean,user_funny_mean,user_useful_mean,user_star_mean,user_star_std,user_polarity_mean,user_polarity_std,user_subjectivity_mean,user_subjectivity_std
0,--3HptO9LVPn1yTS973M_Q,1,0.0,0.0,0.0,1.0,0.0,-0.483333,0.0,0.566667,0.0
1,--3r3ei2_Q2X1-y_3IISaQ,2,0.0,0.0,0.0,5.0,0.0,0.36956,0.261073,0.620486,0.027335
2,--4lTPjO0M3N3u49zEyLjQ,1,0.0,0.0,0.0,5.0,0.0,0.105126,0.0,0.576257,0.0
3,--BmCCdnD9UcnSy6JfnNZw,1,0.0,0.0,0.0,5.0,0.0,0.585938,0.0,0.6625,0.0
4,--C_8RU0qyqtZLpobfkkHA,2,0.0,0.0,0.0,5.0,0.0,0.18125,0.344715,0.5,0.088388


In [69]:
#Explore the new merged dataset
res.sort_values(by=['user_review_count'], inplace=True, ascending=False)
print(max(res['user_review_count']))
res.head()

103


Unnamed: 0,user_id,user_review_count,user_cool_mean,user_funny_mean,user_useful_mean,user_star_mean,user_star_std,user_polarity_mean,user_polarity_std,user_subjectivity_mean,user_subjectivity_std
70368,xLoLsQ6k5GOrPTl0Y7jwxg,103,12.126214,2.961165,11.669903,4.029126,0.912402,0.198185,0.104109,0.503074,0.073061
45164,bQCHF5rn5lMI9c5kEwCaNA,79,7.658228,3.556962,8.582278,3.797468,0.992016,0.156869,0.089329,0.49298,0.083258
16667,DVDLRYt37TdPXtluovM33A,75,1.32,0.24,3.173333,3.666667,1.2006,0.185276,0.123852,0.506203,0.071868
40649,YTu0c-1I9ECeksIgoHXFPA,74,29.135135,11.081081,27.027027,4.72973,0.476784,0.261343,0.144237,0.540316,0.113285
52418,hmZuukfOdV3Klufeo7UuHw,71,0.042254,0.028169,0.042254,3.985915,0.819285,0.232297,0.137902,0.5267,0.112581


In [72]:
#rename the column names
user_data = user_data_temp.rename(columns = {'user_review_count':'user_review_count_original'})
print(list(user_data))

['user_id', 'user_name', 'user_review_count_original', 'yelping_since', 'user_useful', 'user_funny', 'user_cool', 'elite', 'friends', 'fans', 'average_stars', 'compliment_hot', 'compliment_more', 'compliment_profile', 'compliment_cute', 'compliment_list', 'compliment_note', 'compliment_plain', 'compliment_cool', 'compliment_funny', 'compliment_writer', 'compliment_photos']


In [73]:
#Merge the merged dataset and user_dataset
user_data = user_data.drop(['compliment_cool', 'compliment_cute', 'compliment_funny', 'compliment_hot', \
                      'compliment_list', 'compliment_more', 'compliment_note', 'compliment_photos', \
                      'compliment_plain', 'compliment_profile', 'compliment_writer', 'user_funny', \
                      'user_review_count_original', 'user_useful', 'user_cool'], axis = 1)
user_data = user_data.merge(res, on = 'user_id', how = 'left')
user_data.head(10)

Unnamed: 0,user_id,user_name,yelping_since,elite,friends,fans,average_stars,user_review_count,user_cool_mean,user_funny_mean,user_useful_mean,user_star_mean,user_star_std,user_polarity_mean,user_polarity_std,user_subjectivity_mean,user_subjectivity_std
0,HM3JCyTu9PZOAvnbJV8Xsw,Kristen,2011-06-07 16:17:51,2016,"l-OwsyvDCOyUPYSiJvmQkQ, jk90O0GXI-G5P-UnygbOMg...",10,3.94,3,0.0,0.0,0.333333,5.0,0.0,0.315361,0.07431,0.580611,0.163659
1,suTaYHp6SQ3FUSyY8cycug,Alessandra,2011-02-22 19:15:10,,"QwggNODSpW8caPuxqZdAxQ, xv_kBu1Ns74fHJTSjBViEg...",5,3.88,2,0.0,0.0,0.0,4.0,1.414214,0.231782,0.137544,0.506073,0.030135
2,BgZwJBhVWKq1Urs4rKBdiA,Tres,2008-01-31 01:39:24,200820092010,"78eclU-tCzcSvgZRi6Enzw, DLG7Am1wBoxLSX0V0EuxJA...",83,3.51,5,1.0,0.8,1.6,2.6,2.19089,0.070782,0.146323,0.553281,0.057399
3,0KjqHIouprEuBr_8A9BJ_Q,Jonathan,2008-08-20 20:53:30,"2009,2010,2011,2012,2013,2014,2015,2016,2017,2...","emrOMoSI0mShelUtHYP6Bg, q-m5ihnO-qHtal5tDGYC5A...",149,3.99,4,2.5,2.0,2.75,4.75,0.5,0.302648,0.063561,0.551059,0.11087
4,p9TNT6pNKRuvBSzNf27JkQ,Jessica,2009-03-02 05:52:26,201220132014201520162017201820192020,"Qx5bz4luino7htScnjwsMA, CNaaizAUc2c4WIb1M3BBPw...",16,4.09,3,0.0,0.0,1.333333,4.333333,0.57735,0.316502,0.14346,0.558103,0.068562
5,x9DRvDktxccwvZemaOMgZA,Lanie,2008-05-19 00:58:40,"2009,2010,2011,2012,2013,2016,2017,2018,2019,2...","lNDRQXkwHD-EspEp3Qu9FQ, p8kHm5pzSevEG7sJdGkIYA...",324,3.92,1,9.0,7.0,10.0,5.0,0.0,0.068621,0.0,0.396146,0.0
6,nuG5UpCvswNdDXJP_Efl8Q,Cathy,2008-02-01 20:20:17,"2011,2012,2013,2014,2015,2016,2017,2018,2019,2...","PskECams_7euDCemKChQ0Q, SpcXt3YxSJhJcYLMsL0swA...",35,3.91,8,3.375,1.5,3.625,4.625,0.517549,0.180137,0.094472,0.514523,0.063036
7,TylOr9YYTV3znqIvH7kdmQ,Ted,2007-08-21 06:08:17,"2008,2009,2010,2011,2012,2013,2014,2015,2016,2...","ZNWErzMr2NL2cT3F566ZWA, d9qn7CSNzC_yuyvXIyoMsQ...",110,3.45,1,2.0,1.0,2.0,3.0,0.0,0.0651,0.0,0.528994,0.0
8,YttDgOC9AlM4HcAlDsbB2A,Phil,2009-09-02 15:30:08,"2011,2012,2013,2014,2015,2016,2017,2018,2019,2...","QQ82r5VjULzbWc7aIHajnQ, D4Eznn83MRWUK39n5WI0JA...",792,3.58,2,13.5,7.5,14.5,4.0,0.0,0.175387,0.015428,0.599145,0.081025
9,-lzhDCGh9gOVnDSRJmluFg,Elissa,2008-05-07 00:34:28,2017201820192020,"pdNfTnMSUD7_pwos9ty8sA, VukcjKh0gbROToqjvN40Mw...",7,4.2,1,0.0,0.0,1.0,5.0,0.0,0.26875,0.0,0.546875,0.0


In [74]:
#Save the merged dataset
user_data.to_csv('D:/DCU Documents/Semester 2/Data Analytics and Data Mining/Project/Kaggle Dataset/' + 'User_SD_Mean' + '.csv')

In [77]:
#Filter the business dataset based on values in review_data
business_data_temp = business_data[business_data['business_id'].isin(review_data['business_id'])]
print(len(business_data_temp))

46367


In [78]:
#Merge the business_data and review_data and find the aggreagate count, mean and standard deviation
merged_data_business_review = business_data_temp[['business_id']].merge(review_data, on = 'business_id', how = 'left')
grouped = merged_data_business_review.groupby('business_id').agg(['count', 'mean', 'std'])
res = pd.DataFrame(grouped.to_records())
print(list(res))
res = res[["business_id", "('review_cool', 'count')", "('review_cool', 'mean')", "('review_funny', 'mean')",\
           "('review_useful', 'mean')", "('review_stars', 'mean')", "('review_stars', 'std')",\
           "('polarity', 'mean')", "('polarity', 'std')", "('subjectivity', 'mean')", "('subjectivity', 'std')"]]

res = res.rename(columns = {"('review_cool', 'count')":"buz_review_count", "('review_cool', 'mean')":"buz_cool_mean", \
                      "('review_funny', 'mean')":"buz_funny_mean","('review_useful', 'mean')":"buz_useful_mean", \
                      "('review_stars', 'mean')":"buz_star_mean", "('review_stars', 'std')":"buz_star_std", \
                      "('polarity', 'mean')":"buz_polarity_mean", "('polarity', 'std')":"buz_polarity_std", \
                      "('subjectivity', 'mean')":"buz_subjectivity_mean", "('subjectivity', 'std')":"buz_subjectivity_std"})
res = res.fillna(0)
res.head()

['business_id', "('review_stars', 'count')", "('review_stars', 'mean')", "('review_stars', 'std')", "('review_useful', 'count')", "('review_useful', 'mean')", "('review_useful', 'std')", "('review_funny', 'count')", "('review_funny', 'mean')", "('review_funny', 'std')", "('review_cool', 'count')", "('review_cool', 'mean')", "('review_cool', 'std')", "('polarity', 'count')", "('polarity', 'mean')", "('polarity', 'std')", "('subjectivity', 'count')", "('subjectivity', 'mean')", "('subjectivity', 'std')"]


Unnamed: 0,business_id,buz_review_count,buz_cool_mean,buz_funny_mean,buz_useful_mean,buz_star_mean,buz_star_std,buz_polarity_mean,buz_polarity_std,buz_subjectivity_mean,buz_subjectivity_std
0,--0DF12EMHYI8XIgoFha6A,1,0.0,0.0,0.0,5.0,0.0,0.208333,0.0,0.461667,0.0
1,--2aF9NhXnNVpDV0KS3xBQ,1,2.0,1.0,2.0,5.0,0.0,0.245568,0.0,0.539811,0.0
2,--JKSSgnfoOjVDFGv692BA,1,0.0,0.0,0.0,5.0,0.0,0.085,0.0,0.52,0.0
3,--Q3mAcX9t63f7Xcbn7LVA,28,1.964286,1.035714,2.714286,3.785714,1.571909,0.307043,0.218635,0.597008,0.115211
4,--TEGvhgrXwHnRjiFbK08Q,1,0.0,0.0,0.0,5.0,0.0,0.318783,0.0,0.519709,0.0


In [79]:
#Explore the merged dataset
res.sort_values(by=['buz_review_count'], inplace=True, ascending=False)

print(max(res['buz_review_count']))
res.head()

184


Unnamed: 0,business_id,buz_review_count,buz_cool_mean,buz_funny_mean,buz_useful_mean,buz_star_mean,buz_star_std,buz_polarity_mean,buz_polarity_std,buz_subjectivity_mean,buz_subjectivity_std
14164,IOLwhRIF2wyhbTJl_J2khw,184,0.168478,0.065217,0.38587,4.092391,1.365919,0.302718,0.227101,0.581059,0.122029
12569,GJxFtnTqTiokFedNrW9iDQ,164,0.207317,0.103659,0.390244,4.213415,1.256957,0.260284,0.238299,0.562105,0.126196
25787,YZs1gNSh_sN8JmN_nrpxeA,154,0.467532,0.175325,0.590909,4.636364,0.830923,0.332537,0.210135,0.5616,0.122681
20795,Rba9Ol4jnTiov6_iAuoF5g,135,0.303704,0.051852,0.459259,4.681481,0.825452,0.389648,0.220885,0.613991,0.107852
10416,DOu8Wl1uZ28uQIP96DSx7g,111,0.378378,0.09009,0.531532,4.765766,0.555222,0.245547,0.189775,0.644141,0.116619


In [80]:
print(business_data_temp.shape)
print(list(business_data_temp))

(160544, 14)
['business_id', 'buz_name', 'address', 'city', 'state', 'postal_code', 'latitude', 'longitude', 'buz_stars', 'buz_review_count', 'is_open', 'attributes', 'categories', 'hours']


In [81]:
#Merge the merged dataset and business_data
business_data = business_data_temp.merge(res, on = 'business_id', how = 'left')
business_data.head(10)

Unnamed: 0,business_id,buz_name,address,city,state,postal_code,latitude,longitude,buz_stars,buz_review_count_x,is_open,attributes,categories,hours,buz_review_count_y,buz_cool_mean,buz_funny_mean,buz_useful_mean,buz_star_mean,buz_star_std,buz_polarity_mean,buz_polarity_std,buz_subjectivity_mean,buz_subjectivity_std
0,6iYb2HFDywm3zjuRg0shjw,Oskar Blues Taproom,921 Pearl St,Boulder,CO,80302,40.017544,-105.283348,4.0,86,1,"{'RestaurantsTableService': 'True', 'WiFi': ""u...","Gastropubs, Food, Beer Gardens, Restaurants, B...","{'Monday': '11:0-23:0', 'Tuesday': '11:0-23:0'...",5,0.2,0.2,0.4,4.2,1.788854,0.466493,0.230207,0.670037,0.054056
1,D4JtQNTI4X3KcbzacDJsMw,Bob Likes Thai Food,3755 Main St,Vancouver,BC,V5V,49.251342,-123.101333,3.5,169,1,"{'GoodForKids': 'True', 'Alcohol': ""u'none'"", ...","Restaurants, Thai","{'Monday': '17:0-21:0', 'Tuesday': '17:0-21:0'...",5,0.0,0.0,0.2,2.0,1.414214,0.038906,0.262315,0.485662,0.101071
2,jFYIsSb7r1QeESVUnXPHBw,Boxwood Biscuit,740 S High St,Columbus,OH,43206,39.947007,-82.997471,4.5,11,1,,"Breakfast & Brunch, Restaurants","{'Saturday': '8:0-14:0', 'Sunday': '8:0-14:0'}",2,0.0,0.0,1.0,4.0,1.414214,0.049713,0.047547,0.442355,0.165965
3,hCABMnKtwo4Y9alQDxh2kw,Star Kreations Salon and Spa,"124 Newbury St, Unit C",Peabody,MA,01960,42.534248,-70.990948,4.0,8,1,"{'RestaurantsPriceRange2': '2', 'BusinessParki...","Wigs, Hair Extensions, Hair Salons, Blow Dry/O...","{'Monday': '10:0-15:0', 'Tuesday': '9:0-18:0',...",1,0.0,0.0,0.0,1.0,0.0,-0.2,0.0,0.5,0.0
4,ufCxltuh56FF4-ZFZ6cVhg,Sister Honey's,247 E Michigan St,Orlando,FL,32806,28.513265,-81.374707,4.5,135,1,"{'BusinessParking': ""{'garage': False, 'street...","Restaurants, American (New), Bakeries, Dessert...","{'Tuesday': '11:0-18:0', 'Wednesday': '11:0-18...",2,0.0,0.0,0.0,5.0,0.0,0.726562,0.386699,0.801389,0.280879
5,j68RDnvOaYEwGcY9cTEHpA,Pittock Mansion,3229 NW Pittock Dr,Portland,OR,97210,45.525259,-122.716227,4.5,406,1,"{'BusinessAcceptsCreditCards': 'True', 'GoodFo...","Tours, Museums, Architectural Tours, Venues & ...","{'Monday': '0:0-0:0', 'Thursday': '10:0-16:0',...",5,6.6,3.4,6.4,4.6,0.894427,0.230236,0.050522,0.48883,0.099651
6,nTIhpR7MhsALPwg_Hh14EA,DoubleTree by Hilton Hotel Austin,6505 N Interstate 35,Austin,TX,78752,30.326377,-97.704543,3.0,139,1,"{'WiFi': ""u'free'"", 'RestaurantsPriceRange2': ...","Hotels, Hotels & Travel, Event Planning & Serv...","{'Monday': '0:0-0:0', 'Tuesday': '0:0-0:0', 'W...",1,0.0,0.0,0.0,1.0,0.0,-0.095918,0.0,0.478231,0.0
7,arEXRZYu8220bFBJ3VAh3A,Blake's On The Park,227 10th St NE,Atlanta,GA,30309,33.781475,-84.379955,3.5,170,1,"{'Alcohol': ""'full_bar'"", 'BusinessParking': ""...","Nightlife, Bars, Gay Bars, Sports Bars","{'Monday': '15:0-3:0', 'Tuesday': '15:0-3:0', ...",1,1.0,0.0,2.0,4.0,0.0,0.184505,0.0,0.616667,0.0
8,s8eTWEtW5WcnL2kUxrOVmw,Austin Regional Clinic: ARC Quarry Lake,"4515 Seton Center Pkwy, Ste 220",Austin,TX,78759,30.401657,-97.742943,3.5,48,1,"{'ByAppointmentOnly': 'True', 'AcceptsInsuranc...","Diagnostic Services, Doctors, Internal Medicin...","{'Monday': '8:0-17:0', 'Tuesday': '8:0-17:0', ...",3,0.0,0.0,0.0,1.0,0.0,0.091561,0.364678,0.432619,0.116707
9,NRPemqVb4qpWFF0Avq_6OQ,Eurasia Sushi Bar & Seafood,"7101 W Hwy 71, Ste C-13",Austin,TX,78735,30.234533,-97.877262,4.5,395,1,"{'Ambience': ""{'touristy': False, 'hipster': F...","Bars, Nightlife, Cocktail Bars, Seafood, Resta...","{'Monday': '0:0-0:0', 'Tuesday': '11:0-22:0', ...",17,0.352941,0.058824,0.470588,4.529412,0.624264,0.429815,0.196787,0.62875,0.127855


In [82]:
#Save the business_data
business_data.to_csv('D:/DCU Documents/Semester 2/Data Analytics and Data Mining/Project/Kaggle Dataset/' + 'Business_SD_Mean' + '.csv')