In [61]:
import pandas as pd
import ast
from datetime import datetime
import numpy as np
from fuzzywuzzy import fuzz
import re, string
from collections import Counter
from sklearn import preprocessing

In [71]:
df_raw = pd.read_csv("./6.10/raw_binary_6340_7000.csv")

In [72]:
df_raw = df_raw.replace({"None":np.nan})

In [73]:
def missing_values(df, percentage):

    columns = df.columns
    percent_missing = df.isnull().sum() * 100 / len(df)
    missing_value_df = pd.DataFrame({'column_name': columns,
                                 'percent_missing': percent_missing})

    missing_drop = list(missing_value_df[missing_value_df.percent_missing>percentage].column_name)
#     return missing_drop
    df = df.drop(missing_drop, axis=1)
    return df

In [74]:
df_clean = missing_values(df_raw, 30)

In [75]:
len(df_clean.columns)

65

In [76]:
#####################
####Drop features####
#####################
df_clean = df_clean.drop(['name'], axis=1, errors='ignore')
df_clean = df_clean.drop(['address'], axis=1, errors='ignore')
df_clean = df_clean.drop(['business_id'], axis=1, errors='ignore')
df_clean = df_clean.drop(['city'], axis=1, errors='ignore')
df_clean = df_clean.drop(['postal_code'], axis=1, errors='ignore')
df_clean = df_clean.drop(['covid_cumul_case_state'], axis=1, errors='ignore')
df_clean = df_clean.drop(['covid_cumul_death_state'], axis=1, errors='ignore')
df_clean = df_clean.drop(['state'], axis=1, errors='ignore')
df_clean = df_clean.fillna('None')


case = df_clean['Cases'] #returns a numpy array
df_clean['Cases'] = (case-case.mean())/case.std()

death = df_clean['Deaths'] #returns a numpy array
df_clean['Deaths'] = (death-death.mean())/death.std()

########### state #########################
# df_clean = pd.get_dummies(data=df_clean, columns=['state'])

########### RestaurantsAttire #########################
if 'RestaurantsAttire' in df_clean.columns:
    df_clean['RestaurantsAttire'] = df_clean['RestaurantsAttire'].replace({"u'casual'":"'casual'", "u'dressy'":"'dressy'", "u'formal'":"'formal'"})
    df_clean['RestaurantsAttire'] = df_clean['RestaurantsAttire'].replace({"'casual'":0, "'dressy'":1, "'formal'":2 , "None":1})

########### Noise-level #########################
if 'NoiseLevel' in df_clean.columns:
    df_clean['NoiseLevel'] = df_clean['NoiseLevel'].replace({"u'average'":"'average'", "u'loud'":"'loud'", "u'quiet'":"'quiet'", "u'very_loud'": "'very_loud'"})
    df_clean['NoiseLevel'] = df_clean['NoiseLevel'].replace({"'average'":0, "'loud'":1, "'quiet'":-1, "'very_loud'":2, 'None':0})

#### RestaurantsPriceRange ##
if 'RestaurantsPriceRange' in df_clean.columns:
    df_clean['RestaurantsPriceRange'] = df_clean['RestaurantsPriceRange'].replace({'None':2})

#### beer ##
if 'Alcohol' in df_clean.columns:
    df_clean['Alcohol'] = df_clean['Alcohol'].replace({"u'beer_and_wine'":"'beer_and_wine'", "u'full_bar'":"'full_bar'", "u'none'":"'none'"})
    df_clean['Alcohol'] = df_clean['Alcohol'].replace({"'none'":0, "'full_bar'":2, "'beer_and_wine'":1, 'None':1})

########### Wifi #########################
if 'WiFi' in df_clean.columns:
    df_clean['WiFi'] = df_clean['WiFi'].replace({"u'free'":"'free'",  "u'no'":"'no'", "u'paid'":"'paid'"})
    df_clean['WiFi'] = df_clean['WiFi'].replace({"'no'":0, "'free'":1, "'paid'":2, 'None':1})


df_clean = df_clean.replace({'None':0, 'FALSE':-1, 'TRUE':1})
df_clean = df_clean.replace({False:0, True:1})

########### latitude #########################
if 'latitude' in df_clean.columns:
    lat = df_clean['latitude']
    df_clean['latitude'] = (lat-lat.mean())/lat.std()

########### longitude #########################
if 'latitude' in df_clean.columns:
    lon = df_clean['longitude'] 
    df_clean['longitude'] = (lon-lon.mean())/lon.std()

########### review_count #########################
if 'review_count' in df_clean.columns:
    rev = df_clean['review_count'] #returns a numpy array
    df_clean['review_count'] =(rev-rev.mean())/rev.std()

########### Total_hours #########################
if 'hours' in df_clean.columns:
    total_hours = []
    for index, row in df_clean.iterrows():
            current = row['hours']
            #print(current)
            if current == 0:
                total_hours.append(0)
                continue
            summ = 0
            for key in (ast.literal_eval(current)).keys():
                cur_duration = (ast.literal_eval(current))[key]
                index = cur_duration.find('-')
                before = cur_duration[:index]
                after = cur_duration[index+1:]
                before = datetime.strptime(before, "%H:%M")
                after = datetime.strptime(after, "%H:%M")
                #print(cur_duration)
                #print(abs(divmod((after-before).total_seconds(), 3600)[0]))
                summ = summ + abs(divmod((after-before).total_seconds(), 3600)[0])
            total_hours.append(summ)
    df_clean['total_hours_week'] = total_hours
    df_clean = df_clean.drop(['hours'], axis=1)
    total_hours = df_clean['total_hours_week'] #returns a numpy array
    df_clean['total_hours_week'] =(total_hours-total_hours.mean())/total_hours.std() 

    
df_clean['chain'] = df_clean['chain'].astype('int64') 


if 'stars' in df_clean.columns:
    stars = df_clean['stars'] #returns a numpy array
    df_clean['stars'] =(stars-stars.mean())/stars.std()

if 'checkIn_2019' in df_clean.columns:
    df_clean['checkIn_2019'] = df_clean['checkIn_2019'].astype('float64') 
    checkIn_2019 = df_clean['checkIn_2019'] #returns a numpy array
    df_clean['checkIn_2019'] = (checkIn_2019-checkIn_2019.mean())/checkIn_2019.std()

if 'total_checkIn' in df_clean.columns:
    df_clean['total_checkIn'] = df_clean['total_checkIn'].astype('float64') 
    total_checkIn = df_clean['total_checkIn'] #returns a numpy array
    df_clean['total_checkIn'] = (total_checkIn-total_checkIn.mean())/total_checkIn.std()

if 'dayOfStart' in df_clean.columns:
    df_clean['dayOfStart'] = df_clean['dayOfStart'].astype('float64') 
    dayOfStart = df_clean['dayOfStart'] #returns a numpy array
    df_clean['dayOfStart'] = (dayOfStart-dayOfStart.mean())/dayOfStart.std()

if 'land_price' in df_clean.columns:
    land_price = df_clean['land_price'] #returns a numpy array
    df_clean['land_price'] = (land_price-land_price.mean())/land_price.std()

if 'RestaurantsPriceRange' in df_clean.columns:    
    df_clean['RestaurantsPriceRange'] = df_clean['RestaurantsPriceRange'].astype('int64')

df_clean = df_clean.replace({'CLOSED_PERMANENTLY':0,  'CLOSED_TEMPORARILY':0, 'OPERATIONAL':1})
df1 = df_clean.pop('status_g')
df_clean['status_g']=df1
df_clean = df_clean.rename(columns={"status_g": "labels"})

df_clean.to_csv('./6.11/clean_binary_6340_7000.csv', index=False) 

In [2]:
import pandas as pd
import ast
from datetime import datetime
import numpy as np
from fuzzywuzzy import fuzz
import re, string
from collections import Counter
from sklearn import preprocessing


df_raw = pd.read_csv("./6.10/raw_binary_6340_7000.csv")
df_raw = df_raw.drop(['name'], axis=1)
df_raw = df_raw.drop(['address'], axis=1)
df_raw = df_raw.drop(['business_id'], axis=1)
df_raw = df_raw.drop(['city'], axis=1)
df_raw = df_raw.drop(['postal_code'], axis=1)
df_raw = df_raw.drop(['covid_cumul_case_state'], axis=1)
df_raw = df_raw.drop(['covid_cumul_death_state'], axis=1)

df_raw = df_raw.fillna(0)
df_raw = df_raw.replace({'None':0, 'FALSE':-1, 'TRUE':1})
df_raw = df_raw.replace({False:0, True:1})

case = df_raw['Cases'] #returns a numpy array
df_raw['Cases'] = (case-case.mean())/case.std()

death = df_raw['Deaths'] #returns a numpy array
df_raw['Deaths'] = (death-death.mean())/death.std()

########### state #########################
df_raw = pd.get_dummies(data=df_raw, columns=['state'])


########### latitude #########################
lat = df_raw['latitude']
df_raw['latitude'] = (lat-lat.mean())/lat.std()

########### longitude #########################
lon = df_raw['longitude'] 
df_raw['longitude'] = (lon-lon.mean())/lon.std()

########### review_count #########################
rev = df_raw['review_count'] #returns a numpy array
df_raw['review_count'] =(rev-rev.mean())/rev.std()

########### Total_hours #########################
total_hours = []
for index, row in df_raw.iterrows():
        current = row['hours']
        #print(current)
        if current == 0:
            total_hours.append(0)
            continue
        summ = 0
        for key in (ast.literal_eval(current)).keys():
            cur_duration = (ast.literal_eval(current))[key]
            index = cur_duration.find('-')
            before = cur_duration[:index]
            after = cur_duration[index+1:]
            before = datetime.strptime(before, "%H:%M")
            after = datetime.strptime(after, "%H:%M")
            #print(cur_duration)
            #print(abs(divmod((after-before).total_seconds(), 3600)[0]))
            summ = summ + abs(divmod((after-before).total_seconds(), 3600)[0])
        total_hours.append(summ)
df_raw['total_hours_week'] = total_hours
df_raw = df_raw.drop(['hours'], axis=1)
total_hours = df_raw['total_hours_week'] #returns a numpy array
df_raw['total_hours_week'] =(total_hours-total_hours.mean())/total_hours.std()

########### stars #########################
stars = df_raw['stars'] #returns a numpy array
df_raw['stars'] =(stars-stars.mean())/stars.std()

########### RestaurantsAttire #########################
rest_Attire = df_raw['RestaurantsAttire'] #returns a numpy array
df_raw = df_raw.replace({"u'casual'":"'casual'", "u'dressy'":"'dressy'", "u'formal'":"'formal'"})
df_raw = df_raw.replace({"'casual'":'casual', "'dressy'":'dressy', "'formal'":'formal'})
df_raw = pd.get_dummies(data=df_raw, columns=['RestaurantsAttire'])

########### Noise-level #########################
df_raw = df_raw.replace({"u'average'":"'average'", "u'loud'":"'loud'", "u'quiet'":"'quiet'", "u'very_loud'": "'very_loud'"})
df_raw = df_raw.replace({"'average'":0, "'loud'":1, "'quiet'":-1, "'very_loud'":2})

########### beer #########################
df_raw = df_raw.replace({"u'beer_and_wine'":"'beer_and_wine'", "u'full_bar'":"'full_bar'", "u'none'":"'none'"})
df_raw = df_raw.replace({"'none'":0, "'full_bar'":1, "'beer_and_wine'":-1})

########### Wifi #########################
df_raw = df_raw.replace({"u'free'":"'free'",  "u'no'":"'no'", "u'paid'":"'paid'"})
df_raw = df_raw.replace({"'no'":0, "'free'":-1, "'paid'":1})

df_raw['checkIn_2019'] = df_raw['checkIn_2019'].astype('float64') 
checkIn_2019 = df_raw['checkIn_2019'] #returns a numpy array
df_raw['checkIn_2019'] = (checkIn_2019-checkIn_2019.mean())/checkIn_2019.std()

df_raw['total_checkIn'] = df_raw['total_checkIn'].astype('float64') 
total_checkIn = df_raw['total_checkIn'] #returns a numpy array
df_raw['total_checkIn'] = (total_checkIn-total_checkIn.mean())/total_checkIn.std()

df_raw['chain'] = df_raw['chain'].astype('int64') 

df_raw['dayOfStart'] = df_raw['dayOfStart'].astype('float64') 
dayOfStart = df_raw['dayOfStart'] #returns a numpy array
df_raw['dayOfStart'] = (dayOfStart-dayOfStart.mean())/dayOfStart.std()

land_price = df_raw['land_price'] #returns a numpy array
df_raw['land_price'] = (land_price-land_price.mean())/land_price.std()

df_raw['RestaurantsPriceRange'] = df_raw['RestaurantsPriceRange'].astype('int64')

df_raw = df_raw.replace({'CLOSED_PERMANENTLY':0,  'CLOSED_TEMPORARILY':1, 'OPERATIONAL':2})

df1 = df_raw.pop('status_g')
df_raw['status_g']=df1
df_raw = df_raw.rename(columns={"status_g": "labels"})

df_raw.to_csv('clean_multi_6340_7000.csv', index=False) 



In [3]:
list(df_raw.columns)

['Cases',
 'Deaths',
 'latitude',
 'longitude',
 'stars',
 'review_count',
 'RestaurantsTakeOut',
 'BusinessAcceptsCreditCards',
 'NoiseLevel',
 'GoodForKids',
 'RestaurantsReservations',
 'RestaurantsGoodForGroups',
 'RestaurantsPriceRange',
 'HasTV',
 'Alcohol',
 'BikeParking',
 'RestaurantsDelivery',
 'ByAppointmentOnly',
 'OutdoorSeating',
 'Caters',
 'WheelchairAccessible',
 'WiFi',
 'RestaurantsTableService',
 'DogsAllowed',
 'HappyHour',
 'parking_garage',
 'parking_street',
 'parking_validated',
 'parking_lot',
 'parking_valet',
 'Ambience_touristy',
 'Ambience_hipster',
 'Ambience_romantic',
 'Ambience_intimate',
 'Ambience_trendy',
 'Ambience_upscale',
 'Ambience_classy',
 'Ambience_casual',
 'Ambience_divey',
 'GoodForMeal_dessert',
 'GoodForMeal_latenight',
 'GoodForMeal_lunch',
 'GoodForMeal_dinner',
 'GoodForMeal_brunch',
 'GoodForMeal_breakfast',
 'categories_Specialty Food',
 'categories_Food',
 'categories_Restaurants',
 'categories_Desserts',
 'categories_Ice Cream & 

In [246]:
df_raw[~df_raw.applymap(np.isreal).all(1)]

In [245]:
for col in df_raw.columns:
    for item in df_raw[col]:
        if type(item) == int or type(item) == float :
            continue
        print(col)
        print(item)
        print(type(item))
        print('------------------------')

RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
3
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
3
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
-----------------

1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
4
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPri

------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
3
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
3
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class

3
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
3
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPri

2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
3
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
3
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
3
<class 'str'>
------------------------
RestaurantsPri

<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
4
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPrice

<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
4
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPrice

RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
3
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
-----------------

1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPri

2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
3
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
4
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPri

RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
3
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
4
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
-----------------

------------------------
RestaurantsPriceRange
3
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class

<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
3
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
3
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
3
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPrice

1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
3
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPri

<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
3
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPrice

1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
3
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPri

RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
-----------------

2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
3
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPri

2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPri

RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
3
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
-----------------

2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
3
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
3
<class 'str'>
------------------------
RestaurantsPriceRange
3
<class 'str'>
------------------------
RestaurantsPri

<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPrice

2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPri

<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
3
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPrice

RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
-----------------

2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
3
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
4
<class 'str'>
------------------------
RestaurantsPri

------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class

<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
3
<class 'str'>
------------------------
RestaurantsPriceRange
3
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPrice

RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
2
<class 'str'>
------------------------
RestaurantsPriceRange
1
<class 'str'>
-----------------

In [249]:
df_raw['labels']

0        0
1        0
2        0
3        0
4        0
        ..
13335    1
13336    1
13337    1
13338    1
13339    1
Name: labels, Length: 13340, dtype: int64

In [143]:
df_raw.loc[(df_raw['Unnamed: 98'] == 'CLOSED_PERMANENTLY')]['status_g'] = 'CLOSED_PERMANENTLY'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [145]:
df_raw = df_raw.drop(['Unnamed: 98'], axis=1)

In [160]:
set(df_raw['status_g'])

{'CLOSED_PERMANENTLY', 'CLOSED_TEMPORARILY', 'OPERATIONAL'}

In [189]:
df_raw = df_raw.replace({'CLOSED_PERMANENTLY':0,  'CLOSED_TEMPORARILY':1, 'OPERATIONAL':2})

In [199]:
df_raw[~df_raw.applymap(np.isreal).all(1)]

Unnamed: 0,Cases,Deaths,latitude,longitude,stars,review_count,RestaurantsTakeOut,BusinessAcceptsCreditCards,NoiseLevel,GoodForKids,...,state_PA,state_QC,state_SC,state_VT,state_WI,total_hours_week,RestaurantsAttire_0,RestaurantsAttire_casual,RestaurantsAttire_dressy,RestaurantsAttire_formal


In [200]:
df1 = df_raw.pop('status_g')
df_raw['status_g']=df1

In [202]:
df_raw['status_g']

0        1
1        0
2        0
3        1
4        0
        ..
10335    2
10336    2
10337    2
10338    2
10339    2
Name: status_g, Length: 10340, dtype: int64

In [205]:
df_raw = df_raw.rename(columns={"status_g": "labels"})

In [206]:
df_raw.to_csv('raw_with_sample.csv', index=False) 

In [140]:
df_raw1 = pd.read_csv("./raw_withdownsample.csv")

In [157]:
df_raw.loc[(df_raw['status_g'] == 0), 'status_g'] = 'CLOSED_PERMANENTLY'

In [159]:
df_raw.loc[(df_raw['status_g'] == 0)]

Unnamed: 0,Cases,Deaths,latitude,longitude,stars,review_count,RestaurantsTakeOut,BusinessAcceptsCreditCards,NoiseLevel,GoodForKids,...,state_PA,state_QC,state_SC,state_VT,state_WI,total_hours_week,RestaurantsAttire_0,RestaurantsAttire_casual,RestaurantsAttire_dressy,RestaurantsAttire_formal


In [254]:
df_raw1 = pd.read_csv("./6.10/raw_binary_6340_7000.csv")

In [256]:
set(df_raw1.columns)

{'Alcohol',
 'Ambience_casual',
 'Ambience_classy',
 'Ambience_divey',
 'Ambience_hipster',
 'Ambience_intimate',
 'Ambience_romantic',
 'Ambience_touristy',
 'Ambience_trendy',
 'Ambience_upscale',
 'BikeParking',
 'BusinessAcceptsCreditCards',
 'ByAppointmentOnly',
 'Cases',
 'Caters',
 'Deaths',
 'DogsAllowed',
 'GoodForKids',
 'GoodForMeal_breakfast',
 'GoodForMeal_brunch',
 'GoodForMeal_dessert',
 'GoodForMeal_dinner',
 'GoodForMeal_latenight',
 'GoodForMeal_lunch',
 'HappyHour',
 'HasTV',
 'NoiseLevel',
 'OutdoorSeating',
 'RestaurantsAttire',
 'RestaurantsDelivery',
 'RestaurantsGoodForGroups',
 'RestaurantsPriceRange',
 'RestaurantsReservations',
 'RestaurantsTableService',
 'RestaurantsTakeOut',
 'WheelchairAccessible',
 'WiFi',
 'address',
 'business_id',
 'categories_American (New)',
 'categories_American (Traditional)',
 'categories_Asian Fusion',
 'categories_Bakeries',
 'categories_Bars',
 'categories_Beer',
 'categories_Breakfast & Brunch',
 'categories_Burgers',
 'categ