## Label & Hot Encoding

In [38]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

### Getting the data we want for label encoding
##### (Note: this is not on all rows, just an example until we get the other rating data)

In [49]:
rest = pd.read_csv('final_restaurants.csv')

In [50]:
pd.set_option('display.max_columns', None)

In [51]:
rest = rest[['name', 'amenity', 'lat','lon','is chain']]

In [52]:
rest = rest.rename({'is chain': 'is_chain'}, axis=1)

In [53]:
rest['price_level'] = 0
rest['rating'] = 0
rest['user_ratings_total'] = 0

In [54]:
rest

Unnamed: 0,name,amenity,lat,lon,is_chain,price_level,rating,user_ratings_total
0,starbucks,cafe,49.260812,-123.125736,True,0,0,0
1,waves,cafe,49.263582,-123.128836,True,0,0,0
2,mcdonald's,fast_food,49.263448,-123.138067,True,0,0,0
3,denny's,restaurant,49.263225,-123.133390,True,0,0,0
4,the keg,restaurant,49.271156,-123.135808,True,0,0,0
...,...,...,...,...,...,...,...,...
5096,browns socialhouse,restaurant,49.033067,-123.069255,False,0,0,0
5097,wood n frog coffee company,cafe,49.024524,-123.066485,False,0,0,0
5098,ridge garden,fast_food,49.250815,-123.168022,False,0,0,0
5099,kanaka creek coffee,cafe,49.187647,-122.552271,False,0,0,0


### Label Encoding

In [56]:
#amenities = np.unique(rest['amenity'])

In [67]:
ale = LabelEncoder()
amenity_labels = ale.fit_transform(rest['amenity'])
rest['amenity_label'] = amenity_labels
#amenity_mappings = {index: label for index, label in enumerate(ale.classes_)}

In [68]:
cle = LabelEncoder()
chain_labels = cle.fit_transform(rest['is_chain'])
rest['chain_label'] = chain_labels

In [70]:
rest = rest[['name','lat','lon', 'price_level', 'rating','user_ratings_total','amenity','amenity_label','is_chain','chain_label']]

In [71]:
rest

Unnamed: 0,name,lat,lon,price_level,rating,user_ratings_total,amenity,amenity_label,is_chain,chain_label
0,starbucks,49.260812,-123.125736,0,0,0,cafe,2,True,1
1,waves,49.263582,-123.128836,0,0,0,cafe,2,True,1
2,mcdonald's,49.263448,-123.138067,0,0,0,fast_food,3,True,1
3,denny's,49.263225,-123.133390,0,0,0,restaurant,7,True,1
4,the keg,49.271156,-123.135808,0,0,0,restaurant,7,True,1
...,...,...,...,...,...,...,...,...,...,...
5096,browns socialhouse,49.033067,-123.069255,0,0,0,restaurant,7,False,0
5097,wood n frog coffee company,49.024524,-123.066485,0,0,0,cafe,2,False,0
5098,ridge garden,49.250815,-123.168022,0,0,0,fast_food,3,False,0
5099,kanaka creek coffee,49.187647,-122.552271,0,0,0,cafe,2,False,0


### Hot Encoding

In [111]:
from sklearn.preprocessing import OneHotEncoder

In [118]:
a_ohe = OneHotEncoder()
a_feature_arr = a_ohe.fit_transform(
                              rest[['amenity_label']]).toarray()
a_feature_labels = list(ale.classes_)
a_features = pd.DataFrame(a_feature_arr, 
                            columns=a_feature_labels)

In [119]:
c_ohe = OneHotEncoder()
c_feature_arr = c_ohe.fit_transform(
                              rest[['chain_label']]).toarray()
c_feature_labels = list('chain_'+str(cls_label) for cls_label in cle.classes_)
c_features = pd.DataFrame(c_feature_arr, 
                            columns=c_feature_labels)

In [120]:
a_features

Unnamed: 0,bar,bistro,cafe,fast_food,ice_cream,juice_bar,pub,restaurant
0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...
5096,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
5097,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
5098,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
5099,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [121]:
c_features

Unnamed: 0,chain_False,chain_True
0,0.0,1.0
1,0.0,1.0
2,0.0,1.0
3,0.0,1.0
4,0.0,1.0
...,...,...
5096,1.0,0.0
5097,1.0,0.0
5098,1.0,0.0
5099,1.0,0.0


In [129]:
encoded_df = pd.concat([rest,a_features,c_features], axis=1)

In [130]:
columns = sum([['name', 'lat', 'lon', 'price_level','rating','user_ratings_total','amenity','amenity_label'],
              a_feature_labels,['is_chain','chain_label'],
              c_feature_labels],[])

In [131]:
encoded_df[columns]

Unnamed: 0,name,lat,lon,price_level,rating,user_ratings_total,amenity,amenity_label,bar,bistro,cafe,fast_food,ice_cream,juice_bar,pub,restaurant,is_chain,chain_label,chain_False,chain_True
0,starbucks,49.260812,-123.125736,0,0,0,cafe,2,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,True,1,0.0,1.0
1,waves,49.263582,-123.128836,0,0,0,cafe,2,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,True,1,0.0,1.0
2,mcdonald's,49.263448,-123.138067,0,0,0,fast_food,3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,True,1,0.0,1.0
3,denny's,49.263225,-123.133390,0,0,0,restaurant,7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,True,1,0.0,1.0
4,the keg,49.271156,-123.135808,0,0,0,restaurant,7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,True,1,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5096,browns socialhouse,49.033067,-123.069255,0,0,0,restaurant,7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,False,0,1.0,0.0
5097,wood n frog coffee company,49.024524,-123.066485,0,0,0,cafe,2,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,False,0,1.0,0.0
5098,ridge garden,49.250815,-123.168022,0,0,0,fast_food,3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,False,0,1.0,0.0
5099,kanaka creek coffee,49.187647,-122.552271,0,0,0,cafe,2,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,False,0,1.0,0.0
