In [92]:
# Import modules
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
from pathlib import Path
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier

In [93]:
file_path = Path("Data/cleaned_data.csv")
df= pd.read_csv(file_path)

In [94]:
# Create new columns and check if 'delivery', 'pickup', or 'restaurant_reservation' is in 'transactions'
df['delivery'] = df['transactions'].str.contains('delivery', case=False, na=False)
df['pickup'] = df['transactions'].str.contains('pickup', case=False, na=False)
df['restaurant_reservation'] = df['transactions'].str.contains('restaurant_reservation', case=False, na=False)

# Fill NaN values with False in the new columns
df['delivery'] = df['delivery'].fillna(False)
df['pickup'] = df['pickup'].fillna(False)
df['restaurant_reservation'] = df['restaurant_reservation'].fillna(False)

In [95]:
df.head()

Unnamed: 0,id,name,image_url,is_closed,url,review_count,rating,transactions,price,group_city,cuisines,latitude,longitude,state,delivery,pickup,restaurant_reservation
0,a0IET3_yCFcO36OqGSsisg,Eataly NYC Flatiron,https://s3-media4.fl.yelpcdn.com/bphoto/1UDlnu...,False,https://www.yelp.com/biz/eataly-nyc-flatiron-n...,6101,4.0,"delivery, pickup",2,New York City,Italian,40.742101,-73.989922,NY,True,True,False
1,zj8Lq1T8KIC5zwFief15jg,Prince Street Pizza,https://s3-media4.fl.yelpcdn.com/bphoto/PfI8oV...,False,https://www.yelp.com/biz/prince-street-pizza-n...,5028,4.5,"delivery, pickup",1,New York City,Italian,40.723088,-73.99453,NY,True,True,False
2,16ZnHpuaaBt92XWeJHCC5A,Olio e Più,https://s3-media4.fl.yelpcdn.com/bphoto/CUpPgz...,False,https://www.yelp.com/biz/olio-e-pi%C3%B9-new-y...,4828,4.5,"pickup, delivery",2,New York City,Italian,40.733798,-73.999774,NY,True,True,False
3,vyoA8dxwScuMV_AsTcjQcg,L & B Spumoni Gardens,https://s3-media1.fl.yelpcdn.com/bphoto/hN5xKw...,False,https://www.yelp.com/biz/l-and-b-spumoni-garde...,4648,4.0,"delivery, pickup",2,New York City,Italian,40.594715,-73.981316,NY,True,True,False
4,22nKUyCIbpnzR6R3_g1ptQ,Carmine's Italian Restaurant - Times Square,https://s3-media1.fl.yelpcdn.com/bphoto/0UszeE...,False,https://www.yelp.com/biz/carmines-italian-rest...,4640,4.0,"delivery, pickup",2,New York City,Italian,40.757498,-73.986653,NY,True,True,False


In [96]:
y = df['rating']
x = df.drop(columns=['rating', 'image_url', 'url','name', 'id'])
#X = df.drop(columns='rating')
x.head()

Unnamed: 0,is_closed,review_count,transactions,price,group_city,cuisines,latitude,longitude,state,delivery,pickup,restaurant_reservation
0,False,6101,"delivery, pickup",2,New York City,Italian,40.742101,-73.989922,NY,True,True,False
1,False,5028,"delivery, pickup",1,New York City,Italian,40.723088,-73.99453,NY,True,True,False
2,False,4828,"pickup, delivery",2,New York City,Italian,40.733798,-73.999774,NY,True,True,False
3,False,4648,"delivery, pickup",2,New York City,Italian,40.594715,-73.981316,NY,True,True,False
4,False,4640,"delivery, pickup",2,New York City,Italian,40.757498,-73.986653,NY,True,True,False


In [97]:
from sklearn import preprocessing
from sklearn import utils

lab = preprocessing.LabelEncoder()
y_transformed = lab.fit_transform(y)

In [98]:
X= x.fillna(0)

In [109]:
X.head()

Unnamed: 0,is_closed,review_count,price,latitude,longitude,transactions_0,transactions_delivery,"transactions_delivery, pickup","transactions_delivery, pickup, restaurant_reservation","transactions_delivery, restaurant_reservation",...,state_PE,state_SC,state_TN,state_TX,state_VA,state_WA,state_WI,delivery_delivery,pickup_pickup,restaurant_reservation_restaurant_reservation
0,False,6101,2,40.742101,-73.989922,False,False,True,False,False,...,False,False,False,False,False,False,False,True,True,True
1,False,5028,1,40.723088,-73.99453,False,False,True,False,False,...,False,False,False,False,False,False,False,True,True,True
2,False,4828,2,40.733798,-73.999774,False,False,False,False,False,...,False,False,False,False,False,False,False,True,True,True
3,False,4648,2,40.594715,-73.981316,False,False,True,False,False,...,False,False,False,False,False,False,False,True,True,True
4,False,4640,2,40.757498,-73.986653,False,False,True,False,False,...,False,False,False,False,False,False,False,True,True,True


In [118]:
X =pd.get_dummies(X).astype(int)

In [119]:
X.head()

Unnamed: 0,is_closed,review_count,price,latitude,longitude,transactions_0,transactions_delivery,"transactions_delivery, pickup","transactions_delivery, pickup, restaurant_reservation","transactions_delivery, restaurant_reservation",...,state_PE,state_SC,state_TN,state_TX,state_VA,state_WA,state_WI,delivery_delivery,pickup_pickup,restaurant_reservation_restaurant_reservation
0,0,6101,2,40,-73,0,0,1,0,0,...,0,0,0,0,0,0,0,1,1,1
1,0,5028,1,40,-73,0,0,1,0,0,...,0,0,0,0,0,0,0,1,1,1
2,0,4828,2,40,-73,0,0,0,0,0,...,0,0,0,0,0,0,0,1,1,1
3,0,4648,2,40,-73,0,0,1,0,0,...,0,0,0,0,0,0,0,1,1,1
4,0,4640,2,40,-73,0,0,1,0,0,...,0,0,0,0,0,0,0,1,1,1


In [120]:
X.dtypes

is_closed                                        int32
review_count                                     int32
price                                            int32
latitude                                         int32
longitude                                        int32
                                                 ...  
state_WA                                         int32
state_WI                                         int32
delivery_delivery                                int32
pickup_pickup                                    int32
restaurant_reservation_restaurant_reservation    int32
Length: 106, dtype: object

In [121]:
X_train, X_test, y_train, y_test = train_test_split(X, y_transformed, random_state=42)

In [122]:
scaler = StandardScaler()

X_scaler = scaler.fit(X_train)

X_train_scaled = X_scaler.transform(X_train)

X_test_scaled = X_scaler.transform(X_test)

In [129]:
## K Nearest Neighbor method
from sklearn.neighbors import KNeighborsClassifier


knn = KNeighborsClassifier(n_neighbors=25)

knn.fit(X_train_scaled, y_train)

In [130]:
y_pred = knn.predict(X_test_scaled)

In [131]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        11
           1       0.08      0.07      0.07       102
           2       0.18      0.26      0.21       222
           3       0.17      0.15      0.16       292
           4       0.20      0.08      0.12       517
           5       0.32      0.35      0.33      1191
           6       0.40      0.58      0.48      1538
           7       0.25      0.10      0.14       800
           8       0.42      0.24      0.30       114

    accuracy                           0.33      4787
   macro avg       0.22      0.20      0.20      4787
weighted avg       0.30      0.33      0.30      4787



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
