In [11]:
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier

In [12]:
file_path = Path("Data/cleaned_data2.csv")
df= pd.read_csv(file_path)

In [13]:
df['delivery'] = df['transactions'].str.contains('delivery', case=False, na=False)
df['pickup'] = df['transactions'].str.contains('pickup', case=False, na=False)
df['restaurant_reservation'] = df['transactions'].str.contains('restaurant_reservation', case=False, na=False)

# Fill NaN values with False in the new columns
df['delivery'] = df['delivery'].fillna(False)
df['pickup'] = df['pickup'].fillna(False)
df['restaurant_reservation'] = df['restaurant_reservation'].fillna(False)

In [18]:
new_york = []

for x in df['group_city']:
    if x == "New York City":
        new_york.append(x)
    else:
        new_york.append(0)



df['New York'] = np.array(new_york)

df= df.fillna(0)

df.head()


Unnamed: 0,id,name,image_url,is_closed,url,review_count,rating,transactions,price,group_city,cuisines,latitude,longitude,state,delivery,pickup,restaurant_reservation,New York
0,a0IET3_yCFcO36OqGSsisg,Eataly NYC Flatiron,https://s3-media4.fl.yelpcdn.com/bphoto/1UDlnu...,False,https://www.yelp.com/biz/eataly-nyc-flatiron-n...,6102,4.0,"delivery, pickup",2,New York City,Italian,40.742101,-73.989922,NY,True,True,False,New York City
1,zj8Lq1T8KIC5zwFief15jg,Prince Street Pizza,https://s3-media4.fl.yelpcdn.com/bphoto/PfI8oV...,False,https://www.yelp.com/biz/prince-street-pizza-n...,5031,4.5,"delivery, pickup",1,New York City,Italian,40.723088,-73.99453,NY,True,True,False,New York City
2,16ZnHpuaaBt92XWeJHCC5A,Olio e Più,https://s3-media4.fl.yelpcdn.com/bphoto/CUpPgz...,False,https://www.yelp.com/biz/olio-e-pi%C3%B9-new-y...,4858,4.5,"delivery, pickup",2,New York City,Italian,40.733798,-73.999774,NY,True,True,False,New York City
3,vyoA8dxwScuMV_AsTcjQcg,L & B Spumoni Gardens,https://s3-media1.fl.yelpcdn.com/bphoto/hN5xKw...,False,https://www.yelp.com/biz/l-and-b-spumoni-garde...,4647,4.0,"delivery, pickup",2,New York City,Italian,40.594715,-73.981316,NY,True,True,False,New York City
4,22nKUyCIbpnzR6R3_g1ptQ,Carmine's Italian Restaurant - Times Square,https://s3-media1.fl.yelpcdn.com/bphoto/0UszeE...,False,https://www.yelp.com/biz/carmines-italian-rest...,4644,4.0,"delivery, pickup",2,New York City,Italian,40.757498,-73.986653,NY,True,True,False,New York City


In [19]:
def is_success(rating):
    return rating > 3.7

df['success'] = df['rating'].apply(is_success)

df.head()

Unnamed: 0,id,name,image_url,is_closed,url,review_count,rating,transactions,price,group_city,cuisines,latitude,longitude,state,delivery,pickup,restaurant_reservation,New York,success
0,a0IET3_yCFcO36OqGSsisg,Eataly NYC Flatiron,https://s3-media4.fl.yelpcdn.com/bphoto/1UDlnu...,False,https://www.yelp.com/biz/eataly-nyc-flatiron-n...,6102,4.0,"delivery, pickup",2,New York City,Italian,40.742101,-73.989922,NY,True,True,False,New York City,True
1,zj8Lq1T8KIC5zwFief15jg,Prince Street Pizza,https://s3-media4.fl.yelpcdn.com/bphoto/PfI8oV...,False,https://www.yelp.com/biz/prince-street-pizza-n...,5031,4.5,"delivery, pickup",1,New York City,Italian,40.723088,-73.99453,NY,True,True,False,New York City,True
2,16ZnHpuaaBt92XWeJHCC5A,Olio e Più,https://s3-media4.fl.yelpcdn.com/bphoto/CUpPgz...,False,https://www.yelp.com/biz/olio-e-pi%C3%B9-new-y...,4858,4.5,"delivery, pickup",2,New York City,Italian,40.733798,-73.999774,NY,True,True,False,New York City,True
3,vyoA8dxwScuMV_AsTcjQcg,L & B Spumoni Gardens,https://s3-media1.fl.yelpcdn.com/bphoto/hN5xKw...,False,https://www.yelp.com/biz/l-and-b-spumoni-garde...,4647,4.0,"delivery, pickup",2,New York City,Italian,40.594715,-73.981316,NY,True,True,False,New York City,True
4,22nKUyCIbpnzR6R3_g1ptQ,Carmine's Italian Restaurant - Times Square,https://s3-media1.fl.yelpcdn.com/bphoto/0UszeE...,False,https://www.yelp.com/biz/carmines-italian-rest...,4644,4.0,"delivery, pickup",2,New York City,Italian,40.757498,-73.986653,NY,True,True,False,New York City,True


In [27]:
y = df['success']
X = df.drop(columns=['rating', 'success', 'image_url', 'url', 'id', 'name', 'group_city'])

X.head()

Unnamed: 0,is_closed,review_count,transactions,price,cuisines,latitude,longitude,state,delivery,pickup,restaurant_reservation,New York
0,False,6102,"delivery, pickup",2,Italian,40.742101,-73.989922,NY,True,True,False,New York City
1,False,5031,"delivery, pickup",1,Italian,40.723088,-73.99453,NY,True,True,False,New York City
2,False,4858,"delivery, pickup",2,Italian,40.733798,-73.999774,NY,True,True,False,New York City
3,False,4647,"delivery, pickup",2,Italian,40.594715,-73.981316,NY,True,True,False,New York City
4,False,4644,"delivery, pickup",2,Italian,40.757498,-73.986653,NY,True,True,False,New York City


In [28]:
from sklearn import preprocessing
from sklearn import utils

lab = preprocessing.LabelEncoder()
y_transformed = lab.fit_transform(y)

In [29]:
y_transformed[:10]

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 0], dtype=int64)

In [30]:
X =pd.get_dummies(X)

In [31]:
X_train, X_test, y_train, y_test = train_test_split(X, y_transformed, random_state=42)

In [32]:
scaler = StandardScaler()

X_scaler = scaler.fit(X_train)

X_train_scaled = X_scaler.transform(X_train)

X_test_scaled = X_scaler.transform(X_test)

In [33]:
model = RandomForestClassifier(n_estimators= 100, random_state=42)

In [34]:
model= model.fit(X_train_scaled, y_train)

In [35]:
predictions = model.predict(X_test_scaled)

In [36]:
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       0.64      0.63      0.63      2892
           1       0.64      0.65      0.65      2995

    accuracy                           0.64      5887
   macro avg       0.64      0.64      0.64      5887
weighted avg       0.64      0.64      0.64      5887

