Import Libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

Load Dataset

In [2]:
df = pd.read_csv(r'/content/Dataset .csv')
df.head(5)

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,Botswana Pula(P),No,No,No,No,4,4.9,Dark Green,Excellent,365
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",...,Botswana Pula(P),Yes,No,No,No,4,4.8,Dark Green,Excellent,229


Dropping Unnecessary Columns

In [3]:
df.drop(['Restaurant ID', 'Country Code', 'City', 'Address', 'Locality',
         'Locality Verbose', 'Longitude', 'Latitude', 'Currency',
         'Has Table booking', 'Has Online delivery', 'Is delivering now',
         'Switch to order menu', 'Price range', 'Aggregate rating',
         'Rating color', 'Rating text', 'Votes'], axis=1, inplace=True)

In [4]:
df.head()

Unnamed: 0,Restaurant Name,Cuisines,Average Cost for two
0,Le Petit Souffle,"French, Japanese, Desserts",1100
1,Izakaya Kikufuji,Japanese,1200
2,Heat - Edsa Shangri-La,"Seafood, Asian, Filipino, Indian",4000
3,Ooma,"Japanese, Sushi",1500
4,Sambo Kojin,"Japanese, Korean",1500


In [5]:
print("Missing values in each column:")
print(df.isnull().sum())

df.dropna(inplace=True)
print("Shape after removing missing values:", df.shape)

Missing values in each column:
Restaurant Name         0
Cuisines                9
Average Cost for two    0
dtype: int64
Shape after removing missing values: (9542, 3)


Encoding Categorical Variables

In [6]:
label_encoder = LabelEncoder()
df['Restaurant Name'] = label_encoder.fit_transform(df['Restaurant Name'])
df['Cuisines'] = label_encoder.fit_transform(df['Cuisines'])

df.head()

Unnamed: 0,Restaurant Name,Cuisines,Average Cost for two
0,3742,920,1100
1,3167,1111,1200
2,2892,1671,4000
3,4700,1126,1500
4,5515,1122,1500


Splitting the Dataset


In [7]:
X = df[['Restaurant Name', 'Average Cost for two']]
Y = df['Cuisines']

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=10)

Decision Tree Classifier

In [8]:
model_DT = DecisionTreeClassifier(random_state=10, criterion="gini")
model_DT.fit(X_train, Y_train)

Y_pred_DT = model_DT.predict(X_test)

print("Confusion Matrix (Decision Tree):\n", confusion_matrix(Y_test, Y_pred_DT))
print("Classification Report (Decision Tree):\n", classification_report(Y_test, Y_pred_DT))
print("Accuracy (Decision Tree):", accuracy_score(Y_test, Y_pred_DT))

Confusion Matrix (Decision Tree):
 [[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 1 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
Classification Report (Decision Tree):
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       0.00      0.00      0.00         1
           6       0.25      0.14      0.18         7
           7       0.00      0.00      0.00         1
           8       0.00      0.00      0.00         1
           9       0.00      0.00      0.00         1
          11       0.00      0.00      0.00         1
          12       0.00      0.00      0.00         1
          13       0.00      0.00      0.00         1
          15       0.00      0.00      0.00         1
          16       1.00      1.00      1.00         2
          18       0.00      0.00      0.00         1
          20       0.00      0.00      0.00         1
          21       0.00      0.00      0.00     

Logistic Regression

In [9]:
classifier = LogisticRegression(multi_class="multinomial")
classifier.fit(X_train, Y_train)

Y_pred_LR = classifier.predict(X_test)

print("Confusion Matrix (Logistic Regression):\n", confusion_matrix(Y_test, Y_pred_LR))
print("Classification Report (Logistic Regression):\n", classification_report(Y_test, Y_pred_LR))
print("Accuracy (Logistic Regression):", accuracy_score(Y_test, Y_pred_LR))

Confusion Matrix (Logistic Regression):
 [[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
Classification Report (Logistic Regression):
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       0.00      0.00      0.00         1
           6       0.00      0.00      0.00         7
           7       0.00      0.00      0.00         1
           8       0.00      0.00      0.00         1
           9       0.00      0.00      0.00         1
          11       0.00      0.00      0.00         1
          12       0.00      0.00      0.00         1
          13       0.00      0.00      0.00         1
          15       0.00      0.00      0.00         1
          16       0.00      0.00      0.00         2
          18       0.00      0.00      0.00         1
          20       0.00      0.00      0.00         1
          21       0.00      0.00   

Observations:
Decision Tree Classifier:

Accuracy: 24.36%
Confusion Matrix and Classification Report are displayed above.
Handles complex data well but tends to overfit on training data.
Logistic Regression:

Accuracy: 10.58%
Confusion Matrix and Classification Report are displayed above.
Simpler model that works best for linearly separable data.
Conclusion:
Decision Tree Classifier performed significantly better compared to Logistic Regression, achieving a higher accuracy.
However, the overall accuracy is low, indicating that additional preprocessing, feature engineering, or the use of more advanced models (e.g., Random Forest, Gradient Boosting) is necessary for better performance.