# Import Libraries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

# Import Dataset

In [2]:
df=pd.read_csv('Dataset .csv')
df.head(5)

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,Botswana Pula(P),No,No,No,No,4,4.9,Dark Green,Excellent,365
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",...,Botswana Pula(P),Yes,No,No,No,4,4.8,Dark Green,Excellent,229


# Data Preprocessing

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9551 entries, 0 to 9550
Data columns (total 21 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Restaurant ID         9551 non-null   int64  
 1   Restaurant Name       9551 non-null   object 
 2   Country Code          9551 non-null   int64  
 3   City                  9551 non-null   object 
 4   Address               9551 non-null   object 
 5   Locality              9551 non-null   object 
 6   Locality Verbose      9551 non-null   object 
 7   Longitude             9551 non-null   float64
 8   Latitude              9551 non-null   float64
 9   Cuisines              9542 non-null   object 
 10  Average Cost for two  9551 non-null   int64  
 11  Currency              9551 non-null   object 
 12  Has Table booking     9551 non-null   object 
 13  Has Online delivery   9551 non-null   object 
 14  Is delivering now     9551 non-null   object 
 15  Switch to order menu 

In [4]:
df.drop(['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Address', 'Locality', 'Currency', 'Rating text','Rating color','Longitude' ,'Latitude','Locality Verbose'], axis=1, inplace=True)

In [5]:
df.isnull().sum()

Cuisines                9
Average Cost for two    0
Has Table booking       0
Has Online delivery     0
Is delivering now       0
Switch to order menu    0
Price range             0
Aggregate rating        0
Votes                   0
dtype: int64

In [6]:
df['Cuisines'].fillna('Unknown', inplace=True)

In [7]:
df.isnull().sum()

Cuisines                0
Average Cost for two    0
Has Table booking       0
Has Online delivery     0
Is delivering now       0
Switch to order menu    0
Price range             0
Aggregate rating        0
Votes                   0
dtype: int64

In [8]:
from sklearn.preprocessing import LabelEncoder

# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Label encoding for Cuisines
df['Cuisines Encoded'] = label_encoder.fit_transform(df['Cuisines'])

# Display the encoded DataFrame
print(df[['Cuisines', 'Cuisines Encoded']])


                              Cuisines  Cuisines Encoded
0           French, Japanese, Desserts               920
1                             Japanese              1111
2     Seafood, Asian, Filipino, Indian              1671
3                      Japanese, Sushi              1126
4                     Japanese, Korean              1122
...                                ...               ...
9546                           Turkish              1813
9547   World Cuisine, Patisserie, Cafe              1825
9548            Italian, World Cuisine              1110
9549                   Restaurant Cafe              1657
9550                              Cafe               331

[9551 rows x 2 columns]


In [9]:
df.isnull().sum()

Cuisines                0
Average Cost for two    0
Has Table booking       0
Has Online delivery     0
Is delivering now       0
Switch to order menu    0
Price range             0
Aggregate rating        0
Votes                   0
Cuisines Encoded        0
dtype: int64

In [10]:
df.replace({'Has Table booking': {'Yes': 1,'No': 0}}, inplace=True)
df.replace({'Has Online delivery':{'Yes': 1,'No': 0}}, inplace=True)
df.replace({'Is delivering now': {'Yes': 1,'No': 0}}, inplace=True)
df.replace({'Switch to order menu':{'Yes': 1,'No': 0}}, inplace=True)

# Splitting Data

In [11]:
# Splitting the dataset into features and target variable
X = df.drop('Cuisines', axis=1)
y = df['Cuisines']

In [12]:
# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Model Selection and Training

In [13]:
# Initialize SVM classifier
svm_classifier = SVC(kernel='linear', random_state=42)

In [14]:
# Define hyperparameters for tuning
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

In [15]:
# Train the classifier
svm_classifier.fit(X_train, y_train)

# Model Evaluation

In [16]:
# Predict on the test set
y_pred = svm_classifier.predict(X_test)

In [17]:
# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.7002791346824843


# Generate classification report

In [18]:
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Classification Report:
                                                                                               precision    recall  f1-score   support

                                                                                      Afghani       1.00      1.00      1.00         1
                                                                    Afghani, Mughlai, Chinese       0.00      0.00      0.00         0
                                                    Afghani, North Indian, Pakistani, Arabian       0.00      0.00      0.00         1
                                                                                     American       0.80      0.89      0.84         9
                                                           American, Asian, European, Seafood       0.00      0.00      0.00         1
                                         American, Asian, North Indian, Mexican, South Indian       0.00      0.00      0.00         1
                              

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
