In [1]:
# Step 1: Import required libraries
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report


In [2]:
# Step 2: Load the dataset
df = pd.read_csv('/content/Dataset .csv')
 # adjust path if different
df.columns = df.columns.str.strip()  # clean column names
df.fillna('', inplace=True)


In [3]:
# Step 3: Combine selected features into one
df['combined'] = (
    df['Restaurant Name'].astype(str) + ' ' +
    df['Cuisines'].astype(str) + ' ' +
    df['Average Cost for two'].astype(str) + ' ' +
    df['Price range'].astype(str) + ' ' +
    df['Aggregate rating'].astype(str) + ' ' +
    df['Votes'].astype(str)
)


In [4]:
# Step 4: TF-IDF Vectorizer for combined features
tfidf = TfidfVectorizer()
X = tfidf.fit_transform(df['combined'])


In [5]:
# Step 5: Label encode the target (Cuisines)
le = LabelEncoder()
y = le.fit_transform(df['Cuisines'])


In [6]:
# Step 6: Split dataset into train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [7]:
# Step 7: Train the model
model = RandomForestClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)


In [8]:
# Step 8: Evaluate the model
# To avoid target_names mismatch error:
labels_present = np.unique(np.concatenate([y_test, y_pred]))
print(classification_report(
    y_test, y_pred,
    labels=labels_present,
    target_names=le.inverse_transform(labels_present)
))


                                                                                           precision    recall  f1-score   support

                                                                                                0.50      0.25      0.33         4
                                                                                  Afghani       1.00      1.00      1.00         1
                                                Afghani, North Indian, Pakistani, Arabian       0.00      0.00      0.00         1
                                                                                 American       0.38      1.00      0.55         9
                                                       American, Asian, European, Seafood       0.00      0.00      0.00         1
                                     American, Asian, North Indian, Mexican, South Indian       0.00      0.00      0.00         1
                                                                            Americ

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
