<a href="https://colab.research.google.com/github/Vivek-ML001/Zomato_Food_Rating/blob/main/Zomato.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# import

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score

In [None]:
df = pd.read_csv('/content/zomato.csv', encoding='latin1')
df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df.duplicated().sum()

In [None]:
df.describe()

In [None]:
# Handle missing values by dropping rows where 'Cuisines' is not present
df.dropna(subset=['Cuisines'], inplace=True)

In [None]:
df.head()

In [None]:
df.isnull().sum()

In [None]:
# Simplify the 'Cuisines' column by taking only the first cuisine listed
df['Cuisines'] = df['Cuisines'].apply(lambda x: x.split(',')[0])

In [None]:
# Convert 'Yes'/'No' columns to 1s and 0s for the model
df['Has Table booking'] = df['Has Table booking'].replace({'Yes': 1, 'No': 0})
df['Has Online delivery'] = df['Has Online delivery'].replace({'Yes': 1, 'No': 0})

In [None]:
df.head()

In [None]:
# Use one-hot encoding to convert the 'Cuisines' column into a numerical format
cuisines_dummies = pd.get_dummies(df['Cuisines'], prefix='Cuisine')
df = pd.concat([df, cuisines_dummies], axis=1)

In [None]:
df.head()

# Distribution of Restaurant Ratings

In [None]:
plt.figure(figsize=(10, 6))
sns.histplot(df['Aggregate rating'], kde=True, bins=20)
plt.title('Distribution of Aggregate Restaurant Ratings')
plt.xlabel('Aggregate Rating')
plt.ylabel('Number of Restaurants')
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
sns.boxplot(x='Has Online delivery', y='Aggregate rating', data=df)
plt.title('Aggregate Rating vs. Online Delivery')
plt.xlabel('Has Online Delivery (0: No, 1: Yes)')
plt.ylabel('Aggregate Rating')
plt.xticks([0, 1], ['No Online Delivery', 'Online Delivery'])
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.scatterplot(x='Average Cost for two', y='Aggregate rating', data=df)
plt.title('Aggregate Rating vs. Average Cost for two')
plt.xlabel('Average Cost for two')
plt.ylabel('Aggregate Rating')
plt.show()

In [None]:
# Get the value counts of the 'Cuisines' column
cuisine_counts = df['Cuisines'].value_counts()

# Display the top 10 most popular cuisines
print("Top 10 Most Popular Cuisines:")
display(cuisine_counts.head(10))

In [None]:
plt.figure(figsize=(12, 7))
cuisine_counts.head(10).plot(kind='bar')
plt.title('Top 10 Most Popular Cuisines')
plt.xlabel('Cuisine')
plt.ylabel('Number of Restaurants')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
rating_category_counts = df['Rating text'].value_counts()
print("Number of Restaurants in Each Rating Category:")
display(rating_category_counts)

In [None]:
plt.figure(figsize=(10, 6))
rating_category_counts.plot(kind='bar')
plt.title('Number of Restaurants by Rating Category')
plt.xlabel('Rating Category')
plt.ylabel('Number of Restaurants')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
not_rated_restaurants = df[df['Rating text'] == 'Not rated']
display(not_rated_restaurants.head())
print(f"\nNumber of restaurants in 'Not rated' category: {len(not_rated_restaurants)}")

In [None]:
df.tail()

# . Defining Features (Inputs) and Target (Output)

In [None]:
# Define the features (inputs) and the target (output) for the model
features = ['Average Cost for two', 'Votes', 'Price range', 'Has Table booking', 'Has Online delivery'] + list(cuisines_dummies.columns)
target = 'Aggregate rating'

In [None]:
X = df[features]
y = df[target]

In [None]:
# Split the data into a training set (80%) and a testing set (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Train and Evaluate the Model ---

In [None]:
# Initialize the Random Forest Regressor model
model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)

In [None]:
# Train the model on the training data
model.fit(X_train, y_train)

In [None]:
# Use the trained model to make predictions on the test data
y_pred = model.predict(X_test)

# # Calculate the model's performance metrics

In [None]:
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [None]:
# Get the importance of each feature in the model's predictions
importances = model.feature_importances_
feature_importance = pd.DataFrame({'feature': features, 'importance': importances})
feature_importance = feature_importance.sort_values('importance', ascending=False).head(10)

In [None]:
# Create a hypothetical restaurant to get a "suggestion" (predicted rating)
new_restaurant = {
    'Average Cost for two': 300,
    'Votes': 100,
    'Price range': 2,
    'Has Table booking': 1,
    'Has Online delivery': 0
}

In [None]:
# Add all the one-hot encoded cuisine columns...initially set to 0
for cuisine in cuisines_dummies.columns:
    new_restaurant[cuisine] = 0

# Specify the cuisine for our hypothetical restaurant
if 'Cuisine_North Indian' in new_restaurant:
    new_restaurant['Cuisine_North Indian'] = 1

# Convert to a DataFrame and ensure columns are in the correct order
new_restaurant_df = pd.DataFrame([new_restaurant])
new_restaurant_df = new_restaurant_df[features]

# # Predict the rating for the new restaurant

In [None]:
predicted_rating = model.predict(new_restaurant_df)

In [None]:
# Predict the rating for the new restaurant
predicted_rating = model.predict(new_restaurant_df)

print(f"\n--- Suggestion Example ---")
print(f"Predicted rating for the new hypothetical restaurant: {predicted_rating[0]:.2f}")


# Model is ready

In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
import ipywidgets as widgets
from ipywidgets import interact, fixed

# --- All Preprocessing and Training Code ---
# This part is the same as before, ensuring the model is ready.

def prepare_and_train_model():
    """A function to encapsulate all the data prep and training."""
    try:
        df = pd.read_csv('zomato.csv', encoding='latin-1')
    except UnicodeDecodeError:
        df = pd.read_csv('zomato.csv', encoding='iso-88-59-1')

    df.dropna(subset=['Cuisines'], inplace=True)
    df['Cuisines'] = df['Cuisines'].apply(lambda x: x.split(',')[0])
    df['Has Table booking'] = df['Has Table booking'].replace({'Yes': 1, 'No': 0})
    df['Has Online delivery'] = df['Has Online delivery'].replace({'Yes': 1, 'No': 0})

    cuisines_dummies = pd.get_dummies(df['Cuisines'], prefix='Cuisine')
    df = pd.concat([df, cuisines_dummies], axis=1)

    features = ['Average Cost for two', 'Votes', 'Price range', 'Has Table booking', 'Has Online delivery'] + list(cuisines_dummies.columns)
    target = 'Aggregate rating'

    X = df[features]
    y = df[target]

    X_train, _, y_train, _ = train_test_split(X, y, test_size=0.2, random_state=42)

    model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)
    model.fit(X_train, y_train)

    return model, features, list(df['Cuisines'].unique())

# Train the model and get necessary data for the UI
model, features_list, unique_cuisines = prepare_and_train_model()


# --- Function to Make a Prediction based on UI Input ---

def get_suggestion(model, features, cuisine, cost, votes, price, booking, delivery):
    """Predicts rating based on interactive widget inputs."""

    # Create a dictionary for the new restaurant
    new_restaurant = {
        'Average Cost for two': cost,
        'Votes': votes,
        'Price range': price,
        'Has Table booking': 1 if booking == 'Yes' else 0,
        'Has Online delivery': 1 if delivery == 'Yes' else 0
    }

    # Set all cuisine features to 0 initially
    for feature in features:
        if feature.startswith('Cuisine_'):
            new_restaurant[feature] = 0

    # Set the selected cuisine to 1
    selected_cuisine_feature = f"Cuisine_{cuisine}"
    if selected_cuisine_feature in new_restaurant:
        new_restaurant[selected_cuisine_feature] = 1

    # Convert to DataFrame and ensure correct column order
    new_restaurant_df = pd.DataFrame([new_restaurant])
    new_restaurant_df = new_restaurant_df[features]

    # Predict the rating and display it
    predicted_rating = model.predict(new_restaurant_df)[0]
    print(f"Predicted Rating: {predicted_rating:.2f} ⭐")


# --- Create and Display the Interactive Widgets ---

print("✨ Restaurant Suggestion Tool ✨")
print("Adjust the values below to see the predicted rating for your ideal restaurant.")

interact(
    get_suggestion,
    model=fixed(model),
    features=fixed(features_list),
    cuisine=widgets.Dropdown(options=sorted(unique_cuisines), description='Cuisine:'),
    cost=widgets.IntSlider(min=0, max=8000, step=50, value=500, description='Avg Cost (for 2):'),
    votes=widgets.IntSlider(min=0, max=10000, step=25, value=100, description='Votes:'),
    price=widgets.IntSlider(min=1, max=4, step=1, value=2, description='Price Range:'),
    booking=widgets.Dropdown(options=['Yes', 'No'], description='Table Booking?:'),
    delivery=widgets.Dropdown(options=['Yes', 'No'], description='Online Delivery?:')
);

In [None]:
import joblib

# 2. Save the Trained Model to a File
print("Saving model to 'model.pkl'...")
joblib.dump(model, 'model.pkl')
print("Model saved successfully!")

# Calculate the model's performance metrics

In [None]:
# Calculate the model's performance metrics
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"R-squared (R2): {r2:.2f}")

In [None]:
plt.figure(figsize=(8, 6))
plt.scatter(y_test, y_pred, alpha=0.5)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2) # Add diagonal line
plt.xlabel('Actual Rating')
plt.ylabel('Predicted Rating')
plt.title('Actual vs. Predicted Restaurant Ratings')
plt.grid(True)
plt.show()