# Task: Predict Restaurant Ratings

# Import Libraries 

In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score


# Load the dataset from the specified path


In [5]:
file_path = r'C:\Users\manis\OneDrive\Desktop\Dataset .csv'
df = pd.read_csv(file_path)


# Display the first few rows of the dataset

In [7]:
print(df.head())

   Restaurant ID         Restaurant Name  Country Code              City  \
0        6317637        Le Petit Souffle           162       Makati City   
1        6304287        Izakaya Kikufuji           162       Makati City   
2        6300002  Heat - Edsa Shangri-La           162  Mandaluyong City   
3        6318506                    Ooma           162  Mandaluyong City   
4        6314302             Sambo Kojin           162  Mandaluyong City   

                                             Address  \
0  Third Floor, Century City Mall, Kalayaan Avenu...   
1  Little Tokyo, 2277 Chino Roces Avenue, Legaspi...   
2  Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...   
3  Third Floor, Mega Fashion Hall, SM Megamall, O...   
4  Third Floor, Mega Atrium, SM Megamall, Ortigas...   

                                     Locality  \
0   Century City Mall, Poblacion, Makati City   
1  Little Tokyo, Legaspi Village, Makati City   
2  Edsa Shangri-La, Ortigas, Mandaluyong City   
3      SM 


# Handle missing values by dropping rows with any missing values

In [13]:

df.dropna(axis=0, how='any', inplace=True)



# Encode categorical variables using one-hot encoding

In [16]:
df = pd.get_dummies(df, drop_first=True)


# Split the data into features and target variable

In [17]:

X = df.drop('Votes', axis=1)
y = df['Votes']

In [18]:

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [19]:
# Choose and train the regression model (Decision Tree Regressor)
model = DecisionTreeRegressor(random_state=42)
model.fit(X_train, y_train)

In [20]:
# Make predictions on the testing set
y_pred = model.predict(X_test)


In [21]:
# Evaluate the model's performance using Mean Squared Error (MSE) and R-squared (R²)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')


Mean Squared Error: 146471.11105290728
R-squared: 0.03227924771989332


In [22]:
# Interpret the model's results by analyzing feature importance
feature_importances = pd.Series(model.feature_importances_, index=X.columns).sort_values(ascending=False)
print("Feature Importances:")
print(feature_importances)

Feature Importances:
Aggregate rating                                                                                                    0.240828
Restaurant ID                                                                                                       0.144865
City_Bangalore                                                                                                      0.109024
Longitude                                                                                                           0.080506
Locality_Connaught Place                                                                                            0.039990
                                                                                                                      ...   
Address_1st Floor, ATR Towers, Harbour Park Road,  Panduranga Puram, Near Harbour Park, Kirlampudi Layout, Vizag    0.000000
Address_1st Floor, Above ICICI Bank, Next to Avenue Centre, Panampilly Nagar, Kochi                     