In [None]:
#TASK 1 : Predict Restaurant Ratings
#Analyze the most influential features affecting restaurant ratings.

#Importing Libraries 
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

#Loading Data 
datafile = pd.read_csv('/content/Dataset .csv')
datafile = datafile.dropna()


#Data Preparation
features = datafile[['Locality', 'Cuisines', 'Average Cost for two', 'Votes']]
target = datafile['Aggregate rating']

features = pd.get_dummies(features, drop_first=True)
scaler = StandardScaler()
if 'Average Cost for two' in features.columns and 'Votes' in features.columns:
    features[['Average Cost for two', 'Votes']] = scaler.fit_transform(
        features[['Average Cost for two', 'Votes']]
    )

#Train-Test Split 
X_train, X_test, y_train, y_test = train_test_split(
    features, target, test_size=0.2, random_state=42
)

# Model Training 
model_rf = RandomForestRegressor(n_estimators=100, random_state=42)
model_rf.fit(X_train, y_train)
pred = model_rf.predict(X_test)

#Feature Importance
importance = model_rf.feature_importances_
cols = features.columns
imp_df = pd.DataFrame({'Parameter': cols, 'Score': importance}).sort_values(
    by='Score', ascending=False
)


print("Most influential features affecting restaurant ratings:")
print(imp_df.head(10), "\n")

mse_val = mean_squared_error(y_test, pred)
rmse_val = mse_val ** 0.5
r2_val = r2_score(y_test, pred)
print("RMSE:", round(rmse_val, 4))
print("R² Score:", round(r2_val, 2), "\n")

# Visualization 
plt.figure(figsize=(10, 6))
plt.barh(imp_df.head(10)['Parameter'], imp_df.head(10)['Score'], color='tomato')
plt.xlabel('Importance Value')
plt.ylabel('Feature')
plt.title("Most influential features affecting restaurant ratings")
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()
