In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
data = pd.read_csv('restaurant_customer_satisfaction.csv')
data

In [None]:
print(data.info())
print(data.describe())

In [None]:
plt.figure(figsize=(12, 8))
sns.heatmap(data.corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()

In [None]:
X = data.drop(columns=['CustomerID', 'HighSatisfaction'])
y = data['HighSatisfaction']
X = pd.get_dummies(X, drop_first=True)

# training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# training
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

In [None]:
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

In [None]:
# feature importance
feature_importance = pd.Series(clf.feature_importances_, index=X.columns).sort_values(ascending=False)

plt.figure(figsize=(12, 8))
sns.barplot(x=feature_importance, y=feature_importance.index)
plt.title('Feature Importance')
plt.show()

In [None]:
# average ratings
ratings = data[['ServiceRating', 'FoodRating', 'AmbianceRating', 'HighSatisfaction']]
avg_ratings = ratings.groupby('HighSatisfaction').mean().reset_index()

plt.figure(figsize=(12, 8))
sns.barplot(data=avg_ratings, x='HighSatisfaction', y='ServiceRating', label='Service Rating')
sns.barplot(data=avg_ratings, x='HighSatisfaction', y='FoodRating', label='Food Rating', alpha=0.6)
sns.barplot(data=avg_ratings, x='HighSatisfaction', y='AmbianceRating', label='Ambiance Rating', alpha=0.3)
plt.title('Average Ratings by Satisfaction')
plt.legend()
plt.show()