In [None]:
pip install seaborn

In [None]:
#Import relevant libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## Data Understanding

Data Exploration

In [None]:
#load the dataset
dataset=pd.read_csv('restaurant_customer_satisfaction.csv')

In [None]:
#View Dataset
dataset

In [None]:
#viewing first five rows of the dataset
dataset.head()

In [None]:
#Viewing last three raws of the dataset
dataset.tail(3)

In [None]:
dataset.loc[0:5]

In [None]:
#viewing data raws upto zero to twenty 3 by 3
dataset.loc[0:20:3]

In [None]:
#viewing the dimention of the dataset
dataset.shape

In [None]:
#Viewing the coloumns names of the dataset
dataset.columns

In [None]:
#viewing dataset as a dataframe
df=pd.DataFrame(dataset)
df

In [None]:
#get the detailed informations of the features
df.info()

In [None]:
#viewing specific colomn only
df['Income']

In [None]:
#viewing specific rows of a specific colomn
df.loc[0:1,'Income']

In [None]:
df[["Age", "Gender", "MealType"]]

In [None]:
#get the mean of FoodRating colomn
meann = df['FoodRating'].mean()
meann

In [None]:
#Round the mean in to two points
round(meann,2)

In [None]:
#Viewing the data types of the colomns
dataset.dtypes

In [None]:
#Viewing the total number of null values in the data set according to the features
dataset.isnull().sum()

In [None]:
# drop unwanted columns
dataset = dataset.drop(['CustomerID','Gender', 'GroupSize', 'MealType', 'OnlineReservation', 'DeliveryOrder', 'WaitTime', 'AverageSpend'],axis=1)

In [None]:
#Viewing the dataset after dropping the unwanted colomns
dataset.columns

In [None]:
#Get the statistical description of the data(only Numarical data will describe)
dataset.describe()

# Data preprocessing


In [None]:
#get a copy of dataset
original_dataset = dataset.copy()

In [None]:
#differntiate the data(numarical/catergorical/bianary/non-binary)
non_binary_categorical_columns =['VisitFrequency', 'PreferredCuisine', 'TimeOfVisit', 'DiningOccasion']
numarical_columns = ['Age', 'Income',  'LoyaltyProgramMember', 'ServiceRating', 'FoodRating', 'AmbianceRating','HighSatisfaction']

In [None]:
#Apply one-hot encoding
VisitFrequency_dummie = pd.get_dummies(original_dataset['VisitFrequency'], prefix = 'VisitFrequency')
PreferredCuisine_dummie = pd.get_dummies(original_dataset['PreferredCuisine'], prefix = 'PreferredCuisine')
TimeOfVisit_dummie = pd.get_dummies(original_dataset['TimeOfVisit'], prefix = 'TimeOfVisit')
DiningOccasion_dummie = pd.get_dummies(original_dataset['DiningOccasion'], prefix = 'DiningOccasion')

In [None]:
type(TimeOfVisit_dummie)

In [None]:
TimeOfVisit_dummie.columns

In [None]:
TimeOfVisit_dummie.head()

In [None]:
#drop the non-binary categorical colomns from the copy of dataset and stote in another variable
new_data = original_dataset.drop(non_binary_categorical_columns, axis=1)

In [None]:
#concatenate the dummies
OH_data = pd.concat([new_data,VisitFrequency_dummie,PreferredCuisine_dummie,TimeOfVisit_dummie,DiningOccasion_dummie], axis=1)
print (OH_data)


OH_data.columns = OH_data.columns.astype(str)

OH_data.head()

In [None]:
#get a copy of encoded data and check the null values
encoded_data = OH_data.copy()
encoded_data.isnull().sum()

# Data Visualization

In [None]:
# Plot distributions for numerical features
numeric_columns = ['Income', 'HighSatisfaction',  'LoyaltyProgramMember', 'ServiceRating', 'FoodRating', 'AmbianceRating']
for column in numeric_columns:
    plt.figure(figsize=(8, 4))
    sns.histplot(df[column].dropna(), kde=True)
    plt.title(f'Distribution of {column}')
    plt.show()

In [None]:

sns.displot(encoded_data["Income"])


In [None]:
sns.displot(encoded_data["HighSatisfaction"])

In [None]:
sns.displot(encoded_data["LoyaltyProgramMember"])

In [None]:
#plotting the boxplots of some features
plt.boxplot(encoded_data['ServiceRating'])
plt.title('Service Rating Explot')
plt.ylabel('Service Rating')
plt.show()

In [None]:
plt.boxplot(encoded_data['FoodRating'])
plt.title('Food Rating Explot')
plt.ylabel('Food Rating')
plt.show()

In [None]:
plt.boxplot(encoded_data['AmbianceRating'])
plt.title('Ambiance Rating Explot')
plt.ylabel('AmbianceRating')
plt.show()

In [None]:
#plotting the pie charts for some features
plt.figure(figsize=(6, 6))
plt.pie(dataset['LoyaltyProgramMember'].value_counts(), labels=original_dataset['LoyaltyProgramMember'].value_counts().index, autopct='%1.1f%%', startangle=90)
plt.title('Loyality Program Member Distribution')
plt.show()

In [None]:
plt.figure(figsize=(6, 6))
plt.pie(dataset['PreferredCuisine'].value_counts(), labels=original_dataset['PreferredCuisine'].value_counts().index, autopct='%1.1f%%', startangle=90)
plt.title('Preferred Cuisine Distribution')
plt.show()

In [None]:
plt.figure(figsize=(6, 6))
plt.pie(dataset['VisitFrequency'].value_counts(), labels=original_dataset['VisitFrequency'].value_counts().index, autopct='%1.1f%%', startangle=90)
plt.title('Visit Frequency Distribution')
plt.show()

In [None]:
plt.figure(figsize=(6, 6))
plt.pie(dataset['DiningOccasion'].value_counts(), labels=original_dataset['DiningOccasion'].value_counts().index, autopct='%1.1f%%', startangle=90)
plt.title('Dining Occasion Distribution')
plt.show()

In [None]:
# calculating and visualizing correlation of each feature with the target value
corr_matrix = encoded_data.corr()
corr_with_target = corr_matrix['LoyaltyProgramMember'].sort_values(ascending=False)
print(corr_with_target)

In [None]:
#plot the corerelation of target variable with independent variables
plt.figure(figsize=(10, 6))
corr_with_target.drop('LoyaltyProgramMember').plot(kind='bar', color='green')
plt.title('Correlation with Target Variable (Loyalty Program Member)')
plt.xlabel('Features')
plt.ylabel('Correlation')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
#plot the heatmap to further visulization of correlation
plt.figure(figsize=(20, 12))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', linewidths=0.1)
plt.title('Correlation Heatmap of the Loyality Program Member (Target Variable) ')
plt.show()