# Mall Customers Clustering & Random Forest Regression + Modelling

## Load Dataset

In [None]:
import pandas as pd
dfm = pd.read_csv("Mall_Customers.csv")
dfm.head()

## EDA

In [None]:
print(dfm.info())
print(dfm.describe())
print(dfm.isnull().sum())

## Scaling Data

In [None]:
from sklearn.preprocessing import StandardScaler
X = dfm[['Age','Annual Income (k$)','Spending Score (1-100)']]
scaler = StandardScaler()
Xs = scaler.fit_transform(X)
Xs[:5]

## Elbow Method

In [None]:
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

inertia=[]
for k in range(2,11):
    inertia.append(KMeans(n_clusters=k,random_state=42).fit(Xs).inertia_)

plt.plot(range(2,11),inertia,'o-')
plt.title("Elbow Method")
plt.show()

## Silhouette Score

In [None]:
from sklearn.metrics import silhouette_score
sil=[]
for k in range(2,11):
    sil.append(silhouette_score(Xs, KMeans(n_clusters=k,random_state=42).fit_predict(Xs)))

plt.plot(range(2,11),sil,'o-')
plt.title("Silhouette Score")
plt.show()

## Final Clustering (k=4)

In [None]:
km = KMeans(n_clusters=4, random_state=42)
dfm['Cluster'] = km.fit_predict(Xs)

import seaborn as sns
sns.scatterplot(data=dfm, x='Annual Income (k$)', y='Spending Score (1-100)', hue='Cluster', palette='tab10')
plt.show()

dfm.groupby('Cluster')[['Age','Annual Income (k$)','Spending Score (1-100)']].mean()

## Random Forest Regression

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor

Xr = dfm[['Age','Annual Income (k$)']]
yr = dfm['Spending Score (1-100)']

X_train, X_test, y_train, y_test = train_test_split(Xr, yr, test_size=0.2, random_state=42)

rf = RandomForestRegressor(n_estimators=300, random_state=42)
rf.fit(X_train,y_train)
pred = rf.predict(X_test)

rmse = mean_squared_error(y_test,pred,squared=False)
r2 = r2_score(y_test,pred)

rmse, r2

## Modelling (Save Model & Scaler)

In [None]:
import pickle

with open("model_random_forest.pkl","wb") as f:
    pickle.dump(rf,f)

with open("scaler.pkl","wb") as f:
    pickle.dump(scaler,f)

print("Model & Scaler saved!")

## Load Model 

In [None]:
loaded_model = pickle.load(open("model_random_forest.pkl","rb"))
loaded_scaler = pickle.load(open("scaler.pkl","rb"))