# Exercise - Customer Segmentation Clustering

## Import Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly as ply
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import plotly.subplots as sp
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

## Import Data

In [None]:
df = pd.read_csv("./Mall_Customers.csv") 
# you can download the dataset at https://www.kaggle.com/datasets/vjchoudhary7/customer-segmentation-tutorial-in-python/data?select=Mall_Customers.csv
df.head()

In [None]:
df.describe()

In [None]:
df.shape

## Data Analysis

In [None]:
sns.countplot(y = 'Gender', data = df)
plt.show()

In [None]:
df.drop(['CustomerID'], axis = 1, inplace = True)
sns.pairplot(df, hue = 'Gender')

## Data Pre-processing

In [None]:
X1 = df.loc[:,['Age', 'Spending Score (1-100)']].values
X2 = df.loc[:,['Annual Income (k$)', 'Spending Score (1-100)']].values
X3 = df.loc[:,['Age', 'Annual Income (k$)']].values

scaler = StandardScaler()
X1 = scaler.fit_transform(X1)
X2 = scaler.fit_transform(X2)
X3 = scaler.fit_transform(X3)

## Model Training

In [None]:
sse = []
k_rng = range(1,10)
for k in k_rng:
    km1 = KMeans(n_clusters=k, n_init = 10)
    km1.fit(X1)
    sse.append(km1.inertia_)

In [None]:
# Elbow Method
plt.xlabel('K')
plt.ylabel('Sum of squared error')
plt.plot(k_rng,sse, linewidth=2, marker='8')

In [None]:
km1 = KMeans(n_clusters=4, n_init = 10)
y_predicted1 = km1.fit_predict(X1)
y_predicted1

In [None]:
plt.scatter(X1[:,0],X1[:,1],c=km1.labels_, cmap='rainbow')
plt.scatter(km1.cluster_centers_[:,0],km1.cluster_centers_[:,1], color='black', marker='*', label='centroid')
plt.xlabel('Age (scaled)')
plt.ylabel('Spending Score (scaled)')
plt.legend()
plt.show()
plt.show()

In [None]:
kmeans_kwargs = {
    'init': 'k-means++',
    'n_init': 20,
    'max_iter': 300,
    'random_state': 42
}

In [None]:
def kmeans_model(k, x):
    model = KMeans(k, **kmeans_kwargs)
    model.fit(x)
    labels = model.labels_
    centroids = model.cluster_centers_
    return model, labels, centroids

In [None]:
def plot_clusters(x, h, model, labels, centroids):
    x_min, x_max = x[:, 0].min() - 1, x[:, 0].max() + 1
    y_min, y_max = x[:, 1].min() - 1, x[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

    z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    z = z.reshape(xx.shape) 
    sns.set_style('ticks')
    plt.clf()
    plt.figure(figsize=(12, 6))

    plt.imshow(z, interpolation = 'nearest',
           extent = (xx.min(), xx.max(), yy.min(), yy.max()),
           cmap = 'Pastel1', aspect = 'auto',
           origin = 'lower')

    sns.scatterplot(x = x[:, 0], y = x[:, 1], 
                hue = labels, palette = 'tab10', s = 100)
    plt.scatter(x = centroids[:, 0] , y = centroids[:, 1], 
                s = 100, c = 'black', alpha = 0.8, marker = 'X')
    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)
    plt.show()

In [None]:
X2_model, X2_labels, X2_centroids = kmeans_model(5, X2)
plot_clusters(X2, 0.02, X2_model, X2_labels, X2_centroids)

In [None]:
X3_model, X3_labels, X3_centroids = kmeans_model(3, X3)
plot_clusters(X3, 0.02, X3_model, X3_labels, X3_centroids)