# Customer
Training a model that can cluster customers in diffrent groups

### Importing the required libraries 

In [None]:
import numpy as np 
from sklearn.cluster import DBSCAN, KMeans
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
import matplotlib.pyplot as plt
import pandas as pd
from joblib import dump
%matplotlib inline

### Reading the data and trying to understand it

In [None]:
df = pd.read_csv("customer.csv")
print(df.count())
print(df.dtypes)

In [None]:
df.head()

## Normalizing the Data

In [None]:
# Normalizing the data
le_sex = LabelEncoder()
le_sex.fit(['Female','Male'])
df['Gender'] = le_sex.transform(df['Gender'])
df = df.dropna()

x = np.asanyarray(df[['Gender', 'Age', 'Annual Income (k$)', 'Spending Score (1-100)']])
min_max_scaler = MinMaxScaler()
x = min_max_scaler.fit_transform(x)
df.head()

## Training the Model using DBSCAN

In [None]:
def dbscan(r, m, printLabels = False, printCores = False):
    
    global db, dbLabels
    
    print('radius:', r)
    print('minimum samples:', m)
    print('============')
    db = DBSCAN(eps=r, min_samples=m).fit(x)
    dbLabels = db.labels_
    if printLabels: print('labels:', dbLabels)
    uniqueLabels = set(dbLabels)
    print('============')
    print('unique_labels:', uniqueLabels)
    print('============')
    
    #Distinguish outliers
    core_samples_mask = np.zeros_like(dbLabels, dtype=bool)
    core_samples_mask[db.core_sample_indices_] = True
    if printLabels: print('cores:', core_samples_mask)

## Checking other radius and minimum samples

In [None]:
for r in np.linspace(0.1, 1, 10):
    for m in range(3, 11):
        dbscan(round(r, 2), m)
        
dbscan(0.3, 6, True, True) #training final model

## Insight on the data

In [None]:
df['DBSCAN-Clus'] = dbLabels
print(df.groupby('DBSCAN-Clus').mean())
df.head()

## Training the model using K-Means

In [None]:
def kmeans(clusterNum, initAmount):
    
    global k_means, kmeansLabels
    
    print('clusterNum', clusterNum)
    print('initAmount', initAmount)
    print('============')
    # I didnt make init type 'random' because i think its more relevent 
    k_means = KMeans(init = 'k-means++', n_clusters = clusterNum, n_init = initAmount) 
    k_means.fit(x)
    kmeansLabels = k_means.labels_
    print(kmeansLabels)

## Checking other cluster numbers and init numbers

In [None]:
for cluster in range(1, 5):
    for init in range(1, 15):
        kmeans(cluster, init)
        
kmeans(3, 15) # Training final model

## Insight on the data

In [None]:
df['KMeans-Clus'] = kmeansLabels
print(df.groupby('KMeans-Clus').mean())
df.head()

## Saving the model for future use

In [None]:
dump(k_means, 'customer-kmeans-model.joblib')
dump(db, 'customer-dbscan-model.joblib') 