# Mean Shift Clustering

## Step 1: Environment

In [None]:
import numpy as np
import pandas as pd

from sklearn.cluster import  MeanShift, estimate_bandwidth

from sklearn import metrics
import matplotlib.pyplot as plt


from scipy.spatial.distance import cdist

import seaborn as sns
import plotly.express as pxp
import plotly.graph_objs as gph


## Step 2: Data Preparation

In [None]:
!pwd

In [None]:
# Load input data
# X = np.loadtxt('../data/data_clustering.txt', delimiter=',')
data = pd.read_csv('/Users/tdi/Documents/Teaching/Data/data_clustering.txt', delimiter=',')
data

In [None]:
data.shape

In [None]:
data.head()

In [None]:
# Select attributes and convert to numpy
X = data.iloc[:, :].values
X

## Step 3: Train and Implement the Model

In [None]:
# Bandwidth is found automatically with
bandwidth = estimate_bandwidth(X, quantile=0.2, n_samples=200)
bandwidth 

In [None]:
# Train mean-shift model with the data frame
msmodel = MeanShift(bandwidth=bandwidth, bin_seeding=True)
msmodel.fit(X)

In [None]:
labels = msmodel.labels_
labels_unique = np.unique(labels)

In [None]:
n_clusters_ = len(labels_unique)
n_clusters_

In [None]:
# See the clusters and cluster centres
cluster_centers = msmodel.cluster_centers_
cluster_centers

In [None]:
# Predict the cluster for all the samples
Y = msmodel.predict(X)
Y

In [None]:
len(msmodel.labels_)

In [None]:
# Generate scatter plot for the training data
# colors = list(map(lambda x: '#3b4cc0' if x == 1 else '#b40426' if x == 2 else '#67c614', P))
# s - marker size, here, the dots size
# c - marker color, here taken from the predicted clusters
plt.scatter(X[:,0], X[:,1], c=labels, marker="o", picker=True)
plt.title(f'Estimated number of clusters = {n_clusters_}')
plt.xlabel('x')
plt.ylabel('y')
plt.show()

In [None]:
# Visualising the clusters in 3D
# Plot the points with color code of the predicted clusters
# viridis - a color map, https://matplotlib.org/users/colormaps.html
fig = plt.figure()
plt.title('Discovered Clusters')
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X[:,0], X[:,1],  marker='o', cmap='viridis', c=labels)
ax.scatter(cluster_centers[:,0], cluster_centers[:,1], marker='x', 
           color='red', s=100, linewidth=3, zorder=10)
plt.show()

## Store the Model

In [None]:
!pip install joblib

In [None]:
# For serialization and deserialization of data from/to file
# from sklearn.externals import joblib
import joblib
joblib.dump(msmodel, '../deploy/msmodel.pkl')