In [None]:
# ###### crime rate clustering analysis
from sklearn.cluster import KMeans
from sklearn.preprocessing import scale
from sklearn.decomposition import PCA
from sklearn import metrics
import plotly.graph_objects as go
import pandas as pd
from plotly.figure_factory import create_dendrogram
from scipy.cluster.hierarchy import linkage

crime_DF = pd.read_csv('../cleaned_data/crime_VM_cleaned.csv', index_col=0)
crime_scaled = scale(crime_DF)

#
def cluster_match(labels):
    return labels

## PCA 3 components
X_3d = PCA(n_components=3).fit_transform(crime_scaled)
# kmeans clustering
kmeans = KMeans(n_clusters=2).fit(X_3d)

# kmeans 3d plot
fig = go.Figure(data=[go.Scatter3d(x=X_3d[:,0], y=X_3d[:,1], z=X_3d[:,2],
                                   mode='markers',
                                    marker=dict(
                                        size=3,
                                        color=cluster_match(kmeans.labels_),   # set color to an array/list of desired values
                                        opacity=0.8
                                        ))])
fig.update_layout(title='KMeans for 2 clusters')
fig.show()

print('Kmeans Silhouette Score:')
print(metrics.silhouette_score(X_3d, kmeans.labels_, metric='euclidean'))

# Agglomerative Clustering: ward
from sklearn.cluster import AgglomerativeClustering
ward = AgglomerativeClustering(n_clusters=2)
ward = ward.fit(X_3d)

fig = create_dendrogram(
    X_3d, orientation='left', labels=crime_DF.index.values,
    linkagefun=lambda x: linkage(x, 'ward', metric='euclidean')
)
fig.update_layout(title='Ward Clustering Dendrogram', width=800, height=1500)
fig.show()

## 3d
fig = go.Figure(data=[go.Scatter3d(x=X_3d[:,0], y=X_3d[:,1], z=X_3d[:,2],
                                   mode='markers',
                                    marker=dict(
                                        size=3,
                                        color=cluster_match(ward.labels_),   # set color to an array/list of desired values
                                        opacity=0.8
                                        ))])
fig.update_layout(title='Ward Clustering for 2 clusters')
fig.show()

print('Ward Silhouette Score:')
print(metrics.silhouette_score(X_3d, ward.labels_, metric='euclidean'))

# DBSCAN
from sklearn.cluster import DBSCAN
dbscan = DBSCAN(eps=1, min_samples=5).fit(X_3d)
fig = go.Figure(data=[go.Scatter3d(x=X_3d[:,0], y=X_3d[:,1], z=X_3d[:,2],
                                   mode='markers',
                                    marker=dict(
                                        size=3,
                                        color=cluster_match(dbscan.labels_),   # set color to an array/list of desired values
#                                         colorscale='Viridis',   # choose a colorscale
                                        opacity=0.8
                                        ))])
fig.update_layout(title='DBSCAN')
fig.show()

print('DBSCAN Silhouette Score:')
print(metrics.silhouette_score(X_3d, dbscan.labels_, metric='euclidean'))