# Visualization of Ground Truth and all 4 Clustering Algorithm

According to our evaluation of the three implemented clustering algorithms DBSCAN, Affinity Propagation and BIRCH, we want to visualize all clustering algorithms regarding our Spotify dataset.

In [2]:
# Modify sys.path to allow imports of own modules
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.manifold import TSNE

BASE_DIR = os.path.join(os.path.abspath(""), "..")
sys.path.insert(0, BASE_DIR)

from clustering import dbscan
from clustering.affinity_propagation import affinity_propagation
from clustering import birch

from data.datasource import load_X_y

In [3]:
# Load dataset
X, y, df = load_X_y(os.path.join(BASE_DIR, "data", "SpotifyFeatures.csv"), sample_size=500)

np.random.seed(42)

n_samples, n_dims = X.shape

# 2D Visualization

In [4]:
# Dimensional Reduction with T-SNE to 2D
tsne_2d = TSNE(n_components=2, perplexity=60) # larger perplexity for larger datasets
X_tsne_2d = tsne_2d.fit_transform(X)

# 3D Visualization


In [5]:
# Dimensional Reduction with T-SNE to 3D
tsne_3d = TSNE(n_components=3, perplexity=60) # larger perplexity for larger datasets
X_tsne_3d = tsne_3d.fit_transform(X)

In [9]:
def get_3Dclustering(y, X_tsne_3d, path):
    labels = np.squeeze(y)

    colors = ['#37003c', '#2c0041', '#210046', '#16004b', '#0b0050', '#000055', '#00045a', '#00095f', '#000e64', '#001369',
          '#00186e', '#001d73', '#002178', '#00267d', '#002b82', '#003087', '#00358c', '#003a91', '#003f96', '#00449b',
          '#0049a0', '#004ea5', '#0053aa', '#0058af', '#005db4']

    color_map = {label: color for label, color in enumerate(colors)}
    labels = labels.astype(str)

    fig = px.scatter_3d(
        x=X_tsne_3d[:, 0], y=X_tsne_3d[:, 1], z=X_tsne_3d[:, 2],
        color=labels, 
        color_discrete_map = color_map,
        labels={'color': 'Cluster ID'}
    )

    fig.update_traces(marker_size=2)
    fig.write_html(path) # "./ground_truth_3d.html"
    fig.show()

    return 

### Ground Truth Data

In [10]:
get_3Dclustering(df.genre, X_tsne_3d, path=os.path.join(BASE_DIR, "docs", "ground_truth_3d.html"))

### DBSCAN Clustering Algorithm

In [12]:
# DBSCAN config
eps = 0.3244
dbscan_clustering = dbscan.dbscan(X, epsilon=eps, min_points=20)
clustercenter_indices, dbscan_labels = dbscan_clustering

In [13]:
# 3D with Rotation for DBSCAN
get_3Dclustering(dbscan_labels, X_tsne_3d, path=os.path.join(BASE_DIR, "docs", "dbscan_3d.html"))

### BIRCH Clustering Algorithm

In [14]:
# BIRCH config
big_brc = birch.Birch(branching_factor=50, threshold=0.25, predict=True, n_cluster=25)
big_brc.fit(X)
brc_labels = big_brc.predict(X)

In [15]:
# BIRCH 3D with Rotation
get_3Dclustering(brc_labels, X_tsne_3d, path=os.path.join(BASE_DIR, "docs", "birch_3d.html"))

### KMeans

In [16]:
from sklearn.cluster import KMeans
random_state = 42
n_clusters = 25
kmeans = KMeans(n_clusters=n_clusters, random_state=random_state, n_init="auto").fit(X)
kmeans_labels = kmeans.labels_

In [17]:
# KMeans with Rotation
get_3Dclustering(kmeans_labels, X_tsne_3d, path=os.path.join(BASE_DIR, "docs", "kmeans_3d.html"))

### Affinity Propagation Algorithm

In [18]:
base_center_indices, base_labels = affinity_propagation(X, damping=0.7, convergence_iter=20)
base_labels

array([ 99,  97,  80, ..., 114, 165, 274])

In [19]:
from clustering.affinity_propagation import affinity_propagation
from clustering.affinity_propagation import compute_similarity

S = compute_similarity(X)

min_center_indices2, min_labels2 = affinity_propagation(X, damping=0.7, 
                                                      convergence_iter=20, 
                                                      preferences=np.full(shape=y.shape[0], fill_value=np.min(S)))

In [20]:
# 3D Rotation for Affinity Propagation
get_3Dclustering(min_labels2, X_tsne_3d, path=os.path.join(BASE_DIR, "docs", "affinity_prop_3d.html"))