In [1]:
#author: Amelie Bauerdick
#Wabnitz Lab

# Import Packages

In [2]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import sklearn.cluster as cluster
from sklearn.cluster import KMeans 
from sklearn.mixture import GaussianMixture
import hdbscan

# Load CSV

In [None]:
data=pd.read_csv("path/DMAP.csv")
data

In [None]:
list(data.columns)

# Store Features Temporarily

In [5]:
feature= data[['feature']].copy()

In [6]:
data = data.drop(columns=[                       
                          'feature'
                         ])

# K-Means

In [9]:
kmeans_labels = cluster.KMeans(n_clusters=9).fit_predict(data)

In [None]:
plt.style.use('seaborn-v0_8-poster')
plt.figure(figsize=(6, 6))

plt.scatter(data[['x']],data[['y']], c=kmeans_labels, s=1, cmap='Set1');
plt.title('K-Means Clustering')
plt.xlabel('DensMap-1')
plt.ylabel('DensMAP-2')
plt.xlim(-10, 18)
plt.ylim(-5, 22)

plt.savefig('png/kmeans', dpi=300, bbox_inches = 'tight')

# Gaussian Mixture

In [12]:
gmm = GaussianMixture(n_components=9, random_state=42)
gmm.fit(data)
gaussian_labels = gmm.predict(data)

In [None]:
plt.style.use('seaborn-v0_8-poster')
plt.figure(figsize=(6, 6))

plt.scatter(data[['x']],data[['y']], c=gaussian_labels, s=1, cmap='Set1');
plt.title('Gaussian Mixture Clustering')
plt.xlabel('DensMAP-1')
plt.ylabel('DensMAP-2')
plt.xlim(-10, 18)
plt.ylim(-5, 22)

plt.savefig('png/gaussian', dpi=300, bbox_inches = 'tight')

# HDBSCAN

In [14]:
clusterer = hdbscan.HDBSCAN(min_cluster_size=300,gen_min_span_tree=True)
clusterer.fit(data)
hdbscan_labels = clusterer.labels_
outliers_mask = hdbscan_labels == -1

In [None]:
plt.style.use('seaborn-v0_8-poster')
plt.figure(figsize=(6, 6))

plt.scatter(data[['x']],data[['y']] , c=hdbscan_labels, cmap='Spectral', s=5)
plt.scatter(data.loc[outliers_mask, 'x'], data.loc[outliers_mask, 'y'],s=4, c='gray', marker='v', label='Outliers', alpha=0.5)
plt.title('HDBSCAN Clustering')
plt.xlabel('DensMAP-1')
plt.ylabel('DensMAP-2')
plt.xlim(-10, 18)
plt.ylim(-5, 22)
plt.legend(markerscale=6)

plt.savefig('png/hdbscan', dpi=300, bbox_inches = 'tight')
plt.show()

# Save Data

In [16]:
#data['cluster'] = kmeans_labels.tolist()
data['cluster'] = gaussian_labels.tolist()
#data['cluster'] = hdbscan_labels.tolist()

In [None]:
frames=[data,feature]
data=pd.concat(frames, axis=1)
data

In [19]:
data.to_csv("path/DMAP_2.csv",index=False)

In [20]:
data.to_excel("path/DMAP_2.xlsx",index=False)