In [None]:
import numpy as np
import glob
import umap
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN

# Example
descriptors = []

files = glob.glob("descriptors/week/descriptor*.npy")
for file in files:
    partes = file.split("/")
    if len(partes) > 1:
        file_name = partes[-1]
        
    descriptor = np.load(file).sum(axis=1)
    descriptors += [descriptor]

descriptors = np.asarray(np.matrix(descriptors))
descriptors = np.nan_to_num(descriptors)

# Number of clusters to user
eps = 0.5
min_samples = 5

# DBSCAN object with eps and min_samples
dbscan = DBSCAN(eps=eps, min_samples=min_samples)

# Adjust DBSCAN to our descriptor
dbscan.fit(descriptor)

# Get the labels assigned to every 
labels = dbscan.labels_

# Visualize the clusters using UMAP
reducer = umap.UMAP()

# Fit and transform the data to 2 dimensions
descriptor_umap = reducer.fit_transform(descriptor)

# Plot the clusters
plt.figure(figsize=(8, 6))

# Plot each data point with a color corresponding to its cluster
for i in range(len(np.unique(labels))):
    plt.scatter(descriptor_umap[labels == i, 0], descriptor_umap[labels == i, 1], label=f'Cluster {i}')

plt.title('Clustering de Descriptores con DBSCAN y UMAP')
plt.xlabel('UMAP Dimension 1')
plt.ylabel('UMAP Dimension 2')
plt.legend()
plt.grid(True)
plt.savefig("descriptors/visual_desc/week/dbscan_week_cluster.png")
plt.close()
