# Clustering Training Models: Ganglion Cells in the Retina

- **Author**: David Felipe
- **Contact**: https://github.com/davidnfu0
- **Last Modification**: January 30, 2024
- **Description:** In this document, we will train models according to the hyperparameters identified previously.

## Introduction

### Importing Libraries


In [1]:
import sys
import pickle
from sklearn.cluster import KMeans, DBSCAN, HDBSCAN, AgglomerativeClustering
from sklearn.mixture import GaussianMixture

In [2]:
sys.path.append("../../")

In [3]:
from scripts import load_yaml_config, hide_warnings

### Paths and configuration

In [4]:
# Adding the configuration
configPath = "../../config/"
config = load_yaml_config(configPath + "general_config.yml")

### Loading the Data

In [5]:
with open(
    "../../" + config["paths"]["data_cache"]["clustering"]["DFS_NORM"], "rb"
) as file:
    dfs_norm = pickle.load(file)
with open(
    "../../" + config["paths"]["data_cache"]["clustering"]["PARAMS"], "rb"
) as file:
    clustering_params = pickle.load(file)

## Creating models

### K-Means

In [6]:
hide_warnings()
k_means = dict()
k_means_pred = dict()
k_means_centroids = dict()

for df_name, data in dfs_norm.items():
    k_means[df_name] = KMeans(**(clustering_params["k-means"][df_name])).fit(data)
    k_means_pred[df_name] = k_means[df_name].labels_
    k_means_centroids[df_name] = k_means[df_name].cluster_centers_

### DBSCAN

In [7]:
dbscans = dict()
dbscans_pred = dict()

for df_name, data in dfs_norm.items():
    dbscans[df_name] = DBSCAN(**clustering_params["dbscan"][df_name]).fit(data)
    dbscans_pred[df_name] = dbscans[df_name].labels_

### Gaussian Mixture Models

In [8]:
gmms = dict()
gmms_pred = dict()

for df_name, data in dfs_norm.items():
    gmms[df_name] = GaussianMixture(**clustering_params["gmm"][df_name]).fit(data)
    gmms_pred[df_name] = gmms[df_name].predict(data)

### HDBSCAN

In [9]:
hdbscans = dict()
hdbscans_pred = dict()

for df_name, data in dfs_norm.items():
    hdbscans[df_name] = HDBSCAN(**clustering_params["hdbscan"][df_name]).fit(data)
    hdbscans_pred[df_name] = hdbscans[df_name].labels_

### Agglomerative Clustering

In [10]:
agglomeratives = dict()
agglomeratives_pred = dict()

for df_name, data in dfs_norm.items():
    agglomeratives[df_name] = AgglomerativeClustering(
        **clustering_params["agglomerative"][df_name]
    ).fit(data)
    agglomeratives_pred[df_name] = agglomeratives[df_name].labels_

### Model dict

In [11]:
clustering_models = dict()
clustering_models["k-means"] = k_means
clustering_models["dbscan"] = dbscans
clustering_models["gmm"] = gmms
clustering_models["hdbscan"] = hdbscans
clustering_models["agglomerative"] = agglomeratives


clustering_pred = dict()
clustering_pred["k-means"] = k_means_pred
clustering_pred["dbscan"] = dbscans_pred
clustering_pred["gmm"] = gmms_pred
clustering_pred["hdbscan"] = hdbscans_pred
clustering_pred["agglomerative"] = agglomeratives_pred

clustering_centroids = dict()
clustering_centroids["k-means"] = k_means_centroids

## Export the Data

In [12]:
with open(
    "../../" + config["paths"]["data_cache"]["clustering"]["MODELS"], "wb"
) as output:
    pickle.dump(clustering_models, output)

with open(
    "../../" + config["paths"]["data_cache"]["clustering"]["PREDS"], "wb"
) as output:
    pickle.dump(clustering_pred, output)

with open(
    "../../" + config["paths"]["data_cache"]["clustering"]["CENTROIDS"], "wb"
) as output:
    pickle.dump(clustering_centroids, output)

___