## Hyperparameter tuning of Birch

In [29]:
### Load data
import pandas as pd

data = pd.read_csv('../Data/Processed/processed.csv')

In [30]:
import numpy as np
from sklearn.cluster import Birch
from sklearn.metrics import silhouette_score

threshold_range = np.arange(0.1, 0.9, 0.1)
branching_factor_range = np.arange(10, 100, 10)
n_clusters_range = np.arange(2, 21, 1)

silhouette_score_averages = []
max_score = {
    'silhouette_coefficient': 0,
    'threshold': 0,
    'branching_factor': 0,
    'n_clusters': 0
}

for threshold in threshold_range:
    for branching_factor in branching_factor_range:
        for n_clusters in n_clusters_range:

            prediction = Birch(threshold=threshold, branching_factor=branching_factor, n_clusters=n_clusters).fit_predict(X=data)
            silhouette_score_average = silhouette_score(data, prediction)
            silhouette_score_averages.append(silhouette_score_average)

            if (silhouette_score_average > max_score['silhouette_coefficient']):
                max_score['silhouette_coefficient'] = silhouette_score_average
                max_score['threshold'] = threshold
                max_score['branching_factor'] = branching_factor
                max_score['n_clusters'] = n_clusters

            print((f"threshold: {threshold:.1f}, branching_factor: {branching_factor}, n_clusters: {n_clusters:>2}, Silhouette Score Average: {silhouette_score_average:.4f}"))

print("----------------#####-----#####---------------------")
print("The best score from tuning is: ")
print(f"threshold: {max_score['threshold']}, branching_factor: {max_score['branching_factor']}, n_clusters: {max_score['n_clusters']}, Silhouette Coefficient: {max_score['silhouette_coefficient']:.4f}")

old: 0.7, branching_factor: 70, n_clusters:  2, Silhouette Score Average: 0.2166
threshold: 0.7, branching_factor: 70, n_clusters:  3, Silhouette Score Average: 0.2900
threshold: 0.7, branching_factor: 70, n_clusters:  4, Silhouette Score Average: 0.2141
threshold: 0.7, branching_factor: 70, n_clusters:  5, Silhouette Score Average: 0.2141
threshold: 0.7, branching_factor: 70, n_clusters:  6, Silhouette Score Average: 0.2141
threshold: 0.7, branching_factor: 70, n_clusters:  7, Silhouette Score Average: 0.2141
threshold: 0.7, branching_factor: 70, n_clusters:  8, Silhouette Score Average: 0.2141
threshold: 0.7, branching_factor: 70, n_clusters:  9, Silhouette Score Average: 0.2141
threshold: 0.7, branching_factor: 70, n_clusters: 10, Silhouette Score Average: 0.2141
threshold: 0.7, branching_factor: 70, n_clusters: 11, Silhouette Score Average: 0.2141
threshold: 0.7, branching_factor: 70, n_clusters: 12, Silhouette Score Average: 0.2141
threshold: 0.7, branching_factor: 70, n_clusters: