# 2D Hyperparameter Tuning for Fuzzy C-Means

Description

In [1]:
# Reload the notebook if an external file is updated
%load_ext autoreload
%autoreload 2

import os
import sys

from pathlib import Path

path = (
    Path
    .cwd()
    .parent
    .parent
    .joinpath('warbler')
    .as_posix()
)

os.chdir(path)
sys.path.append(path)

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pickle
import scienceplots

from constant import SETTINGS
from copy import deepcopy
from datatype.dataset import Dataset
from datatype.scorer import (
    CalinskiHarabaszScore,
    DaviesBouldinIndex,
    FukuyamaSugenoIndex,
    PartitionCoefficient,
    PartitionEntropyCoefficient,
    Scorer,
    SilhouetteScore,
    SumOfSquaredErrors,
    XieBeniIndex
)
from datatype.search import GridSearch
from datatype.settings import Settings
from datatype.voronoi import Builder, VoronoiFCM
from fcmeans import FCM

In [3]:
plt.style.use('science')

In [4]:
dataset = Dataset('segment')
dataframe = dataset.load()

dataframe['original_array'] = dataframe['original_array'].apply(
    lambda x: pickle.loads(
        bytes.fromhex(x)
    )
)

In [5]:
x = np.array(
    [
        dataframe.umap_x_2d,
        dataframe.umap_y_2d
    ]
).transpose()

In [6]:
grid = {
    'm': np.arange(1.5, 2.5, 0.1),
    'max_iter': np.arange(50, 200, 50),
    'n_clusters': np.arange(2, 20)
}

metrics = [
    CalinskiHarabaszScore,
    DaviesBouldinIndex,
    FukuyamaSugenoIndex,
    PartitionCoefficient,
    PartitionEntropyCoefficient,
    SilhouetteScore,
    SumOfSquaredErrors,
    XieBeniIndex
]

In [7]:
for metric in metrics:
    instance = metric()
    
    name = str(instance)

    metric = metric(estimator=None, x=x)
    scorer = Scorer(metric=metric)
   
    search = GridSearch(
        grid=grid, 
        scorer=scorer
    )

    data = deepcopy(x)
    search.fit(data)

    filename = repr(instance)
    search.export(filename)

    parameters, score = search.best(instance)
    
    fcm = FCM(**parameters)
    fcm.fit(data)

    label = fcm.predict(data)
    dataframe['fcm_label_2d'] = label

    # Load default settings
    path = SETTINGS.joinpath('voronoi.json')
    settings = Settings.from_file(path)
    settings.name = name

    unique = dataframe.fcm_label_2d.unique()

    by = ['duration']

    ascending = [False]

    dataframe = dataframe.sort_values(
        ascending=ascending,
        by=by
    )

    coordinates = [
        dataframe.umap_x_2d,
        dataframe.umap_y_2d
    ]

    embedding = np.column_stack(coordinates)

    spectrogram = dataframe.original_array.to_numpy()
    label = dataframe.fcm_label_2d.to_numpy()

    builder = Builder(
        embedding=embedding,
        label=label,
        spectrogram=~spectrogram,
        settings=settings,
        unique=unique
    )

    voronoi = VoronoiFCM(builder=builder)

    component = voronoi.build()

    figure = component.get('figure')
    filename = filename + '.png'

    voronoi.save(
        figure=figure,
        filename=filename
    )

100%|█████████████████████████████████████████████████████████████████████████████| 540/540 [00:56<00:00,  9.61it/s]
