# Subset

Description

In [1]:
# Reload the notebook if an external file is updated
%load_ext autoreload
%autoreload 2

import os
import sys

from pathlib import Path

path = (
    Path
    .cwd()
    .parent
    .parent
    .joinpath('warbler.py')
)

os.chdir(path)
sys.path.append(path)

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from constant import PICKLE, SETTINGS
from datatype.dataset import Dataset
from datatype.settings import Settings
from datatype.validation import (
    jaccard_coefficient, 
    rand_index,
    variation_of_information
)
from datatype.voronoi import Builder, VoronoiFCM
from fcmeans import FCM
from sklearn.metrics import (
    adjusted_rand_score, 
    jaccard_score,
    mutual_info_score,
    silhouette_score,
    rand_score
)
from sklearn.utils import resample
from textwrap import dedent

In [3]:
pd.set_option('display.max_colwidth', None)

In [4]:
dataset = Dataset('segment')
dataframe = dataset.load()

In [5]:
x = np.concatenate(
    (
        [dataframe.umap_x_2d],
        [dataframe.umap_y_2d]
    )
)

x = x.transpose()

In [6]:
fcm = FCM(
    m=1.5,
    max_iter=150,
    n_clusters=19
)

fcm.fit(x)

In [7]:
labels = fcm.predict(x)

In [10]:
# Bootstrap Stability
n_iterations = 10
stability_scores = []

for iteration in range(n_iterations):
    print(iteration)
    
    # Bootstrap resample
    x_resampled, labels_resampled = resample(x, labels)
    print(len(x_resampled))
    print(len(labels_resampled))
    
    # Run KMeans on the resampled data
    fcm.fit(x_resampled)
    labels_new = fcm.predict(x)
    
    # Calculate stability
    stability = np.sum(labels_resampled == labels_new) / len(labels_resampled)
    stability_scores.append(stability)

print(f"Average Stability Score: {np.mean(stability_scores)}")

0
13192
13192
1
13192
13192


KeyboardInterrupt: 