### Imports

In [2]:
%load_ext autoreload
%autoreload 2

import numpy as np

from sklearn.datasets import load_iris

from helper_functions import plot3d_iris_dataset
from clustering import KMeans, FuzzCMeans, MountainClustering, SubstractiveClustering, GaussianMixture

### Auxiliary functions

In [3]:
def load_dataset():
    """Loads the iris dataset.
    Returns:
        np.ndarray: Feature matrix.
        np.ndarray: Target vector.
    """
    iris = load_iris()
    return iris.data, iris.target

### Run and Play

In [4]:
feature_matrix, target = load_dataset()

# Scale Feature Matrix
feature_min = feature_matrix.min(axis=0)
feature_max = feature_matrix.max(axis=0)
feature_matrix = (feature_matrix - feature_min) / (feature_max - feature_min)

plot3d_iris_dataset(feature_matrix, target, 'Iris dataset')

#### Fit algorithms

In [21]:
sigma_squared = 1
mountain_clustering = MountainClustering(number_of_partitions=10,
                                        distance_metric='manhattan',
                                        sigma_squared=sigma_squared,
                                        beta_squared=1.5*sigma_squared,)
mountain_clustering.fit(feature_matrix)

cluster_assignments = mountain_clustering.predict()

plot3d_iris_dataset(feature_matrix, cluster_assignments, 'Mountain Clustering')

In [16]:
r_a = 1.2
substractive_clustering = SubstractiveClustering(r_a=r_a,
                                                r_b=1.5*r_a,
                                                distance_metric='euclidean',)

substractive_clustering.fit(feature_matrix)

cluster_assignments = substractive_clustering.predict()

plot3d_iris_dataset(feature_matrix, cluster_assignments, 'Substractive Clustering')

In [14]:
kmeans = KMeans(number_of_clusters=5,
                distance_metric='euclidean',
                n_iter=1000,
                verbose=False,
                n_jobs=2)

kmeans.fit(feature_matrix)

cluster_assignments = kmeans.predict()

plot3d_iris_dataset(feature_matrix, cluster_assignments, 'K-Means')

KeyboardInterrupt: 

In [101]:
fuzzycmeans = FuzzCMeans(number_of_clusters=5,
                        fuzzines_parameter=2,
                        distance_metric='euclidean',
                        n_iter=2000,
                        verbose=False,)

fuzzycmeans.fit(feature_matrix)

assignment_matrix = fuzzycmeans.predict()
cluster_assignments = assignment_matrix.argmax(axis=1)

plot3d_iris_dataset(feature_matrix, cluster_assignments, 'Fuzzy C-Means')

In [112]:
expectation_maximization = GaussianMixture(number_of_clusters=3, n_iter=1000)
expectation_maximization.fit(feature_matrix)

assignment_matrix = expectation_maximization.predict()
cluster_assignments = assignment_matrix.argmax(axis=1)

plot3d_iris_dataset(feature_matrix, cluster_assignments, 'Expectation Maximization')


invalid value encountered in true_divide



ValueError: array must not contain infs or NaNs

In [37]:
assignment_matrix

array([[5.00000000e-001, 1.18422187e-248, 5.00000000e-001],
       [5.00000000e-001, 8.02328178e-249, 5.00000000e-001],
       [5.00000000e-001, 7.25099374e-249, 5.00000000e-001],
       [5.00000000e-001, 6.88364190e-249, 5.00000000e-001],
       [5.00000000e-001, 1.14922466e-248, 5.00000000e-001],
       [5.00000000e-001, 2.05902345e-248, 5.00000000e-001],
       [5.00000000e-001, 7.73245256e-249, 5.00000000e-001],
       [5.00000000e-001, 1.08297156e-248, 5.00000000e-001],
       [5.00000000e-001, 5.16619684e-249, 5.00000000e-001],
       [5.00000000e-001, 8.63145121e-249, 5.00000000e-001],
       [5.00000000e-001, 1.70583304e-248, 5.00000000e-001],
       [5.00000000e-001, 9.62056682e-249, 5.00000000e-001],
       [5.00000000e-001, 7.34456891e-249, 5.00000000e-001],
       [5.00000000e-001, 4.45323548e-249, 5.00000000e-001],
       [5.00000000e-001, 2.42912577e-248, 5.00000000e-001],
       [5.00000000e-001, 3.08417453e-248, 5.00000000e-001],
       [5.00000000e-001, 1.78656071e-248