In [None]:
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import sys
import numpy as np
from collections import Counter

sys.path.append('../')

import utils as ut
from unsupervised.clustering import Hierarchical

plt.style.use('seaborn-v0_8-pastel')

### Goal replicate similar visualizations for each clustering algorithm as is present on [SKLearn's Clustering page](https://scikit-learn.org/stable/modules/clustering.html)

In [None]:
n_samples = 1_000
n_features = 2
random_state = 42

uniform = ut.generate_uniform_noise(n_samples=n_samples, n_features=n_features, random_state=random_state)
circles = ut.make_circles_2d(n_samples, noise=0.07, factor=0.6, random_state=0)
moons = ut.make_moons_2d(n_samples, noise=0.07, random_state=0)

datasets = {
    'uniform': {
        'data': uniform,
        'labels': None
    },
    'circles': {
        'data': circles[0],
        'labels': circles[1]
    },
    'moons': {
        'data': moons[0],
        'labels': moons[1]
    }
}

fig, axes = plt.subplots(1, 3, figsize=(15, 5))
for ax, (name, dataset) in zip(axes, datasets.items()):
    data = dataset['data']
    labels = dataset['labels']
    if labels is None:
        labels = np.zeros(n_samples)
    ax.scatter(data[:, 0], data[:, 1], c=labels, s=10)
    ax.set_title(name)
plt.show()

#### Hierarchical Clustering

In [None]:
K = 3
assert K <= 10, "K must be less than 10"
# hierarchical = Hierarchical(linkage='ward')
# y_pred = hierarchical.fit_predict(X, K)

In [None]:
# pass
# clusters = hierarchical.clusters
# cmap = ListedColormap(plt.cm.viridis(np.linspace(0, 1, K)))
# for i, cluster in enumerate(clusters):
#     dp = hierarchical.X[clusters[cluster]]
#     color = np.ones((dp.shape[0], 1)) * i
#     plt.scatter(dp[:,0], dp[:,1] , label="Cluster {}".format(cluster), color=cmap(i), alpha=0.65, s=20)
# plt.legend()
# plt.title(f"Hierarchical Clustering with {K} clusters")
# plt.show()

In [None]:
X, y = make_circles_2d(1000, noise=0.07, factor=0.6, random_state=0)
plt.title(f"Dataset with 2 cluster (uses KMeans)")
plt.scatter(X[:,0], X[:,1], c=y, cmap='viridis', alpha=0.65, s=20)
plt.show()

In [None]:
X, y = make_moons_2d(1000, noise=0.07, random_state=0)
plt.title(f"Moons")
plt.scatter(X[:,0], X[:,1], c=y, cmap='viridis', alpha=0.65, s=20)

In [None]:
from sklearn.datasets import make_blobs

In [None]:
rng = np.random.RandomState(42)
no_structure, y = rng.rand(10_000, 2), None
no_structure