# Clustering - Data Science Koans

Master clustering algorithms!

## How to Use
1. Read koans
2. Complete TODOs
3. Validate
4. Iterate

In [None]:
# Setup
import sys
sys.path.append('../..')
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans, AgglomerativeClustering
from sklearn.metrics import silhouette_score
from koans.core.validator import KoanValidator
from koans.core.progress import ProgressTracker

validator = KoanValidator('10_clustering')
tracker = ProgressTracker()
print('Setup complete!')
print(f"Progress: {tracker.get_notebook_progress('10_clustering')}%")

## KOAN 10.1: KMeans Fit
**Objective**: Cluster data
**Difficulty**: Advanced

In [None]:
def kmeans_fit():
    X = np.array([[1,2],[2,3],[8,9],[9,10]])
    model = KMeans(n_clusters=2, random_state=42, n_init=10)
    # TODO: Fit model
    pass

@validator.koan(1, "KMeans Fit", difficulty="Advanced")
def validate():
    model = kmeans_fit()
assert hasattr(model, 'labels_')
validate()

## KOAN 10.2: Get Labels
**Objective**: Cluster assignments
**Difficulty**: Advanced

In [None]:
def get_labels():
    X = np.array([[1,2],[2,3],[8,9],[9,10]])
    model = KMeans(n_clusters=2, random_state=42, n_init=10)
    model.fit(X)
    # TODO: Return labels_
    pass

@validator.koan(2, "Get Labels", difficulty="Advanced")
def validate():
    labels = get_labels()
assert len(labels) == 4
validate()

## KOAN 10.3: Centroids
**Objective**: Cluster centers
**Difficulty**: Advanced

In [None]:
def get_centers():
    X = np.array([[1,2],[2,3],[8,9],[9,10]])
    model = KMeans(n_clusters=2, random_state=42, n_init=10)
    model.fit(X)
    # TODO: Return cluster_centers_
    pass

@validator.koan(3, "Centroids", difficulty="Advanced")
def validate():
    centers = get_centers()
assert centers.shape == (2, 2)
validate()

## KOAN 10.4: Predict
**Objective**: Assign new point
**Difficulty**: Advanced

In [None]:
def predict_new():
    X = np.array([[1,2],[2,3],[8,9],[9,10]])
    model = KMeans(n_clusters=2, random_state=42, n_init=10)
    model.fit(X)
    # TODO: Predict [[1.5, 2.5]]
    pass

@validator.koan(4, "Predict", difficulty="Advanced")
def validate():
    cluster = predict_new()
assert cluster[0] in [0, 1]
validate()

## KOAN 10.5: Inertia
**Objective**: Sum of squares
**Difficulty**: Advanced

In [None]:
def get_inertia():
    X = np.array([[1,2],[2,3],[8,9],[9,10]])
    model = KMeans(n_clusters=2, random_state=42, n_init=10)
    model.fit(X)
    # TODO: Return inertia_
    pass

@validator.koan(5, "Inertia", difficulty="Advanced")
def validate():
    inertia = get_inertia()
assert inertia > 0
validate()

## KOAN 10.6: Silhouette
**Objective**: Cluster quality
**Difficulty**: Advanced

In [None]:
def calc_silhouette():
    X = np.array([[1,2],[2,3],[8,9],[9,10]])
    model = KMeans(n_clusters=2, random_state=42, n_init=10)
    labels = model.fit_predict(X)
    # TODO: Calculate silhouette_score
    pass

@validator.koan(6, "Silhouette", difficulty="Advanced")
def validate():
    score = calc_silhouette()
assert -1 <= score <= 1
validate()

## KOAN 10.7: Hierarchical
**Objective**: Agglomerative
**Difficulty**: Advanced

In [None]:
def hierarchical():
    X = np.array([[1,2],[2,3],[8,9],[9,10]])
    model = AgglomerativeClustering(n_clusters=2)
    # TODO: Fit model
    pass

@validator.koan(7, "Hierarchical", difficulty="Advanced")
def validate():
    model = hierarchical()
assert hasattr(model, 'labels_')
validate()

## KOAN 10.8: Elbow Method
**Objective**: Choose K
**Difficulty**: Advanced

In [None]:
def elbow_concept():
    # Elbow: plot inertia vs K, choose where curve bends
    # TODO: Return True
    pass

@validator.koan(8, "Elbow Method", difficulty="Advanced")
def validate():
    result = elbow_concept()
assert result == True
validate()

## Congratulations!

You completed Clustering!

In [None]:
progress = tracker.get_notebook_progress('10_clustering')
print(f'Final Progress: {progress}%')
if progress == 100:
    print('Excellent! Mastered Clustering!')