# Clustering Example

This notebook demonstrates using the refactored satellite analysis toolkit for clustering.

In [None]:
# Imports
import numpy as np
from satellite_analysis.config import Config
from satellite_analysis.analyzers.clustering import ClusteringFactory
from satellite_analysis.utils import (
    reshape_image_to_table,
    reshape_table_to_image,
    min_max_scale,
    plot_clustered_image,
    plot_elbow_curve,
)
import rasterio
from pathlib import Path

## 1. Load Configuration

In [None]:
config = Config.from_yaml("../config/config.yaml")
print(f"City: {config.area.city}")
print(f"Clustering algorithm: {config.clustering.algorithm}")
print(f"Number of clusters: {config.clustering.n_clusters}")

## 2. Load Processed Image

We'll use a previously downloaded and processed image.

In [None]:
# Load your processed sentinel stack
image_path = "../sentinel_im/sent_stack.tiff"

with rasterio.open(image_path) as src:
    image = src.read()
    band_descriptions = src.descriptions

print(f"Image shape: {image.shape}")
print(f"Bands: {band_descriptions}")

## 3. Preprocess Image

In [None]:
# Transpose: (bands, height, width) -> (height, width, bands)
image = np.transpose(image, (1, 2, 0))
print(f"Transposed shape: {image.shape}")

# Reshape to table format
data_table = reshape_image_to_table(image)
print(f"Table shape: {data_table.shape}")

# Scale data
data_scaled = min_max_scale(data_table)
print(f"Scaled range: [{data_scaled.min():.3f}, {data_scaled.max():.3f}]")

## 4. Run Clustering - KMeans++

In [None]:
# Create clusterer using factory
clusterer = ClusteringFactory.create(
    "kmeans++",
    n_clusters=8,
    max_iterations=100,
    random_state=42
)

# Fit and predict
labels = clusterer.fit_predict(data_scaled)
print(f"Labels shape: {labels.shape}")
print(f"Inertia: {clusterer.inertia_:.2f}")

## 5. Visualize Results

In [None]:
# Reshape labels back to image
labels_image = reshape_table_to_image(image.shape[:2], labels)

# Define class names based on your analysis
class_names = [
    "Water",
    "Grass/Vegetation",
    "Bare Soil",
    "Buildings",
    "Roads",
    "Forest",
    "Industrial",
    "Agricultural"
]

# Plot
fig = plot_clustered_image(
    labels_image,
    n_clusters=8,
    class_names=class_names,
    figsize=(12, 12)
)
fig.show()

## 6. Compare Different Algorithms

In [None]:
# Try different algorithms
algorithms = ["kmeans", "kmeans++", "sklearn"]
results = {}

for algo in algorithms:
    print(f"\nRunning {algo}...")
    clusterer = ClusteringFactory.create(
        algo,
        n_clusters=8,
        random_state=42
    )
    labels = clusterer.fit_predict(data_scaled)
    results[algo] = {
        "labels": labels,
        "inertia": clusterer.inertia_
    }
    print(f"Inertia: {clusterer.inertia_:.2f}")

# Compare inertias
for algo, result in results.items():
    print(f"{algo}: {result['inertia']:.2f}")

## 7. Elbow Method for Optimal K

In [None]:
# Test different k values
k_values = range(2, 13)
inertias = []

for k in k_values:
    print(f"Testing k={k}...")
    clusterer = ClusteringFactory.create(
        "sklearn",  # Fastest
        n_clusters=k,
        random_state=42
    )
    clusterer.fit(data_scaled)
    inertias.append(clusterer.inertia_)

# Plot elbow curve
fig = plot_elbow_curve(inertias, list(k_values))
fig.show()

## 8. List Available Strategies

In [None]:
available = ClusteringFactory.list_strategies()
print("Available clustering strategies:")
for strategy in available:
    print(f"  - {strategy}")

In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Clustering Example\\n",
    "\\n",
    "This notebook demonstrates using the refactored satellite analysis toolkit for clustering."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Imports\\n",
    "import numpy as np\\n",
    "from satellite_analysis.config import Config\\n",
    "from satellite_analysis.analyzers.clustering import ClusteringFactory\\n",
    "from satellite_analysis.utils import (\\n",
    "    reshape_image_to_table,\\n",
    "    reshape_table_to_image,\\n",
    "    min_max_scale,\\n",
    "    plot_clustered_image,\\n",
    "    plot_elbow_curve,\\n",
    ")\\n",
    "import rasterio\\n",
    "from pathlib import Path"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Load Configuration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "config = Config.from_yaml(\"../config/config.yaml\")\\n",
    "print(f\"City: {config.area.city}\")\\n",
    "print(f\"Clustering algorithm: {config.clustering.algorithm}\")\\n",
    "print(f\"Number of clusters: {config.clustering.n_clusters}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Load Processed Image\\n",
    "\\n",
    "We'll use a previously downloaded and processed image."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Load your processed sentinel stack\\n",
    "image_path = \"../sentinel_im/sent_stack.tiff\"\\n",
    "\\n",
    "with rasterio.open(image_path) as src:\\n",
    "    image = src.read()\\n",
    "    band_descriptions = src.descriptions\\n",
    "\\n",
    "print(f\"Image shape: {image.shape}\")\\n",
    "print(f\"Bands: {band_descriptions}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Preprocess Image"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Transpose: (bands, height, width) -> (height, width, bands)\\n",
    "image = np.transpose(image, (1, 2, 0))\\n",
    "print(f\"Transposed shape: {image.shape}\")\\n",
    "\\n",
    "# Reshape to table format\\n",
    "data_table = reshape_image_to_table(image)\\n",
    "print(f\"Table shape: {data_table.shape}\")\\n",
    "\\n",
    "# Scale data\\n",
    "data_scaled = min_max_scale(data_table)\\n",
    "print(f\"Scaled range: [{data_scaled.min():.3f}, {data_scaled.max():.3f}]\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Run Clustering - KMeans++"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Create clusterer using factory\\n",
    "clusterer = ClusteringFactory.create(\\n",
    "    \"kmeans++\",\\n",
    "    n_clusters=8,\\n",
    "    max_iterations=100,\\n",
    "    random_state=42\\n",
    ")\\n",
    "\\n",
    "# Fit and predict\\n",
    "labels = clusterer.fit_predict(data_scaled)\\n",
    "print(f\"Labels shape: {labels.shape}\")\\n",
    "print(f\"Inertia: {clusterer.inertia_:.2f}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. Visualize Results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Reshape labels back to image\\n",
    "labels_image = reshape_table_to_image(image.shape[:2], labels)\\n",
    "\\n",
    "# Define class names based on your analysis\\n",
    "class_names = [\\n",
    "    \"Water\",\\n",
    "    \"Grass/Vegetation\",\\n",
    "    \"Bare Soil\",\\n",
    "    \"Buildings\",\\n",
    "    \"Roads\",\\n",
    "    \"Forest\",\\n",
    "    \"Industrial\",\\n",
    "    \"Agricultural\"\\n",
    "]\\n",
    "\\n",
    "# Plot\\n",
    "fig = plot_clustered_image(\\n",
    "    labels_image,\\n",
    "    n_clusters=8,\\n",
    "    class_names=class_names,\\n",
    "    figsize=(12, 12)\\n",
    ")\\n",
    "fig.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6. Compare Different Algorithms"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Try different algorithms\\n",
    "algorithms = [\"kmeans\", \"kmeans++\", \"sklearn\"]\\n",
    "results = {}\\n",
    "\\n",
    "for algo in algorithms:\\n",
    "    print(f\"\\\\nRunning {algo}...\")\\n",
    "    clusterer = ClusteringFactory.create(\\n",
    "        algo,\\n",
    "        n_clusters=8,\\n",
    "        random_state=42\\n",
    "    )\\n",
    "    labels = clusterer.fit_predict(data_scaled)\\n",
    "    results[algo] = {\\n",
    "        \"labels\": labels,\\n",
    "        \"inertia\": clusterer.inertia_\\n",
    "    }\\n",
    "    print(f\"Inertia: {clusterer.inertia_:.2f}\")\\n",
    "\\n",
    "# Compare inertias\\n",
    "for algo, result in results.items():\\n",
    "    print(f\"{algo}: {result['inertia']:.2f}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7. Elbow Method for Optimal K"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Test different k values\\n",
    "k_values = range(2, 13)\\n",
    "inertias = []\\n",
    "\\n",
    "for k in k_values:\\n",
    "    print(f\"Testing k={k}...\")\\n",
    "    clusterer = ClusteringFactory.create(\\n",
    "        \"sklearn\",  # Fastest\\n",
    "        n_clusters=k,\\n",
    "        random_state=42\\n",
    "    )\\n",
    "    clusterer.fit(data_scaled)\\n",
    "    inertias.append(clusterer.inertia_)\\n",
    "\\n",
    "# Plot elbow curve\\n",
    "fig = plot_elbow_curve(inertias, list(k_values))\\n",
    "fig.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 8. List Available Strategies"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "available = ClusteringFactory.list_strategies()\\n",
    "print(\"Available clustering strategies:\")\\n",
    "for strategy in available:\\n",
    "    print(f\"  - {strategy}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "version": "3.10.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
'''

#