import libraries and read data

In [None]:
# we have to install the development version for the time being
#!pip uninstall eodal -y
#!pip install git+https://github.com/lukasValentin/eodal@landsat-dev

#!pip install --upgrade git+https://github.com/EOA-team/eodal
#!pip install --upgrade planetary-computer

# Print EOdal version
#import eodal
#print("Version of EOdal: " + eodal.__version__)

In [None]:
# Import general libraries
from pathlib import Path
from matplotlib import pyplot as plt
import numpy as np

# Import EOdal libraries
from eodal.core.band import Band
from eodal.core.raster import RasterCollection

# Import scikit-learn libraries
import sklearn
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score
from sklearn.metrics import pairwise_distances
from sklearn.cluster import MeanShift
from sklearn.preprocessing import StandardScaler

In [None]:
#year to cluster
year = 2021

In [None]:
#read tif from disk:
fpath_raster = Path(f'S:\MSc_23_TimckeFinn\data\EOdal\landsat_median_composite_{year}_smallAOI.tif')

# initialize a RasterCollection from the .tif file
composite = RasterCollection.from_multi_band_raster(fpath_raster=fpath_raster,
                                                    band_names_dst= ['blue_median','green_median','red_median','nir08_median','swir16_median','swir22_median'],
                                                    band_aliases = ["blue", "green", "red", "nir_1", "swir_1","swir_2"])

# calculate some SI
composite.calc_si('NDVI', inplace = True)
composite.calc_si('NDWI', inplace = True)
composite.calc_si('EVI', inplace = True)
composite.calc_si('CI_GREEN', inplace = True)
composite.calc_si('MSAVI', inplace = True)
composite.calc_si('BSI', inplace = True)

# print band summaries
composite.band_summaries()

In [None]:
# Create a DataFrame from the RasterCollection
gdf = composite.to_dataframe()

gdf.describe()

In [None]:
from sklearn.preprocessing import MinMaxScaler

selected_columns = ['blue_median','green_median','red_median','nir08_median','swir16_median','swir22_median','NDVI','NDWI','EVI','CI_GREEN','MSAVI','BSI']

## Step 1: Extract the feature columns from the GeoDataFrame into a NumPy array
features = gdf[selected_columns].values

## Step 2: Initialize the MinMaxScaler
scaler = MinMaxScaler()

## Step 3: Fit the scaler to the feature data
scaler.fit(features)

## Step 4: Transform the feature data using the fitted scaler
scaled_features = scaler.transform(features)

## Step 5: Replace the original feature values in the GeoDataFrame with the scaled values
gdf[selected_columns] = scaled_features

gdf[selected_columns].describe()


In [None]:
# Get the colors from the "Accent" colormap
cmap = plt.get_cmap('Accent')
colors = cmap.colors
values = np.arange(8)

In [None]:
## cluster the scaled spectral data

#Use MeanShift for Clustering:
from sklearn.cluster import estimate_bandwidth

bandwidth_est = estimate_bandwidth(scaled_features)
print(f'estimated bandwidth: {bandwidth_est}')

#define bandwidth for model
bandwidth = bandwidth_est

# create model
ms = MeanShift(bandwidth=bandwidth)

# fit the model to the data
ms.fit(scaled_features)

# Assign cluster labels
cluster_labels = ms.labels_

# Assign cluster labels to GeoDataFrame
gdf[f'MeanShift_cluster_{bandwidth}'] = cluster_labels

# add column cluster_mean_shift as a new Band into the RasterCollection composite
MeanShift_cluster = Band.from_vector(vector_features = gdf,
                                band_name_src = f'MeanShift_cluster_{bandwidth}',
                                geo_info = composite['blue_median'].geo_info,
                                band_name_dst=f'MeanShift_cluster_{bandwidth}')


composite.add_band(MeanShift_cluster)

## Use K-means for Clustering:

# set the desired number of clusters for KMean, GMM and Spectral clustering
n = 6

# create model
km = KMeans(n_clusters = n, n_init = 'auto') 

# fit the model to the data
km.fit(scaled_features)

# Get the cluster labels
labels = km.labels_

# Calculate the scores
#silhouette = silhouette_score(scaled_features, labels)
#calinski_harabasz = calinski_harabasz_score(scaled_features, labels)
#davies_bouldin = davies_bouldin_score(scaled_features, labels)

# Print the scores
#print(f'kmeans, {n}, Silhouette score, {silhouette}')
#print(f'kmeans, {n}, Calinski-Harabasz score, {calinski_harabasz}')
#print(f'kmeans, {n}, Davies-Bouldin score, {davies_bouldin}')

# Add the cluster labels to the DataFrame and show it
gdf[f'KMeans_{n}_cluster'] = labels
    
# create new band instance from a column in GeoDataFrame
KMeans_cluster = Band.from_vector(vector_features = gdf,
                                band_name_src = f'KMeans_{n}_cluster',
                                geo_info = composite['blue_median'].geo_info,
                                band_name_dst = f'KMeans_{n}_cluster'
                                )

#add column KMeans_cluster as a new Band into the RasterCollection composite
composite.add_band(KMeans_cluster)

## Use Spectral Clustering for Clustering:

from sklearn.cluster import SpectralClustering

# Assuming 'scaled_features' is your scaled data array
# You need to set the appropriate values for these parameters

affinity_matrix = 'nearest_neighbors'  # Type of affinity matrix ('nearest_neighbors' or 'rbf')
random_state = 42  # Random state for reproducibility

# Create a SpectralClustering instance
spectral_clustering = SpectralClustering(n_clusters=n, affinity=affinity_matrix, random_state=random_state)

# Fit and predict the clusters
cluster_labels = spectral_clustering.fit_predict(scaled_features)

print("Cluster labels:", cluster_labels)

# Assign cluster labels to GeoDataFrame
gdf[f'Spectral_{n}_cluster'] = cluster_labels

# create new band instance from a column in GeoDataFrame
Spectral_cluster = Band.from_vector(vector_features = gdf,
                                band_name_src = f'Spectral_{n}_cluster',
                                geo_info = composite['blue_median'].geo_info,
                                band_name_dst = f'Spectral_{n}_cluster')

#add column GMM_cluster as a new Band into the RasterCollection composite
composite.add_band(Spectral_cluster)

## Use Gaussian Mixture Models for Clustering:

# create model
gmm = GaussianMixture(n_components = n)

# fit the model to the data
gmm.fit(scaled_features)

# Assign cluster labels
GMM_cluster = gmm.predict(scaled_features)

# Calculate the scores
#silhouette = silhouette_score(scaled_features, labels)
#calinski_harabasz = calinski_harabasz_score(scaled_features, labels)
#davies_bouldin = davies_bouldin_score(scaled_features, labels)

# Print the scores
#print(f'gmm, {n}, Silhouette score, {silhouette}')
#print(f'gmm, {n}, Calinski-Harabasz score, {calinski_harabasz}')
#print(f'gmm, {n}, Davies-Bouldin score, {davies_bouldin}')

# Assign cluster labels to GeoDataFrame
gdf[f'GMM_{n}_cluster'] = GMM_cluster

# create new band instance from a column in GeoDataFrame
GMM_cluster = Band.from_vector(vector_features = gdf,
                                band_name_src = f'GMM_{n}_cluster',
                                geo_info = composite['blue_median'].geo_info,
                                band_name_dst = f'GMM_{n}_cluster')

#add column GMM_cluster as a new Band into the RasterCollection composite
composite.add_band(GMM_cluster)

In [None]:
composite.band_summaries()
composite.is_bandstack()
composite.band_names

In [None]:
# save as GeoTiff
composite.to_rasterio(f'S:\MSc_23_TimckeFinn\data\EOdal\landsat_median_composite_{year}_smallAOI_2_clean_si_cluster.tif')

composite.to_rasterio(f'S:\MSc_23_TimckeFinn\data\EOdal\landsat_median_composite_{year}_smallAOI_2_MeanShift_cluster_{bandwidth}.tif', band_selection = f'MeanShift_cluster_{bandwidth}', use_band_aliases = False)
composite.to_rasterio(f'S:\MSc_23_TimckeFinn\data\EOdal\landsat_median_composite_{year}_smallAOI_2_KMeans_{n}_cluster.tif', band_selection = f'KMeans_{n}_cluster')
composite.to_rasterio(f'S:\MSc_23_TimckeFinn\data\EOdal\landsat_median_composite_{year}_smallAOI_2_Spectral_{n}_cluster.tif', band_selection = f'Spectral_{n}_cluster')
composite.to_rasterio(f'S:\MSc_23_TimckeFinn\data\EOdal\landsat_median_composite_{year}_smallAOI_2_GMM_{n}_cluster.tif', band_selection = f'GMM_{n}_cluster')


In [None]:
# plot Kmeans clusters
fig, axs = plt.subplots(1, 2, figsize = (20, 10), sharey=True)
cmap = plt.cm.colors.ListedColormap(colors[:n])
ticks = list(range(n))

composite.plot_multiple_bands(["red", "green", "blue"], ax=axs[0])
axs[0].set_title(f'RGB {year}')

composite.plot_band(f'KMeans_{n}_cluster', colormap=cmap, discrete_values = False, user_defined_ticks = ticks, ax=axs[1])
axs[1].set_title(f'KMeans_{n}_cluster')

# save as PNG for quick view
fig.savefig(f'S:\MSc_23_TimckeFinn\data\python_outputs\cluster\KMeans_{n}_cluster_{year}.png')
plt.close(fig)

# plot MeanShift clusters
fig, axs = plt.subplots(1, 2, figsize = (20, 10), sharey=True)

composite.plot_multiple_bands(["red", "green", "blue"], ax=axs[0])
axs[0].set_title(f'RGB {year}')

composite.plot_band(f'MeanShift_cluster_{bandwidth}', colormap='Accent', discrete_values = False, user_defined_ticks = ticks, ax=axs[1])
axs[1].set_title(f'MeanShift_cluster with bandwith: {bandwidth}')

# save as PNG for quick view
fig.savefig(f'S:\MSc_23_TimckeFinn\data\python_outputs\cluster\MS_cluster_{year}_{bandwidth}.png')
plt.close(fig)
 
# plot GMM clusters
fig, axs = plt.subplots(1, 2, figsize = (20, 10), sharey=True)
cmap = plt.cm.colors.ListedColormap(colors[:n])
ticks = list(range(n))

composite.plot_multiple_bands(["red", "green", "blue"], ax=axs[0])
axs[0].set_title(f'RGB {year}')

composite.plot_band(f'GMM_{n}_cluster', colormap=cmap, discrete_values = False, user_defined_ticks = ticks, ax=axs[1])
axs[1].set_title(f'GMM_{n}_cluster')

# save as PNG for quick view
fig.savefig(f'S:\MSc_23_TimckeFinn\data\python_outputs\cluster\GMM_{n}_cluster_{year}.png')
plt.close(fig)

# plot Spectral clusters
fig, axs = plt.subplots(1, 2, figsize = (20, 10), sharey=True)
cmap = plt.cm.colors.ListedColormap(colors[:n_clusters])
ticks = list(range(n_clusters))

composite.plot_multiple_bands(["red", "green", "blue"], ax=axs[0])
axs[0].set_title(f'RGB {year}')

composite.plot_band(f'Spectral_{n_clusters}_cluster', colormap=cmap, discrete_values = False, user_defined_ticks = ticks, ax=axs[1])
axs[1].set_title(f'Spectral_{n_clusters}_cluster')

# save as PNG for quick view
fig.savefig(f'S:\MSc_23_TimckeFinn\data\python_outputs\cluster\Spectral_{n_clusters}_cluster_{year}.png')
plt.close(fig)