import libraries and read data

In [None]:
# !pip uninstall eodal -y
# !pip install git+https://github.com/lukasValentin/eodal.git@landsat-dev

In [None]:
# Import general libraries
from pathlib import Path
from matplotlib import pyplot as plt
import numpy as np

from shapely.geometry import Point
from sklearn.mixture import GaussianMixture
from sklearn.mixture import GaussianMixture

# Import scikit-learn libraries
import sklearn
from sklearn.cluster import KMeans
from sklearn.cluster import MeanShift
from sklearn.preprocessing import StandardScaler
print("Version of scikit-learn: " + sklearn.__version__)

# Import EOdal libraries
import eodal
from eodal.core.band import Band
from eodal.core.raster import RasterCollection

# Print EOdal version
print("Version of EOdal: " + eodal.__version__)

In [None]:
year = 2003

In [None]:
# read tif from disk:
fpath_raster = Path('S:\MSc_23_TimckeFinn\data\EOdal\landsat_median_composite_' + str(year) + '.tif')

# initialize a RasterCollection from the .tif file
composite = RasterCollection.from_multi_band_raster(fpath_raster=fpath_raster, 
                                                    band_aliases = ["blue", "green", "red", "nir_1", "swir_1"])
composite.band_summaries()


## Feature extraction

In [None]:
# calculate some SI
composite.calc_si('NDVI', inplace = True)
composite.calc_si('NDWI', inplace = True)
composite.calc_si('EVI', inplace = True)
composite.calc_si('CI_GREEN', inplace = True)
composite.calc_si('MSAVI', inplace = True)
composite.calc_si('BSI', inplace = True)

# print band summaries
composite.band_summaries()

In [None]:
#plot bands:
fig, axs = plt.subplots(1, 2, figsize = (20, 10), sharey=True)
#composite.plot_multiple_bands(["swir22_median", "swir16_median", "red_median"], ax=axs[0])
#axs[0].set_title("Short-Wave Infrared")
composite.plot_multiple_bands(["nir08_median", "red_median", "green_median"], ax=axs[0])
axs[0].set_title("Color Infrared")
composite.plot_multiple_bands(["swir16_median", "nir08_median", "blue_median"], ax=axs[1])
axs[1].set_title("Agriculture")

for band in composite.band_names:
    fig, axs = plt.subplots(1, 2, figsize = (20, 10), sharey=True)
    composite.plot_multiple_bands(["red", "green", "blue"], ax=axs[0])
    axs[0].set_title("RGB")
    if band == 'blue_median':
        composite.plot_band(band, colormap="Blues", ax=axs[1])
        axs[1].set_title(band)
    if band == 'green_median':
        composite.plot_band(band, colormap="Greens", ax=axs[1])
        axs[1].set_title(band)
    if band == 'red_median':
        composite.plot_band(band, colormap="Reds", ax=axs[1])
        axs[1].set_title(band)
    if band == 'nir08_median':
        composite.plot_band(band, colormap="Greys_r", ax=axs[1])
        axs[1].set_title(band)
    if band == 'swir16_median':
        composite.plot_band(band, colormap="Greys_r", ax=axs[1])
        axs[1].set_title(band)
    if band == 'NDVI':
        composite.plot_band(band, colormap="YlGn", ax=axs[1])
        axs[1].set_title(band)
    if band == 'NDWI':
        composite.plot_band(band, colormap="GnBu", ax=axs[1])
        axs[1].set_title(band)
    if band == 'EVI':
        composite.plot_band(band, colormap="YlGn", ax=axs[1])
        axs[1].set_title(band)
    if band == 'CI_GREEN':
        composite.plot_band(band, colormap="YlGn", ax=axs[1])
        axs[1].set_title(band)
    if band == 'MSAVI':
        composite.plot_band(band, colormap="YlGn", ax=axs[1])
        axs[1].set_title(band)
    if band == 'BSI':
        composite.plot_band(band, colormap="pink_r", ax=axs[1])
        axs[1].set_title(band)


In [None]:
# Create a DataFrame from the RasterCollection
gdf = composite.to_dataframe()

# # Extract x and y coordinates into new columns
# gdf['x_coordinate'] = gdf['geometry'].apply(lambda point: Point(point).x)
# gdf['y_coordinate'] = gdf['geometry'].apply(lambda point: Point(point).y)

# Select all columns except the first one (geometry is non-numeric)
selected_columns = ['blue_median', 'green_median', 'red_median', 'nir08_median','swir16_median', 'NDVI', 'NDWI', 'EVI', 'CI_GREEN', 'MSAVI', 'BSI']
columns_to_scale = ['blue_median', 'green_median', 'red_median', 'nir08_median','swir16_median']

gdf[selected_columns].head()

In [None]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# Step 1: Extract the feature columns from the GeoDataFrame into a NumPy array
features = gdf[columns_to_scale].values

# Step 2: Initialize the MinMaxScaler
scaler = MinMaxScaler()

# Step 3: Fit the scaler to the feature data
scaler.fit(features)

# Step 4: Transform the feature data using the fitted scaler
scaled_features = scaler.transform(features)

# Step 5: Replace the original feature values in the GeoDataFrame with the scaled values
gdf[columns_to_scale] = scaled_features
gdf[selected_columns].head()

In [None]:
# print band summaries
composite.band_summaries()

In [None]:
## Use K-means for Clustering:

# set the desired number of clusters for KMean and GMM

for n in range(2, 8, 1):

    # create model
    km = KMeans(n_clusters = n) 

    # fit the model to the data
    km.fit(scaled_features)

    # Get the cluster labels
    labels = km.labels_

    # Add the cluster labels to the DataFrame and show it
    gdf[str("KMeans_" + str(n) + "_cluster")] = labels

    # # Feature relevance assessment
    # centroid_values = km.cluster_centers_  # Centroid values for each feature within each cluster

    # # Calculate the range of values for each feature
    # feature_ranges = np.ptp(scaled_features, axis=0)

    # # Calculate the relative feature relevance
    # feature_relevance = centroid_values * feature_ranges

    # # Print the feature relevance for each cluster
    # for cluster_id, relevance_scores in enumerate(feature_relevance):
    #     print(f"Cluster {cluster_id + 1}:")
    #     for feature_id, score in enumerate(relevance_scores):
    #         print(f"Feature {selected_columns[feature_id]}: {score}")
    #     print()
        
    # create new band instance from a column in GeoDataFrame
    KMeans_cluster = Band.from_vector(vector_features = gdf,
                                 band_name_src = str("KMeans_" + str(n) + "_cluster"),
                                 geo_info = composite['blue'].geo_info,
                                 band_name_dst = str("KMeans_" + str(n) + "_cluster")
                                 )

    #add column cluster_label as a new Band into the RasterCollection composite
    composite.add_band(KMeans_cluster)
    
    fig, axs = plt.subplots(1, 2, figsize = (20, 10), sharey=True)

    composite.plot_multiple_bands(["red", "green", "blue"], ax=axs[0])
    axs[0].set_title("RGB")

    composite.plot_band(str("KMeans_" + str(n) + "_cluster"), colormap="Accent", ax=axs[1])
    axs[1].set_title(str("KMeans_" + str(n) + "_cluster"))
    
    #save as PNG for quick view
    fig.savefig(str("S:\MSc_23_TimckeFinn\data\python_outputs\cluster\KMeans_" + str(n) + "_cluster_" + str(year) + ".png"))
    
    plt.close(fig)  # Close open figures
    

In [None]:
## Use Gaussian Mixture Models for Clustering:

for n in range(3, 8, 1):

    # create model
    gmm = GaussianMixture(n_components = n)

    # fit the model to the data
    gmm.fit(scaled_features)

    # Assign cluster labels
    GMM_cluster = gmm.predict(scaled_features)

    # Assign cluster labels to GeoDataFrame
    gdf[str("GMM_" + str(n) + "_cluster")] = GMM_cluster

    # add column cluster_mean_shift as a new Band into the RasterCollection composite
    GMM_cluster = Band.from_vector(vector_features = gdf,
                                    band_name_src = str("GMM_" + str(n) + "_cluster"),
                                    geo_info = composite['blue'].geo_info,
                                    band_name_dst = str("GMM_" + str(n) + "_cluster"))

    composite.add_band(GMM_cluster)
    
    fig, axs = plt.subplots(1, 2, figsize = (20, 10), sharey=True)

    composite.plot_multiple_bands(["red", "green", "blue"], ax=axs[0])
    axs[0].set_title("RGB")

    composite.plot_band(str("GMM_" + str(n) + "_cluster"), colormap="Accent", ax=axs[1])
    axs[1].set_title(str("GMM_" + str(n) + "_cluster"))
    
    #save as PNG for quick view
    fig.savefig(str("S:\MSc_23_TimckeFinn\data\python_outputs\cluster\GMM_" + str(n) + "_cluster_" + str(year) + ".png"))

In [None]:
## Use MeanShift for Clustering:

# set the desired number of clusters for the MeanShift clustering
# bandwidth = 8 

# create model
#ms = MeanShift(bandwidth=bandwidth)

# fit the model to the data
#ms.fit(normalized_features)

# Assign cluster labels
#cluster_labels = ms.labels_

# Assign cluster labels to GeoDataFrame
#gdf['MeanShift_cluster'] = cluster_labels

# add column cluster_mean_shift as a new Band into the RasterCollection composite
#MeanShift_cluster = Band.from_vector(vector_features = gdf,
                                 #band_name_src = 'MeanShift_cluster',
                                 #geo_info = composite['blue'].geo_info,
                                 #band_name_dst='MeanShift_cluster')

#composite.add_band(MeanShift_cluster)

#save as PNG for quick view
#fig.savefig(str("S:\MSc_23_TimckeFinn\data\python_outputs\cluster\MS_cluster_" + str(year) + ".png"))


In [None]:
# save as GeoTiff
composite.to_rasterio('S:\MSc_23_TimckeFinn\data\EOdal\landsat_median_composite_cluster_' + str(year) + '.tif')