# Clasificación no supervisada con serie temporal NDVI de Terra/MODIS

In [27]:
!pip install rasterio

Collecting rasterio
  Downloading rasterio-1.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)
Collecting affine (from rasterio)
  Downloading affine-2.4.0-py3-none-any.whl.metadata (4.0 kB)
Collecting cligj>=0.5 (from rasterio)
  Downloading cligj-0.7.2-py3-none-any.whl.metadata (5.0 kB)
Collecting click-plugins (from rasterio)
  Downloading click_plugins-1.1.1-py2.py3-none-any.whl.metadata (6.4 kB)
Downloading rasterio-1.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (22.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.2/22.2 MB[0m [31m62.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Downloading affine-2.4.0-py3-none-any.whl (15 kB)
Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)
Installing collected packages: cligj, click-plugins, affine, rasterio
Successfully installed affine-2.4.0 click-plugins-1.1.1 cligj-0.7.2 rasterio-1.4.3


### Cargar librerías

In [1]:
import ee

# Authenticate and initialize GEE
ee.Authenticate()
ee.Initialize(project='ee-my-francodbarr')

In [28]:
import pandas as pd
import numpy as np
from datetime import datetime as dt
import geemap
import geopandas as gpd
from shapely.geometry import Polygon
from shapely.geometry import shape
from shapely.geometry import Point
import rasterio
import random

### Planicie de paleocauces vector importing and conversion

In [None]:
# Import vector file from the GH repo
!wget https://github.com/francobarrionuevoenv21/ClusteringArroceras_Incendios_SanJavier/blob/main/Primary_data/Planicie_con_paleocauces_NW-SW_4326.geojson

--2025-05-31 17:57:58--  https://github.com/francobarrionuevoenv21/ClusteringArroceras_Incendios_SanJavier/blob/main/Primary_data/Planicie_con_paleocauces_NW-SW_4326.geojson
Resolving github.com (github.com)... 140.82.113.4
Connecting to github.com (github.com)|140.82.113.4|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
Saving to: ‘Planicie_con_paleocauces_NW-SW_4326.geojson’

Planicie_con_paleoc     [ <=>                ] 337.19K  --.-KB/s    in 0.1s    

2025-05-31 17:57:58 (2.65 MB/s) - ‘Planicie_con_paleocauces_NW-SW_4326.geojson’ saved [345285]



In [None]:
# Vector file reading
targetCRS = "EPSG:4326"
vectorPPCC = gpd.read_file('/content/Planicie_con_paleocauces_NW-SW_4326.geojson').to_crs(targetCRS)

In [None]:
# getCoordsSHP function
def getCoordsSHP(gdfshpFile):
  '''
  This function takes a polygon vector file passed as a GeoDataFrame. Then
  it returns all the coordinates as a tuple of tuples.
  '''

  gdfiterf = gdfshpFile.iterfeatures()
  tupleFile = next(gdfiterf)['geometry']['coordinates']

  return tupleFile

# Get vector coordinates as a tuple of tuples
tupleOfTuples01 = getCoordsSHP(vectorPPCC)

# Converting coordinates into a list of lists
listOfLists01 = [list(t[:2]) for t in tupleOfTuples01[0]]

# Coordinates to a EE.Polygon
polygon01 = ee.Geometry.Polygon(listOfLists01)

# Polygon to a EE Feature Collection (For clipping according to the vector of the region)
featureCollection01 = ee.FeatureCollection(polygon01)

## MODIS NDVI data retrieving from GEE catalog
#### https://developers.google.com/earth-engine/datasets/catalog/MODIS_061_MOD13Q1

In [2]:
# MODIS MOD13Q1 NDVI data selection
MODIS = ee.ImageCollection('MODIS/006/MOD13Q1').select('NDVI') # MOD13Q1.061 Terra Vegetation Indices 16-Day Global 250m

# Time range selection
startDate = '2000-02-18'
endDate = '2022-12-31'

# Image collection filtering by time range
MODIS = MODIS.filterDate(startDate, endDate)


Attention required for MODIS/006/MOD13Q1! You are using a deprecated asset.
To make sure your code keeps working, please update it.
Learn more: https://developers.google.com/earth-engine/datasets/catalog/MODIS_006_MOD13Q1



In [22]:
# Check data retrieving
print(type(MODIS))
print(MODIS.size().getInfo())

<class 'ee.imagecollection.ImageCollection'>
526


In [9]:
# Define a function to unmask the NDVI band
def unmaskNDVI(image):
    return image.select('NDVI').unmask(-2000)

# Map the unmask function over the entire collection
MODIS_unmasked = MODIS.map(unmaskNDVI)

In [24]:
# Convert the image collection to a stack of images
MODIS_stack = MODIS_unmasked.toBands()
print(type(MODIS_stack))

<class 'ee.image.Image'>


In [None]:
# Clip the stack of images according to the study area
MODIS_stack = MODIS_stack.clipToCollection(featureCollection01)

## K-means classification of the study area
#### Kmeans in GEE description: https://developers.google.com/earth-engine/guides/clustering

### Initialization/training dataset

In [None]:
'''
NOTE: Running the Kmeans algorithm in GEE requires random training points as input
'''

# Define the region to generate the input sample
region = featureCollection01
# Pixel spatial resolution in meters
scale = 250
# Number of initialization/training points
numPixels = 5000

# Make the training dataset
MODIS_randomtraining = MODIS_stack.sample(region=region, scale=scale, numPixels=numPixels)

### Study area classification

In [None]:
'''
NOTE: The criteria for the number of classification clusters was defined as
five times the prior landcover classification types according to Clausser et al. (2023)
'''
# Define the number of clusters for classification
num_clusters = 50

# Train the Kmeans classifier
kmeans = ee.Clusterer.wekaKMeans(num_clusters).train(MODIS_randomtraining)

In [None]:
# Apply the classifier over the stack of images (Time series classification)
MODIS_kmeans = MODIS_stack.cluster(kmeans) # Pixels are classified as 0 to n_clusters-1
MODIS_kmeans = MODIS_kmeans.add(1) # Sum 1 so the first cluster id different to 0

# Clip the image classified according to vector of the study area
MODIS_kmeans = MODIS_kmeans.clip(featureCollection01)

In [None]:
# Check image output format
print(type(MODIS_kmeans))

### Visualize classification

In [None]:
Map = geemap.Map(center=(-32.06, -60.62), zoom=9)
Map.addLayer(MODIS_kmeans)

Map

### Download the final classification

In [None]:
'''
Tutorial for large images exporting from GEE
https://worldbank.github.io/OpenNightLights/tutorials/mod3_7_import_export_data.html
'''

task = ee.batch.Export.image.toDrive(
        image = MODIS_kmeans.toDouble(),
        scale=scale,
        region = featureCollection01.geometry().bounds(), # Or use custom ee.Geometry.Rectangle([minlon, minlat, maxlon, maxlat])
        description = 'MODIS_MOD13Q1_NDVI_kmeans50-17-04.tif',
        crs = 'EPSG:4326',
        maxPixels = 1e10, # Avoid limits in memory usage
        fileFormat = "GeoTIFF",
        folder ='earthengine'
  )

task.start() # Find the final image in Google Drive folder

## Summary of cluster's classes

#### Data sampling to reconstruct the timeseries of each classification

In [33]:
# Import image with the classification from the GH repo
# If any error with the link, you can find the file here: https://github.com/francobarrionuevoenv21/ClusteringArroceras_Incendios_SanJavier/blob/main/Data_results/01_MODIS_MOD13Q1_NDVI_kmeans50-17-04.tif
!wget https://github.com/francobarrionuevoenv21/ClusteringArroceras_Incendios_SanJavier/raw/refs/heads/main/Data_results/01_MODIS_MOD13Q1_NDVI_kmeans50-17-04.tif

--2025-05-31 20:27:09--  https://github.com/francobarrionuevoenv21/ClusteringArroceras_Incendios_SanJavier/raw/refs/heads/main/Data_results/01_MODIS_MOD13Q1_NDVI_kmeans50-17-04.tif
Resolving github.com (github.com)... 140.82.114.4
Connecting to github.com (github.com)|140.82.114.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/francobarrionuevoenv21/ClusteringArroceras_Incendios_SanJavier/refs/heads/main/Data_results/01_MODIS_MOD13Q1_NDVI_kmeans50-17-04.tif [following]
--2025-05-31 20:27:09--  https://raw.githubusercontent.com/francobarrionuevoenv21/ClusteringArroceras_Incendios_SanJavier/refs/heads/main/Data_results/01_MODIS_MOD13Q1_NDVI_kmeans50-17-04.tif
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200

In [34]:
# Load raster classification
raster_file = "/content/01_MODIS_MOD13Q1_NDVI_kmeans50-17-04.tif"
with rasterio.open(raster_file) as src:
    classification = src.read(1)
    transform = src.transform
    num_clusters = len(np.unique(classification))

In [41]:
cluster_id = 9
np.where(classification == cluster_id)

(array([217, 218, 218, 218, 220, 220, 220, 221, 221, 221, 221, 221, 222,
        222, 222, 222, 222, 222, 222, 223, 223, 223, 223, 223, 223, 223,
        223, 224, 224, 224, 224, 224, 224, 224, 224, 225, 225, 225, 225,
        225, 225, 225, 225, 226, 226, 226, 226, 226, 226, 226, 226, 226,
        226, 226, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227, 227,
        227, 227, 227, 227, 228, 228, 228, 228, 228, 228, 228, 228, 228,
        228, 228, 228, 228, 228, 228, 229, 229, 229, 229, 229, 229, 229,
        229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229, 229,
        229, 229, 229, 229, 229, 229, 229, 230, 230, 230, 230, 230, 230,
        230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230,
        230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 230, 231, 231,
        231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231,
        231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231, 231,
        231, 232, 232, 232, 232, 232, 232, 232, 232

### Create a dataset with random sampling points of the obtained classification

In [None]:
# Set the number of sampling points for each cluster
num_points_per_cluster = 100

# Initialize lists for storing the sampling points data
points = []
cluster_numbers = []

# Ieration for generating the sampling points randomly
for cluster_id in range(1, num_clusters + 1):
    cluster_indices = np.where(classification == cluster_id)
    sample_indices = random.sample(range(len(cluster_indices[0])), min(num_points_per_cluster, len(cluster_indices[0])))

    for index in sample_indices:
        row, col = cluster_indices[0][index], cluster_indices[1][index] # Gets the index of each pixel
        x, y = rasterio.transform.xy(transform, row, col) # Extracts the coordinates from each pixel
        points.append(Point(x, y))
        cluster_numbers.append(cluster_id)


# Create a GeoDataFrame from the points
data = {'geometry': points, 'cluster': cluster_numbers}
gdf = gpd.GeoDataFrame(data, crs=src.crs)

# Save the GeoDataFrame to a shapefile
output_shapefile = "kmeans_random_points.shp"
gdf.to_file(output_shapefile)

print("Random points saved to:", output_shapefile)

### -

In [None]:
modis_collection = MODIS
#modis_collection = MODIS.filterDate("2022-01-01", "2022-01-31") #prueba con pocas imagenes


clusters = gdf["cluster"].unique()

# Create a function to extract NDVI values for a point
def get_ndvi_for_point(feature):
    point = feature.geometry()
    ndvi_values = modis_collection.getRegion(point, scale=250).getInfo()
    return ndvi_values

# Loop through each cluster and extract NDVI values for each point
ndvi_data = []

for cluster in clusters:
    cluster_gdf = gdf[gdf["cluster"] == cluster]
    for index, row in cluster_gdf.iterrows():
        point = Point(row["geometry"].x, row["geometry"].y)
        feature = ee.Feature(ee.Geometry.Point(point.x, point.y))
        ndvi_values = get_ndvi_for_point(feature)

        # Extract NDVI values for each scene
        for scene_values in ndvi_values[1:]:
            timestamp_milliseconds = int(scene_values[3])
            timestamp_seconds = timestamp_milliseconds / 1000
            dt_object = dt.utcfromtimestamp(timestamp_seconds)
            formatted_date = dt_object.strftime('%Y-%m-%d')

            scene_ndvi_values = scene_values[4:]
            ndvi_data.append([cluster, point.x, point.y, formatted_date, *scene_ndvi_values])

# Create a dataframe from the extracted data
columns = ["cluster", "longitude", "latitude", "date"] + ["NDVI" + str(i) for i in range(1, len(ndvi_data[0]) - 3)]
ndvi_df = pd.DataFrame(ndvi_data, columns=columns)

# NOTA: este paso para el Delta tarda unas 2 horas

In [None]:
#llenar los NoData con -2000 (igual que hicimos en la imagen con unmask, ahora lo hace en la tabla)
ndvi_df = ndvi_df.fillna(-2000)

In [None]:
# Display the dataFrame
ndvi_df

In [None]:
# Pivot the DataFrame to have one column per date
pivot_df = ndvi_df.pivot_table(
    index=["cluster", "longitude", "latitude"],
    columns="date",
    values=["NDVI1"],  # List all the NDVI columns here
    aggfunc="first"  # You can adjust the aggregation function if needed
)

# Reset the index to make columns regular columns
pivot_df.reset_index(inplace=True)

# Display the pivoted DataFrame
pivot_df

In [None]:
#export
pivot_df.to_csv("./kmeans_ndvi_pivot.csv", index=False)

ndvi_df.to_csv("./kmeans_ndvi.csv", index=False)