<a href="https://colab.research.google.com/github/cshgiser/HRSM/blob/main/HRSM_MAPPING_Sec2-2_ML_pipeline_area.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Initial settings (authorize, connect GD, install packages, etc)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# @title get GEE authorization
import ee

# Trigger the authentication flow.
ee.Authenticate()
# Initialize the library.
ee.Initialize(project='ee-scai62')

In [None]:
# @title Connect to my Google Drive
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
!cp -r '/content/gdrive/My Drive/NIFA_Download/StartFolder/' .

In [None]:
# @title Install necessary packages
!pip install folium
!pip install geopandas
!pip install netCDF4
!pip install --upgrade xee
!pip install rasterio
!pip install ipywidgets
!pip install permetrics==2.0.0
# !pip install pyDEM
# !pip install richdem
# !pip install pyflwdir
!pip install mapie
!pip install scikit-learn==1.5.2

In [None]:
# @title Import packages
from mapie.regression import SplitConformalRegressor, ConformalizedQuantileRegressor
import lightgbm as lgb

import folium
from folium import Figure
import geopandas as gpd
import json
import geemap
import numpy as np
from shapely.geometry import mapping
import pandas as pd
import netCDF4 as nc
import xarray
import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling
from rasterio.crs import CRS
import math
import time
import os
from datetime import datetime, timedelta
import joblib
import ipywidgets as widgets
from IPython.display import display
from permetrics import RegressionMetric
import osgeo.gdal as gdal
# import richdem as rd
# import pyflwdir
from osgeo import osr, gdalconst
from scipy.stats import rankdata

## Define study site: set longitude and latitude

In [None]:
# @title Input longitude and latitude

# Global variables to store coordinates
upper_left = None
bottom_right = None

def validate_coordinates(lon, lat):
    """Check if coordinate is inside contiguous USA."""
    return -125 <= lon <= -66 and 24 <= lat <= 50

def on_button_click(b):
    global upper_left, bottom_right
    try:
        lon1 = float(ul_longitude_input.value)
        lat1 = float(ul_latitude_input.value)
        lon2 = float(br_longitude_input.value)
        lat2 = float(br_latitude_input.value)

        if (validate_coordinates(lon1, lat1) and
            validate_coordinates(lon2, lat2) and
            lon1 < lon2 and lat1 > lat2):  # ensure proper rectangle

            upper_left = (lon1, lat1)
            bottom_right = (lon2, lat2)

            output_area.clear_output()
            with output_area:
                print(f"Valid rectangle defined:")
                print(f"  Upper-left  (Lon {lon1}, Lat {lat1})")
                print(f"  Bottom-right (Lon {lon2}, Lat {lat2})")

        else:
            output_area.clear_output()
            with output_area:
                print("Invalid coordinates. Ensure they are inside the contiguous USA "
                      "and that Upper-left is above and left of Bottom-right.")

    except ValueError:
        output_area.clear_output()
        with output_area:
            print("Invalid input. Please enter numeric values.")

# Widgets for inputs
ul_longitude_input = widgets.Text(placeholder="UL Longitude")
ul_latitude_input = widgets.Text(placeholder="UL Latitude")
br_longitude_input = widgets.Text(placeholder="BR Longitude")
br_latitude_input = widgets.Text(placeholder="BR Latitude")

submit_button = widgets.Button(description="OK")
output_area = widgets.Output()

submit_button.on_click(on_button_click)

# Display layout
display(widgets.HBox([widgets.Label("Upper-left Longitude:"), ul_longitude_input]))
display(widgets.HBox([widgets.Label("Upper-left Latitude:"), ul_latitude_input]))
display(widgets.HBox([widgets.Label("Bottom-right Longitude:"), br_longitude_input]))
display(widgets.HBox([widgets.Label("Bottom-right Latitude:"), br_latitude_input]))
display(submit_button)
display(output_area)

##  Upper-left  (Lon -90.0, Lat 40.0)
##  Bottom-right (Lon -89.0, Lat 39.0)


In [None]:
# @title Define polygon geometry from upper-left and bottom-right points
# Extract coordinates
ul_lon, ul_lat = upper_left
br_lon, br_lat = bottom_right

# Define polygon coordinates in counter-clockwise order
polygon_coords = [
    [ul_lon, ul_lat],        # upper-left
    [br_lon, ul_lat],        # upper-right
    [br_lon, br_lat],        # bottom-right
    [ul_lon, br_lat],        # bottom-left
    [ul_lon, ul_lat]         # close the polygon
]

# Create an Earth Engine Polygon
polygon = ee.Geometry.Polygon([polygon_coords])

# Wrap in a FeatureCollection
feature_collection = ee.FeatureCollection([
    ee.Feature(polygon, {"id_num": 0})
])

print("Polygon FeatureCollection defined.")

In [None]:
feature_collection.getInfo()

In [None]:
# @title Visualization - spatial distribution of sites
import folium
from folium import Figure

fig = Figure(width=800, height=600)
m = folium.Map(location=[42, -95.25], zoom_start=4)


roi_geojson = feature_collection.getInfo()
folium.TileLayer(
    tiles='https://mt1.google.com/vt/lyrs=s&x={x}&y={y}&z={z}',
    attr='Google Satellite',
    name='Satellite',
    overlay=True
).add_to(m)
folium.GeoJson(roi_geojson).add_to(m)

fig.add_child(m)

## Read ee.images (constant images)

In [None]:
#@title read images to array

# Landcover
LC = ee.ImageCollection('USGS/NLCD_RELEASES/2019_REL/NLCD').filterDate('2016-01-01', '2022-12-31').select('landcover').first()
lc_proj = LC.projection()

#polaris
bd_0_5 = ee.ImageCollection('projects/sat-io/open-datasets/polaris/bd_mean').filterMetadata('system:index', 'equals', 'bd_0_5').first().rename('bd_0_5')
bd_5_15 = ee.ImageCollection('projects/sat-io/open-datasets/polaris/bd_mean').filterMetadata('system:index', 'equals', 'bd_5_15').first().rename('bd_5_15')
bd_15_30 = ee.ImageCollection('projects/sat-io/open-datasets/polaris/bd_mean').filterMetadata('system:index', 'equals', 'bd_15_30').first().rename('bd_15_30')
bd_30_60 = ee.ImageCollection('projects/sat-io/open-datasets/polaris/bd_mean').filterMetadata('system:index', 'equals', 'bd_30_60').first().rename('bd_30_60')
bd_60_100 = ee.ImageCollection('projects/sat-io/open-datasets/polaris/bd_mean').filterMetadata('system:index', 'equals', 'bd_60_100').first().rename('bd_60_100')
bd_0_100 = bd_0_5.multiply(0.05).add(bd_5_15.multiply(0.1)).add(bd_15_30.multiply(0.15)).add(bd_30_60.multiply(0.3)).add(bd_60_100.multiply(0.4)).rename('bd_0_100')

clay_0_5 = ee.ImageCollection('projects/sat-io/open-datasets/polaris/clay_mean').filterMetadata('system:index', 'equals', 'clay_0_5').first().rename('clay_0_5')
clay_5_15 = ee.ImageCollection('projects/sat-io/open-datasets/polaris/clay_mean').filterMetadata('system:index', 'equals', 'clay_5_15').first().rename('clay_5_15')
clay_15_30 = ee.ImageCollection('projects/sat-io/open-datasets/polaris/clay_mean').filterMetadata('system:index', 'equals', 'clay_15_30').first().rename('clay_15_30')
clay_30_60 = ee.ImageCollection('projects/sat-io/open-datasets/polaris/clay_mean').filterMetadata('system:index', 'equals', 'clay_30_60').first().rename('clay_30_60')
clay_60_100 = ee.ImageCollection('projects/sat-io/open-datasets/polaris/clay_mean').filterMetadata('system:index', 'equals', 'clay_60_100').first().rename('clay_60_100')
clay_0_100 = clay_0_5.multiply(0.05).add(clay_5_15.multiply(0.1)).add(clay_15_30.multiply(0.15)).add(clay_30_60.multiply(0.3)).add(clay_60_100.multiply(0.4)).rename('clay_0_100')

ksat_0_5 = ee.ImageCollection('projects/sat-io/open-datasets/polaris/ksat_mean').filterMetadata('system:index', 'equals', 'ksat_0_5').first().rename('ksat_0_5')
ksat_5_15 = ee.ImageCollection('projects/sat-io/open-datasets/polaris/ksat_mean').filterMetadata('system:index', 'equals', 'ksat_5_15').first().rename('ksat_5_15')
ksat_15_30 = ee.ImageCollection('projects/sat-io/open-datasets/polaris/ksat_mean').filterMetadata('system:index', 'equals', 'ksat_15_30').first().rename('ksat_15_30')
ksat_30_60 = ee.ImageCollection('projects/sat-io/open-datasets/polaris/ksat_mean').filterMetadata('system:index', 'equals', 'ksat_30_60').first().rename('ksat_30_60')
ksat_60_100 = ee.ImageCollection('projects/sat-io/open-datasets/polaris/ksat_mean').filterMetadata('system:index', 'equals', 'ksat_60_100').first().rename('ksat_60_100')


sand_0_5 = ee.ImageCollection('projects/sat-io/open-datasets/polaris/sand_mean').filterMetadata('system:index', 'equals', 'sand_0_5').first().rename('sand_0_5')
sand_5_15 = ee.ImageCollection('projects/sat-io/open-datasets/polaris/sand_mean').filterMetadata('system:index', 'equals', 'sand_5_15').first().rename('sand_5_15')
sand_15_30 = ee.ImageCollection('projects/sat-io/open-datasets/polaris/sand_mean').filterMetadata('system:index', 'equals', 'sand_15_30').first().rename('sand_15_30')
sand_30_60 = ee.ImageCollection('projects/sat-io/open-datasets/polaris/sand_mean').filterMetadata('system:index', 'equals', 'sand_30_60').first().rename('sand_30_60')
sand_60_100 = ee.ImageCollection('projects/sat-io/open-datasets/polaris/sand_mean').filterMetadata('system:index', 'equals', 'sand_60_100').first().rename('sand_60_100')
sand_0_100 = sand_0_5.multiply(0.05).add(sand_5_15.multiply(0.1)).add(sand_15_30.multiply(0.15)).add(sand_30_60.multiply(0.3)).add(sand_60_100.multiply(0.4)).rename('sand_0_100')

silt_0_5 = ee.ImageCollection('projects/sat-io/open-datasets/polaris/silt_mean').filterMetadata('system:index', 'equals', 'silt_0_5').first().rename('silt_0_5')
silt_5_15 = ee.ImageCollection('projects/sat-io/open-datasets/polaris/silt_mean').filterMetadata('system:index', 'equals', 'silt_5_15').first().rename('silt_5_15')
silt_15_30 = ee.ImageCollection('projects/sat-io/open-datasets/polaris/silt_mean').filterMetadata('system:index', 'equals', 'silt_15_30').first().rename('silt_15_30')
silt_30_60 = ee.ImageCollection('projects/sat-io/open-datasets/polaris/silt_mean').filterMetadata('system:index', 'equals', 'silt_30_60').first().rename('silt_30_60')
silt_60_100 = ee.ImageCollection('projects/sat-io/open-datasets/polaris/silt_mean').filterMetadata('system:index', 'equals', 'silt_60_100').first().rename('silt_60_100')
silt_0_100 = silt_0_5.multiply(0.05).add(silt_5_15.multiply(0.1)).add(silt_15_30.multiply(0.15)).add(silt_30_60.multiply(0.3)).add(silt_60_100.multiply(0.4)).rename('silt_0_100')

polaris = bd_0_5.addBands(bd_0_100)\
          .addBands(ksat_0_5).addBands(ksat_0_5)\
          .addBands(clay_0_5).addBands(clay_0_100)\
          .addBands(sand_0_5).addBands(sand_0_100)\
          .addBands(silt_0_5).addBands(silt_0_100)

polaris_ksat = ksat_0_5.addBands(ksat_5_15).addBands(ksat_15_30).addBands(ksat_30_60).addBands(ksat_60_100)

In [None]:
region = feature_collection.geometry()

print("land cover:")
LC_100m = LC.reproject(crs='EPSG:4326', scale=100)
LC_arr = geemap.ee_to_numpy(
    LC_100m,
    region=region,
    scale=100,
)
print(LC_arr.shape)

print("polaris:")
polaris_100m = polaris.reproject(crs='EPSG:4326', scale=100)
polaris_arr = geemap.ee_to_numpy(
    polaris_100m,
    region=region,
    scale=100,
)
print(polaris_arr.shape)

polaris_ksat_100m = polaris_ksat.reproject(crs='EPSG:4326', scale=100)
polaris_ksat_arr = geemap.ee_to_numpy(
    polaris_ksat_100m,
    region=region,
    scale=100,
)
print(polaris_ksat_arr.shape)

In [None]:
# @title Convert units of soil properties for ML inputs

polaris_arr[:, :, 2] = np.power(10, polaris_arr[:, :, 2])  # convert to cm/hr
polaris_ksat_arr[:, :, 0] = np.power(10, polaris_ksat_arr[:, :, 0])
polaris_ksat_arr[:, :, 1] = np.power(10, polaris_ksat_arr[:, :, 1])
polaris_ksat_arr[:, :, 2] = np.power(10, polaris_ksat_arr[:, :, 2])
polaris_ksat_arr[:, :, 3] = np.power(10, polaris_ksat_arr[:, :, 3])
polaris_ksat_arr[:, :, 4] = np.power(10, polaris_ksat_arr[:, :, 4])

d0_5 = 0.05
d5_15 = 0.10
d15_30 = 0.15
d30_60 = 0.30
d60_100 = 0.40
d_total = d0_5 + d5_15 + d15_30 + d30_60 + d60_100
polaris_arr[:, :, 3] = d_total/(d0_5/polaris_ksat_arr[:, :, 0] +
                                d5_15/polaris_ksat_arr[:, :, 1] +
                                d15_30/polaris_ksat_arr[:, :, 2] +
                                d30_60/polaris_ksat_arr[:, :, 3] +
                                d60_100/polaris_ksat_arr[:, :, 4])

In [None]:
print("max ksat at surface: ", polaris_arr[:, :, 2].max())
print("min ksat at surface: ", polaris_arr[:, :, 2].min())
print("max ksat at rootzone: ", polaris_arr[:, :, 3].max())
print("min ksat at rootzone: ", polaris_arr[:, :, 3].min())
print("number of nan values in soil property data: ", np.count_nonzero(np.isnan(polaris_arr)))

## Load DEM and read to array

In [None]:
dem_image = ee.Image('projects/ee-scai62/assets/COUNS_dem100m')
TWII_image = ee.Image('projects/ee-scai62/assets/CONUS_dem100m_TWII')
aspect_image = ee.Image('projects/ee-scai62/assets/CONUS_dem100m_aspect')
slopeDegree_image = ee.Image('projects/ee-scai62/assets/CONUS_dem100m_slopeDegree')

dem_image = dem_image.reproject(lc_proj)
TWII_image = TWII_image.reproject(lc_proj)
aspect_image = aspect_image.reproject(lc_proj)
slopeDegree_image = slopeDegree_image.reproject(lc_proj)

dem_image = dem_image.reproject(crs='EPSG:4326', scale=100)
TWII_image = TWII_image.reproject(crs='EPSG:4326', scale=100)
aspect_image = aspect_image.reproject(crs='EPSG:4326', scale=100)
slopeDegree_image = slopeDegree_image.reproject(crs='EPSG:4326', scale=100)

In [None]:
dem_arr = geemap.ee_to_numpy(dem_image, region=region, scale=100)
dem_arr[dem_arr<-1000] = np.nan

twi_arr = geemap.ee_to_numpy(TWII_image, region=region, scale=100)

aspect_arr = geemap.ee_to_numpy(aspect_image, region=region, scale=100)
sinAsp_arr = np.sin(aspect_arr/180*np.pi)
sinAsp_arr[aspect_arr==-1] = 0
cosAsp_arr = np.cos(aspect_arr/180*np.pi)
cosAsp_arr[aspect_arr == -1] = 0

slope_arr = geemap.ee_to_numpy(slopeDegree_image, region=region, scale=100)

In [None]:
# # Duplicate the last column of dem_arr to match the width of other arrays
# last_column_dem = dem_arr[:, -1:, :]
# dem_arr = np.concatenate((dem_arr, last_column_dem), axis=1)

# last_column_twi = twi_arr[:, -1:, :]
# twi_arr = np.concatenate((twi_arr, last_column_twi), axis=1)

# last_column_sinAsp = sinAsp_arr[:, -1:, :]
# sinAsp_arr = np.concatenate((sinAsp_arr, last_column_sinAsp), axis=1)

# last_column_cosAsp = cosAsp_arr[:, -1:, :]
# cosAsp_arr = np.concatenate((cosAsp_arr, last_column_cosAsp), axis=1)

# last_column_slope = slope_arr[:, -1:, :]
# slope_arr = np.concatenate((slope_arr, last_column_slope), axis=1)


# # Print the new shapes to verify
# print("New dem_arr shape: ", dem_arr.shape)
# print("New twi_arr shape: ", twi_arr.shape)
# print("New sinAsp_arr shape: ", sinAsp_arr.shape)
# print("New cosAsp_arr shape: ", cosAsp_arr.shape)
# print("New slope_arr shape: ", slope_arr.shape)

# dem_arr = dem_arr[:LC_arr.shape[0], :LC_arr.shape[1], 0]
# twi_arr = twi_arr[:LC_arr.shape[0], :LC_arr.shape[1], 0]
# sinAsp_arr = sinAsp_arr[:LC_arr.shape[0], :LC_arr.shape[1], 0]
# cosAsp_arr = cosAsp_arr[:LC_arr.shape[0], :LC_arr.shape[1], 0]
# slope_arr = slope_arr[:LC_arr.shape[0], :LC_arr.shape[1], 0]

# print("dem_arr shape: ", dem_arr.shape)
# print("twi_arr shape: ", twi_arr.shape)
# print("sinAsp_arr shape: ", sinAsp_arr.shape)
# print("cosAsp_arr shape: ", cosAsp_arr.shape)
# print("slope_arr shape: ", slope_arr.shape)

## Define study area and period

In [None]:
# prompt: get the boundary of feature_collection
region = feature_collection.geometry()

start_date = '2024-01-01'
end_date = '2025-01-01'

start_datetime = datetime.strptime(start_date, '%Y-%m-%d')
end_datetime = datetime.strptime(end_date, '%Y-%m-%d')  # end_date

num_hours = int((end_datetime - start_datetime).total_seconds()/3600)
print(num_hours)

## Get ee imagecollections

In [None]:
# @title Era5-land
start_date_era5 = (datetime.strptime(start_date, '%Y-%m-%d')- timedelta(hours=120+24)).strftime('%Y-%m-%d')

era5hour_short = ee.ImageCollection("ECMWF/ERA5_LAND/HOURLY")\
                    .filterBounds(region).filterDate(start_date_era5, end_date)\
                    .map(lambda img: img.addBands(
                        img.select('u_component_of_wind_10m').hypot(img.select('v_component_of_wind_10m')).rename('wind_10m')
                        )).select([
                            'temperature_2m',    # 2m air temperature, K
                            'dewpoint_temperature_2m',  # 2m dew point temperature, K
                            'wind_10m',  # wind speed, m/s
                            'surface_pressure',  # atmospheric surface pressure, Pa
                            ])


era5hour_mid = ee.ImageCollection("ECMWF/ERA5_LAND/HOURLY").filterBounds(region).filterDate(start_date_era5, end_date).select([
    'surface_solar_radiation_downwards_hourly',  # solar radiation, J
    'surface_thermal_radiation_downwards_hourly',  # thermal radiation, J
    ])


era5hour_long = ee.ImageCollection("ECMWF/ERA5_LAND/HOURLY").filterBounds(region).filterDate(start_date_era5, end_date).select([
    'total_evaporation_hourly',  # total evaporation, m
    'total_precipitation_hourly',   # precipitation, m
    ])



In [None]:
# @title HLSL30

###note: no scale factor for HLSL30 product.

def bitwiseExtract(img, fromBit, toBit):
  maskSize = ee.Number(1).add(toBit).subtract(fromBit)
  mask = ee.Number(1).leftShift(maskSize).subtract(1)
  return img.rightShift(fromBit).bitwiseAnd(mask)

# remove low quality data
def maskHLSL30(image):
  qcDay = image.select('Fmask')
  cloud = bitwiseExtract(qcDay, 1, 1).eq(0)
  cloudshadow = bitwiseExtract(qcDay, 3, 3).eq(0)
  snowice = bitwiseExtract(qcDay, 4, 4).eq(0)
  water = bitwiseExtract(qcDay, 5, 5).eq(0)
  aerosol = bitwiseExtract(qcDay, 6, 7).lte(2)
  mask = cloud.And(cloudshadow).And(snowice).And(water).And(aerosol)

  return    image.updateMask(mask).copyProperties(image, ['system:time_start'])


HLSL30 = ee.ImageCollection("NASA/HLS/HLSL30/v002").filterBounds(region)\
    .filterDate(start_date, end_date).map(maskHLSL30) \
		.select(['B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B9', 'B10', 'B11'])



HLSL30_timestamps = HLSL30.aggregate_array('system:time_start')
HLSL30_datetime_list = HLSL30_timestamps.map(
    lambda t: ee.Date(t).format('YYYY-MM-dd HH')
)
HLSL30_datetime_list_py = HLSL30_datetime_list.getInfo()
print(HLSL30_datetime_list_py[:10])  # preview first 10
print("Total unique timestamps:", len(set(HLSL30_datetime_list_py)))

In [None]:
# @title Sentinel-2
# Sentinel-2 images

# old version
# def maskSentinel2(img):
#   cloudOpaqueBitMask = (1 << 10);
#   cloudCirrusMask = (1 << 11);
#   # Get the pixel QA band.
#   qa = img.select('QA60')
#   # Both flags should be set to zero, indicating clear conditions.
#   mask = qa.bitwiseAnd(cloudOpaqueBitMask).eq(0) \
#                 .And(qa.bitwiseAnd(cloudCirrusMask).eq(0))
#   return img.updateMask(mask).copyProperties(img, ['system:time_start'])                  #.multiply(0.0001).toFloat().copyProperties(img, ['mydate'])  # after applying updateMask(). all properties will be lost


def maskSentinel2(img):
  # Get the pixel QA band.
  scl = img.select('SCL')
  mask = scl.neq(8).And(scl.neq(9)).And(scl.neq(10)).And(scl.neq(11))
  return img.updateMask(mask).copyProperties(img, ['system:time_start'])


def add_date(img):
  date_start = ee.Date(img.get('system:time_start')).format('YYYY-MM-dd-HH')
  return img.set('mydate', date_start)


Sentinel2 = ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED")\
            .filterDate(start_date, end_date).filterBounds(region)\
            .map(maskSentinel2)\
            .select(['B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B11', 'B12'])


# Sentinel2 = Sentinel2.map(add_date)



Sentinel2_timestamps = Sentinel2.aggregate_array('system:time_start')
Sentinel2_datetime_list = Sentinel2_timestamps.map(
    lambda t: ee.Date(t).format('YYYY-MM-dd HH')
)
Sentinel2_datetime_list_py = Sentinel2_datetime_list.getInfo()
print(Sentinel2_datetime_list_py[:10])  # preview first 10
print("Total unique timestamps:", len(set(Sentinel2_datetime_list_py)))

In [None]:
# @title Sentinel-1

def preprocess_vv(image):
    vv_masked = image.updateMask(image.gt(-20).And(image.lt(-5)))
    vv_filtered = vv_masked.convolve(ee.Kernel.gaussian(3))
    return vv_filtered #.rename('VV').copyProperties(image, ['system:time_start'])

# Define preprocessing for VH
def preprocess_vh(image):
    vh_masked = image.updateMask(image.gt(-30).And(image.lt(-10)))
    vh_filtered = vh_masked.convolve(ee.Kernel.gaussian(3))
    return vh_filtered #.rename('VH').copyProperties(image, ['system:time_start'])


def merge_bands(image):
    vv = image.select('VV')
    vh = image.select('VH')
    angle = image.select('angle')

    vv_prep = preprocess_vv(vv)      # Apply mask + smoothing to VV
    vh_prep = preprocess_vh(vh)      # Apply mask + smoothing to VH

    merged = vv_prep.addBands(vh_prep).addBands(angle.rename('angle'))

    return merged.copyProperties(image, ['system:time_start'])

def to_float(image):
    all_bands = image.bandNames()
    return image.select(all_bands).float().copyProperties(image, ['system:time_start'])

Sentinel1 = (
    ee.ImageCollection('COPERNICUS/S1_GRD')
    .filterDate(start_date, end_date)
    .filterBounds(region)
    .filter(ee.Filter.eq('instrumentMode', 'IW'))
    .filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VV'))
    .filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VH'))
    # .filter(ee.Filter.eq('orbitProperties_pass', 'ASCENDING'))
    .sort('SLC_Processing_start')
    .map(merge_bands).map(to_float)
)


Sentinel1_timestamps = Sentinel1.aggregate_array('system:time_start')
Sentinel1_datetime_list = Sentinel1_timestamps.map(
    lambda t: ee.Date(t).format('YYYY-MM-dd HH')
)
Sentinel1_datetime_list_py = Sentinel1_datetime_list.getInfo()
print(Sentinel1_datetime_list_py[:10])  # preview first 10
print("Total unique timestamps:", len(set(Sentinel1_datetime_list_py)))

## run ML models

In [None]:
#@title load ML models
ML1_sly_scaler = joblib.load('./StartFolder/TrainedML/ML1_sly_scaler.joblib')
ML1_rly_scaler = joblib.load('./StartFolder/TrainedML/ML1_rly_scaler.joblib')
ML2_sly_scaler = joblib.load('./StartFolder/TrainedML/ML2_sly_scaler.joblib')
ML2_rly_scaler = joblib.load('./StartFolder/TrainedML/ML2_rly_scaler.joblib')
ML3_sly_scaler = joblib.load('./StartFolder/TrainedML/ML3_sly_scaler.joblib')
ML3_rly_scaler = joblib.load('./StartFolder/TrainedML/ML3_rly_scaler.joblib')
# ML1-sly
ML1_sly_lgb_mean = joblib.load('./StartFolder/TrainedML/ML1_sly_lgb_mean.joblib')
ML1_sly_mapie_cqr = joblib.load('./StartFolder/TrainedML/ML1_sly_mapie_cqr.joblib')
# ML1 - rly
ML1_rly_lgb_mean = joblib.load('./StartFolder/TrainedML/ML1_rly_lgb_mean.joblib')
ML1_rly_mapie_cqr = joblib.load('./StartFolder/TrainedML/ML1_rly_mapie_cqr.joblib')
# ML2-sly
ML2_sly_lgb_mean = joblib.load('./StartFolder/TrainedML/ML2_sly_lgb_mean.joblib')
ML2_sly_mapie_cqr = joblib.load('./StartFolder/TrainedML/ML2_sly_mapie_cqr.joblib')
# ML2-rly
ML2_rly_lgb_mean = joblib.load('./StartFolder/TrainedML/ML2_rly_lgb_mean.joblib')
ML2_rly_mapie_cqr = joblib.load('./StartFolder/TrainedML/ML2_rly_mapie_cqr.joblib')
# ML3-sly
ML3_sly_lgb_mean = joblib.load('./StartFolder/TrainedML/ML3_sly_lgb_mean.joblib')
ML3_sly_mapie_cqr = joblib.load('./StartFolder/TrainedML/ML3_sly_mapie_cqr.joblib')
# ML3-rly
ML3_rly_lgb_mean = joblib.load('./StartFolder/TrainedML/ML3_rly_lgb_mean.joblib')
ML3_rly_mapie_cqr = joblib.load('./StartFolder/TrainedML/ML3_rly_mapie_cqr.joblib')

In [None]:
print("Mean:", ML1_sly_scaler.mean_)
# print("Standard Deviation:", ML1_sly_scaler.scale_)
# print("Mean:", ML1_rly_scaler.mean_)
# print("Standard Deviation:", ML1_rly_scaler.scale_)
print("Mean:", ML2_sly_scaler.mean_)
# print("Standard Deviation:", ML2_sly_scaler.scale_)
# print("Mean:", ML2_rly_scaler.mean_)
# print("Standard Deviation:", ML2_rly_scaler.scale_)
print("Mean:", ML3_sly_scaler.mean_)
# print("Standard Deviation:", ML3_sly_scaler.scale_)
# print("Mean:", ML3_rly_scaler.mean_)
# print("Standard Deviation:", ML3_rly_scaler.scale_)

In [None]:
def make_weights(n):
    weights = 1.0 / np.arange(n, 0, -1)      # reversed sequence [n ... 1]
    weights = weights / weights.sum()        # normalize
    return weights, ee.List(weights.tolist())

# Short-term (24h)
short_term_weights, ee_short_term_weights = make_weights(24)
# print("Short-term weights:", short_term_weights)

# Mid-term (72h)
mid_term_weights, ee_mid_term_weights = make_weights(72)
# print("Mid-term weights:", mid_term_weights)

# Long-term (120h)
long_term_weights, ee_long_term_weights = make_weights(120)
# print("Long-term weights:", long_term_weights)

def weighted_sum(imgcol, weights):
    # Ensure both image list and weights are same size
    img_list = imgcol.toList(imgcol.size())
    zipped   = img_list.zip(weights)

    weighted_list = zipped.map(
        lambda pair: ee.Image(ee.List(pair).get(0)).multiply(ee.Number(ee.List(pair).get(1)))
    )

    return ee.ImageCollection(weighted_list).sum()


In [None]:
rows, cols = LC_arr.shape[:2]# shape of your land cover array
# Create 1D coordinate vectors
lon_1d = np.linspace(ul_lon, br_lon, num=cols)
lat_1d = np.linspace(ul_lat, br_lat, num=rows)
# Make 2D coordinate grids
lon_arr, lat_arr = np.meshgrid(lon_1d, lat_1d)

LCmask = LC_arr.reshape(-1) == 82  # only cropland


HLSL30_set = set(HLSL30_datetime_list_py)
S2_set = set(Sentinel2_datetime_list_py)
S1_set = set(Sentinel1_datetime_list_py)

In [None]:
import rasterio
from rasterio.transform import from_origin

def arr_2_tif(y_hat_2d, lat_arr, lon_arr, filename, epsg=4326):
    """
    Save a 2D result array as a GeoTIFF given latitude and longitude grids.

    Parameters
    ----------
    y_hat_2d : np.ndarray
        2D array with results (rows x cols).
    lat_arr : np.ndarray
        2D array of latitudes (same shape as y_hat_2d).
    lon_arr : np.ndarray
        2D array of longitudes (same shape as y_hat_2d).
    filename : str
        Output file name (e.g., "result.tif").
    epsg : int, optional
        EPSG code for the CRS (default = 4326, WGS84).
    """

    rows, cols = y_hat_2d.shape

    # Derive resolution from first differences
    res_lat = abs(lat_arr[1, 0] - lat_arr[0, 0])   # pixel height
    res_lon = abs(lon_arr[0, 1] - lon_arr[0, 0])   # pixel width

    # Upper-left corner (rasterio expects top-left origin)
    ul_lat = lat_arr[0, 0]
    ul_lon = lon_arr[0, 0]

    # Create affine transform
    transform = from_origin(ul_lon, ul_lat, res_lon, res_lat)

    # Write GeoTIFF
    with rasterio.open(
        filename,
        "w",
        driver="GTiff",
        height=rows,
        width=cols,
        count=1,
        dtype=y_hat_2d.dtype,
        crs=f"EPSG:{epsg}",
        transform=transform,
    ) as dst:
        dst.write(y_hat_2d, 1)

In [None]:
rrrows, cccols = LC_arr.shape[:2]

current_dt = start_datetime
while current_dt <= end_datetime:
  dt_str = current_dt.strftime('%Y-%m-%d %H')  # match your lists
  dt_str_4_filename = current_dt.strftime('%Y%m%d%H')
  doy = current_dt.timetuple().tm_yday

  # HLSL30
  if dt_str in HLSL30_set:
    print("------HLSL30------", current_dt)
    img = HLSL30.filterDate(current_dt, current_dt + timedelta(hours=1)).reduce(ee.Reducer.mean()).unmask(-9999)
    if img:
        if os.path.exists(f'HLSL30_surface_{ul_lon*10:.0f}{br_lon*10:.0f}{br_lat*10:.0f}{ul_lat*10:.0f}_{dt_str_4_filename}.tif'):
          current_dt += timedelta(hours=1)
          continue

        HLSL30_arr = geemap.ee_to_numpy(img, region=region, scale=100) # , default_value=-999
        HLSL30_arr[HLSL30_arr<-9998] = np.nan

        # ---- Short term (24h) ----
        era5hour_short_last24 = era5hour_short.filterDate(
            current_dt - timedelta(hours=23), current_dt
        )
        weighted_era5hour_short = weighted_sum(era5hour_short_last24, ee_short_term_weights)
        era5hour_short_arr = geemap.ee_to_numpy(weighted_era5hour_short, region=region, scale=100)
        # print("Short term shape:", era5hour_short_arr.shape)

        # ---- Mid term (72h) ----
        era5hour_mid_last72 = era5hour_mid.filterDate(
            current_dt - timedelta(hours=71), current_dt
        )
        weighted_era5hour_mid = weighted_sum(era5hour_mid_last72, ee_mid_term_weights)
        era5hour_mid_arr = geemap.ee_to_numpy(weighted_era5hour_mid, region=region, scale=100)
        # print("Mid term shape:", era5hour_mid_arr.shape)

        # ---- Long term (120h) ----
        era5hour_long_last120 = era5hour_long.filterDate(
            current_dt - timedelta(hours=119), current_dt
        )
        weighted_era5hour_long = weighted_sum(era5hour_long_last120, ee_long_term_weights)
        era5hour_long_arr = geemap.ee_to_numpy(weighted_era5hour_long, region=region, scale=100)
        # print("Long term shape:", era5hour_long_arr.shape)

        X = np.full((len(LC_arr.reshape(-1)), 30), np.nan)
        X = X.astype(np.float32)
        X[:, 0] = np.ones(LC_arr.size, dtype=np.float32) * doy
        X[:, 1] = lat_arr.reshape(-1)[:]
        X[:, 2] = lon_arr.reshape(-1)[:]
        X[:, 3] = dem_arr.reshape(-1)[:]
        X[:, 4] = slope_arr.reshape(-1)[:]
        X[:, 5] = sinAsp_arr.reshape(-1)[:]
        X[:, 6] = cosAsp_arr.reshape(-1)[:]
        X[:, 7] = twi_arr.reshape(-1)[:]
        X[:, 8] = polaris_arr[:,:,0].reshape(-1)[:]
        X[:, 9] = polaris_arr[:,:,6].reshape(-1)[:]
        X[:, 10] = polaris_arr[:,:,4].reshape(-1)[:]
        X[:, 11] = polaris_arr[:,:,2].reshape(-1)[:]
        X[:, 12] = era5hour_short_arr[:,:,1].reshape(-1)[:]
        X[:, 13] = era5hour_short_arr[:,:,0].reshape(-1)[:]
        X[:, 14] = era5hour_mid_arr[:,:,0].reshape(-1)[:]
        X[:, 15] = era5hour_mid_arr[:,:,1].reshape(-1)[:]
        X[:, 16] = era5hour_long_arr[:,:,0].reshape(-1)[:]
        X[:, 17] = era5hour_short_arr[:,:,2].reshape(-1)[:]
        X[:, 18] = era5hour_short_arr[:,:,3].reshape(-1)[:]
        X[:, 19] = era5hour_long_arr[:,:,1].reshape(-1)[:][:]
        B2 = HLSL30_arr[:, :, 0].reshape(-1)  # B2
        B3 = HLSL30_arr[:, :, 1].reshape(-1)  # B3
        B4 = HLSL30_arr[:, :, 2].reshape(-1)  # B4
        X[:, 20] = HLSL30_arr[:, :, 3].reshape(-1) # B5
        X[:, 21] = HLSL30_arr[:, :, 4].reshape(-1)  # B6
        X[:, 22] = HLSL30_arr[:, :, 5].reshape(-1)  # B7
        X[:, 23] = HLSL30_arr[:, :, 7].reshape(-1)  # B10
        X[:, 24] = HLSL30_arr[:, :, 8].reshape(-1)  # B11
        X[:, 25] = (X[:, 20]- B4) / (X[:, 20] + B4)  # NDVI
        X[:, 26] = (B3 - X[:, 20]) / (B3 + X[:, 20])  # NDWI
        X[:, 27] = -0.3599 * B2 - 0.3533 * B3 - 0.4734 * B4 + 0.6633 *X[:, 20] - 0.0087 * X[:, 21] - 0.2856 * X[:, 22]  # greenness
        X[:, 28] = 0.3510 * B2 + 0.3813 * B3 + 0.3437 * B4 + 0.7196 * X[:, 20] + 0.2396 * X[:, 21] + 0.1949 * X[:, 22]  # brightness
        X[:, 29] = 0.2578 * B2 + 0.2305 * B3 + 0.0883 * B4 + 0.1071 * X[:, 20] - 0.7611 * X[:, 21] - 0.5308 * X[:, 22]  # wetness


        startt = time.time()

        X_mask = ~np.any(np.isnan(X), axis=1) & ~np.any(np.isinf(X), axis=1) & LCmask
        print("surface layer")
        print(f'valid pixels: {np.count_nonzero(X_mask)}/{len(X_mask)}')
        if np.count_nonzero(X_mask) != 0:
          ML3_sly_y_hat = np.full((X.shape[0]), np.nan)
          # ML3_sly_y_up_hat = np.full((X.shape[0]), np.nan)
          # ML3_sly_y_low_hat = np.full((X.shape[0]), np.nan)
          ML3_sly_y_hat[X_mask] = ML3_sly_lgb_mean.predict(ML3_sly_scaler.transform(X[X_mask, :]))
          # y_pred_cqr, y_pis_cqr = ML3_sly_mapie_cqr.predict_interval(ML3_sly_scaler.transform(X[X_mask, :]))
          # ML3_sly_y_up_hat[X_mask] = y_pis_cqr[:, 1, 0]
          # ML3_sly_y_low_hat[X_mask] = y_pis_cqr[:, 0, 0]

          ML3_sly_y_hat_2d = ML3_sly_y_hat.reshape(rrrows, cccols)
          arr_2_tif(ML3_sly_y_hat_2d, lat_arr, lon_arr, f'HLSL30_surface_{ul_lon*10:.0f}{br_lon*10:.0f}{br_lat*10:.0f}{ul_lat*10:.0f}_{dt_str_4_filename}.tif')

        endt = time.time()
        print(f"Elapsed time: {endt - startt:.4f} seconds")


        X[:, 8] = polaris_arr[:,:,1].reshape(-1)[:]
        X[:, 9] = polaris_arr[:,:,7].reshape(-1)[:]
        X[:, 10] = polaris_arr[:,:,5].reshape(-1)[:]
        X[:, 11] = polaris_arr[:,:,3].reshape(-1)[:]


        startt = time.time()

        X_mask = ~np.any(np.isnan(X), axis=1) & ~np.any(np.isinf(X), axis=1) & LCmask
        print("rootzone")
        print(f'valid pixels: {np.count_nonzero(X_mask)}/{len(X_mask)}')
        if np.count_nonzero(X_mask) != 0:
          ML3_rly_y_hat = np.full((X.shape[0]), np.nan)
          # ML3_rly_y_up_hat = np.full((X.shape[0]), np.nan)
          # ML3_rly_y_low_hat = np.full((X.shape[0]), np.nan)
          ML3_rly_y_hat[X_mask] = ML3_rly_lgb_mean.predict(ML3_rly_scaler.transform(X[X_mask, :]))
          # y_pred_cqr, y_pis_cqr = ML3_rly_mapie_cqr.predict_interval(ML3_rly_scaler.transform(X[X_mask, :]))
          # ML3_rly_y_up_hat[X_mask] = y_pis_cqr[:, 1, 0]
          # ML3_rly_y_low_hat[X_mask] = y_pis_cqr[:, 0, 0]

          ML3_rly_y_hat_2d = ML3_rly_y_hat.reshape(rrrows, cccols)
          arr_2_tif(ML3_rly_y_hat_2d, lat_arr, lon_arr, f'HLSL30_rootzone_{ul_lon*10:.0f}{br_lon*10:.0f}{br_lat*10:.0f}{ul_lat*10:.0f}_{dt_str_4_filename}.tif')

        endt = time.time()
        print(f"Elapsed time: {endt - startt:.4f} seconds")

        # print(ML3_sly_y_hat.shape)
        # print(ML3_sly_y_hat)


  current_dt += timedelta(hours=1)


  ##########################################

In [None]:

rrrows, cccols = LC_arr.shape[:2]

current_dt = start_datetime
while current_dt <= end_datetime:
  dt_str = current_dt.strftime('%Y-%m-%d %H')  # match your lists
  dt_str_4_filename = current_dt.strftime('%Y%m%d%H')
  doy = current_dt.timetuple().tm_yday


  # Sentinel2
  if dt_str in S2_set:
    print("------Sentinel2------", current_dt)
    img = Sentinel2.filterDate(current_dt, current_dt + timedelta(hours=1)).reduce(ee.Reducer.mean()).unmask(-9999)
    if img:
      if os.path.exists(f'Sentinel2_surface_{ul_lon*10:.0f}{br_lon*10:.0f}{br_lat*10:.0f}{ul_lat*10:.0f}_{dt_str_4_filename}.tif'):
        current_dt += timedelta(hours=1)
        continue

      sentinel2_arr = geemap.ee_to_numpy(img, region=region, scale=100)
      sentinel2_arr[sentinel2_arr<-9998] = np.nan

      # ---- Short term (24h) ----
      era5hour_short_last24 = era5hour_short.filterDate(
          current_dt - timedelta(hours=23), current_dt
      )
      weighted_era5hour_short = weighted_sum(era5hour_short_last24, ee_short_term_weights)
      era5hour_short_arr = geemap.ee_to_numpy(weighted_era5hour_short, region=region, scale=100)

      # ---- Mid term (72h) ----
      era5hour_mid_last72 = era5hour_mid.filterDate(
          current_dt - timedelta(hours=71), current_dt
      )
      weighted_era5hour_mid = weighted_sum(era5hour_mid_last72, ee_mid_term_weights)
      era5hour_mid_arr = geemap.ee_to_numpy(weighted_era5hour_mid, region=region, scale=100)

      # ---- Long term (120h) ----
      era5hour_long_last120 = era5hour_long.filterDate(
          current_dt - timedelta(hours=119), current_dt
      )
      weighted_era5hour_long = weighted_sum(era5hour_long_last120, ee_long_term_weights)
      era5hour_long_arr = geemap.ee_to_numpy(weighted_era5hour_long, region=region, scale=100)


      X = np.full((len(LC_arr.reshape(-1)), 32), np.nan)
      X = X.astype(np.float32)
      X[:, 0] = np.ones(LC_arr.size, dtype=np.float32) * doy
      X[:, 1] = lat_arr.reshape(-1)[:]
      X[:, 2] = lon_arr.reshape(-1)[:]
      X[:, 3] = dem_arr.reshape(-1)[:]
      X[:, 4] = slope_arr.reshape(-1)[:]
      X[:, 5] = sinAsp_arr.reshape(-1)[:]
      X[:, 6] = cosAsp_arr.reshape(-1)[:]
      X[:, 7] = twi_arr.reshape(-1)[:]
      X[:, 8] = polaris_arr[:,:,0].reshape(-1)[:]
      X[:, 9] = polaris_arr[:,:,6].reshape(-1)[:]
      X[:, 10] = polaris_arr[:,:,4].reshape(-1)[:]
      X[:, 11] = polaris_arr[:,:,2].reshape(-1)[:]
      X[:, 12] = era5hour_short_arr[:,:,1].reshape(-1)[:]
      X[:, 13] = era5hour_short_arr[:,:,0].reshape(-1)[:]
      X[:, 14] = era5hour_mid_arr[:,:,0].reshape(-1)[:]
      X[:, 15] = era5hour_mid_arr[:,:,1].reshape(-1)[:]
      X[:, 16] = era5hour_long_arr[:,:,0].reshape(-1)[:]
      X[:, 17] = era5hour_short_arr[:,:,2].reshape(-1)[:]
      X[:, 18] = era5hour_short_arr[:,:,3].reshape(-1)[:]
      X[:, 19] = era5hour_long_arr[:,:,1].reshape(-1)[:][:]
      B2 = sentinel2_arr[:, :, 0].reshape(-1) * 0.0001  # B2
      B3 = sentinel2_arr[:, :, 1].reshape(-1) * 0.0001  # B3
      B4 = sentinel2_arr[:, :, 2].reshape(-1) * 0.0001  # B4
      X[:, 20] = sentinel2_arr[:, :, 3].reshape(-1) * 0.0001 # B5
      X[:, 21] = sentinel2_arr[:, :, 4].reshape(-1) * 0.0001  # B6
      X[:, 22] = sentinel2_arr[:, :, 5].reshape(-1) * 0.0001  # B7
      X[:, 23] = sentinel2_arr[:, :, 6].reshape(-1) * 0.0001  # B8
      X[:, 24] = sentinel2_arr[:, :, 7].reshape(-1) * 0.0001  # B8A
      X[:, 25] = sentinel2_arr[:, :, 8].reshape(-1) * 0.0001  # B11
      X[:, 26] = sentinel2_arr[:, :, 9].reshape(-1) * 0.0001  # B12
      X[:, 27] = (X[:, 23]- B4) / (X[:, 23] + B4)  # NDVI
      X[:, 28] = (B3 - X[:, 23]) / (B3 + X[:, 23])  # NDWI
      X[:, 29] = -0.3599 * B2 - 0.3533 * B3 - 0.4734 * B4 + 0.6633 *X[:, 23] - 0.0087 * X[:, 25] - 0.2856 * X[:, 26]  # greenness
      X[:, 30] = 0.3510 * B2 + 0.3813 * B3 + 0.3437 * B4 + 0.7196 * X[:, 23] + 0.2396 * X[:, 25] + 0.1949 * X[:, 26]  # brightness
      X[:, 31] = 0.2578 * B2 + 0.2305 * B3 + 0.0883 * B4 + 0.1071 * X[:, 23] - 0.7611 * X[:, 25] - 0.5308 * X[:, 26]  # wetness


      # row_means = np.mean(X, axis=0)
      # print(row_means.shape)
      # print(row_means)

      startt = time.time()

      X_mask = ~np.any(np.isnan(X), axis=1) & ~np.any(np.isinf(X), axis=1) & LCmask
      print("surface layer")
      print(f'valid pixels: {np.count_nonzero(X_mask)}/{len(X_mask)}')
      if np.count_nonzero(X_mask) != 0:
        ML2_sly_y_hat = np.full((X.shape[0]), np.nan)
        # ML2_sly_y_up_hat = np.full((X.shape[0]), np.nan)
        # ML2_sly_y_low_hat = np.full((X.shape[0]), np.nan)
        ML2_sly_y_hat[X_mask] = ML2_sly_lgb_mean.predict(ML2_sly_scaler.transform(X[X_mask, :]))
        # y_pred_cqr, y_pis_cqr = ML2_sly_mapie_cqr.predict_interval(ML2_sly_scaler.transform(X[X_mask, :]))
        # ML2_sly_y_up_hat[X_mask] = y_pis_cqr[:, 1, 0]
        # ML2_sly_y_low_hat[X_mask] = y_pis_cqr[:, 0, 0]

        ML2_sly_y_hat_2d = ML2_sly_y_hat.reshape(rrrows, cccols)
        arr_2_tif(ML2_sly_y_hat_2d, lat_arr, lon_arr, f'Sentinel2_surface_{ul_lon*10:.0f}{br_lon*10:.0f}{br_lat*10:.0f}{ul_lat*10:.0f}_{dt_str_4_filename}.tif')

      endt = time.time()
      print(f"Elapsed time: {endt - startt:.4f} seconds")

      X[:, 8] = polaris_arr[:,:,1].reshape(-1)[:]
      X[:, 9] = polaris_arr[:,:,7].reshape(-1)[:]
      X[:, 10] = polaris_arr[:,:,5].reshape(-1)[:]
      X[:, 11] = polaris_arr[:,:,3].reshape(-1)[:]

      startt = time.time()

      X_mask = ~np.any(np.isnan(X), axis=1) & ~np.any(np.isinf(X), axis=1) & LCmask
      print("rootzone")
      print(f'valid pixels: {np.count_nonzero(X_mask)}/{len(X_mask)}')
      if np.count_nonzero(X_mask) != 0:
        ML2_rly_y_hat = np.full((X.shape[0]), np.nan)
        # ML2_rly_y_up_hat = np.full((X.shape[0]), np.nan)
        # ML2_rly_y_low_hat = np.full((X.shape[0]), np.nan)
        ML2_rly_y_hat[X_mask] = ML2_rly_lgb_mean.predict(ML2_rly_scaler.transform(X[X_mask, :]))
        # y_pred_cqr, y_pis_cqr = ML2_rly_mapie_cqr.predict_interval(ML2_rly_scaler.transform(X[X_mask, :]))
        # ML2_rly_y_up_hat[X_mask] = y_pis_cqr[:, 1, 0]
        # ML2_rly_y_low_hat[X_mask] = y_pis_cqr[:, 0, 0]

        ML2_rly_y_hat_2d = ML2_rly_y_hat.reshape(rrrows, cccols)
        arr_2_tif(ML2_rly_y_hat_2d, lat_arr, lon_arr, f'Sentinel2_rootzone_{ul_lon*10:.0f}{br_lon*10:.0f}{br_lat*10:.0f}{ul_lat*10:.0f}_{dt_str_4_filename}.tif')

      endt = time.time()
      print(f"Elapsed time: {endt - startt:.4f} seconds")

      # print(ML2_sly_y_hat.shape)
      # print(ML2_sly_y_hat)


  current_dt += timedelta(hours=1)

In [None]:

rrrows, cccols = LC_arr.shape[:2]
########################################

current_dt = start_datetime
while current_dt <= end_datetime:
  dt_str = current_dt.strftime('%Y-%m-%d %H')  # match your lists
  dt_str_4_filename = current_dt.strftime('%Y%m%d%H')
  doy = current_dt.timetuple().tm_yday

  # Sentinel1
  if dt_str in S1_set:
    print("------Sentinel1------", current_dt)
    img = Sentinel1.filterDate(current_dt, current_dt + timedelta(hours=1)).reduce(ee.Reducer.mean()).unmask(-9999)
    if img:
      if os.path.exists(f'Sentinel1_surface_{ul_lon*10:.0f}{br_lon*10:.0f}{br_lat*10:.0f}{ul_lat*10:.0f}_{dt_str_4_filename}.tif'):
        current_dt += timedelta(hours=1)
        continue

      sentinel1_arr = geemap.ee_to_numpy(img, region=region, scale=100)
      sentinel1_arr[sentinel1_arr<-9998] = np.nan

      # ---- Short term (24h) ----
      era5hour_short_last24 = era5hour_short.filterDate(
          current_dt - timedelta(hours=23), current_dt
      )
      weighted_era5hour_short = weighted_sum(era5hour_short_last24, ee_short_term_weights)
      era5hour_short_arr = geemap.ee_to_numpy(weighted_era5hour_short, region=region, scale=100)

      # ---- Mid term (72h) ----
      era5hour_mid_last72 = era5hour_mid.filterDate(
          current_dt - timedelta(hours=71), current_dt
      )
      weighted_era5hour_mid = weighted_sum(era5hour_mid_last72, ee_mid_term_weights)
      era5hour_mid_arr = geemap.ee_to_numpy(weighted_era5hour_mid, region=region, scale=100)

      # ---- Long term (120h) ----
      era5hour_long_last120 = era5hour_long.filterDate(
          current_dt - timedelta(hours=119), current_dt
      )
      weighted_era5hour_long = weighted_sum(era5hour_long_last120, ee_long_term_weights)
      era5hour_long_arr = geemap.ee_to_numpy(weighted_era5hour_long, region=region, scale=100)


      X = np.full((len(LC_arr.reshape(-1)), 26), np.nan)
      X = X.astype(np.float32)
      X[:, 0] = np.ones(LC_arr.size, dtype=np.float32) * doy
      X[:, 1] = lat_arr.reshape(-1)[:]
      X[:, 2] = lon_arr.reshape(-1)[:]
      X[:, 3] = dem_arr.reshape(-1)[:]
      X[:, 4] = slope_arr.reshape(-1)[:]
      X[:, 5] = sinAsp_arr.reshape(-1)[:]
      X[:, 6] = cosAsp_arr.reshape(-1)[:]
      X[:, 7] = twi_arr.reshape(-1)[:]
      X[:, 8] = polaris_arr[:,:,0].reshape(-1)[:]
      X[:, 9] = polaris_arr[:,:,6].reshape(-1)[:]
      X[:, 10] = polaris_arr[:,:,4].reshape(-1)[:]
      X[:, 11] = polaris_arr[:,:,2].reshape(-1)[:]
      X[:, 12] = era5hour_short_arr[:,:,1].reshape(-1)[:]
      X[:, 13] = era5hour_short_arr[:,:,0].reshape(-1)[:]
      X[:, 14] = era5hour_mid_arr[:,:,0].reshape(-1)[:]
      X[:, 15] = era5hour_mid_arr[:,:,1].reshape(-1)[:]
      X[:, 16] = era5hour_long_arr[:,:,0].reshape(-1)[:]
      X[:, 17] = era5hour_short_arr[:,:,2].reshape(-1)[:]
      X[:, 18] = era5hour_short_arr[:,:,3].reshape(-1)[:]
      X[:, 19] = era5hour_long_arr[:,:,1].reshape(-1)[:][:]
      X[:, 20] = sentinel1_arr[:, :, 0].reshape(-1)
      X[:, 21] = sentinel1_arr[:, :, 1].reshape(-1)
      X[:, 22] = sentinel1_arr[:, :, 2].reshape(-1)
      X[:, 23] = sentinel1_arr[:, :, 0].reshape(-1) / sentinel1_arr[:, :, 1].reshape(-1)
      X[:, 24] = (sentinel1_arr[:, :, 0].reshape(-1)**2 + sentinel1_arr[:, :, 1].reshape(-1)**2)/np.sqrt(2)
      X[:, 25] = (sentinel1_arr[:, :, 0].reshape(-1) + sentinel1_arr[:, :, 1].reshape(-1)) / (sentinel1_arr[:, :, 0].reshape(-1) - sentinel1_arr[:, :, 1].reshape(-1))


      startt = time.time()

      X_mask = ~np.any(np.isnan(X), axis=1) & ~np.any(np.isinf(X), axis=1) & LCmask
      print("surface layer")
      print(f'valid pixels: {np.count_nonzero(X_mask)}/{len(X_mask)}')
      if np.count_nonzero(X_mask) != 0:
        ML1_sly_y_hat = np.full((X.shape[0]), np.nan)
        # ML1_sly_y_up_hat = np.full((X.shape[0]), np.nan)
        # ML1_sly_y_low_hat = np.full((X.shape[0]), np.nan)
        ML1_sly_y_hat[X_mask] = ML1_sly_lgb_mean.predict(ML1_sly_scaler.transform(X[X_mask, :]))
        # y_pred_cqr, y_pis_cqr = ML1_sly_mapie_cqr.predict_interval(ML1_sly_scaler.transform(X[X_mask, :]))
        # ML1_sly_y_up_hat[X_mask] = y_pis_cqr[:, 1, 0]
        # ML1_sly_y_low_hat[X_mask] = y_pis_cqr[:, 0, 0]

        ML1_sly_y_hat_2d = ML1_sly_y_hat.reshape(rrrows, cccols)
        arr_2_tif(ML1_sly_y_hat_2d, lat_arr, lon_arr, f'Sentinel1_surface_{ul_lon*10:.0f}{br_lon*10:.0f}{br_lat*10:.0f}{ul_lat*10:.0f}_{dt_str_4_filename}.tif')

      endt = time.time()
      print(f"Elapsed time: {endt - startt:.4f} seconds")

      X[:, 8] = polaris_arr[:,:,1].reshape(-1)[:]
      X[:, 9] = polaris_arr[:,:,7].reshape(-1)[:]
      X[:, 10] = polaris_arr[:,:,5].reshape(-1)[:]
      X[:, 11] = polaris_arr[:,:,3].reshape(-1)[:]


      startt = time.time()

      X_mask = ~np.any(np.isnan(X), axis=1) & ~np.any(np.isinf(X), axis=1) & LCmask
      print("rootzone")
      print(f'valid pixels: {np.count_nonzero(X_mask)}/{len(X_mask)}')
      if np.count_nonzero(X_mask) != 0:
        ML1_rly_y_hat = np.full((X.shape[0]), np.nan)
        # ML1_rly_y_up_hat = np.full((X.shape[0]), np.nan)
        # ML1_rly_y_low_hat = np.full((X.shape[0]), np.nan)
        ML1_rly_y_hat[X_mask] = ML1_rly_lgb_mean.predict(ML1_rly_scaler.transform(X[X_mask, :]))
        # y_pred_cqr, y_pis_cqr = ML1_rly_mapie_cqr.predict_interval(ML1_rly_scaler.transform(X[X_mask, :]))
        # ML1_rly_y_up_hat[X_mask] = y_pis_cqr[:, 1, 0]
        # ML1_rly_y_low_hat[X_mask] = y_pis_cqr[:, 0, 0]

        ML1_rly_y_hat_2d = ML1_rly_y_hat.reshape(rrrows, cccols)
        arr_2_tif(ML1_rly_y_hat_2d, lat_arr, lon_arr, f'Sentinel1_rootzone_{ul_lon*10:.0f}{br_lon*10:.0f}{br_lat*10:.0f}{ul_lat*10:.0f}_{dt_str_4_filename}.tif')

      endt = time.time()
      print(f"Elapsed time: {endt - startt:.4f} seconds")

      # print(ML1_sly_y_hat.shape)
      # print(ML1_sly_y_hat)


  current_dt += timedelta(hours=1)

**Searching files:**

Search for all tiff files exported from above ML model and store file names in a list.

In [None]:
import glob
import os

# Search for files
tif_files = glob.glob(f'*{ul_lon*10:.0f}{br_lon*10:.0f}{br_lat*10:.0f}{ul_lat*10:.0f}_*.tif')

# Print the first and last file names
if tif_files:
    print("First .tif file:", tif_files[0])
    print("Last .tif file:", tif_files[-1])
else:
    print("No .tif files found.")

# Print the total number of specific .tif files
print("Total number of .tif files:", len(tif_files))

**Downloading files:**

To download the files, you can use the following command in a new code cell. This will create a zip file containing all the `.tif` files and then download it to your local machine.

In [None]:
import glob
import os
from google.colab import files

if tif_files:
    # Create a zip file of the specific .tif files
    zip_filename = 'tif_files.zip'
    !zip {zip_filename} {" ".join(tif_files)}

    # Download the zip file
    files.download(zip_filename)
else:
    print("No specific .tif files found to download.")