In [2]:
import sys
sys.path.append('/home/potzschf/repos/')
from helperToolz.helpsters import *
from helperToolz.dicts_and_lists import *
from helperToolz.guzinski import * 
import geopandas as gpd
from collections import defaultdict
from joblib import Parallel, delayed
from datetime import datetime, timedelta
import time

In [3]:
np.random.seed(42)

comp = 'maxLST'
pathi = '/data/Aldhani/eoagritwin/et/Sentinel3/LST/SharpEvap/Brandenburg/FirstShot/evap/'
vrt_folder= f'{pathi}{comp}/vrt/'
median_folder = f'{pathi}{comp}/median_9day/ET_sum'

pattern = r"\d{4}_\d{2}_\d{1,2}"
Landsat_folder = '/data/Aldhani/eoagritwin/et/Landsat/daily_extracts/Brandenburg/2019/'

In [4]:
med_comps = [file for file in getFilelist(median_folder, '.tif')]
all_dates = re.findall(pattern, " ".join(med_comps))

starts = [datetime.strptime(str(compDate), '%Y_%m_%d') for compDate in all_dates]
ends = [datetime.strptime(str(compDate), '%Y_%m_%d') + timedelta(days=8) for compDate in all_dates]

year

In [9]:
def make_ET_median_Landsat(Landsat_master_folder, date_s, date_e, pattern, outFold, mask_arr=False):
    """
    Compute median ET from Landsat images within a date range and optionally apply a mask.

    Landsat_master_folder : str
        Path to the folder containing Landsat ET GeoTIFFs.
    date_s, date_e : datetime
        Start and end dates for filtering Landsat files.
    pattern : str
        Regex pattern to extract date (e.g. r'20\d{6}' or r'20\d{2}_\d{2}_\d{2}').
    outFold : str
        Output directory for saving median composites.
    mask_arr : np.ndarray, optional
        Array mask (same shape as target raster) to apply before saving.
    """
    files = getFilelist(Landsat_master_folder, '.tif', deep=True)
    dicti = defaultdict(list)
    reg_search = re.compile(pattern)

    for file in files:
            m = reg_search.search(file).group()
            year, month, day = m.split('_')
            day = day.zfill(2)  # "1" → "01", "13" → "13"
            date_key = f"{year}{month}{day}"
            
            if date_s <= datetime.strptime(date_key, '%Y%m%d') <= date_e:
                dicti[datetime.strftime(date_s, '%Y_%m_%d')].append(file)
        
    for k, v in dicti.items():
        arrL = []
        for file in v:
            ds = gdal.Open(file)
            arr = ds.GetRasterBand(1).ReadAsArray()
            arr[arr<=0] = np.nan
            arr[arr>12] = np.nan
            arrL.append(arr)
        median_arr = np.nanmedian(np.dstack(arrL),axis=2)

        if mask_arr is not False:
            warped_arr = warp_np_to_reference(median_arr, file,
                                target_tif_path=med_comps[0])
            warped_masked = warped_arr * mask_arr
            npTOdisk(warped_masked, med_comps[0], outPath=f'{outFold}Landsat_ET_9daycomp_{k}.tif')
        else:
            warp_np_to_reference(median_arr, file,
                                target_tif_path=med_comps[0],
                                output_path=f'{outFold}Landsat_ET_9daycomp_{k}.tif')

  Regex pattern to extract date (e.g. r'20\d{6}' or r'20\d{2}_\d{2}_\d{2}').


In [10]:
mask_ds = gdal.Open('/data/Aldhani/eoagritwin/fields/Auxiliary/grid_search/Brandenburg/quick_n_dirty/Fields_as_mask.tif')
mask_arr = mask_ds.GetRasterBand(1).ReadAsArray()
mask_arr[mask_arr>0] = 1


for date_start, date_end in zip(starts, ends):
    make_ET_median_Landsat(Landsat_folder, date_start, date_end, pattern, '/data/Aldhani/eoagritwin/et/Landsat/composites/for_evap_comparison/', mask_arr=mask_arr)

  median_arr = np.nanmedian(np.dstack(arrL),axis=2)


In [5]:
Landsat_evap = getFilelist('/data/Aldhani/eoagritwin/et/Landsat/composites/for_evap_comparison/', 'tif')

In [6]:
for file in med_comps:
    results = []
    ds = gdal.Open(file)
    arr = ds.GetRasterBand(1).ReadAsArray()
    valid_arr = np.where((~np.isnan(arr)) & (arr != 0), arr, np.nan)

    # Number of strata and samples per stratum
    n_strata = 4
    samples_per_stratum = 2500

    # Compute percentile thresholds
    percentiles = np.nanpercentile(valid_arr, np.linspace(0, 100, n_strata + 1))


    for land_file in Landsat_evap:
        if land_file.split('9daycomp_')[-1].split('.tif')[0] == '_'.join(file.split('/')[-1].split('_')[:3]):
            ds_l = gdal.Open(land_file)
            arr_l = ds_l.GetRasterBand(1).ReadAsArray()
            if arr.shape == arr_l.shape:
                
                for i in range(n_strata):
        
                    lower, upper = percentiles[i], percentiles[i + 1]
                    stratum_mask = (valid_arr >= lower) & (valid_arr < upper) & (arr_l != 0) & (~np.isnan(arr_l))

                    idx = np.argwhere(stratum_mask)

                    chosen_idx = idx[np.random.choice(len(idx), min(samples_per_stratum, len(idx)), replace=False)]

                    chosen_landsat = arr_l[chosen_idx[:,0], chosen_idx[:,1]]
                    chosen_evapo_est = valid_arr[chosen_idx[:,0], chosen_idx[:,1]]
                    
                    for pix in range(len(chosen_landsat)):
                    
                        results.append({
                            'Date': '_'.join(file.split('/')[-1].split('_')[:3]),
                            'Stratum': f'{i}_{i+1}th',
                            'Landsat': chosen_landsat[pix],
                            'Guzinski': chosen_evapo_est[pix],
                            'row': chosen_idx[pix][0],
                            'col': chosen_idx[pix][1]
                        })

            else:
                raise ValueError('Evap estimate raster does not match the Landsat composite shape')
    # convert and export
    df = pd.DataFrame(results)
    df.to_csv(f'Landsat_vs_Guzinski/extract_{'_'.join(file.split('/')[-1].split('_')[:3])}.csv', index=False)

