In [1]:
import sys
sys.path.append('/home/potzschf/repos/')
from helperToolz.helpsters import *
from helperToolz.dicts_and_lists import *
from helperToolz.guzinski import * 
import geopandas as gpd
from collections import defaultdict
from joblib import Parallel, delayed
from datetime import datetime, timedelta
import time

In [2]:
comp = 'minVZA'
evap_outFolder = f'/data/Aldhani/eoagritwin/et/Sentinel3/LST/SharpEvap/Brandenburg/evap/{comp}/'
vrt_folder= f'/data/Aldhani/eoagritwin/et/Sentinel3/LST/SharpEvap/Brandenburg/evap/{comp}/vrt/'

if not os.path.exists(vrt_folder):
    os.makedirs(vrt_folder)

In [3]:
# separate for 4 different evap products (soil vs canopy, ssrd calc vs ssrd func)
all_files = getFilelist(evap_outFolder, '.tif', deep=True)

soil_func_files = [file for file in all_files if 'Soil_func' in file]
soil_calc_files = [file for file in all_files if 'Soil_calc' in file]

canopy_func_files = [file for file in all_files if 'Canopy_func' in file]
canopy_calc_files = [file for file in all_files if 'Canopy_calc' in file]


In [4]:
# get the max extent for vrts. this is based on the mask (which actually was dervied from a vrt createdm and then deleted as there are some with smaller extent)
mask_ds = gdal.Open('/data/Aldhani/eoagritwin/fields/Auxiliary/grid_search/Brandenburg/quick_n_dirty/Fields_as_mask.tif')
gt = mask_ds.GetGeoTransform()
mask_proj = mask_ds.GetProjection()
xmin = gt[0]
ymax = gt[3]
px_size_x = gt[1]
px_size_y = abs(gt[5])
xres = mask_ds.RasterXSize
yres = mask_ds.RasterYSize
xmax = xmin + xres * px_size_x
ymin = ymax - yres * px_size_y


vrt_options = gdal.BuildVRTOptions(
    outputBounds=[xmin, ymin, xmax, ymax],
    xRes=px_size_x,       
    yRes=px_size_y, 
    outputSRS=mask_proj,
    resampleAlg='nearest',
    separate=False
)



In [5]:
months = ['April', 'May', 'June', 'July', 'August', 'September', 'October']

date_reg = re.compile(r'20\d{2}_(?:' + '|'.join(months) + r')_\d{1,2}_')
date_reg2 = re.compile(r'20\d{2}_(?:' + '|'.join(months) + r')_\d{1,2}.')

filesList = [soil_func_files, soil_calc_files, canopy_func_files, canopy_calc_files]
filesNames = ['Soil_func', 'Soil_calc', 'Canopy_func', 'Canopy_calc']

for fname, fileL in zip(filesNames, filesList):
    dicti = defaultdict(list)
    for file in fileL:
        match = date_reg.search(file)
        if match:
            date = match.group()[:-1]  
            year, month, day = date.split('_')
            day = day.zfill(2)  # "1" → "01", "13" → "13"
            date_key = f"{year}_{month}_{day}"  
            dicti[date_key].append(file)
            continue
    
    for key, files in dicti.items():
        
        outfolder = f'{vrt_folder}{fname}/'
        os.makedirs(outfolder,exist_ok=True)

        vrt_path = f'{outfolder}{fname}_{key}.vrt'
        vrt = gdal.BuildVRT(vrt_path, files, options=vrt_options)
        vrt = None

In [6]:
# load the comp dates incorporate a sanity check maybe???

compDates = pd.read_csv('/data/Aldhani/eoagritwin/et/Sentinel3/LST/SharpEvap/Brandenburg/evap/compdates.csv')['compdates'].tolist()

starts = [datetime.strptime(str(compDate), '%Y%m%d') - timedelta(days=4) for compDate in compDates]
ends = [datetime.strptime(str(compDate), '%Y%m%d') + timedelta(days=4) for compDate in compDates]

# check lowest and highest start date in vrts and adapt start and end if needed
vrt_dates = list(dicti.keys())
vrt_datesL = []
for vrt_date in vrt_dates:
    year, month, day = vrt_date.split('_') 
    vrt_datesL.append(f'{year}{MONTH_TO_02D[month]}{day}')
vrt_start = datetime.strptime(str(min(vrt_datesL)), '%Y%m%d')
vrt_end = datetime.strptime(str(max(vrt_datesL)), '%Y%m%d')

if vrt_start < starts[0]:
    starts[0] = vrt_start

if vrt_end > ends[-1]:
    ends[-1] = vrt_end

In [22]:
# mask the vrts
mask_arr = mask_ds.GetRasterBand(1).ReadAsArray()
mask_arr[mask_arr>0] = 1

os.makedirs(f'{evap_outFolder}median_9day/', exist_ok=True)
os.makedirs(f'{evap_outFolder}median_9day/ET_sum/', exist_ok=True)

In [16]:
def make_ET_median(vrt_folder, ET_var, date_s, date_e, reg_search, outFold, mask):
    
    files = getFilelist(f'{vrt_folder}{ET_var}/', '.vrt')
    dicti = defaultdict(list)

    for file in files:
            m = reg_search.search(file).group()[:-1]
            year, month, day = m.split('_')
            day = day.zfill(2)  # "1" → "01", "13" → "13"
            date_key = f"{year}{MONTH_TO_02D[month]}{day}"
            
            if date_s <= datetime.strptime(date_key, '%Y%m%d') < date_e:
                dicti[datetime.strftime(date_s, '%Y_%m_%d')].append(file)
        
    for k, v in dicti.items():
        arrL = []
        for file in v:
            ds = gdal.Open(file)
            arr = ds.GetRasterBand(1).ReadAsArray()
            arr[arr<=0] = np.nan
            arr[arr>12] = np.nan
            arrL.append(arr)
        median_arr = np.nanmedian(np.dstack(arrL),axis=2)

        makeTif_np_to_matching_tif(median_arr * mask, file, f'{outFold}median_9day/{ET_var}_{k}_median_ET.tif', 0)


In [17]:

joblist = []

for idx, et_var in enumerate(['Soil_func', 'Soil_calc', 'Canopy_func', 'Canopy_calc']):
    for date_start, date_end in zip(starts, ends):
        joblist.append([vrt_folder, et_var, date_start, date_end, date_reg2, evap_outFolder, mask_arr])


print(f'\n{len(joblist)} jobs will be processed\n')



56 jobs will be processed



In [18]:
ncores = 8


if __name__ == '__main__':
    starttime = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime())
    print("--------------------------------------------------------")
    print("Starting process, time:" + starttime)
    print("")

    Parallel(n_jobs=ncores)(delayed(make_ET_median)(job[0], job[1], job[2], job[3], job[4], job[5], job[6]) for job in joblist)

    print("")
    endtime = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime())
    print("--------------------------------------------------------")
    print("--------------------------------------------------------")
    print("start : " + starttime)
    print("end: " + endtime)
    print("")



--------------------------------------------------------
Starting process, time:Tue, 14 Oct 2025 12:59:42






--------------------------------------------------------
--------------------------------------------------------
start : Tue, 14 Oct 2025 12:59:42
end: Tue, 14 Oct 2025 13:21:29



In [23]:
medList = getFilelist(f'{evap_outFolder}median_9day/', '.tif')
for date_start in starts:

    dt = datetime.strftime(date_start, '%Y_%m_%d')
    date_sub_calc = [med_arr for med_arr in medList if dt in med_arr and 'calc' in med_arr]
    date_sub_func = [med_arr for med_arr in medList if dt in med_arr and 'func' in med_arr]

    arrL = []
    for pathi in date_sub_calc:
        ds = gdal.Open(pathi)
        arrL.append(ds.GetRasterBand(1).ReadAsArray())
    arr_sum = np.nansum(np.dstack([arrL[0], arrL[1]]),axis=2)
    arr_sum[arr_sum >= 10] = np.nan
    # we cut off the highest values at the Polish border for now
    cutOff = np.nanpercentile(arr_sum, [99.9])[0]
    arr_sum[arr_sum > cutOff] = np.nan
    makeTif_np_to_matching_tif(arr_sum, pathi, f'{evap_outFolder}median_9day/ET_sum/{dt}_median_ET.tif', 0)


    # arrL = []
    # for pathi in date_sub_func:
    #     ds = gdal.Open(pathi)
    #     arrL.append(ds.GetRasterBand(1).ReadAsArray())
    # arr_sum = np.nansum(np.dstack([arrL[0], arrL[1]]), axis=2)
    # arr_sum[arr_sum >= 10] = np.nan
    # # we cut off the highest values at the Polish border for now
    # cutOff = np.nanpercentile(arr_sum, [99.9])[0]
    # arr_sum[arr_sum > cutOff] = np.nan

    # makeTif_np_to_matching_tif(arr_sum, pathi, f'{evap_outFolder}median_9day/ET_sum/{date_list[4]}_median_func_ET.tif', 0)

In [None]:
medList

In [None]:
filesNames = ['Soil_func', 'Soil_calc', 'Canopy_func', 'Canopy_calc']


for fname in filesNames:
    files = getFilelist(f'{vrt_folder}{fname}/', '.vrt')
    
    dicti = defaultdict(list)
    
    for start_d, end_d in zip(starts, ends):
    
        for file in files:
            m = date_reg2.search(file).group()[:-1]
            year, month, day = m.split('_')
            day = day.zfill(2)  # "1" → "01", "13" → "13"
            date_key = f"{year}{MONTH_TO_02D[month]}{day}"
            
            if start_d <= datetime.strptime(date_key, '%Y%m%d') < end_d:
                dicti[datetime.strftime(start_d, '%Y%m%d')].append(file)

    for k, v in dicti.items():
        arrL = []
        for file in v:
            ds = gdal.Open(file)
            arr = ds.GetRasterBand(1).ReadAsArray()
            arr[arr<=0] = np.nan
            arr[arr>12] = np.nan
            arrL.append(arr)
        median_arr = np.nanmedian(np.dstack(arrL),axis=2)

        makeTif_np_to_matching_tif(median_arr * mask_arr, file, f'{evap_outFolder}median_9day/{fname}_{k}_median_ET.tif', 0)


In [None]:
# create a raster of mask with the dimensions of the vrts

mask_path = '/data/Aldhani/eoagritwin/fields/Auxiliary/grid_search/Brandenburg/quick_n_dirty/Fields_polygons.shp'
dummy_path = getFilelist(vrt_folder, '.vrt', deep=True)[0]
mask_raster_path = '/data/Aldhani/eoagritwin/fields/Auxiliary/grid_search/Brandenburg/quick_n_dirty/Fields_as_mask.tif'


aoi_ds = gdal.Open(dummy_path)
geo_transform = aoi_ds.GetGeoTransform()
projection = aoi_ds.GetProjection()
x_res = aoi_ds.RasterXSize
y_res = aoi_ds.RasterYSize

# === Create empty target raster ===
target_ds = gdal.GetDriverByName('GTiff').Create(
    mask_raster_path,
    x_res,
    y_res,
    1,                     # one band
    gdal.GDT_Int32
)
target_ds.SetGeoTransform(geo_transform)
target_ds.SetProjection(projection)

# === Open shapefile ===
shp_ds = ogr.Open(mask_path)
layer = shp_ds.GetLayer()

# === Rasterize ===
# Use attribute "FieldID" from your shapefile for pixel values
gdal.RasterizeLayer(
    target_ds,
    [1],             # which band(s) to burn into
    layer,
    options=["ATTRIBUTE=FieldID"]
)

# === Clean up ===
target_ds.FlushCache()
target_ds = None
shp_ds = None
aoi_ds = None