In [2]:
import sys
sys.path.append('/home/potzschf/repos/')
from helperToolz.helpsters import *
from helperToolz.guzinski import *
from helperToolz.dicts_and_lists import INT_TO_MONTH

# set storPath for exported tiffs
LST_path = '/data/Aldhani/eoagritwin/et/Sentinel3/raw_LST/'
VZA_path = '/data/Aldhani/eoagritwin/et/Sentinel3/VZA/monthly_tiff_values/'
AirTemp_path = '/data/Aldhani/eoagritwin/et/Auxiliary/ERA5/tiff/low_res/2m_temperature/'

storPath_base = '/data/Aldhani/eoagritwin/et/Sentinel3/LST/Analytics/guzinski_masking/'
# for year in [2019]:
year = 2019

# get a temporal subset of LST, VZA and air temp files for that year
files = sorted(getFilelist(LST_path, '.nc'))
yearFiles_LST = [file for file in sorted(getFilelist(LST_path, '.nc')) if int(file.split('/')[-1].split('_')[-1][0:4]) == year]
yearFiles_VZA = getFilelist(f'{VZA_path}{year}/', 'tif')
yearFiles_2mT = getFilelist(f'{AirTemp_path}{year}', '.tif')

mask = makeGermanyMaskforNC('/data/Aldhani/eoagritwin/misc/gadm41_DEU_shp/gadm41_DEU_0.shp', yearFiles_LST[0])



In [3]:
yearList = []
for month in [f'{i:02d}' for i in range(1,13)]:
    if month != '07':
        continue
    if growingSeasonChecker(int(month)):
        
        # subset LST to month and get acquisition time and calculate observations per day
        file_LST = [yearfile_LST for yearfile_LST in yearFiles_LST if f'{month}.nc' == yearfile_LST.split('-')[-1]][0]
        accDateTimes = getAllDatesS3(file_LST) 
        df = pd.Series(accDateTimes)
        counts_per_day = df.dt.floor("D").value_counts().sort_index()
        # make iterables from counts per day that catch starting and ending indices to subset all obs per day
        cumulative_day_counts_end = np.asarray(np.cumsum(counts_per_day))
        cumulative_day_counts_start = np.insert(cumulative_day_counts_end, 0 ,0)

        # load data (/all observations for that month)
        dat_LST = getDataFromNC_LST(file_LST)

        # apply the temperature threshold
        dat_LST[dat_LST<273.15] = np.nan # LST_MASKING check!

        # get VZA stack and 2m airtemperature mask
        file_VZA = [yearfile_VZA for yearfile_VZA in yearFiles_VZA if f'{month}.tif' == yearfile_VZA.split('_')[-1]][0]
        dat_VZA = stackReader(file_VZA)

In [4]:
month='07'

In [5]:
# check air temperature (2m ERA5)
file_2mT = [yearFile_2mT for yearFile_2mT in yearFiles_2mT if f'{INT_TO_MONTH[month]}.tif' == yearFile_2mT.split('_')[-1]][0]
dat_2mT, time_2mT = stackReader(file_2mT, bands=True)

#### get ERA5 AirTemp (interpolated from both modelled values that are closest to LST)
bands_low = []
minutes = [] # get the minutes to interpolate ERA5 temp values to the exact minute of LST acquisition
for accDT in accDateTimes: # search for each LST observation
    for count, air_time in enumerate(time_2mT): # the two neighbouting ERA5 air temp values
        if accDT.astype('datetime64[h]')== pd.Timestamp(air_time): # this will get the hourly value before the acquisition
            bands_low.append(count)
            minutes.append(pd.Timestamp(accDT).minute)
bands_up = [band + 1 for band in bands_low]# this get the hourly value after the acquisition

# interpolate to the minute of observation
air_temp_intpol = dat_2mT[:,:,bands_low] - (dat_2mT[:,:,bands_low] - dat_2mT[:,:,bands_up]) * (np.array(minutes, dtype=np.float32) / 60).reshape(1,1,-1) # add one dimension for broadcasting

# apply air threshold
dat_LST = np.where((dat_LST - air_temp_intpol) < -2, np.nan, dat_LST)

# now get composites (minVZA, maxLST, single scenes)

count_list = []
doyL = [] # for band names when exporting

In [None]:
yearList = []
for month in [f'{i:02d}' for i in range(1,13)]:
    if month != '07':
        continue
    if growingSeasonChecker(int(month)):
        
        # subset LST to month and get acquisition time and calculate observations per day
        file_LST = [yearfile_LST for yearfile_LST in yearFiles_LST if f'{month}.nc' == yearfile_LST.split('-')[-1]][0]
        accDateTimes = getAllDatesS3(file_LST) 
        df = pd.Series(accDateTimes)
        counts_per_day = df.dt.floor("D").value_counts().sort_index()
        # make iterables from counts per day that catch starting and ending indices to subset all obs per day
        cumulative_day_counts_end = np.asarray(np.cumsum(counts_per_day))
        cumulative_day_counts_start = np.insert(cumulative_day_counts_end, 0 ,0)

        # load data (/all observations for that month)
        dat_LST = getDataFromNC_LST(file_LST)

        # apply the temperature threshold
        dat_LST[dat_LST<273.15] = np.nan # LST_MASKING check!

        # get VZA stack and 2m airtemperature mask
        file_VZA = [yearfile_VZA for yearfile_VZA in yearFiles_VZA if f'{month}.tif' == yearfile_VZA.split('_')[-1]][0]
        dat_VZA = stackReader(file_VZA)

        # sanity check
        if (dat_LST.shape == dat_VZA.shape):
    
            # check air temperature (2m ERA5)
            file_2mT = [yearFile_2mT for yearFile_2mT in yearFiles_2mT if f'{INT_TO_MONTH[month]}.tif' == yearFile_2mT.split('_')[-1]][0]
            dat_2mT, time_2mT = stackReader(file_2mT, bands=True)

            #### get ERA5 AirTemp (interpolated from both modelled values that are closest to LST)
            bands_low = []
            minutes = [] # get the minutes to interpolate ERA5 temp values to the exact minute of LST acquisition
            for accDT in accDateTimes: # search for each LST observation
                for count, air_time in enumerate(time_2mT): # the two neighbouting ERA5 air temp values
                    if accDT.astype('datetime64[h]')== pd.Timestamp(air_time): # this will get the hourly value before the acquisition
                        bands_low.append(count)
                        minutes.append(pd.Timestamp(accDT).minute)
            bands_up = [band + 1 for band in bands_low]# this get the hourly value after the acquisition
            
            # interpolate to the minute of observation
            air_temp_intpol = dat_2mT[:,:,bands_low] - (dat_2mT[:,:,bands_low] - dat_2mT[:,:,bands_up]) * (np.array(minutes, dtype=np.float32) / 60).reshape(1,1,-1) # add one dimension for broadcasting

            # apply air threshold
            dat_LST = np.where((dat_LST - air_temp_intpol) < -2, np.nan, dat_LST)

            # now get composites (minVZA, maxLST, single scenes)
            
            count_list = []
            doyL = [] # for band names when exporting

            for l in range(len(counts_per_day)):

                ################## LST values
                # Select the slices for the day:
                LST_slice = dat_LST[:, :, cumulative_day_counts_start[l]:cumulative_day_counts_end[l]]  # shape (X,Y,Z)
                VZA_slice = dat_VZA[:, :, cumulative_day_counts_start[l]:cumulative_day_counts_end[l]]  # shape (X,Y,Z)

                # Create mask where LST is valid and VZA < 45
                valid_mask = (~np.isnan(LST_slice)) & (VZA_slice < 45)
                valid_obs = np.sum(valid_mask, axis=2) * mask
                count_list.append(valid_obs)

                doyL.append(f'DOY_{l+1}')
                yearList.append(valid_obs)

                lst_to_export = np.where(valid_mask, LST_slice, np.nan)

                npTOdisk(lst_to_export, f'{storPath_base}Daily_Observation_Guzinski_mask_{year}_{INT_TO_MONTH[month]}.tif',
                f"/data/Aldhani/eoagritwin/et/Sentinel3/temp/guzinski_masked_single_scenes/{file_LST.split('.nc')[0].split('/')[-1]}.tif",
                bands=lst_to_export.shape[2], bandnames=df[cumulative_day_counts_start[l]:cumulative_day_counts_end[l]])
                

#         exportNCarrayDerivatesInt(file_LST, storPath_base, f'Daily_Observation_Guzinski_mask_{year}_{INT_TO_MONTH[month]}.tif',
#                           doyL, np.dstack(count_list), numberOfBands=len(count_list))
            
# exportNCarrayDerivatesInt(file_LST, storPath_base, f'April-October_Cumulative_Observation_Guzinski_mask_{year}_{INT_TO_MONTH[month]}.tif',
#                           f'Cumulative_Observations in growing season {year}', np.sum(np.dstack(yearList),axis=2), numberOfBands=1)

In [10]:
for l in range(len(counts_per_day)):

    ################## LST values
    # Select the slices for the day:
    LST_slice = dat_LST[:, :, cumulative_day_counts_start[l]:cumulative_day_counts_end[l]]  # shape (X,Y,Z)
    VZA_slice = dat_VZA[:, :, cumulative_day_counts_start[l]:cumulative_day_counts_end[l]]  # shape (X,Y,Z)

    # Create mask where LST is valid and VZA < 45
    valid_mask = (~np.isnan(LST_slice)) & (VZA_slice < 45)
    valid_obs = np.sum(valid_mask, axis=2) * mask
    count_list.append(valid_obs)

    doyL.append(f'DOY_{l+1}')
    yearList.append(valid_obs)

    lst_to_export = np.where(valid_mask, LST_slice, np.nan)

    npTOdisk(lst_to_export, f'{storPath_base}Daily_Observation_Guzinski_mask_{year}_{INT_TO_MONTH[month]}.tif',
    f"/data/Aldhani/eoagritwin/et/Sentinel3/temp/guzinski_masked_single_scenes/{file_LST.split('.nc')[0].split('/')[-1]}_day_{l+1}.tif",
    bands=lst_to_export.shape[2], bandnames=df[cumulative_day_counts_start[l]:cumulative_day_counts_end[l]].tolist())



In [12]:
l = 8
################# LST values
# Select the slices for the day:
LST_slice = dat_LST[:, :, cumulative_day_counts_start[l]:cumulative_day_counts_end[l]]  # shape (X,Y,Z)
VZA_slice = dat_VZA[:, :, cumulative_day_counts_start[l]:cumulative_day_counts_end[l]]  # shape (X,Y,Z)

# Create mask where LST is valid and VZA < 45
valid_mask = (~np.isnan(LST_slice)) & (VZA_slice < 45)
valid_obs = np.sum(valid_mask, axis=2) * mask
count_list.append(valid_obs)

doyL.append(f'DOY_{l+1}')
yearList.append(valid_obs)

lst_to_export = np.where(valid_mask, LST_slice, np.nan)

In [17]:
np.nansum(lst_to_export[:,:,3])

np.float32(26136040.0)

In [9]:
def npTOdisk(arr, reference_path, outPath, bands = False, bandnames = False):
    """exports a numpy array to a tif that is stored on disk

    Args:
        arr (numpy array): the array to be exported
        reference_path (str): path to the reference tif. The extent and dimensions must fit!!!!
        outPath (_str): path to exported tif on disk
    """
    ref_ds = checkPath(reference_path)
    ref_band = ref_ds.GetRasterBand(1)
    if not bands:
        bands = ref_ds.RasterCount
    out_ds = gdal.GetDriverByName('GTiff').Create(outPath, ref_ds.RasterXSize, ref_ds.RasterYSize, bands, ref_band.DataType)
    out_ds.SetGeoTransform(ref_ds.GetGeoTransform())
    out_ds.SetProjection(ref_ds.GetProjection())
    if bands == 1:
        out_ds.GetRasterBand(1).WriteArray(arr)
        if bandnames:
                out_ds.GetRasterBand(1).SetDescription(bandnames)
    else:
        for i in range(bands):
            out_ds.GetRasterBand(i+1).WriteArray(arr[:,:,i])
            if bandnames:
                out_ds.GetRasterBand(i+1).SetDescription(str(bandnames[i]))
    out_ds.FlushCache()


In [None]:
# ystack = np.dstack(yearList)
# ystack[ystack>0] = 1

# exportNCarrayDerivatesInt(file_LST, storPath_base, f'April-October_Amount_days_with_Observation_Guzinski_mask_{year}_{INT_TO_MONTH[month]}.tif',
#                           f'Amount_days_with_Observations in growing season {year}', np.sum(ystack,axis=2), numberOfBands=1)