In [100]:
import sys
sys.path.append('/home/potzschf/repos/')
from helperToolz.helpsters import *
from helperToolz.evapo import *
import geopandas as gpd
import tarfile
from pyproj import CRS
from collections import Counter
from joblib import Parallel, delayed
workhorse = True

if workhorse:
    origin = 'Aldhani/eoagritwin/'
else:
    origin = ''

utm_to_epsg = {
    '28': 32628,  # Western Portugal, Azores
    '29': 32629,  # Western Spain, Portugal
    '30': 32630,  # Spain, France, UK
    '31': 32631,  # France, Benelux, Germany, Western Norway
    '32': 32632,  # Germany, Denmark, Switzerland, Italy, Austria
    '33': 32633,  # Central Europe: Poland, Czechia, Hungary, Croatia, Sweden, Norway
    '34': 32634,  # Eastern Europe: Finland, Baltic States, Romania
    '35': 32635,  # Western Russia, Ukraine
    '36': 32636,  # Russia, Black Sea region
}

def get_union_bounds(bounds_list):
    """Compute the union of multiple bounding boxes."""
    minx = min([b[0] for b in bounds_list])
    miny = min([b[1] for b in bounds_list])
    maxx = max([b[2] for b in bounds_list])
    maxy = max([b[3] for b in bounds_list])
    return (minx, miny, maxx, maxy)

In [101]:
#### get path and rows of scenes that have data for the chosen AOI (e.g. Brandenburg)
aoi_set_man = 'Brandenburg'
res = 30

# load shapefiles and check projections
ger = gpd.read_file(f'/data/{origin}misc/gadm41_DEU_shp/gadm41_DEU_1.shp')
aoi = ger[ger['NAME_1'] == aoi_set_man]

orbits = gpd.read_file(f'/data/{origin}misc/WRS2_descending_0/WRS2_descending.shp')

if aoi.crs != orbits.crs:
    aoi = aoi.to_crs(orbits.crs)

# find overlapping paths/rows
#intersecting = orbits[orbits.intersects(aoi.unary_union)]
intersecting = gpd.sjoin(orbits, aoi, how="inner", predicate="intersects")
path_rows = [[p, r] for p, r in zip(intersecting['PATH'], intersecting['ROW'])]
# make sure, paths and rows have the correct format
path_rows = [f'{str(p).zfill(3)}{str(r).zfill(3)}' for p, r in path_rows]

In [None]:
import matplotlib.pyplot as plt

# Ensure same CRS
orbits = orbits.to_crs(aoi.crs)

# Find intersecting WRS-2 paths
intersecting = gpd.sjoin(orbits, aoi, how="inner", predicate="intersects")

# Plot
fig, ax = plt.subplots(figsize=(10, 10))

# Plot orbits that intersect
intersecting.boundary.plot(ax=ax, color='blue', linewidth=1, label='Intersecting WRS-2 Paths')

# Plot AOI
aoi.boundary.plot(ax=ax, color='red', linewidth=2, label='AOI (Brandenburg)')

# Style
ax.set_title("Landsat WRS-2 Paths intersecting Brandenburg", fontsize=14)
ax.legend()
ax.set_axis_off()
plt.tight_layout()
plt.show()

In [102]:
# get all paths from downloaded products --> subsetted to paths and rows
landsat_files = getFilelist(f'/data/{origin}et/Landsat/raw/', '.tar.gz', deep=True)

# create a look-up dictionary for time subsets
lookUp = LandsatETFileManager(landsat_files)

In [103]:
#### do the compositing monthly
year = 2024
month = 7
# check if temp_folder is empty and delete everything if not
tempF = f'/data/{origin}et/Landsat/extracts/'
if len(getFilelist(tempF, '.nc')) > 0:
    for file in getFilelist(tempF, '.nc'):
        os.remove(file)
    print('kill complete')

# subset data and extract
year_month = lookUp.get_by_year_and_month(year, month)
year_month_path_row = [scene for scene in year_month for pr in path_rows if pr in scene]

for landsat_file in year_month_path_row:
    with tarfile.open(landsat_file, 'r:gz') as tar:
        tar.extractall(tempF)

kill complete


  tar.extractall(tempF)


In [104]:
# List of file paths (NetCDF or GeoTIFF)
files_nc = getFilelist(f'/data/{origin}et/Landsat/extracts/', '.nc')
files_xml = getFilelist(f'/data/{origin}et/Landsat/extracts/', '.xml')
datasets = []

for f_nc in files_nc:
    
    utm_zone, w, e, n, s = get_UTM_zone_and_corners_from_xml(f_nc, files_xml)
    ds = xr.open_dataset(f_nc)
    da = ds['ETA']
    da.rio.set_spatial_dims(x_dim="XDim_ETA", y_dim="YDim_ETA", inplace=True)
    da.rio.write_crs(f'EPSG:{utm_to_epsg[utm_zone]}', inplace=True)
    # da.rio.write_nodata(-9999, inplace=True)
    datasets.append(da)

# Reproject to common CRS 
target_crs = 'EPSG:32633'
datasets_reprojected = [ds.rio.reproject(target_crs) for ds in datasets]

# get common bounds
all_bounds = [ds.rio.bounds() for ds in datasets_reprojected]
union_bounds = get_union_bounds(all_bounds)

# define resolution
res = datasets_reprojected[0].rio.resolution()
res_x, res_y = abs(res[0]), abs(res[1])

In [None]:
# Create output grid shape
minx, miny, maxx, maxy = union_bounds
width = int((maxx - minx) / res_x)
height = int((maxy - miny) / res_y)

# Create a template with correct transform
from affine import Affine
transform = Affine.translation(minx, maxy) * Affine.scale(res_x, -res_y)

# Reproject all datasets to this common grid
from rasterio.enums import Resampling

aligned = []
for da in datasets:
    aligned_da = da.rio.reproject(
        dst_crs=target_crs,
        shape=(height, width),
        transform=transform,
        resampling=Resampling.cubic  # or bilinear/cubic
    )
    aligned.append(aligned_da)

# Stack and composite (e.g., using nanmean for mosaic)
stacked = xr.concat(aligned, dim="stack")
composite = stacked.mean(dim="stack", skipna=True)

# mask composite
aoi_m = aoi.to_crs(composite.rio.crs)
composite_clipped = composite.rio.clip(aoi.geometry.values, aoi.crs, drop=True, invert=False)
composite_clipped.rio.to_raster(f'/data/{origin}et/Landsat/comp.tif')

In [None]:
tempF = f'/data/{origin}et/Landsat/extracts/'
if len(getFilelist(tempF, '.nc')) > 0:
    for end in ['.nc', '.xml', '.txt']:
        for file in getFilelist(tempF, end):
            os.remove(file)
    print('kill complete')

In [None]:
files_nc = getFilelist(f'/data/{origin}et/Landsat/extracts/', '.nc')
files_xml = getFilelist(f'/data/{origin}et/Landsat/extracts/', '.xml')

w,e,n,s = [],[],[],[]

for f_nc in files_nc:
  
    utm_zone, w, e, n, s = get_UTM_zone_and_corners_from_xml(f_nc, files_xml)
    ds = xr.open_dataset(f_nc)
    da = ds['ETA']
    da.rio.set_spatial_dims(x_dim="XDim_ETA", y_dim="YDim_ETA", inplace=True)
    da.rio.write_crs(f'EPSG:{utm_to_epsg[utm_zone]}', inplace=True)  # or detect from file
    da.rio.to_raster(f'/data/{origin}et/Landsat/tiff/{f_nc.rsplit('/', maxsplit=1)[-1].split('.')[0]}.tif')

# if w > aoi.bounds['minx']
# aoi.bounds['miny']
# aoi.bounds['max']
# aoi.bounds['maxy']
#utm_to_epsg[utm_zone)

In [None]:
# # get maximum bounding box based on aoi and nc data
# nc_files = getFilelist(f'/data/{origin}et/Landsat/extracts/', '.nc')
# # min-maxs for data
# file_Xmin = []
# file_Xmax = []
# file_Ymin = []
# file_Ymax = []

# for nc_file in nc_files:
#     file_Xmin.append(xr.open_dataset(nc_file).coords['XDim_ETA'][0])
#     file_Xmax.append(xr.open_dataset(nc_file).coords['XDim_ETA'][-1])
#     file_Ymin.append(xr.open_dataset(nc_file).coords['YDim_ETA'][-1])
#     file_Ymax.append(xr.open_dataset(nc_file).coords['YDim_ETA'][0])

# dXmin = min(file_Xmin) # western border
# dXmax = max(file_Xmax) # eastern border
# dYmin = min(file_Ymin) # southern border
# dYmax = max(file_Ymax) # northern border

# # min-max aoi
# aXmin, aYmin, aXmax, aYmax = aoi.to_crs(CRS.from_epsg(landsat_epsg)).total_bounds

# # get bounding box for array and later export
# nXmin = int(math.floor((aXmin - dXmin) / res) * res + dXmin)
# nXmax = int(dXmax - (math.floor((dXmax - aXmax) / res) * res))
# nYmin = int(math.floor((aYmin - dYmin) / res) * res + dYmin)
# nYmax = int(dYmax - (math.floor((dYmax - aYmax) / res) * res))

# # create empty np array
# rows = int(((nYmax - nYmin) / res) + 1)
# cols = int(((nXmax - nXmin) / res) + 1)
# bands = len(year_month_path_row)
# block = np.empty((rows, cols, bands), dtype=np.float32)

In [None]:
# # fill it
# for i, nc_file in enumerate(nc_files):
#     print(i)
#     values = xr.open_dataset(nc_file).data_vars['ETA'].values
#     fXmin = int(file_Xmin[i])
#     fXmax = int(file_Xmax[i])
#     fYmin = int(file_Ymin[i])
#     fYmax = int(file_Ymax[i])

#     if nXmin - fXmin < 0: 
#         start_col_aoi = int((fXmin - nXmin) / 30)
#         start_col_dat = 0
#     elif nXmin - fXmin > 0:
#         start_col_aoi = 0
#         start_col_dat = int((nXmin - fXmin) / 30)
#     else:
#         start_col_aoi = 0
#         start_col_dat = 0


#     if nXmax - fXmax < 0:
#         end_col_dat = values.shape[1] - int((fXmax-nXmax)/30)
#         end_col_aoi = cols
#     elif nXmax - fXmax > 0:
#         end_col_dat = values.shape[1]
#         end_col_aoi = cols - int((nXmax - fXmax) / 30)
#     else:
#         end_col_dat = values.shape[1]
#         end_col_aoi = cols


#     if nYmax - fXmax < 0:
#         start_row_dat = int((fYmax - nYmax) / 30)
#         start_row_aoi = 0
#     elif nYmax - fXmax > 0:
#         start_row_dat = 0
#         start_row_aoi = int((nYmax - fYmax) / 30)
#     else:
#         start_row_dat = 0
#         start_row_aoi = 0


#     if nYmin - fYmin < 0:
#         end_row_dat = values.shape[0]
#         end_row_aoi = rows - int((nYmin - fYmin) / 30)
#     elif nYmin - fYmin > 0:
#         end_row_dat = values.shape[0] - int((nYmin - fYmin) / 30)
#         end_row_aoi = rows 
#     else:
#         end_row_dat = values.shape[0]
#         end_row_aoi = rows
    
#     pList = [start_row_dat, end_row_dat, start_col_dat, end_col_dat, start_row_aoi, end_row_aoi, start_col_aoi, end_col_aoi]
#     if any(x < 0 for x in pList):
#         print('shit')
#         continue
#     else:
#         block[start_row_aoi:end_row_aoi, start_col_aoi:end_col_aoi, i] = values[start_row_dat:end_row_dat, start_col_dat:end_col_dat]

In [None]:
# #### take care of overlap due due multiple scenes per day
# day_counts = Counter(year_month_path_row)

# dummy = 0
# cumulative_day_counts_end = []

# for _, count in sorted(day_counts.items()):
#     dummy += count
#     cumulative_day_counts_end.append(dummy)

# cumulative_day_counts_start = np.insert(cumulative_day_counts_end, 0 ,0)

# # try fancy list aggregation
# cumulative_day_counts_start = np.array(cumulative_day_counts_start)
# cumulative_day_counts_end = np.array(cumulative_day_counts_end)

# ### too slow
# stack_list = [
#     np.nanmedian(block[:, :, start:end], axis=2)
#     for start, end in zip(cumulative_day_counts_start[:-1], cumulative_day_counts_end)
# ]

# fin_block = np.dstack(stack_list)


In [None]:
# # Define the function to compute median for one slice
# def compute_daily_median(start, end):
#     return np.nanmedian(block[:, :, start:end], axis=2)

# # Run in parallel
# stack_list = Parallel(n_jobs=31)(  # -1 uses all available CPU cores
#     delayed(compute_daily_median)(start, end)
#     for start, end in zip(cumulative_day_counts_start[:-1], cumulative_day_counts_end)
# )

# fin_block = np.dstack(stack_list)

In [None]:
# # tiff export
# import rasterio
# from rasterio.transform import from_origin

# # Assuming the dataset has spatial resolution info
# transform = from_origin(nXmin, nYmax, res, res)

# # Define output GeoTIFF filename
# output_filename = f'/data/{origin}et/Landsat/check.tif'

# # Open a new GeoTIFF file for writing
# with rasterio.open(output_filename, 'w', driver='GTiff', 
#                    height=fin_block.shape[0], width=fin_block.shape[1],
#                    count=1,#fin_block.shape[2], 
#                    dtype=fin_block.dtype,
#                    crs=f'EPSG:{landsat_epsg}', transform=transform) as dst:
#     dst.write(fin_block, 1)#fin_block.transpose(2, 0, 1))