In [None]:
import os
import pandas as pd
from loguru import logger
import geopandas as gpd
from glob import glob
import json
from rasterstats import zonal_stats
from datetime import datetime
import xarray as xr
import rasterio
import numpy as np
from shapely.geometry import mapping
import rioxarray as rxr
from datetime import datetime
from datetime import timezone

workflow_folder = 'C:/Users/artuso/Documents/CLIMAXX/preprocessing/New-folder'

nc_directory = "P:/watxene/ISIMIP/ISIMIP3a/InputData/climate/obsclim_updated/GSWP3-W5E5"
file_starts = 'gswp3-w5e5_obsclim_{}_global_daily_'
nc_vars = ['pr']
os.chdir(nc_directory)
ncs = glob('**/*.nc')

logger.info(f'Found {len(ncs)} files.')

# load nuts2 shape data
print('Load NUTS2 map')
shapefile = os.path.join(workflow_folder, "NUTS_RG_20M_2021_4326.shp")
nuts = gpd.read_file(shapefile)

for nc_var in nc_vars: 
    #if does not work install dependencies netcdf4, h5netcdf and dask
    nc_file_pattern = file_starts.format(nc_var)
    #print(nc_file_pattern)
    varfiles =  [f for f in ncs if nc_file_pattern in f]
    #logger.info(f'Loading {len(varfiles)} {nc_var} files...')
    ds = xr.open_mfdataset(varfiles)
    #logger.info(f'Resampling {nc_var} by month...')
    ds = ds.resample(time='M').mean()
    if ds.rio.crs is None:
        ds.rio.write_crs(4326, inplace=True)
    ds_c = ds.rio.clip(nuts.geometry.apply(mapping), nuts.crs)
    outputs = []
    time_var='time'
    times = ds_c[time_var].values
    lat_var='lat'
    lon_var='lon'
    out_dir='C:/Users/artuso/Documents/CLIMAXX/preprocessing/New-folder/data'
    time_format="%Y-%m-%dT%H:%M:%S.%f000"
    #multiplier=1
    lats = ds_c[lat_var].values
    lons = ds_c[lon_var].values
    for t in range(ds_c.time.size):
        # Get the data for this time step
        data = ds_c[nc_var][t,:,:].values
        #data = data * multiplier
        t_step = times[t]
        # t_step  = datetime.fromisoformat("%Y-%m-%dT%H:%M:%S.%f000"[:-1]).astimezone(timezone.utc)
        # t_step.strftime('%Y-%m-%d %H:%M:%S')
        datetime_step = datetime.strptime(str(t_step), time_format)
        month = str(datetime_step.month)
        if(len(month) == 1):
            month = '0' + month
        filename = f'{datetime_step.year}-{month}_{nc_var}.tif'
        filename = os.path.join(out_dir, filename)
        # Create a raster file using rasterio
        with rasterio.open(
            filename,
            'w',
            driver='GTiff',
            height=data.shape[0],
            width=data.shape[1],
            count=1,
            dtype=data.dtype,
            crs='epsg:4326',
            transform=rasterio.transform.from_bounds(lons.min(), lats.min(), lons.max(), lats.max(), data.shape[1], data.shape[0])
        ) as dst:
            #data[np.isnan(data)] = dst.nodata
            dst.write(data, 1) # writes data (each time step) in a new (tif) format
        with rasterio.open(filename) as src:
            affine = src.transform
            array = src.read(1)
            #array[np.isnan(array)] = src.nodata
            #nodata = src.nodata
            ds_zonal_stats = zonal_stats(nuts,array,affine=affine, stats=['sum']) #all_touched=True #nodata=nodata, #stats=['min', 'max', 'median', 'majority', 'sum']
            outputs.append(ds_zonal_stats)
print (outputs)


# print(ds.rio.crs)
# print(nuts.crs)