In [None]:
import rioxarray as xr
import os
import glob
import pandas as pd
import xarray
import time
import numpy as np
from datetime import datetime

## Extract features of daily mean temperature

In [None]:
nc_folder_features = "./features/daily_mean_temperature/"
nc_features_paths = glob.glob(nc_folder_features+'*.nc')
nc_features_paths

['./features/daily_mean_temperature/tg_ens_mean_0.1deg_reg_2011-2022_v26.0e.nc',
 './features/daily_mean_temperature/tg_ens_mean_0.1deg_reg_1995-2010_v26.0e.nc']

In [None]:
for path_name in nc_features_paths:
    features = xarray.open_dataset(path_name)
    features = features['tg']
    features.rio.to_raster(path_name.replace(".nc", ".tif"))

## Extract features of daily precipitation sum

In [None]:
nc_folder_features = "./features/daily_precipitation_sum/"
nc_features_paths = glob.glob(nc_folder_features+'*.nc')
nc_features_paths

In [None]:
for path_name in nc_features_paths:
    features = xarray.open_dataset(path_name)
    features = features['rr']
    features.rio.to_raster(path_name.replace(".nc", ".tif"))

## Crop tif files in the 10 regions

In [None]:
shape_files_dir = "./bacini_shp/"
shape_files = glob.glob(shape_files_dir+'*.shp')
shape_files

In [None]:
tif_files_dir = "./features/rasters/"
tif_files = glob.glob(tif_files_dir+'*.tif')
tif_files

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from shapely.geometry import mapping
import geopandas as gpd

for tif_name in tif_files: 
    
    raster = xr.open_rasterio(tif_name)
    
    for shape_name in shape_files:
        
        crop_extent = gpd.read_file(shape_name)
        crop_extent = crop_extent.to_crs(epsg=4326)
        raster = raster.rio.set_crs('epsg:4326')
        tiff_clipped = raster.rio.clip(crop_extent.geometry.apply(mapping), crop_extent.crs)
        tiff_clipped.rio.to_raster(tif_name.replace("_ens_mean_0.1deg_reg", "").replace("_v26.0e.tif", "") + 
                                   shape_name.replace("./bacini_shp/", "_").replace(".shp", "") + '.tif')

### Create csv files from cropped tif files (mean values)

In [None]:
regions = ['Adda',
 'Dora',
 'Emiliani1',
 'Piemonte_Sud',
 'Piemonte_Nord',
 'Oglio_Iseo',
 'Ticino',
 'Garda_Mincio',
 'Lambro_Olona',
 'Emiliani2']

tif_files_dir = "./features/rasters/"
csv_files_dir = "./features/csv/"

# ranges : ['1995-01-01', '2010-12-31'], ['2011-01-01', '2022-06-30']
dates = ['1995-01-01', '2022-06-30']
start = '2001-01-05'

days = pd.date_range(start=dates[0], end=dates[1], freq = 'D')

dates_8days = pd.date_range(start=start, end=dates[1], freq = '8D')
years = [date.year for date in dates_8days]
weeks = [date.isocalendar().week for date in dates_8days]

In [None]:
def feature_tifs_to_csv(feature):
    cropped_tif_files = [[] for i in range(len(dates))]
    for region in regions:
        cropped_tif_files = glob.glob(tif_files_dir + feature + '*' + region + '*.tif')
        cropped_tif_files.sort()

        for i in range(len(cropped_tif_files)):
            raster = xr.open_rasterio(cropped_tif_files[i]).drop_vars(["spatial_ref"])
            dataframe = raster.to_dataset('band').to_dataframe()

            # remove useless null values
            dataframe = dataframe.replace(-9999,np.NaN)
            dataframe = dataframe.dropna()
            if i == 0:
                complete_dataframe = dataframe
            else:
                complete_dataframe = pd.concat([complete_dataframe, dataframe], axis=1)

        # convert dates in readable ones and remove useless range

        complete_dataframe.columns = days.strftime('%Y-%m-%d')
        complete_dataframe = complete_dataframe.loc[:, complete_dataframe.columns >= start]

        # create dataframe with mean values for each 8 days and save it as csv
        means = complete_dataframe.mean(axis=0)
        means_8days = [means[i:i+8].mean() for i in range(0, len(means), 8)]
        
        statistics = pd.DataFrame({'mean': means_8days, 'year': years, 'week': weeks},
                      index = dates_8days)
        
        statistics.to_csv(csv_files_dir + region + "_" + feature + ".csv")

In [None]:
feature_tifs_to_csv('tg')
feature_tifs_to_csv('rr')

### Create csv files from cropped tif files (with coordinates)  (cyclostationary mean on training set)

In [None]:
csv_files_dir = "./features/csv_allvalues/"

In [None]:
cropped_tif_files = glob.glob(tif_files_dir + "tg" + '*' + "Emiliani2" + '*.tif')
cropped_tif_files.sort()
cropped_tif_files

['./features/rasters/tg_1995-2010_Emiliani2.tif',
 './features/rasters/tg_2011-2022_Emiliani2.tif']

In [None]:
#raster = xr.open_rasterio(cropped_tif_files[0]).drop_vars(["spatial_ref"])
dataframe = raster.to_dataset('band').to_dataframe()

In [None]:
def feature_tifs_to_csv_allcoord(feature):
    cropped_tif_files = [[] for i in range(len(dates))]
    for region in regions:
        cropped_tif_files = glob.glob(tif_files_dir + feature + '*' + region + '*.tif')
        cropped_tif_files.sort()
        for i in range(len(cropped_tif_files)):
            raster = xr.open_rasterio(cropped_tif_files[i]).drop_vars(["spatial_ref"])
            dataframe = raster.to_dataset('band').to_dataframe()

            # remove useless null values
            dataframe = dataframe.replace(-9999,np.NaN)
            dataframe = dataframe.dropna()
            
            scale_factor = raster.attrs['scale_factor']
            dataframe = dataframe * scale_factor # fix the scale factor
            
            if i == 0:
                complete_dataframe = dataframe
            else:
                complete_dataframe = pd.concat([complete_dataframe, dataframe], axis=1)

        complete_dataframe.columns = days.strftime('%Y-%m-%d')
        complete_dataframe = complete_dataframe.loc[:, complete_dataframe.columns >= start]

        # create a multi_index with both coordinates and date
        multi_index_dataframe = pd.concat([complete_dataframe] * len(dates_8days), keys=dates_8days, names=['date'])

        # save mean values for groups of 8 days
        for i in range(0, len(complete_dataframe.columns), 8):
            if i == 0:
                cells_means_8days = complete_dataframe.iloc[:,i:i+8].mean(axis = 1).values
            else:
                cells_means_8days = np.concatenate([cells_means_8days, complete_dataframe.iloc[:,i:i+8].mean(axis = 1).values])

        statistics = pd.DataFrame({'mean': cells_means_8days, 'year': np.repeat(years, len(complete_dataframe)), 'week': np.repeat(weeks, len(complete_dataframe))},
              index = multi_index_dataframe.index)

        n = 0.6
        # take only the first 60% of the dataframe and compute the cyclostationary mean for week
        last_training_day = round(len(dates_8days)*n)

        train_df = statistics[statistics.index.get_level_values(0) < dates_8days[last_training_day]]
        # cyclostationary_means_8days
        weekoftheyar_mean = train_df.groupby(['week', 'y', 'x'])['mean'].mean()
        index = statistics.index
        statistics = pd.merge(statistics, weekoftheyar_mean, how='left', on=['week', 'y', 'x'], suffixes=['','_weekoftheyear']).set_index(index)
        statistics['cyclostationary_mean'] = statistics['mean'] - statistics['mean_weekoftheyear']
        statistics.drop("mean_weekoftheyear", axis='columns', inplace = True)

        statistics.to_csv(csv_files_dir + region + "_" + feature + ".csv")

In [None]:
feature_tifs_to_csv_allcoord('tg')
feature_tifs_to_csv_allcoord('rr')