# SMA

In [5]:
import os
import glob
import matplotlib.pyplot as plt
import numpy as np
from shapely.geometry import mapping
import rioxarray as rxr
import geopandas as gpd
import xarray as xr
import pandas as pd


In [6]:
def tifToDataframe(tifPath):
    
    xarr = rxr.open_rasterio(tifPath)
    xarr = xarr.assign_coords(x = round_coord(xarr.x), y = round_coord(xarr.y)).drop_vars(["spatial_ref"])
    # 'band' : name of the data variable (!to dataset inverts x and y order)
    dataframe = xarr.to_dataset('band').to_dataframe()
    
    return dataframe

# filter land use ids
def cultivable_areas_only(data):
    cultivable_ids = [11, 12, 13, 14, 15, 16]
    return data.loc[data.band.isin(cultivable_ids)]

# round the coordinates to the nearest 10
def round_coord(x):
    return (x/10).round()*10

### Extraction of tiffs related to each subbasin

In [20]:
basins = ['Adda','Ticino','Lambro_Olona','Piemonte_Nord','Piemonte_Sud','Garda_Mincio','Oglio_Iseo','Emiliani2','Emiliani1','Dora']
years = ['2001','2002','2003','2004','2005','2006','2007','2008','2009','2010','2011','2012','2013','2014','2015','2016','2017','2018','2019']

for basin in basins:
    for year in years:

            crop_extent = gpd.read_file("/Users/paolo/Desktop/prova Qgis/altro /Bacini/"+basin+".shp")
            crop_extent = crop_extent.to_crs(epsg=3035)
            
            raster_folder = "/Users/paolo/Desktop/prova Qgis/altro /SMA/smian_m_euu_"+year+"0101_"+year+"1221_t/"
            rasters_files = glob.glob(raster_folder+'*.tif')
            rasters_names = [full_path[-26:] for full_path in rasters_files]
            
            for rasters_name in rasters_names:
                raster = rxr.open_rasterio(raster_folder+rasters_name)
                tiff_clipped = raster.rio.clip(crop_extent.geometry.apply(mapping), crop_extent.crs)
                tiff_clipped.rio.to_raster("/Users/paolo/Desktop/prova Qgis/altro /SMA_cropped/"+basin+"/smian_m_euu_"+year+"0101_"+year+"1221_t/"+rasters_name[:-4]+"_cropped.tif")


### Land use 

In [23]:
area = rxr.open_rasterio('/Users/paolo/Desktop/prova Qgis/altro /land_usage/Adda.tif').drop_vars(["spatial_ref"])


### together

In [162]:
for area in basins:
    cult_df = rxr.open_rasterio('/Users/paolo/Desktop/prova Qgis/altro /land_usage/'+area+'.tif')
    cult_df = cult_df.assign_coords(x = (cult_df.x/10).round(3)*10, y = (cult_df.y/10).round(3)*10).drop_vars(["spatial_ref"])
    cult_df = cult_df.to_dataset('band').to_dataframe()
    cult_df = cult_df.rename(columns={1:'band'})
    
    cult_df = cult_df[cult_df.band != 255]
    cult_df = cult_df.loc[cult_df.band.isin([1])]
    print(f"Cultivable points for {area}, extracted {cult_df.shape}")
    cult_df
    
    ######
    for year in years:
        raster_folder = "/Users/paolo/Desktop/prova Qgis/altro /SMA_cropped/"+area+"/smian_m_euu_"+year+"0101_"+year+"1221_t/"
        rasters_files = glob.glob(raster_folder+'*.tif')
        rasters_names = [full_path[-26:] for full_path in rasters_files]
        
        for rasters_file in rasters_files:
        
            raster = rxr.open_rasterio(rasters_file).rio.reproject("EPSG:4326")
            raster = raster.assign_coords(x = (raster.x/10).round(3)*10, y = (raster.y/10).round(3)*10).drop_vars(["spatial_ref"])
            # 'band' : name of the data variable (!to dataset inverts x and y order)
            dataframe = raster.to_dataset('band').to_dataframe()
            dataframe = dataframe.rename(columns={1:'band'})
            
            dataframe = dataframe.dropna()
            
            join = dataframe.join(cult_df, how = 'inner', lsuffix = '_vhi', rsuffix = '_id')
            df = join.iloc[: , :-1]
            df = df.reset_index().drop_duplicates()
            df = df.set_index(['y','x'])
            xarr = df.to_xarray()
            xarr = xarr.rio.set_crs('epsg:4326')
            xarr.rio.to_raster('/Users/paolo/Desktop/prova Qgis/altro /SMA_cropped_cult/'+area+'/'+rasters_file[-26:-4]+"_croppedCult.tif")
    

Cultivable points for Adda, extracted (26900, 1)
Cultivable points for Ticino, extracted (17046, 1)
Cultivable points for Lambro_Olona, extracted (57356, 1)
Cultivable points for Piemonte_Nord, extracted (52115, 1)
Cultivable points for Piemonte_Sud, extracted (74110, 1)
Cultivable points for Garda_Mincio, extracted (60563, 1)
Cultivable points for Oglio_Iseo, extracted (61372, 1)
Cultivable points for Emiliani2, extracted (121964, 1)
Cultivable points for Emiliani1, extracted (169977, 1)
Cultivable points for Dora, extracted (4011, 1)


### Final aggregations into averages

In [193]:
for area in basins:
    raster_folder = "/Users/paolo/Desktop/prova Qgis/altro /SMA_cropped_cult/"+area+"/"
    rasters_files = glob.glob(raster_folder+'*.tif')
    rasters_files.sort()
    rasters_files
    data = []
    
    for rasters_file in rasters_files:
        mean = rxr.open_rasterio(rasters_file).rio.reproject("EPSG:4326").drop_vars(["spatial_ref"]).to_dataset('band').to_dataframe().rename(columns={1:'band'}).dropna().mean()[0]
        time = rasters_file[-34:-26]
        data.append([time,mean])
    final_df = pd.DataFrame(data,columns=['time','mean'])
    final_df.to_csv("/Users/paolo/Desktop/prova Qgis/altro /final_dataframes/"+area+'.csv',index=False)

### comparisons with VHI

In [274]:
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression

for area in basins:
    print(f'######## {area} #########')
    
    path_VHI = "/Users/paolo/Documents/Droughts/Veronica/VHI_target/"
    
    df_VHI = pd.read_csv(path_VHI+area+'.csv')
    df_VHI = df_VHI.loc[df_VHI['Unnamed: 0']<='2019-12-31']
    #print(df_VHI)
    df_SMA = pd.read_csv("/Users/paolo/Desktop/prova Qgis/altro /final_dataframes/"+area+'.csv')
    df_SMA['time'] = df_SMA.apply(lambda x: str(x.time), axis=1)
    df_SMA['time'] = df_SMA.apply(lambda x: x.time[0:4]+'-'+x.time[4:6]+'-'+x.time[6:8], axis=1)
    df_SMA['time'] = pd.to_datetime(df_SMA['time'])
    df_SMA['week'] = df_SMA['time'].dt.isocalendar().week
    df_SMA['year'] = df_SMA['time'].dt.isocalendar().year
    
    #print(df_SMA)
    
    ### test correlation (2015-2019)
    df_SMA = df_SMA.set_index(['week','year'])
    df_VHI = df_VHI.set_index(['week','year'])
    joint = df_VHI.join(df_SMA, how = 'inner', lsuffix = '_vhi', rsuffix = '_SMA')
    joint_test = joint.reset_index().loc[joint.reset_index().year>=2015]
    joint_trainVal = joint.reset_index().loc[joint.reset_index().year<2015]
    
    #print(np.corrcoef(joint_test.mean_SMA,joint_test.mean_vhi))
    
    ### test R2 (2015-2019) on the others
    
    regr = LinearRegression()
    regr.fit(joint_trainVal.mean_SMA.values.reshape(-1,1),joint_trainVal.mean_vhi)
    preds = regr.predict(joint_test.mean_SMA.values.reshape(-1,1))
    print(r2_score(joint_test.mean_vhi, preds))



######## Adda #########
0.008163028576268716
######## Ticino #########
0.06207029821186205
######## Lambro_Olona #########
0.08137829294377497
######## Piemonte_Nord #########
0.024473442800698075
######## Piemonte_Sud #########
0.12443466319648488
######## Garda_Mincio #########
-0.0528589081175912
######## Oglio_Iseo #########
0.04765528278321374
######## Emiliani2 #########
-0.1396770462765733
######## Emiliani1 #########
-0.025113477435316334
######## Dora #########
-0.11332682147947537


In [3]:
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
basins = ['Adda','Ticino','Lambro_Olona','Piemonte_Nord','Piemonte_Sud','Garda_Mincio','Oglio_Iseo','Emiliani2','Emiliani1','Dora']

for area in basins:
    print(f'######## {area} #########')
    
    path_VHI = "/Users/paolo/Documents/Droughts/Veronica/VHI_target/"
    
    df_VHI = pd.read_csv(path_VHI+area+'.csv')
    df_VHI = df_VHI.loc[df_VHI['Unnamed: 0']<='2019-12-31']
    #print(df_VHI)
    df_SMA = pd.read_csv("/Users/paolo/Desktop/prova Qgis/altro /final_dataframes/"+area+'.csv')
    df_SMA['time'] = df_SMA.apply(lambda x: str(x.time), axis=1)
    df_SMA['time'] = df_SMA.apply(lambda x: x.time[0:4]+'-'+x.time[4:6]+'-'+x.time[6:8], axis=1)
    df_SMA['time'] = pd.to_datetime(df_SMA['time'])
    df_SMA['week'] = df_SMA['time'].dt.isocalendar().week
    df_SMA['year'] = df_SMA['time'].dt.isocalendar().year
    
    #print(df_SMA)
    
    ### test correlation (2015-2019)
    df_SMA = df_SMA.set_index(['week','year'])
    df_VHI = df_VHI.set_index(['week','year'])
    joint = df_VHI.join(df_SMA, how = 'inner', lsuffix = '_vhi', rsuffix = '_SMA')
    joint_test = joint.reset_index().loc[joint.reset_index().year>=2015]
    joint_trainVal = joint.reset_index().loc[joint.reset_index().year<2015]
    
    #print(np.corrcoef(joint_test.mean_SMA,joint_test.mean_vhi))
    
    ### test R2 (2015-2019) on the others
    
    regr = LinearRegression()
    regr.fit(joint_trainVal.mean_SMA.values.reshape(-1,1),joint_trainVal.mean_vhi)
    preds = regr.predict(joint_test.mean_SMA.values.reshape(-1,1))
    print(r2_score(joint_test.mean_vhi, preds))



######## Adda #########
0.008163028576268716
######## Ticino #########
0.06207029821186205
######## Lambro_Olona #########
0.08137829294377497
######## Piemonte_Nord #########
0.024473442800698075
######## Piemonte_Sud #########
0.12443466319648488
######## Garda_Mincio #########
-0.0528589081175912
######## Oglio_Iseo #########
0.04765528278321374
######## Emiliani2 #########
-0.1396770462765733
######## Emiliani1 #########
-0.025113477435316334
######## Dora #########
-0.11332682147947537


In [8]:
df_SMA_full = pd.DataFrame()
basins = ['Adda','Ticino','Lambro_Olona','Piemonte_Nord','Piemonte_Sud','Garda_Mincio','Oglio_Iseo','Emiliani2','Emiliani1','Dora']

for area in basins:
    df_SMA = pd.read_csv("/Users/paolo/Desktop/prova Qgis/altro /final_dataframes/"+area+'.csv')
    df_SMA['time'] = df_SMA.apply(lambda x: str(x.time), axis=1)
    df_SMA['time'] = df_SMA.apply(lambda x: x.time[0:4]+'-'+x.time[4:6]+'-'+x.time[6:8], axis=1)
    df_SMA['time'] = pd.to_datetime(df_SMA['time'])
    df_SMA['week'] = df_SMA['time'].dt.isocalendar().week
    df_SMA['year'] = df_SMA['time'].dt.isocalendar().year
    
    df_SMA = df_SMA.add_prefix(area+'_SMA_')
    df_SMA_full = pd.concat((df_SMA_full,df_SMA[area+'_SMA_mean']),axis=1)
    
df_SMA_full

Unnamed: 0,Adda_SMA_mean,Ticino_SMA_mean,Lambro_Olona_SMA_mean,Piemonte_Nord_SMA_mean,Piemonte_Sud_SMA_mean,Garda_Mincio_SMA_mean,Oglio_Iseo_SMA_mean,Emiliani2_SMA_mean,Emiliani1_SMA_mean,Dora_SMA_mean
0,0.849422,1.311388,1.274795,1.365120,0.983377,0.708695,0.634947,0.731970,0.839872,2.142798
1,0.863587,1.290474,1.282282,1.399889,1.036304,0.439819,0.655273,0.713870,0.796714,2.213835
2,0.863816,1.257027,1.249491,1.353262,0.869196,0.932197,0.681310,0.809809,0.845765,2.279394
3,0.924851,1.325049,1.253892,1.319815,0.693507,0.790508,0.722208,0.742659,0.803679,2.381575
4,0.878896,1.317425,1.168883,1.380952,0.718440,0.530051,0.631324,0.583605,0.752648,2.497736
...,...,...,...,...,...,...,...,...,...,...
679,0.684699,0.921448,0.670132,0.512691,0.880514,0.315443,0.620895,0.457354,0.214329,-0.135127
680,0.759513,1.290747,1.066558,1.061680,1.186315,0.181804,0.629178,0.652344,0.470714,0.852722
681,0.645140,1.197498,0.982258,0.982553,1.135482,0.119567,0.508179,0.596114,0.407331,0.950974
682,0.680269,1.148829,0.974342,0.944071,1.125849,0.441563,0.538040,0.581679,0.440245,0.973167


In [1]:
import tefs

selected_feat = df1.iloc[:,1:3]

    for col in temp_cmi[col] = tefs.estimation.estimate_cmi(df1.iloc[:,3:].to_numpy(),         # X
                                                            df1.iloc[:,0].to_numpy(),      # Y
                                                            df1[selected_feat].to_numpy(), # Z
                                                            k=k) # Z
print(temp_cmi)

In [33]:
df_VHI_full = pd.DataFrame()
for area in basins:
    df_VHI = pd.read_csv(path_VHI+area+'.csv')
    df_VHI = df_VHI.loc[df_VHI['Unnamed: 0']<='2019-12-31']
    
    df_VHI = pd.DataFrame(df_VHI["mean"]).rename(columns={"mean": area})
    df_VHI_full = pd.concat((df_VHI_full,df_VHI[area]),axis=1)
    
df_VHI_full = df_VHI_full.iloc[-684:,:].reset_index(drop=True)
df_VHI_full


Unnamed: 0,Adda,Ticino,Lambro_Olona,Piemonte_Nord,Piemonte_Sud,Garda_Mincio,Oglio_Iseo,Emiliani2,Emiliani1,Dora
0,0.323763,0.323654,0.407123,0.384664,0.354659,0.294448,0.343456,0.337493,0.380403,0.131541
1,0.317909,0.294593,0.337260,0.361902,0.376762,0.210074,0.267571,0.361583,0.346538,0.195843
2,0.461866,0.395222,0.400677,0.374789,0.341483,0.460568,0.578838,0.432732,0.552533,0.241536
3,0.369182,0.369648,0.393716,0.433280,0.378707,0.340419,0.388670,0.328709,0.452272,0.276412
4,0.248531,0.190191,0.211652,0.192972,0.239732,0.223180,0.308625,0.249552,0.356328,0.136069
...,...,...,...,...,...,...,...,...,...,...
679,0.615317,0.663974,0.690351,0.705207,0.739893,0.707366,0.640075,0.707366,0.562210,0.463120
680,0.415682,0.457430,0.467781,0.508869,0.624494,0.478972,0.440248,0.478972,0.416967,0.293608
681,0.335109,0.353519,0.353293,0.412298,0.452255,0.369868,0.354495,0.369868,0.327186,0.233287
682,0.471742,0.469796,0.466860,0.513418,0.435795,0.496676,0.485338,0.496676,0.519451,0.287428


In [51]:
# for linear regression
from sklearn.metrics import r2_score,mean_absolute_error

def MTL_scores(clust_basins, df_train, df_val, df_test, targets_df_train, targets_df_val, targets_df_test):

    colnames = [x for x in df_train.columns if x.startswith(tuple(clust_basins))]

    clusterdf_train_withClass = pd.DataFrame()
    clusterdf_val_withClass = pd.DataFrame()
    clusterdf_test_withClass = pd.DataFrame()

    for i in range(len(clust_basins)):
        clusterdf_train_withClass = pd.concat((clusterdf_train_withClass,pd.concat((df_train[colnames],pd.DataFrame(1+i*np.ones(len(df_train)),columns=['basin'])),axis=1)),axis=0)
        clusterdf_val_withClass = pd.concat((clusterdf_val_withClass,pd.concat((df_val[colnames],pd.DataFrame(1+i*np.ones(len(df_val)),columns=['basin'])),axis=1)),axis=0)
        clusterdf_test_withClass = pd.concat((clusterdf_test_withClass,pd.concat((df_test[colnames],pd.DataFrame(1+i*np.ones(len(df_test)),columns=['basin'])),axis=1)),axis=0)

    for i in range(len(clust_basins)):
        clusterdf_train_withClass[clust_basins[i]] = clusterdf_train_withClass.apply(lambda x: int(x.basin==i+1),axis=1)
        clusterdf_val_withClass[clust_basins[i]] = clusterdf_val_withClass.apply(lambda x: int(x.basin==i+1),axis=1)
        clusterdf_test_withClass[clust_basins[i]] = clusterdf_test_withClass.apply(lambda x: int(x.basin==i+1),axis=1)

    clusterdf_train_withClass = clusterdf_train_withClass.loc[:,clusterdf_train_withClass.columns != 'basin']
    clusterdf_val_withClass = clusterdf_val_withClass.loc[:,clusterdf_val_withClass.columns != 'basin']
    clusterdf_test_withClass = clusterdf_test_withClass.loc[:,clusterdf_test_withClass.columns != 'basin']

    targets_df_train_unfolded = pd.DataFrame()
    targets_df_val_unfolded = pd.DataFrame()
    targets_df_test_unfolded = pd.DataFrame()

    for basin in clust_basins:
        targets_df_train_unfolded =  pd.concat((targets_df_train_unfolded,targets_df_train[basin]),axis=0)
        targets_df_val_unfolded =  pd.concat((targets_df_val_unfolded,targets_df_val[basin]),axis=0)
        targets_df_test_unfolded =  pd.concat((targets_df_test_unfolded,targets_df_test[basin]),axis=0)
    targets_df_train_unfolded = targets_df_train_unfolded.reset_index(drop=True)
    targets_df_val_unfolded = targets_df_val_unfolded.reset_index(drop=True)
    targets_df_test_unfolded = targets_df_test_unfolded.reset_index(drop=True)

    # same scores changing the solver, some differences changing penalty, some improve with l1
    
    #print(clusterdf_train_withClass,clusterdf_val_withClass,targets_df_train_unfolded,targets_df_val_unfolded)
    
    model_ohe = LinearRegression()
    model_ohe.fit(pd.concat((clusterdf_train_withClass,clusterdf_val_withClass)).values,pd.concat((targets_df_train_unfolded,targets_df_val_unfolded)).values.ravel())

    for basin in clust_basins:
        print(basin)
        res = model_ohe.predict(clusterdf_test_withClass.loc[clusterdf_test_withClass[basin]==1].values)
        print(r2_score(targets_df_test[basin].values.ravel(), res))
        print(np.corrcoef(targets_df_test[basin].values.ravel(), res)[0][1])
        print(mean_absolute_error(targets_df_test[basin].values.ravel(), res))


In [52]:
for clust_basins in [['Adda'],['Ticino'],['Lambro_Olona'],['Oglio_Iseo'],['Adda','Ticino','Lambro_Olona','Oglio_Iseo'],['Garda_Mincio'],['Emiliani1'],['Emiliani2'],['Garda_Mincio','Emiliani1','Emiliani2'],['Dora'],['Piemonte_Sud'],['Piemonte_Nord'],['Dora','Piemonte_Sud','Piemonte_Nord']]:
    MTL_scores(clust_basins=clust_basins, df_train=df_SMA_full.iloc[:230,:].reset_index(drop=True), df_val=df_SMA_full.iloc[230:458,:].reset_index(drop=True), df_test=df_SMA_full.loc[458:,:].reset_index(drop=True), targets_df_train=df_VHI_full.iloc[:230,:].reset_index(drop=True), targets_df_val=df_VHI_full.iloc[230:458,:].reset_index(drop=True), targets_df_test=df_VHI_full.loc[458:,:].reset_index(drop=True))

    

Adda
-0.04856669225899446
-0.0642064555562701
0.1030896195282505
Ticino
-0.0023765412868939073
0.02765305461202426
0.09640548917839399
Lambro_Olona
-0.02642158594010935
-0.040367785464065026
0.10022186933173359
Oglio_Iseo
-0.04006829284992697
-0.18511629686299566
0.1121428349165992
Adda
-0.07782992920519871
-0.16214107493724442
0.10448174130530972
Ticino
-0.03932733389129739
-0.08672012943058795
0.097618167920354
Lambro_Olona
-0.04717596063245866
-0.08564534216838657
0.10080606245575222
Oglio_Iseo
-0.0516284721061091
-0.20010330192972867
0.11276208314159292
Garda_Mincio
-0.05180291505853796
-0.06003929695437554
0.11026623127124598
Emiliani1
-0.01464501960310538
0.08035649218104327
0.1036676325463515
Emiliani2
-0.008280948304708824
0.09957950482780868
0.10803431119284634
Garda_Mincio
-0.04434017641306287
0.2125978539097036
0.1094569100133854
Emiliani1
-0.04603300354314821
0.09365564735098249
0.10486097482259273
Emiliani2
0.007197175429164315
0.21259785390970362
0.10698088945932058
Dora


In [76]:
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
basins = ['Adda','Ticino','Lambro_Olona','Piemonte_Nord','Piemonte_Sud','Garda_Mincio','Oglio_Iseo','Emiliani2','Emiliani1','Dora']

joint_full = pd.DataFrame()
joint_test_full = pd.DataFrame()
joint_trainVal_full = pd.DataFrame()

for area in basins:
    print(f'######## {area} #########')
    
    path_VHI = "/Users/paolo/Documents/Droughts/Veronica/VHI_target/"
    
    df_VHI = pd.read_csv(path_VHI+area+'.csv')
    df_VHI = df_VHI.loc[df_VHI['Unnamed: 0']<='2019-12-31']
    #print(df_VHI)
    df_SMA = pd.read_csv("/Users/paolo/Desktop/prova Qgis/altro /final_dataframes/"+area+'.csv')
    df_SMA['time'] = df_SMA.apply(lambda x: str(x.time), axis=1)
    df_SMA['time'] = df_SMA.apply(lambda x: x.time[0:4]+'-'+x.time[4:6]+'-'+x.time[6:8], axis=1)
    df_SMA['time'] = pd.to_datetime(df_SMA['time'])
    df_SMA['week'] = df_SMA['time'].dt.isocalendar().week
    df_SMA['year'] = df_SMA['time'].dt.isocalendar().year
    
    #print(df_SMA)
    
    ### test correlation (2015-2019)
    df_SMA = df_SMA.set_index(['week','year']).loc[:,'mean']
    df_VHI = df_VHI.set_index(['week','year']).loc[:,['mean']]
    joint = df_VHI.join(df_SMA, how = 'inner', rsuffix = '_SMA').add_prefix(area+'_').rename(columns={area+'_mean':area})
    joint_test = joint.reset_index(drop=True).loc[joint.reset_index().year>=2015]
    joint_trainVal = joint.reset_index(drop=True).loc[joint.reset_index().year<2015]
    
    joint_full = pd.concat((joint_full.reset_index(drop=True),joint.reset_index(drop=True)),axis=1)
    joint_test_full = pd.concat((joint_test_full.reset_index(drop=True),joint_test.reset_index(drop=True)),axis=1)
    joint_trainVal_full = pd.concat((joint_trainVal_full.reset_index(drop=True),joint_trainVal.reset_index(drop=True)),axis=1)
joint_trainVal_full


######## Adda #########
######## Ticino #########
######## Lambro_Olona #########
######## Piemonte_Nord #########
######## Piemonte_Sud #########
######## Garda_Mincio #########
######## Oglio_Iseo #########
######## Emiliani2 #########
######## Emiliani1 #########
######## Dora #########


Unnamed: 0,Adda,Adda_mean_SMA,Ticino,Ticino_mean_SMA,Lambro_Olona,Lambro_Olona_mean_SMA,Piemonte_Nord,Piemonte_Nord_mean_SMA,Piemonte_Sud,Piemonte_Sud_mean_SMA,Garda_Mincio,Garda_Mincio_mean_SMA,Oglio_Iseo,Oglio_Iseo_mean_SMA,Emiliani2,Emiliani2_mean_SMA,Emiliani1,Emiliani1_mean_SMA,Dora,Dora_mean_SMA
0,0.039373,0.849422,0.264043,1.311388,0.369625,1.274795,0.278983,1.365120,0.278060,0.983377,0.102270,0.708695,0.243674,0.634947,0.214281,0.731970,0.379890,0.839872,0.010645,2.142798
1,0.177358,0.849422,0.237810,1.311388,0.224397,1.274795,0.248288,1.365120,0.187961,0.983377,0.220697,0.708695,0.195853,0.634947,0.172254,0.731970,0.170410,0.839872,0.120195,2.142798
2,0.120212,0.737806,0.212735,1.140080,0.207965,1.171900,0.460528,1.351690,0.397668,0.998228,0.017754,0.963069,0.119366,0.634193,0.240512,0.949743,0.281244,1.045687,0.200888,1.975805
3,0.036163,0.737806,0.065804,1.140080,0.033713,1.171900,0.004753,1.351690,0.017034,0.998228,0.000000,0.963069,0.036146,0.634193,0.011346,0.949743,0.012742,1.045687,0.000000,1.975805
4,0.360632,-0.275170,0.376017,-0.578595,0.434309,-0.422248,0.444263,-0.432795,0.475798,-0.581498,0.272415,-0.446908,0.365597,-0.072119,0.348981,-0.203850,0.341569,-0.729762,0.221679,-0.674974
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
440,0.324728,0.582604,0.353636,0.371264,0.447916,0.135517,0.387530,-0.251399,0.457440,0.442481,0.043947,0.379397,0.307880,0.250749,0.507978,0.347683,0.295704,0.129382,0.099246,-0.582066
441,0.415684,-0.234582,0.321518,-0.523416,0.325755,-0.816502,0.374563,-0.597414,0.321975,-0.222833,0.151873,-1.031078,0.450752,0.029254,0.151873,-0.883862,0.319530,-2.146938,0.216306,-0.798986
442,0.409271,-0.234582,0.380277,-0.523416,0.389156,-0.816502,0.391392,-0.597414,0.302949,-0.222833,0.403820,-1.031078,0.445226,0.029254,0.403820,-0.883862,0.407227,-2.146938,0.198335,-0.798986
443,0.270012,0.202987,0.331444,-0.200214,0.376098,-0.175042,0.367717,-0.394414,0.326592,-0.186159,0.334720,0.138064,0.314533,0.345844,0.328501,0.019416,0.418832,0.534049,0.101100,-0.136459


In [77]:
for clust_basins in [['Adda'],['Ticino'],['Lambro_Olona'],['Oglio_Iseo'],['Adda','Ticino','Lambro_Olona','Oglio_Iseo'],['Garda_Mincio'],['Emiliani1'],['Emiliani2'],['Garda_Mincio','Emiliani1','Emiliani2'],['Dora'],['Piemonte_Sud'],['Piemonte_Nord'],['Dora','Piemonte_Sud','Piemonte_Nord']]:
    MTL_scores(clust_basins=clust_basins, df_train=joint_trainVal_full.iloc[:200,[1,3,5,7,9,11,13,15,17,19]].reset_index(drop=True), df_val=joint_trainVal_full.iloc[200:,[1,3,5,7,9,11,13,15,17,19]].reset_index(drop=True), df_test=joint_test_full.iloc[:,[1,3,5,7,9,11,13,15,17,19]].reset_index(drop=True), targets_df_train=joint_trainVal_full.iloc[:200,[0,2,4,6,8,10,12,14,16,18]].reset_index(drop=True), targets_df_val=joint_trainVal_full.iloc[200:,[0,2,4,6,8,10,12,14,16,18]].reset_index(drop=True), targets_df_test=joint_test_full.iloc[:,[0,2,4,6,8,10,12,14,16,18]].reset_index(drop=True))

    

Adda
0.008163028576268716
0.24326153255436148
0.10592144729010196
Ticino
0.06207029821186205
0.34466001215698494
0.09365195620199497
Lambro_Olona
0.08137829294377497
0.3147961656890251
0.09704881424565945
Oglio_Iseo
0.04765528278321374
0.3214017216749031
0.1137016154550359
Adda
0.01168983009894542
0.23060857645290903
0.10559033068829114
Ticino
-0.0046445579719545105
0.20597385637870158
0.10022815219145569
Lambro_Olona
0.004474729529013621
0.21522900709839962
0.10336035692246834
Oglio_Iseo
0.06336266914689037
0.2743844052816775
0.11378091826740508
Garda_Mincio
-0.0528589081175912
0.3212661922976312
0.11534814254358922
Emiliani1
-0.025113477435316334
0.3512933407268807
0.10610671183039966
Emiliani2
-0.1396770462765733
0.37480858880442497
0.11930510767430287
Garda_Mincio
-0.2601266627545269
0.3923618047466804
0.12506034155854434
Emiliani1
-0.22616965583991977
0.3078354688616028
0.11585921750791141
Emiliani2
-0.05242579469861086
0.3923618047466804
0.1151790913528481
Dora
-0.113326821479475

In [2]:
basins = ['Adda','Ticino','Lambro_Olona','Piemonte_Nord','Piemonte_Sud','Garda_Mincio','Oglio_Iseo','Emiliani2','Emiliani1','Dora']
path_VHI = "/Users/paolo/Documents/Droughts/Veronica/VHI_target/"
df_VHI = pd.read_csv(path_VHI+basins[0]+'.csv')
for area in basins:

    path_VHI = "/Users/paolo/Documents/Droughts/Veronica/VHI_target/"
    df_VHI_curr = pd.read_csv(path_VHI+area+'.csv')
    df_VHI['mean_'+area] = df_VHI_curr['mean']

In [3]:
import sys
sys.path.append("/Users/paolo/Documents/methods/MultiLinCFA")
from MultiLinCFA import NonLinCTA

In [4]:
df_VHI.iloc[:867,-10:]

Unnamed: 0,mean_Adda,mean_Ticino,mean_Lambro_Olona,mean_Piemonte_Nord,mean_Piemonte_Sud,mean_Garda_Mincio,mean_Oglio_Iseo,mean_Emiliani2,mean_Emiliani1,mean_Dora
0,0.039373,0.264043,0.369625,0.278983,0.278060,0.102270,0.243674,0.214281,0.379890,0.010645
1,0.380618,0.354618,0.429563,0.494910,0.445159,0.454431,0.424116,0.484737,0.482679,0.206769
2,0.341985,0.427990,0.470784,0.496092,0.488982,0.323514,0.393786,0.466071,0.516259,0.267313
3,0.322044,0.339495,0.370358,0.427992,0.362487,0.301661,0.314939,0.417470,0.434421,0.240836
4,0.354954,0.324134,0.372263,0.400512,0.430732,0.394733,0.464902,0.492202,0.494805,0.193417
...,...,...,...,...,...,...,...,...,...,...
862,0.615317,0.663974,0.690351,0.705207,0.739893,0.707366,0.640075,0.707366,0.562210,0.463120
863,0.415682,0.457430,0.467781,0.508869,0.624494,0.478972,0.440248,0.478972,0.416967,0.293608
864,0.335109,0.353519,0.353293,0.412298,0.452255,0.369868,0.354495,0.369868,0.327186,0.233287
865,0.471742,0.469796,0.466860,0.513418,0.435795,0.496676,0.485338,0.496676,0.519451,0.287428


In [5]:
df_test = pd.read_csv("/Users/paolo/Documents/Droughts/Veronica/NonLinCFA_final_features/temp_prec/Adda_nonLinCFA_CMI_test.csv")
df_train = pd.read_csv("/Users/paolo/Documents/Droughts/Veronica/NonLinCFA_final_features/temp_prec/Adda_nonLinCFA_CMI_train.csv")
df_val = pd.read_csv("/Users/paolo/Documents/Droughts/Veronica/NonLinCFA_final_features/temp_prec/Adda_nonLinCFA_CMI_val.csv")
df = pd.concat((df_train,df_val,df_test),axis=0).reset_index(drop=True)

for basin in basins[1:]:
    df_test = pd.read_csv("/Users/paolo/Documents/Droughts/Veronica/NonLinCFA_final_features/temp_prec/"+basin+"_nonLinCFA_CMI_test.csv")
    df_train = pd.read_csv("/Users/paolo/Documents/Droughts/Veronica/NonLinCFA_final_features/temp_prec/"+basin+"_nonLinCFA_CMI_train.csv")
    df_val = pd.read_csv("/Users/paolo/Documents/Droughts/Veronica/NonLinCFA_final_features/temp_prec/"+basin+"_nonLinCFA_CMI_val.csv")
    df_curr = pd.concat((df_train,df_val,df_test),axis=0).reset_index(drop=True)
    df = pd.concat((df,df_curr),axis=1)
df

Unnamed: 0,cyclostationary_mean_tg_3,cyclostationary_mean_rr_8w_0,cyclostationary_mean_rr_1w_0,cyclostationary_mean_tg_0,cyclostationary_mean_rr_4w_0,cyclostationary_mean_tg_2,cyclostationary_mean_tg_1w_5,cyclostationary_mean_rr_1w_1,cyclostationary_mean_tg_3.1,cyclostationary_mean_rr_8w_1,...,cyclostationary_mean_rr_1w_16,cyclostationary_mean_rr_24w_2,cyclostationary_mean_tg_0.1,cyclostationary_mean_tg_12w_0,cyclostationary_mean_tg_12w_1,cyclostationary_mean_rr_12w_0,cyclostationary_mean_tg_24w_0,cyclostationary_mean_tg_16w_0,cyclostationary_mean_tg_8w_0,cyclostationary_mean_rr_8w_0.1
0,0.723001,1.814900,1.002579,-0.410998,0.611605,-0.447071,-0.817809,0.983063,-0.002354,1.202459,...,0.441886,1.293583,-0.660721,-1.687475,-0.852681,-0.273561,-2.048114,-1.864772,-1.408862,-0.225478
1,1.550127,3.486901,1.976372,0.803930,1.691336,0.647375,-0.156472,1.221730,0.998370,2.256409,...,1.258048,0.519673,0.414456,-0.244265,0.435183,1.339306,-0.206067,-0.280341,-0.153497,1.167725
2,0.089708,2.176553,1.351690,-0.911268,0.832271,-0.540914,-0.410786,0.601920,-0.402219,1.375422,...,0.894924,0.668629,-1.224984,-1.443935,-0.318581,0.336223,-1.642723,-1.544090,-1.132374,0.302564
3,1.303410,2.160372,0.465520,0.538884,0.859041,0.453677,-0.597913,0.263048,0.730195,1.423248,...,1.080434,-0.008145,0.137561,-0.933379,-0.128341,0.580213,-1.039207,-0.996179,-0.721693,0.513097
4,0.581894,1.585804,0.405690,-0.689709,0.647203,0.012920,-0.359136,0.136126,-0.081608,1.006237,...,1.342126,1.773844,-0.950332,-1.292032,-0.243374,0.449412,-1.479871,-1.397019,-1.021249,0.398925
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,1.038318,1.492722,2.782558,-0.340786,2.254330,0.761692,0.149462,2.869661,0.511250,1.537346,...,2.060772,0.890943,-0.651021,0.281817,0.880369,1.079805,0.449477,0.493028,0.543802,1.483194
863,2.200521,2.012044,2.269419,0.665902,1.612833,1.995967,1.333519,3.114018,1.434027,2.249431,...,1.504021,0.202589,0.568564,0.411345,1.131859,1.615746,0.685111,0.607913,0.225517,2.451912
864,1.461189,2.047807,0.641456,0.477127,1.718020,1.479329,1.568005,1.879804,0.761165,2.271385,...,-0.575270,-0.066425,0.543103,0.499786,1.125477,1.595520,0.952740,0.508892,0.380147,2.352041
865,1.459577,1.999738,-0.125536,0.345003,1.590115,1.686775,1.165843,0.158606,0.885731,2.291789,...,-1.321619,-0.318737,0.132557,0.385093,1.038797,1.953082,1.061539,0.549521,0.412396,2.701856


In [27]:
myFun = NonLinCTA(df=df, targets_df=df_VHI.iloc[:867,-10:], eps=0, n_val=-1, neigh=0)
myFun.compute_target_clusters()

[0.04427312] [0.04373885] [[0.0434724]] [0.18184053] [0.18573121] [[0.17678441]] [[0.18067509]]
[0.04348539] [0.04260217] [[0.0430907]] [0.18752498] [0.19351414] [[0.18803841]] [[0.19402756]]
[0.04348539] [0.04208701] [[0.04245786]] [0.18752498] [0.19681986] [[0.18433854]] [[0.19363343]]
[0.04244393] [0.04260217] [[0.04241754]] [0.19454424] [0.19351414] [[0.19347263]] [[0.19244252]]
[0.04241742] [0.04104374] [[0.04183814]] [0.19471559] [0.2030911] [[0.19597093]] [[0.20434644]]
[0.04241742] [0.04067611] [[0.04159345]] [0.19471559] [0.20516592] [[0.19540976]] [[0.20586009]]
[0.04241742] [0.04442236] [[0.04247691]] [0.19471559] [0.1807272] [[0.18562059]] [[0.1716322]]
[0.04249561] [0.04104374] [[0.04195965]] [0.19420924] [0.2030911] [[0.19633866]] [[0.20522052]]
[0.04249561] [0.04067611] [[0.04182823]] [0.19420924] [0.20516592] [[0.19698632]] [[0.207943]]
[0.04249561] [0.03936555] [[0.0417269]] [0.19420924] [0.21198967] [[0.20342016]] [[0.22120059]]
[0.04249561] [0.03553339] [[0.04085765]

[['mean_Adda',
  'mean_Ticino',
  'mean_Piemonte_Nord',
  'mean_Lambro_Olona',
  'mean_Oglio_Iseo',
  'mean_Dora'],
 ['mean_Piemonte_Sud', 'mean_Garda_Mincio', 'mean_Emiliani2'],
 ['mean_Emiliani1']]

In [6]:
myFun = NonLinCTA(df=df, targets_df=df_VHI.iloc[:867,-10:], eps=0, n_val=-1, neigh=0)
myFun.compute_target_clusters()


[0.04427312] [0.04373885] [[0.0434724]] [0.18184053] [0.18573121] [[0.17678441]] [[0.18067509]]
[0.04348539] [0.04260217] [[0.0430907]] [0.18752498] [0.19351414] [[0.18803841]] [[0.19402756]]
[0.04348539] [0.04208701] [[0.04245786]] [0.18752498] [0.19681986] [[0.18433854]] [[0.19363343]]
[0.04244393] [0.04260217] [[0.04241754]] [0.19454424] [0.19351414] [[0.19347263]] [[0.19244252]]
[0.04241742] [0.04104374] [[0.04183814]] [0.19471559] [0.2030911] [[0.19597093]] [[0.20434644]]
[0.04241742] [0.04067611] [[0.04159345]] [0.19471559] [0.20516592] [[0.19540976]] [[0.20586009]]
[0.04241742] [0.04442236] [[0.04247691]] [0.19471559] [0.1807272] [[0.18562059]] [[0.1716322]]
[0.04249561] [0.04104374] [[0.04195965]] [0.19420924] [0.2030911] [[0.19633866]] [[0.20522052]]
[0.04249561] [0.04067611] [[0.04182823]] [0.19420924] [0.20516592] [[0.19698632]] [[0.207943]]
[0.04249561] [0.03936555] [[0.0417269]] [0.19420924] [0.21198967] [[0.20342016]] [[0.22120059]]
[0.04249561] [0.03553339] [[0.04085765]

[['mean_Adda',
  'mean_Ticino',
  'mean_Piemonte_Nord',
  'mean_Lambro_Olona',
  'mean_Oglio_Iseo',
  'mean_Dora'],
 ['mean_Piemonte_Sud', 'mean_Garda_Mincio', 'mean_Emiliani2'],
 ['mean_Emiliani1']]

In [None]:
Hi, I am trying to download the SPI index at the following link: https://edo.jrc.ec.europa.eu/gdo/php/index.php?id=2112. However, selecting "Europe" and "Standardized Precipitation Index 01 month accumulation period (SPI-1) MARSMet" and I select the years from 2001 to 2019, singularly or altogether, the download does not start both with GeoTIFF and NetCDF formats. I taught about an issue with my browser, but I successfully manage to download the data related to the Soil Moisture Index Anomaly (SMA). Can you check that the download of the SPI index is working in the website? Thanks a lot!