In [14]:
## DESCRIPTION
# stats on kriging

In [15]:
import sys

lib_dir = "/home/daniele/documents/github/ftt01/phd/share/lib"
sys.path.insert( 0, lib_dir )

In [16]:
from lib import *

In [17]:
import logging

In [18]:
wdir = "/home/daniele/documents/github/ftt01/phd/projects/era5_evaluation/"
input_path = "/media/windows/projects/era5_bias/xAlberto-exchange/Risultati/Cross_Validation/Ott12-Set18/Semivariogramma2(AA12-18)/16_stations/"
output_path = "/media/windows/projects/era5_bias/05_plots/"

In [19]:
## SETUP
basin = 'alto_adige'
output_path = output_path + "meteo/" + basin + "/kriging/"
mkNestedDir(output_path)
output_log = output_path + "stats_kriging.log"

In [20]:
logging.basicConfig(
    filename=output_log,
    format='%(asctime)s - %(message)s', level=logging.INFO)

start_date_str = "2010-01-01T00:00:00"
end_date_str = "2019-12-31T23:00:00"
start_date = dt.datetime.strptime( start_date_str, '%Y-%m-%dT%H:%M:%S' )
end_date = dt.datetime.strptime( end_date_str, '%Y-%m-%dT%H:%M:%S' )
dates = pd.date_range(start_date, end_date, freq='h')

In [21]:
## KRIGING DATASET INPUT ##
precipitation_kr_path = "/media/windows/projects/era5_bias/OLD/comparison/scripts/kriging/AltoAdige/P/"
         
precipitation_df_kr = pd.DataFrame()

t_file_tot = glob.glob( precipitation_kr_path + '*.csv')

for t_file in t_file_tot:
    data_tot = pd.read_csv(t_file,index_col=0,parse_dates=True,skiprows=4,names=[ os.path.basename(t_file)[:-4] ])
    data_tot = data_tot[start_date:end_date]
    data_tot[data_tot == -999] = None
    precipitation_df_kr = pd.concat([precipitation_df_kr, data_tot],axis=1)

precipitation_df_kr = precipitation_df_kr[start_date:end_date]
precipitation_yearly_mean_input = precipitation_df_kr.resample('Y').sum()
precipitation_yearly_mean_mean_input = precipitation_yearly_mean_input.mean()
logging.info("Precipitation yearly INPUT average: " + str(precipitation_yearly_mean_mean_input.mean()))

In [22]:
### KR1x1 OUTPUTS
model = 'KR1x1'
kriging_precipitation_file = "/media/windows/projects/era5_bias/kriging/old/kriging/AltoAdige/P_AltoAdige.krig"
grid_metadata = "/media/windows/projects/era5_bias/kriging/old/grid_1x1km_Adige_river.csv"

# Reading kriging grid metadata - all Adige river cells
df_grid = pd.read_csv( grid_metadata, index_col=0 )
df_elev = df_grid['Elevation']

# df_p = pd.DataFrame(index=dates,columns=['Tinter','Tslope'])
# df_P = pd.DataFrame(index=dates,columns=IDsubbs)

### data entire Alto Adige
df_p = pd.read_csv( kriging_precipitation_file, parse_dates=True, index_col=0 )
df_p['dates'] = dates
df_p.set_index( 'dates',inplace=True )

krig_p = df_p

precipitation_yearly_mean = df_p.resample('Y').sum()
precipitation_yearly_mean_mean = precipitation_yearly_mean.mean()
logging.info("Precipitation yearly OUTPUT average: " + str(precipitation_yearly_mean_mean.mean()))

In [23]:
#### cross-validation results
start_date_str = "2012-10-01T00:00:00"
end_date_str = "2018-09-30T23:00:00"
start_date = dt.datetime.strptime( start_date_str, '%Y-%m-%dT%H:%M:%S' )
end_date = dt.datetime.strptime( end_date_str, '%Y-%m-%dT%H:%M:%S' )
dates = pd.date_range(start_date, end_date, freq='h')

station_paths = glob.glob(input_path+"*/")

In [24]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

In [112]:
comparison_metrics_p = pd.DataFrame(index=[1,2,3,4,5,6,7,8,9,10,11,12])
comparison_metrics_t = pd.DataFrame(index=[1,2,3,4,5,6,7,8,9,10,11,12])

for station_path in station_paths:
    st_id = station_path.split('/')[-2]
    logging.info( st_id )

    comparison_df_p = pd.DataFrame(index=dates)
    comparison_df_t = pd.DataFrame(index=dates)

    ## add kriging cross-validation result
    krig_p_result = pd.read_csv( station_path + "P_AltoAdige.krig" )
    krig_t_result = pd.read_csv( station_path + "TMEAN_AltoAdige.krig" )
    comparison_df_p['kriging'] = krig_p_result[krig_p_result.columns[0]].values
    comparison_df_t['kriging'] = krig_t_result[krig_t_result.columns[0]].values

    ## add metered data at the same station
    ### retrieve station id
    id_station_p = str(pd.read_csv( station_path + "statistics_precipitation_.txt" )['IDstation'][0])
    id_station_t = str(pd.read_csv( station_path + "statistics_temperature_.txt" )['IDstation'][0])
    metered_p_data = pd.read_csv( 
        "/media/lacie2022/data/meteo/eu/it/taa/aa/older/data/precipitation/" + id_station_p + ".txt", 
        index_col=0, skiprows=4, parse_dates=True, header=None)[start_date:end_date]
    metered_p_data[metered_p_data == -999] = None
    metered_t_data = pd.read_csv( 
        "/media/lacie2022/data/meteo/eu/it/taa/aa/older/data/temperature/" + id_station_t + ".txt", 
        index_col=0, skiprows=4, parse_dates=True, header=None)[start_date:end_date]
    metered_t_data[metered_t_data == -999] = None
    comparison_df_p['metered'] = metered_p_data[1].values
    comparison_df_t['metered'] = metered_t_data[1].values

    comparison_df_p['bias'] = comparison_df_p['metered'] - comparison_df_p['kriging']
    comparison_df_t['bias'] = comparison_df_t['metered'] - comparison_df_t['kriging']

    ## monthly aggregation
    comparison_df_p = comparison_df_p.resample('d').sum()
    comparison_df_t = comparison_df_t.resample('d').mean()

    comparison_df_p.dropna(inplace=True)
    comparison_df_t.dropna(inplace=True)

    logging.info( "MAE: " + str(round(mean_absolute_error( comparison_df_p['metered'], comparison_df_p['kriging'] ),2) ))
    logging.info( "MAPE: " + str(round(mean_squared_error( comparison_df_p['metered'], comparison_df_p['kriging'] ),2) ))
    logging.info( "R2: " + str(round(r2_score( comparison_df_p['metered'], comparison_df_p['kriging'] ),2) ))
    
    logging.info( "MAE: " + str(round(mean_absolute_error( comparison_df_t['metered'], comparison_df_t['kriging'] ),2) ))
    logging.info( "MAPE: " + str(round(mean_squared_error( comparison_df_t['metered'], comparison_df_t['kriging'] ),2) ))
    logging.info( "R2: " + str(round(r2_score( comparison_df_t['metered'], comparison_df_t['kriging'] ),2) ))

    mounth_mae_p = []
    mounth_mape_p = []
    mounth_r2_p = []
    mounth_bias_p = []

    mounth_mae_t = []
    mounth_mape_t = []
    mounth_r2_t = []
    mounth_bias_t = []

    for i in range(1,13,1):
        # logging.info(i)
        df_p_month = comparison_df_p.loc[(comparison_df_p.index.month==i)]
        df_t_month = comparison_df_t.loc[(comparison_df_t.index.month==i)]

        a = df_p_month['metered']
        b = df_p_month['kriging']
        mounth_mae_p.append(round(mean_absolute_error(a,b),2))
        mounth_mape_p.append(round(mean_squared_error(a,b),2))
        mounth_r2_p.append(round(r2_score(a,b),2))
        mounth_bias_p.append(df_p_month['bias'].mean())

        c = df_t_month['metered']
        d = df_t_month['kriging']
        mounth_mae_t.append(round(mean_absolute_error(c,d),2))
        mounth_mape_t.append(round(mean_squared_error(c,d),2))
        mounth_r2_t.append(round(r2_score(c,d),2))
        mounth_bias_t.append(df_t_month['bias'].mean())

    comparison_metrics_p['mae_'+st_id] = mounth_mae_p
    comparison_metrics_p['mape_'+st_id] = mounth_mape_p
    comparison_metrics_p['r2_'+st_id] = mounth_r2_p
    comparison_metrics_p['bias_'+st_id] = mounth_bias_p

    comparison_metrics_t['mae_'+st_id] = mounth_mae_t
    comparison_metrics_t['mape_'+st_id] = mounth_mape_t
    comparison_metrics_t['r2_'+st_id] = mounth_r2_t
    comparison_metrics_t['bias_'+st_id] = mounth_bias_t

comparison_df_p.to_csv(output_path+'precipitation_comparison.csv')
comparison_metrics_p.to_csv(output_path+'precipitation_metrics_comparison.csv')
comparison_df_t.to_csv(output_path+'temperature_comparison.csv')
comparison_metrics_t.to_csv(output_path+'temperature_metrics_comparison.csv')

In [113]:
comparison_metrics_p.filter(like='mae', axis=1).mean(axis=1)

1     0.83250
2     0.90250
3     0.75375
4     0.98625
5     1.60000
6     1.86000
7     2.18250
8     2.15625
9     1.02000
10    1.21000
11    1.41250
12    0.66125
dtype: float64

In [114]:
comparison_metrics_p.filter(like='mape', axis=1).mean(axis=1)

1      7.96625
2      5.43375
3      5.06125
4      6.67125
5     15.64000
6     16.50750
7     24.67000
8     22.59250
9      6.87625
10    14.41750
11    26.75375
12    10.11875
dtype: float64

In [115]:
comparison_metrics_p.filter(like='r2', axis=1).mean(axis=1)

1      -0.46500
2      -0.18625
3       0.28000
4       0.73375
5       0.64250
6       0.63375
7       0.57500
8       0.66750
9       0.76875
10      0.46750
11     -0.52250
12   -528.79250
dtype: float64

In [116]:
comparison_metrics_p.filter(like='bias', axis=1).mean(axis=1)

1    -0.016329
2    -0.115228
3    -0.070411
4    -0.072446
5     0.051048
6     0.100885
7     0.164628
8     0.026084
9    -0.024637
10    0.125991
11   -0.064452
12   -0.075857
dtype: float64

In [117]:
comparison_metrics_p

Unnamed: 0,mae_Alpe_di_Siusi,mape_Alpe_di_Siusi,r2_Alpe_di_Siusi,bias_Alpe_di_Siusi,mae_Anterselva,mape_Anterselva,r2_Anterselva,bias_Anterselva,mae_Bolzano,mape_Bolzano,...,r2_Selva_Gardena,bias_Selva_Gardena,mae_Silandro,mape_Silandro,r2_Silandro,bias_Silandro,mae_Vernago,mape_Vernago,r2_Vernago,bias_Vernago
1,1.39,22.57,-7.99,-0.129169,1.1,6.89,0.7,0.172723,0.33,1.36,...,-0.88,0.032283,0.49,1.74,0.87,-0.227533,0.77,2.49,0.88,-0.303695
2,1.44,13.03,-3.88,-0.069225,1.19,6.91,0.13,0.099335,0.44,1.21,...,-1.61,-0.171626,0.64,2.43,0.58,-0.424741,0.66,1.35,0.78,-0.431901
3,1.26,14.98,-2.31,-0.120995,1.18,6.85,0.5,0.489864,0.36,0.84,...,0.2,-0.045754,0.44,1.3,0.54,-0.379506,0.58,1.6,0.7,-0.494412
4,1.14,6.95,0.52,0.005404,1.81,17.45,0.24,0.346464,0.36,0.78,...,0.88,-0.146521,0.85,3.87,0.78,-0.695632,0.97,5.03,0.84,-0.32939
5,2.45,41.73,0.07,0.561253,2.92,31.28,0.18,0.850703,0.83,3.67,...,0.8,-0.413941,1.31,7.18,0.7,-1.095446,1.25,5.34,0.86,-0.199295
6,2.73,34.91,0.3,0.47025,3.22,36.52,0.33,1.627918,1.27,7.31,...,0.77,-0.001867,1.42,8.46,0.51,-0.981479,1.4,7.04,0.77,-0.454227
7,3.4,56.02,0.44,0.899448,3.65,48.62,0.23,1.500567,1.12,5.7,...,0.61,-0.125696,1.56,9.08,0.64,-0.375076,1.43,7.18,0.71,-0.29463
8,3.33,43.38,0.51,0.326152,3.64,46.27,0.29,1.054992,1.11,5.2,...,0.79,0.273568,1.7,10.32,0.61,-0.624394,1.58,9.17,0.74,-0.284915
9,1.2,8.25,0.82,0.047735,2.03,24.91,0.37,1.132961,0.57,1.25,...,0.93,0.067311,0.8,3.53,0.66,-0.683017,0.93,3.8,0.72,-0.566871
10,2.29,47.4,-1.59,0.151859,1.89,19.17,0.55,1.196164,0.51,2.41,...,0.61,-0.083885,0.66,2.48,0.88,-0.418196,0.76,3.55,0.83,-0.454053


In [118]:
comparison_metrics_t.filter(like='mae', axis=1).mean(axis=1)

1     1.58250
2     1.84500
3     1.92875
4     2.04750
5     1.96500
6     1.98375
7     1.91625
8     1.79875
9     1.79250
10    1.64125
11    1.57500
12    1.60000
dtype: float64

In [119]:
comparison_metrics_t.filter(like='mape', axis=1).mean(axis=1)

1     4.04625
2     5.23625
3     6.02250
4     6.56625
5     5.77125
6     5.81125
7     5.29250
8     4.74000
9     4.89375
10    4.13375
11    3.89375
12    4.36000
dtype: float64

In [120]:
comparison_metrics_t.filter(like='r2', axis=1).mean(axis=1)

1     0.60000
2     0.28750
3     0.43250
4     0.29375
5     0.32000
6     0.30125
7     0.09625
8     0.38000
9     0.34125
10    0.55875
11    0.65500
12    0.60875
dtype: float64

In [121]:
comparison_metrics_t.filter(like='bias', axis=1).mean(axis=1)

1     1.023122
2     1.337892
3     1.488105
4     1.583763
5     1.643842
6     1.593911
7     1.548146
8     1.455712
9     1.439606
10    1.221237
11    1.074234
12    0.883098
dtype: float64