In [11]:
import numpy as np
import xarray as xr
import isodisreg 
from isodisreg import idr
import pandas as pd
from helpers import *

In [12]:
feature_set = 'v2+time'
season = 'JAS'

In [13]:
data_dir = "../precip_data"
data_save = data_dir + '/results/'
lsm = np.loadtxt(data_dir  + "/lsm.txt")

In [14]:
folds = np.arange(9)
bs_fold = np.zeros(len(folds))
crps_fold = np.zeros(len(folds))

# CNN model

In [5]:
for fold in folds:
    val_preds = xr.open_dataset(data_dir + '/forecasts/cnn_fct/subset_val_preds_'+str(feature_set)+'_fold'+str(fold)+'.nc')
    val_tar = xr.open_dataset(data_dir + '/forecasts/cnn_fct/subset_val_target_'+str(feature_set)+'_fold'+str(fold)+'.nc')
    train_prds = xr.open_dataset(data_dir + '/forecasts/cnn_fct/subset_train_preds_'+str(feature_set)+'_fold'+str(fold)+'.nc')
    train_tar = xr.open_dataset(data_dir + '/forecasts/cnn_fct/subset_train_target_'+str(feature_set)+'_fold'+str(fold)+'.nc')

    crps_cv = np.zeros((19, 61))
    bs_cv = np.zeros((19, 61))

    year_dim = val_tar.dims['time']
    ix0, ix1 = sel_season_indx(season, year_dim)

    for lat in range(19):
        for lon in np.arange(-25, 35.5):
            i = int(lat)
            j = int(lon + 25)
            if lsm[i, j] != 0:
                train_fct = train_prds.sel(lat = lat, lon = lon).train_preds.values
                train_obs = train_tar.sel(lat = lat, lon = lon).train_tar.values
                val_fct = val_preds.sel(lat = lat, lon = lon).val_preds.values[ix0:ix1]
                val_obs = val_tar.sel(lat = lat, lon = lon).val_tar.values[ix0:ix1]
                idr_output = idr(train_obs, pd.DataFrame({'fore': train_fct}, columns = ['fore']))
                pred_idr = idr_output.predict(pd.DataFrame({'fore': val_fct}, columns = ['fore']))
                crps_cv[i,j] = np.mean(pred_idr.crps(val_obs))
                bs_cv[i,j] = np.mean(pred_idr.bscore(thresholds = 0.2, y = val_obs))
            else:
                crps_cv[i,j] = np.nan
                bs_cv[i,j] = np.nan

    bs_fold[fold] = np.nanmean(bs_cv)
    crps_fold[fold] = np.nanmean(crps_cv)

np.savetxt(data_save +  'cnn_crps_'+ str(feature_set)+   '_' + season + '.txt', crps_fold)
np.savetxt(data_save +  'cnn_bs_'+ str(feature_set)+  '_' + season + '.txt', bs_fold)

In [6]:
crps_fold

array([2.91792025, 3.03324061, 2.74807059, 2.76811912, 2.74798613,
       2.8035649 , 2.67704742, 2.87727204, 2.85275223])

In [7]:
bs_fold

array([0.12969351, 0.11999359, 0.13019998, 0.12519427, 0.13408735,
       0.11823619, 0.12017427, 0.11808887, 0.11527252])

# Hybrid model: CNN + HRES 

In [15]:
import xarray as xr
hres_data =  xr.open_dataset(data_dir + '/forecasts/hres_fct/HRES24_multi1000_precip_init00_66_reconcut_years_2001_2019.nc')

In [16]:
def cscore(preds_average, grid_test_cv, grid_idr, mean = True):
    c_score = list()
    for j in range(preds_average.shape[0]):
                    # x = preds.predictions[i].points
        y = grid_test_cv[j]
        x = grid_idr
                    # p = preds.cdf(x)[i, :]
        p = preds_average[j, :]
                    # p = preds.cdf(x)[i, :]
                    # p  = preds.predictions[i].ecdf
        w = np.hstack([p[0], np.diff(p)])
        c_score.append(2 * np.sum(w * (np.array((y < x)) - p + 0.5 * w) * np.array(x - y)))
    if mean:
        return np.mean(np.asarray(c_score))
    else:
        return np.asarray(c_score)

In [17]:

for fold in folds:
    print(fold)
    #val_preds = xr.open_dataset('/Volumes/My Passport for Mac/precip/model_results/cnn/fct/val_preds_'+str(feature_set)+'_fold'+str(fold)+'.nc')
    #val_tar = xr.open_dataset('/Volumes/My Passport for Mac/precip/model_results/cnn/fct/val_target_'+str(feature_set)+'_fold'+str(fold)+'.nc')
    #train_prds = xr.open_dataset('/Volumes/My Passport for Mac/precip/model_results/cnn/fct/train_preds_'+str(feature_set)+'_fold'+str(fold)+'.nc')
    #train_tar = xr.open_dataset('/Volumes/My Passport for Mac/precip/model_results/cnn/fct/train_target_'+str(feature_set)+'_fold'+str(fold)+'.nc') #ytrain = load_obs(data_dir, fold, mode = "train")
    val_preds = xr.open_dataset(data_dir + '/forecasts/cnn_fct/subset_val_preds_'+str(feature_set)+'_fold'+str(fold)+'.nc')
    val_tar = xr.open_dataset(data_dir + '/forecasts/cnn_fct/subset_val_target_'+str(feature_set)+'_fold'+str(fold)+'.nc')
    train_prds = xr.open_dataset(data_dir + '/forecasts/cnn_fct/subset_train_preds_'+str(feature_set)+'_fold'+str(fold)+'.nc')
    train_tar = xr.open_dataset(data_dir + '/forecasts/cnn_fct/subset_train_target_'+str(feature_set)+'_fold'+str(fold)+'.nc')
    #if fold == 8:
    #    yval = load_obs(data_dir, fold, mode = "test")
    #else:
    #    yval = load_obs(data_dir, fold, mode = "val")
    
    year = fold +11
    #date_train = pd.date_range(start='04/02/2001T06', end ='12/01/20'+str(year-1)+'T06') - pd.Timedelta(hours = 6)

    date_train, date_test = hres_season_time(season, year)
    hres_data_train = hres_data.sel(time = date_train)
    hres_data_test = hres_data.sel(time = date_test)
    
    crps_cv = np.zeros((19, 61))
    bs_cv = np.zeros((19, 61))

    year_dim = val_tar.dims['time']
    ix0, ix1 = sel_season_indx(season, year_dim)

    for lat in range(19):
        for lon in np.arange(-25, 35.5):
            i = int(lat)
            j = int(lon + 25)
            if lsm[i, j] != 0:
                train_fct = train_prds.sel(lat = lat, lon = lon).train_preds.values
                train_obs = train_tar.sel(lat = lat, lon = lon).train_tar.values
                val_fct = val_preds.sel(lat = lat, lon = lon).val_preds.values[ix0:ix1]
                val_obs = val_tar.sel(lat = lat, lon = lon).val_tar.values[ix0:ix1]

                train_hres = hres_data_train.sel(lat = lat, lon = lon).lsp.values
                val_hres = hres_data_test.sel(lat = lat, lon = lon).lsp.values
                train_ix0 = len(train_obs) - len(train_hres)
                
                
                idr_output = idr(train_obs, pd.DataFrame({'fore': train_fct}, columns = ['fore']))
                pred_idr = idr_output.predict(pd.DataFrame({'fore': val_fct}, columns = ['fore']))
                
                idr_output2 = idr(train_obs[train_ix0:], pd.DataFrame({'fore': train_hres}, columns = ['fore']))
                pred_idr2 = idr_output2.predict(pd.DataFrame({'fore': val_hres}, columns = ['fore']))
                
                grid_idr = np.sort(np.unique(train_obs))
                preds_average = 0.5*(pred_idr.cdf(grid_idr) + pred_idr2.cdf(grid_idr))
                p0 = 0.5*(pred_idr.cdf(0.2) + pred_idr2.cdf(0.2))
                
                crps_cv[i,j] = cscore(preds_average, val_obs, grid_idr, mean = True)
                val_obs_bin = val_obs > 0.2
                bs_cv[i,j] = np.mean(((1-p0) - val_obs_bin)**2)
            else:
                crps_cv[i,j] = np.nan
                bs_cv[i,j] = np.nan

    bs_fold[fold] = np.nanmean(bs_cv)
    crps_fold[fold] = np.nanmean(crps_cv)
    
np.savetxt(data_save +  'hybrid_crps_'+ str(feature_set)+   '_' + season + '.txt', crps_fold)
np.savetxt(data_save +  'hybrid_bs_'+ str(feature_set)+  '_' + season + '.txt', bs_fold)

0
1
2
3
4
5
6
7
8
