In [1]:
import xarray as xr
import numpy as np
import pandas as pd
from helpers import *
from isodisreg import idr
import isodisreg
import properscoring as ps

In [2]:
data_dir = "../precip_data/"
data_dir = '/Volumes/My Passport for Mac/cnn/data_update/with_precip/precip_data'
season = 'JAS'

In [3]:
data_save = data_dir + 'results/'
data_save2 = data_dir + '/results/full_models/'
lsm = np.loadtxt(data_dir  + "/lsm.txt")

In [4]:
save_full = False

# HRES

In [6]:
hres_data =  xr.open_dataset(data_dir + '/forecasts/hres_fct/HRES24_multi1000_precip_init00_66_reconcut_years_2001_2019.nc')

In [5]:
folds = np.arange(9)
crps_fold = np.zeros(len(folds))
bs_fold = np.zeros(len(folds))
for fold in folds:
    print(fold)
    
    ytrain = load_obs(data_dir, fold, mode = "train")
    dim_train = ytrain.shape[0]
    
    if fold == 8:
        yval = load_obs(data_dir, fold, mode = "test")
    else:
        yval = load_obs(data_dir, fold, mode = "val")
    
    year = fold +11
    #date_train = pd.date_range(start='04/02/2001T06', end ='12/01/20'+str(year-1)+'T06') - pd.Timedelta(hours = 6)

    date_train, date_test = hres_season_time(season, year)
    hres_data_train = hres_data.sel(time = date_train)
    hres_data_test = hres_data.sel(time = date_test)
    
    crps_cv = np.zeros((19, 61))
    bs_cv = np.zeros((19, 61))

    year_dim = yval.shape[0]
    ix0, ix1 = sel_season_indx(season, year_dim)

    if save_full:
        model_bs = xr.DataArray(
               np.random.rand(19, 61),
               coords=[np.arange(19) ,np.arange(-25, 35.5) ],
               dims=["lat", "lon"],
               name='var'
            )
        model_crps = xr.DataArray(
               np.random.rand(19, 61),
               coords=[np.arange(19) ,np.arange(-25, 35.5) ],
               dims=["lat", "lon"],
               name='var'
            )

    for lat in range(19):
        for lon in np.arange(-25, 35.5):
            i = int(lat)
            j = int(lon + 25)
            if lsm[i, j] != 0:
                train_hres = hres_data_train.sel(lat = lat, lon = lon).lsp.values
                val_hres = hres_data_test.sel(lat = lat, lon = lon).lsp.values
                train_ix0 = dim_train - len(train_hres)
                
                idr_output = idr(ytrain[train_ix0:, i, j], pd.DataFrame({'fore': train_hres}, columns = ['fore']))
                pred_idr = idr_output.predict(pd.DataFrame({'fore': val_hres}, columns = ['fore']))
                
                crps_cv[i,j] =  np.mean(pred_idr.crps(yval[ix0:ix1, i, j]))
                bs_cv[i,j] = np.mean(pred_idr.bscore(y = yval[ix0:ix1, i, j],thresholds = 0.2)) 
            else:
                crps_cv[i,j] = np.nan
                bs_cv[i,j] = np.nan

    bs_fold[fold] = np.nanmean(bs_cv)
    crps_fold[fold] = np.nanmean(crps_cv)

    if save_full:
        model_bs[:, :] = bs_cv
        model_bs.to_netcdf(data_save2 + 'hres_bs_'+ season + '_' + str(fold) +'.nc')
        model_crps[:, :] = crps_cv
        model_crps.to_netcdf(data_save2 +  'hres_crps_' + season + '_' + str(fold) +'.nc')
    
np.savetxt(data_save +  'hres_crps_'+   season + '.txt', crps_fold)
np.savetxt(data_save +  'hres_bs_'+ season + '.txt', bs_fold)

0
1
2
3
4
5
6
7
8


# ECMWF ensemble

In [47]:
ecmwf_data_dir = data_dir + '/forecasts/ensemble_fct/'
folds = np.arange(9)
bs_fold = np.zeros(len(folds))
crps_fold = np.zeros(len(folds))

for fold in folds:
    print(fold)

    if fold == 8:
        yval = load_obs(data_dir, fold, mode = "test")
    else:
        yval = load_obs(data_dir, fold, mode = "val")
    
    year = fold +11

    crps_raw_cv = np.zeros((19, 61))
    bs_raw_cv = np.zeros((19, 61))

    year_dim = yval.shape[0]
    ix0, ix1 = sel_season_indx(season, year_dim)

    ens = xr.open_dataset(ecmwf_data_dir + 'ens_'+str(fold)+'.nc')

    if save_full:
        model_bs = xr.DataArray(
               np.random.rand(19, 61),
               coords=[np.arange(19) ,np.arange(-25, 35.5) ],
               dims=["lat", "lon"],
               name='var'
            )
        model_crps = xr.DataArray(
               np.random.rand(19, 61),
               coords=[np.arange(19) ,np.arange(-25, 35.5) ],
               dims=["lat", "lon"],
               name='var'
            )

    for lat in range(19):
        for lon in np.arange(-25, 35.5):
            i = int(lat)
            j = int(lon + 25)
            if lsm[i, j] != 0:
                ens_test = ens.sel(lat = lat, lon = lon)['var'].values[ix0:ix1, :]
                pop_test = np.mean(ens_test>0.2, axis = 1)

                ygrid = yval[ix0:ix1, i, j]
                yval_bin = ygrid > 0.2
                
                bs_cv[i,j] = np.mean((yval_bin - pop_test)**2)
                crps_cv[i,j] = np.mean(ps.crps_ensemble(ygrid, ens_test))
            else:
                bs_cv[i,j] = np.nan
                crps_cv[i,j] = np.nan

    bs_fold[fold] = np.nanmean(bs_cv)
    crps_fold[fold] = np.nanmean(crps_cv)

    if save_full:
        model_bs[:, :] = bs_cv
        model_bs.to_netcdf(data_save2 + 'ecmwf_bs_'+ season + '_' + str(fold) +'.nc')
        model_crps[:, :] = crps_cv
        model_crps.to_netcdf(data_save2 +  'ecmwf_crps_' + season + '_' + str(fold) +'.nc')
    
np.savetxt(data_save +  'ecmwf_bs_'+   season + '.txt', bs_fold)
np.savetxt(data_save +  'ecmwf_crps_'+ season + '.txt', crps_fold)

0
1
2
3
4
5
6
7
8


# ECMWF pp

In [5]:
pop = xr.open_dataset(data_dir + '/forecasts/ensemble_fct/ens24_pop_reconcut_invertlat_mm_2006_2019.nc')

In [6]:
folds = np.arange(9)
bs_fold = np.zeros(len(folds))
niamey = np.zeros(len(folds))
obs_list = list()
pop_list = list()
for fold in folds:
    print(fold)
    ytrain = load_obs(data_dir, fold, mode = "train")
    dim_train = ytrain.shape[0]
    
    if fold == 8:
        yval = load_obs(data_dir, fold, mode = "test")
    else:
        yval = load_obs(data_dir, fold, mode = "val")
    
    year = fold +11

    date_train, date_test = ecmwf_season_time(season, year)

    pop_train = pop.sel(time = date_train)
    pop_test = pop.sel(time = date_test)

    dim_pop = pop_train.dims['time']
    new_dim = dim_train - dim_pop
    
    bs_cv = np.zeros((19, 61))

    year_dim = yval.shape[0]
    ix0, ix1 = sel_season_indx(season, year_dim)

    if save_full:
        model_bs = xr.DataArray(
               np.random.rand(19, 61),
               coords=[np.arange(19) ,np.arange(-25, 35.5) ],
               dims=["lat", "lon"],
               name='var'
            )
    
    for lat in range(19):
        for lon in np.arange(-25, 35.5):
            i = int(lat)
            j = int(lon + 25)
            if lsm[i, j] != 0:
        
                probs_train_grid = pop_train.sel(lat = lat, lon = lon).tp.values
                probs_test_grid = pop_test.sel(lat = lat, lon = lon).tp.values
                
                obs_bin_train = ytrain[new_dim:, i, j] > 0.2
                obs_bin_test = yval[ix0:ix1, i, j] > 0.2

                fit = idr(y = 1*obs_bin_train, X = pd.DataFrame(probs_train_grid), progress = False)
                preds = fit.predict(pd.DataFrame(probs_test_grid))
                bs_cv[i, j] = np.mean(preds.bscore(y = 1*obs_bin_test, thresholds = 0.2))
                if lat == 13 and lon == 27:
                    obs_list.append(1*obs_bin_test)
                    pop_list.append(1-preds.cdf(0.2))
            else:
                bs_cv[i,j] = np.nan

    niamey[fold] = bs_cv[13, 27]
                
    bs_fold[fold] = np.nanmean(bs_cv)
    if save_full:
        model_bs[:, :] = bs_cv
        model_bs.to_netcdf(data_save2 + 'ecmwf_pp_bs_'+ season + '_' + str(fold) +'.nc')
#np.savetxt(data_save +  'ecmwf_pp_bs_'+   season + '.txt', bs_fold)

0
1
2
3
4
5
6
7
8
