# Copyright Netherlands eScience Center <br>
**Function     : Paper Fig - address uncertainty with BayesianLSTM and the related metrics** <br>
**Author       : Yang Liu** <br>
**First Built  : 2020.07.08** <br>
**Last Update  : 2020.07.08** <br>
**Library      : Pytorth, Numpy, NetCDF4, os, iris, cartopy, dlacs, matplotlib**<br>
Description     : This notebook serves to make figures for the paper.<br>

The Lorens-84 model is described by Edward Lorens in his 1984 paper:<br>
Lorenz, E. N. (1984). Irregularity: A fundamental property of the atmosphere. Tellus A, 36(2), 98-110.<br>

Return Values   : Time series and figures <br>

In [1]:
%matplotlib inline

import sys
import warnings
import numbers

# for data loading
import os
from netCDF4 import Dataset
# for pre-processing and machine learning
import numpy as np
import csv
#import sklearn
#import scipy
import torch
import torch.nn.functional

#sys.path.append(os.path.join('C:','Users','nosta','ML4Climate','Scripts','DLACs'))
sys.path.append("C:\\Users\\nosta\\ML4Climate\\Scripts\\DLACs")
#sys.path.append("../")
import dlacs
import dlacs.BayesConvLSTM
import dlacs.preprocess
import dlacs.function
import dlacs.saveNetCDF
import dlacs.metric

# for visualization
import dlacs.visual
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
from mpl_toolkits.mplot3d import Axes3D
import iris # also helps with regriding
import cartopy
import cartopy.crs as ccrs

# ignore all the DeprecationWarnings by pytorch
if not sys.warnoptions:
    warnings.simplefilter("ignore")
###############################################################################79

The testing device is Dell Inspirion 5680 with Intel Core i7-8700 x64 CPU and Nvidia GTX 1060 6GB GPU.<br>
Here is a benchmark about cpu v.s. gtx 1060 <br>
https://www.analyticsindiamag.com/deep-learning-tensorflow-benchmark-intel-i5-4210u-vs-geforce-nvidia-1060-6gb/

In [2]:
# constants
constant = {'g' : 9.80616,      # gravititional acceleration [m / s2]
            'R' : 6371009,      # radius of the earth [m]
            'cp': 1004.64,      # heat capacity of air [J/(Kg*K)]
            'Lv': 2500000,      # Latent heat of vaporization [J/Kg]
            'R_dry' : 286.9,    # gas constant of dry air [J/(kg*K)]
            'R_vap' : 461.5,    # gas constant for water vapour [J/(kg*K)]
            'rho' : 1026,       # sea water density [kg/m3]
            }

In [3]:
################################################################################# 
#########                           datapath                             ########
#################################################################################
ಠ_ಠ = "I'm a valid variable name!"
print(ಠ_ಠ)
# model type
model_type = 'lorenz84ex'
# datapath for output
output_path = 'C:\\Users\\nosta\\ML4Climate\\PredictArctic\\BayesMaps\\Lorenz84\\bayes\\fullseries_ex\\paperFig'
# data path of experiment
#pred_lorenz_bayesconvlstm_path = 'C:\\Users\\nosta\\ML4Climate\\PredictArctic\\BayesMaps\\Lorenz84\\bayes\\fullseries_ex\\penalty100'
#pred_lorenz_bayesconvlstm_path = 'C:\\Users\\nosta\\ML4Climate\\PredictArctic\\BayesMaps\\Lorenz84\\bayes\\fullseries_ex\\penalty100\\longLeadTime'
pred_lorenz_bayesconvlstm_path = 'C:\\Users\\nosta\\ML4Climate\\PredictArctic\\BayesMaps\\Lorenz84\\bayes\\fullseries_ex\\epoch3000_p100000_stdx4'

I'm a valid variable name!


In [4]:
if __name__=="__main__":
    #################################################################################
    ###########                configure Lorenz 84 model                  ###########
    #################################################################################
    # Lorenz paramters and initial conditions
    x_init = 1.0 # strength of the symmetric globally encircling westerly current
    y_init = 1.0 # strength of the cosine phases of a chain of superposedwaves (large scale eddies)
    z_init = 1.0 # strength of the sine phases of a chain of superposedwaves (large scale eddies)
    F = 8.0 # temperature difference between pole and equator
    G = 1.0 # thermal forcing term
    epsilon = 0.4 # intensity of periodic forcing
    a = 0.25 # mechanical damping factor for westerly wind x
    b = 4.0 # advection strength of the waves by the westerly current
    
    # assuming the damping time for the waves is 5 days (Lorens 1984)
    dt = 0.0333 # 1/30 unit of time unit (5 days)
    num_steps = 1500
    # cut-off point of initialization period
    cut_off = 0
    # target testing period
    test_len = 200

#### Ensemble<br>
##### Testing benchmark <br>
- Lorenz 84 model initial set-up <br>
original model<br>
**x=1.0, y=1.0, z=1.0, a=0.25, b=4.0, F=8.0, G=1.0** <br>
with periodic forcing<br>
**x=1.0, y=1.0, z=1.0, a=0.25, b=4.0, F=8.0, G=1.0, epsilon=0.4** <br>
with AA effect<br>
**x=1.0, y=1.0, z=1.0, a=0.25, b=4.0, F=8.0, G=1.0, epsilon=0.4, alpha = 0.025, beta = 0.00625** <br>

##### Testing 
- Testing initial conditions x, y, z - uncertainty of model outcome <br>
**starting point x=1.0, y=1.0, z=1.0** <br>
Ensemble variant x 50 members <br>
x = [1.0020 : 0.9980] **~0.2%**<br>
x = [1.0010 : 0.9990] **~0.1%**<br>
x = [1.00010 : 0.99990] **~0.01%**<br>
x_ex = [1.0010 : 0.9990] **~0.1%**<br>
x_ex = [1.00010 : 0.99990] **~0.01%**<br>
x_ex = [1.000010 : 0.999990] **~0.001%**<br>
========================================================================================================<br>
- Testing model parameters a & b - model uncertainty <br>
**starting point a=0.25, b=4.0** <br>
Ensemble variant a <br>
a = [0.25010 : 0.24990] **~0.04%**<br>
a_ex = [0.250010 : 0.249990] **~0.004%**<br>
a_ex = [0.2500010 : 0.2499990] **~0.0004%**<br>
========================================================================================================<br>

In [5]:
    #################################################################################
    ###########            Lorens 84 model + periodic forcing             ###########
    #################################################################################
    def lorenz84_ex(x, y, z, t, a = 0.25, b = 4.0, F = 8.0, G = 1.0, epsilon = 0.4):
        """
        Solver of Lorens-84 model with periodic external forcing.
        
        param x, y, z: location in a 3D space
        param a, b, F, G: constants and forcing
        
        The model is designed with a reference to the paper:
        Broer, H., Simó, C., & Vitolo, R. (2002). Bifurcations and strange
        attractors in the Lorenz-84 climate model with seasonal forcing. Nonlinearity, 15(4), 1205.
        
        Song, Y., Yu, Y., & Wang, H. (2011, October). The stability and chaos analysis of the
        Lorenz-84 atmosphere model with seasonal forcing. In 2011 Fourth International Workshop
        on Chaos-Fractals Theories and Applications (pp. 37-41). IEEE.
        """
        # each time step is ~ 5days, therefore the returning period are 365 / 5 = 73 times in a year
        T = 73
        omega = 2 * np.pi / T
        dx = - y**2 - z**2 - a * x + a * F * (1 + epsilon * np.cos(omega * t))
        dy = x * y - b * x * z - y + G * (1 + epsilon * np.sin(omega * t))
        dz = b * x * y + x * z - z
        
        return dx, dy, dz

In [6]:
    #################################################################################
    ###########        Launch Lorenz 84 model with periodic forcing       ###########
    #################################################################################
    # Need one more for the initial values
    x = np.empty(num_steps)
    y = np.empty(num_steps)
    z = np.empty(num_steps)
    t = 0.0
    
    # save initial values
    x[0] = x_init
    y[0] = y_init
    z[0] = z_init
    
    # Step through "time", calculating the partial derivatives at the current point
    # and using them to estimate the next point
    for i in range(num_steps-1):
        dx, dy, dz = lorenz84_ex(x[i], y[i], z[i], t, a, b ,F, G, epsilon)
        x[i + 1] = x[i] + (dx * dt)
        y[i + 1] = y[i] + (dy * dt)
        z[i + 1] = z[i] + (dz * dt)
        t += dt

In [7]:
    print ('*******************  pre-processing  *********************')
    # time series cut-off
    if cut_off:
        x = x[cut_off:]
        y = y[cut_off:]
        z = z[cut_off:]
    print ('===================  normalize data  =====================')
    x_norm = dlacs.preprocess.operator.normalize(x)
    y_norm = dlacs.preprocess.operator.normalize(y)
    z_norm = dlacs.preprocess.operator.normalize(z)
    print('================  save the normalizing factor  =================')
    # maximum and minimum
    x_max = np.amax(x)
    x_min = np.amin(x)
    y_max = np.amax(y)
    y_min = np.amin(y)
    z_max = np.amax(z)
    z_min = np.amin(z)
    # std
    x_std = np.std(x[-test_len:])
    y_std = np.std(y[-test_len:])
    z_std = np.std(z[-test_len:])
    
    print('x_max')
    print(x_max)
    print('x_min')
    print(x_min)
    print('y_max')
    print(y_max)
    print('y_min')
    print(y_min)
    print('z_max')
    print(z_max)
    print('z_min')
    print(z_min)

*******************  pre-processing  *********************
x_max
2.3660894487784483
x_min
-1.645105946824028
y_max
2.309210407670175
y_min
-1.882577993534199
z_max
3.236059042049284
z_min
-1.5378194442082769


In [8]:
    #################################################################################
    #########        perturb initial condition x and model parameter a       ########
    #################################################################################
    x_pool_per = ["0.001%", "0.01%", "0.1%", "1.0%"]
    a_pool_per = ["0.001%", "0.01%", "0.1%", "1.0%"]
    #################################################################################
    ###########                 perturb model parameters                  ###########
    #################################################################################
    # percentage of perturbation
    x_pool = np.array([0.00001, 0.0001, 0.001, 0.01], dtype=float)
    a_pool = np.array([0.00001, 0.0001, 0.001, 0.01], dtype=float)
    
    # perturbation of parameters
    x_init_pert = x_pool * x_init + x_init
    a_init_pert = - a_pool * a + a
    print("Perturb x")
    print(x_init_pert)
    print("Perturb a")
    print(a_init_pert)

Perturb x
[1.00001 1.0001  1.001   1.01   ]
Perturb a
[0.2499975 0.249975  0.24975   0.2475   ]


# Postprocess forecast data <br>
** Postprocess and visualization of ensemble forecast data ** <br>

In [9]:
    #######################################################################
    #######    Load forecast of Lorenz 84 model by BayesConvLSTM    #######
    #######################################################################
    print ('*******************  load ensemble data  *********************')
    # predict x steps ahead
    step_lead = 16 # unit week
    # number of ensembles
    ens = 20
    predictand = np.zeros((ens, 3, test_len, step_lead), dtype=float)
    for i in range(ens):
        dataset_pred = Dataset(os.path.join(pred_lorenz_bayesconvlstm_path,
                                            'pred_lorenz84_epoch3000_p100000_stdx4_ens_{}.nc'.format(i)))
        #predictand[i,:,:,:] = dataset_pred.variables['series'][:]
        predictand[i,:,:,:] = dataset_pred.variables['series'][:,:,:step_lead]
    print(predictand.shape)
    predictand_median = np.median(predictand, 0)

*******************  load ensemble data  *********************
(20, 3, 200, 16)


In [10]:
    ###################################################################################################
    ###########        Launch Lorenz 84 model with periodic forcing with longer series      ###########
    ###################################################################################################
    num_steps_long = num_steps + step_lead
    # Need one more for the initial values
    xx = np.empty(num_steps_long)
    yy = np.empty(num_steps_long)
    zz = np.empty(num_steps_long)
    tt = 0.0
    
    # save initial values
    xx[0] = x_init
    yy[0] = y_init
    zz[0] = z_init
    
    # Step through "time", calculating the partial derivatives at the current point
    # and using them to estimate the next point
    for i in range(num_steps_long-1):
        dx, dy, dz = lorenz84_ex(xx[i], yy[i], zz[i], tt, a, b ,F, G, epsilon)
        xx[i + 1] = xx[i] + (dx * dt)
        yy[i + 1] = yy[i] + (dy * dt)
        zz[i + 1] = zz[i] + (dz * dt)
        tt += dt

In [11]:
    # Euclidean distance
    def Euclidean(obs, pred):
        """
        Measure the Euclidean distance.
        ----------------------
        param obs: observation time series with the shape [dim, series]
        param pred: reference time series with the shape [dim, series]        
        """
        euclidean = np.mean(np.sqrt(np.sum((obs-pred)**2,0)))
        
        return euclidean

    # RMSE
    def RMSE(obs, pred):
        """
        Root Mean Square Error.
        """
        rmse = np.sqrt(np.mean((obs - pred)**2))
        
        return rmse

In [12]:
    ###########################################################################################
    ##########       generate model output with perturbed data and compute RMSE      ##########
    ###########################################################################################    
    rmse_pert_x = np.zeros((len(x_init_pert), 3), dtype=float)
    rmse_pert_a = np.zeros((len(a_init_pert), 3), dtype=float)
    
    euclidean_pert_x = np.zeros((len(x_init_pert)), dtype=float)
    euclidean_pert_a = np.zeros((len(a_init_pert)), dtype=float)
    
    # for euclidean distance
    obs = np.zeros((3, num_steps_long), dtype=float)
    obs[0,:] = xx[:]
    obs[1,:] = yy[:]
    obs[2,:] = zz[:]
    
    for n in range(len(x_init_pert)):
        print("Perturb the paramter x by {}".format(x_pool_per[n]))
        #################################################################################
        ###########        Launch Lorenz 84 model with periodic forcing       ###########
        #################################################################################
        # Need one more for the initial values
        x = np.empty(num_steps)
        y = np.empty(num_steps)
        z = np.empty(num_steps)
        t = 0.0
        
        x[0] = x_init_pert[n]
        y[0] = y_init
        z[0] = z_init
        
        for i in range(num_steps-1):
            dx, dy, dz = lorenz84_ex(x[i], y[i], z[i], t, a, b ,F, G, epsilon)
            x[i + 1] = x[i] + (dx * dt)
            y[i + 1] = y[i] + (dy * dt)
            z[i + 1] = z[i] + (dz * dt)
            t += dt
        
        rmse_pert_x[n,0] = RMSE(xx[-(test_len + step_lead):-step_lead], x[-test_len:])
        rmse_pert_x[n,1] = RMSE(yy[-(test_len + step_lead):-step_lead], y[-test_len:])
        rmse_pert_x[n,2] = RMSE(zz[-(test_len + step_lead):-step_lead], z[-test_len:])
        
        obs_perturb = np.zeros((3, num_steps), dtype=float)
        obs_perturb[0,:] = x[:]
        obs_perturb[1,:] = y[:]
        obs_perturb[2,:] = z[:]        
        
        euclidean_pert_x[n] = Euclidean(obs[:,-(test_len + step_lead):-step_lead], obs_perturb[:,-test_len:])
    
    print("RMSE perturbed x")
    print(rmse_pert_x)
    print("EuD perturbed x")
    print(euclidean_pert_x)
    
    for n in range(len(a_init_pert)):
        print("Perturb the paramter a by {}".format(a_pool_per[n]))
        #################################################################################
        ###########        Launch Lorenz 84 model with periodic forcing       ###########
        #################################################################################
        # Need one more for the initial values
        x = np.empty(num_steps)
        y = np.empty(num_steps)
        z = np.empty(num_steps)
        t = 0.0
        
        aa = a_init_pert[n]
        
        x[0] = x_init
        y[0] = y_init
        z[0] = z_init
        
        for i in range(num_steps-1):
            dx, dy, dz = lorenz84_ex(x[i], y[i], z[i], t, aa, b ,F, G, epsilon)
            x[i + 1] = x[i] + (dx * dt)
            y[i + 1] = y[i] + (dy * dt)
            z[i + 1] = z[i] + (dz * dt)
            t += dt
        
        rmse_pert_a[n,0] = RMSE(xx[-(test_len + step_lead):-step_lead], x[-test_len:])
        rmse_pert_a[n,1] = RMSE(yy[-(test_len + step_lead):-step_lead], y[-test_len:])
        rmse_pert_a[n,2] = RMSE(zz[-(test_len + step_lead):-step_lead], z[-test_len:])
        
        obs_perturb = np.zeros((3, num_steps), dtype=float)
        obs_perturb[0,:] = x[:]
        obs_perturb[1,:] = y[:]
        obs_perturb[2,:] = z[:]        
        
        euclidean_pert_a[n] = Euclidean(obs[:,-(test_len + step_lead):-step_lead], obs_perturb[:,-test_len:])

    print("RMSE perturbed a")
    print(rmse_pert_a)
    print("EuD perturbed a")
    print(euclidean_pert_a)

Perturb the paramter x by 0.001%
Perturb the paramter x by 0.01%
Perturb the paramter x by 0.1%
Perturb the paramter x by 1.0%
RMSE perturbed x
[[0.13043759 0.51952454 0.5150073 ]
 [0.30267206 1.02412727 1.0275891 ]
 [0.43047495 1.13196735 1.17672187]
 [0.38873922 1.08647351 1.04677691]]
EuD perturbed x
[0.59953399 1.27969415 1.55722093 1.3480159 ]
Perturb the paramter a by 0.001%
Perturb the paramter a by 0.01%
Perturb the paramter a by 0.1%
Perturb the paramter a by 1.0%
RMSE perturbed a
[[0.06548156 0.26737304 0.26480829]
 [0.30924268 1.03608308 1.04067395]
 [0.31879982 1.05144448 1.05763518]
 [0.44599381 1.09287441 1.01849402]]
EuD perturbed a
[0.30372342 1.30031904 1.32854839 1.36703178]


In [13]:
    ##############################################################################################################
    ###########       Compute RMSE with given ensemble predictands and deterministic observation       ###########
    ##############################################################################################################
    # lead time dependent RMSE of Bayesian LSTM forecast
    rmse_pred = np.zeros((ens, step_lead, 3), dtype=float)
    for i in range(step_lead):
        for n in range(ens):
            rmse_x_pred = RMSE(xx[-(test_len + step_lead)+i:-step_lead+i], predictand[n,0,:,i]* (x_max - x_min) + x_min)
            rmse_y_pred = RMSE(yy[-(test_len + step_lead)+i:-step_lead+i], predictand[n,1,:,i]* (y_max - y_min) + y_min)
            rmse_z_pred = RMSE(zz[-(test_len + step_lead)+i:-step_lead+i], predictand[n,2,:,i]* (z_max - z_min) + z_min)
            
            rmse_pred[n,i,0] = rmse_x_pred
            rmse_pred[n,i,1] = rmse_y_pred
            rmse_pred[n,i,2] = rmse_z_pred
            
    rmse_pred_mean = np.mean(rmse_pred, 0)
    
    # lead time dependent RMSE of persistence
    rmse_persist = np.zeros((step_lead,3), dtype=float)
    for i in range(step_lead):
        rmse_persist[i,0] = RMSE(xx[-(test_len + step_lead)+i:-step_lead+i], xx[-(test_len + step_lead)-1:-step_lead-1])
        rmse_persist[i,1] = RMSE(yy[-(test_len + step_lead)+i:-step_lead+i], yy[-(test_len + step_lead)-1:-step_lead-1])
        rmse_persist[i,2] = RMSE(zz[-(test_len + step_lead)+i:-step_lead+i], zz[-(test_len + step_lead)-1:-step_lead-1])

In [14]:
    ##############################################################################################################
    ###########       Compute Euclidean distance with given ensemble predictands and observation       ###########
    ##############################################################################################################
    # lead time dependent Euclidean distance of Bayesian forecast
    euclidean_pred_ens = np.zeros((ens, step_lead), dtype=float)
    
    predictand_pred_denorm = np.zeros(predictand.shape, dtype=float)
    predictand_pred_denorm[:,0,:,:] = predictand[:,0,:,:] * (x_max - x_min) + x_min
    predictand_pred_denorm[:,1,:,:] = predictand[:,1,:,:] * (y_max - y_min) + y_min
    predictand_pred_denorm[:,2,:,:] = predictand[:,2,:,:] * (z_max - z_min) + z_min
    
    for i in range(step_lead):
        for n in range(ens):
            euclidean_pred_n = Euclidean(obs[:,-(test_len + step_lead)+i:-step_lead+i], predictand_pred_denorm[n,:,:,i])
            euclidean_pred_ens[n,i] = euclidean_pred_n
            
    euclidean_pred = np.mean(euclidean_pred_ens, 0)
    
    # lead time dependent Euclidean distance of persistence
    euclidean_persist = np.zeros(step_lead, dtype=float)
    for i in range(step_lead):
        euclidean_persist[i] = Euclidean(obs[:,-(test_len + step_lead)+i:-step_lead+i], obs[:,-(test_len + step_lead)-1:-step_lead-1])

In [15]:
    print(test_len)
    print(step_lead)
    print(obs.shape)
    print(obs[0,1300:1310])
    print(obs[1,1300:1310])
    print(obs[2,1300:1310])
    print(predictand_pred_denorm.shape)
    print(predictand_pred_denorm[0,0,:10,0])
    print(euclidean_pred[:10])
    print(euclidean_pred_ens[0,:10])

200
16
(3, 1516)
[-0.25759871 -0.23993657 -0.22156954 -0.20256376 -0.18298195 -0.16288388
 -0.14232691 -0.12136624 -0.10005529 -0.07844587]
[0.91284143 0.89428431 0.87667303 0.86020739 0.84504902 0.83132377
 0.81912426 0.8085127  0.7995236  0.79216639]
[-0.18303241 -0.20668892 -0.22673568 -0.24338578 -0.25684898 -0.2673274
 -0.27501191 -0.28007951 -0.28269133 -0.28299137]
(20, 3, 200, 16)
[-0.28307041 -0.28092557 -0.2490492  -0.23326942 -0.21755215 -0.1919341
 -0.17975997 -0.16509527 -0.1286868  -0.10881816]
[0.07988185 0.15048682 0.22140164 0.29020124 0.35683655 0.42107154
 0.482868   0.54221145 0.59901891 0.65393392]
[0.07885801 0.1491775  0.22040428 0.28929625 0.35510028 0.41895215
 0.48073745 0.53996763 0.59690448 0.65103031]


In [17]:
    # Write CSV file
    with open(os.path.join(output_path, "RMSE_perturb.csv"), "wt+") as fp:
        writer = csv.writer(fp, delimiter=",")
        for n in range(len(x_init_pert)):
            writer.writerow(["x", "y", "z", "perturb x by{}".format(x_pool_per[n])])  # write header
            writer.writerow(rmse_pert_x[n,:].T)
        for n in range(len(a_init_pert)):
            writer.writerow(["x", "y", "z", "perturb a by{}".format(a_pool_per[n])])  # write header
            writer.writerow(rmse_pert_a[n,:].T)            
        writer.writerow(["x", "y", "z", "Forecast"])  # write header
        writer.writerow(np.arange(step_lead))  # write header
        writer.writerow(rmse_pred_mean.T)
        writer.writerow(["x", "y", "z", "Persistence"])  # write header
        writer.writerow(np.arange(step_lead))  # write header
        writer.writerow(rmse_persist.T)   
    
    with open(os.path.join(output_path, "EuclideanD_perturb.csv"), "wt+") as fp:
        writer = csv.writer(fp, delimiter=",")
        for n in range(len(x_init_pert)):
            writer.writerow(["Euclidean", "perturb x by {}".format(x_pool_per[n])])  # write header
            writer.writerow([euclidean_pert_x[n]])
        for n in range(len(a_init_pert)):
            writer.writerow(["Euclidean", "perturb a by {}".format(a_pool_per[n])])  # write header
            writer.writerow([euclidean_pert_a[n]])           
        writer.writerow(["Euclidean", "Forecast"])  # write header
        writer.writerow(np.arange(step_lead))  # write header
        writer.writerow(euclidean_pred)
        writer.writerow(["Euclidean", "Persistence"])  # write header
        writer.writerow(np.arange(step_lead))  # write header
        writer.writerow(euclidean_persist)