In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
# Basics
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import keras.backend as K
import tensorflow as tf

# Data
import xarray as xr
import h5py
import pickle

# Helpful
import time
import datetime
import itertools
from itertools import product
from tqdm import tqdm
import os

# My Methods
import importlib
from src.utils.CRPS import *
from src.utils.data_split import *
from src.models.EMOS import *
import data.raw.load_data_raw as ldr
import data.processed.load_data_processed as ldp
from src.models.EMOS_global.EMOS_global_load_models import *

2023-05-22 13:51:50.847096: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/opt/anaconda3/lib/
2023-05-22 13:51:50.847141: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


<IPython.core.display.Javascript object>

### Goal of this notebook: Train EMOS global
1. Train one EMOS model globally on the whole grid for each lead time (1...31) and variable(0...4) total 155 Models

#### 0. Basics

In [3]:
var_names = ["u10", "v10", "t2m", "t850", "z500"]

<IPython.core.display.Javascript object>

In [4]:
# Variables to make testing easier --> determines variable and lead_time
var = 0  # 0,...,4
lead_time = 0  # 0,...,30

<IPython.core.display.Javascript object>

#### 1. Load Dataset

In [5]:
dat_train_proc = ldp.load_data_all_train_proc_norm()
dat_test_proc = ldp.load_data_all_test_proc_norm()

<IPython.core.display.Javascript object>

#### 2. Data Split

In [6]:
X_train_var_lead_all, y_train_var_lead_all = split_var_lead(dat_train_proc)
X_test_lead_all, y_test_var_lead_all = split_var_lead(dat_test_proc)

<IPython.core.display.Javascript object>

In [None]:
X_test_lead_all[0][0].loc[:,:,:,:]

#### 3. Load Baseline Scores

In [38]:
mu = X_test_lead_all[0][0].isel(mean_std=0).values.flatten()
sigma = X_test_lead_all[0][0].isel(mean_std=1).values.flatten()
y = y_test_var_lead_all[0][0].values.flatten()

<IPython.core.display.Javascript object>

In [39]:
from scipy.stats import norm
from scipy.special import erf


def crps_trunc(mu, sigma, y):
    y_true = y

    var = K.square(sigma)
    loc = (y_true - mu) / K.sqrt(var)

    phi = 1.0 / np.sqrt(2.0 * np.pi) * K.exp(-K.square(loc) / 2.0)

    Phi_ms = 0.5 * (1.0 + tf.math.erf(mu / sigma / np.sqrt(2.0)))
    Phi = 0.5 * (1.0 + tf.math.erf(loc / np.sqrt(2.0)))
    Phi_2ms = 0.5 * (1.0 + tf.math.erf(np.sqrt(2) * mu / sigma / np.sqrt(2.0)))

    crps = (
        K.sqrt(var)
        / K.square(Phi_ms)
        * (
            loc * Phi_ms * (2.0 * Phi + Phi_ms - 2.0)
            + 2.0 * phi * Phi_ms
            - 1.0 / np.sqrt(np.pi) * Phi_2ms
        )
    )
    return K.mean(crps)

<IPython.core.display.Javascript object>

In [15]:
# Load all scores:
with open(
    "/Data/Delong_BA_Data/scores/crps_benchmark_scores/crps_var_lead_test.pkl", "rb"
) as f:
    crps_var_lead_test_scores = pickle.load(f)

<IPython.core.display.Javascript object>

In [16]:
# Load mean scores:
with open(
    "/Data/Delong_BA_Data/scores/crps_benchmark_scores/crps_var_lead_mean_test.pkl",
    "rb",
) as f:
    crps_var_lead_mean_test_scores = pickle.load(f)

<IPython.core.display.Javascript object>

#### 4. Load Emos Models

In [28]:
EMOS_global_var_lead_models = EMOS_global_load_models()

<IPython.core.display.Javascript object>

#### 5. Use models to make predictions

In [None]:
def EMOS_global_predict():
    '''
    Make predictinos for EMOS globally trained models with specific variabel and lead_time
Args:
    None
Returns:
    nested_list: 5x31, predictions on test dataset based on models trained for each variable and lead_time
    
    '''
    # 1. Load dataset
    dat_test_proc = ldp.load_data_all_test_proc_norm()
    # 2. Split dataset
    X_test_lead_all, y_test_var_lead_all = split_var_lead(dat_test_proc)
    # 3. Load trained models
    EMOS_global_var_lead_models = EMOS_global_load_models()
    # 4. Predict based on trained models
    EMOS_global_var_lead_preds = [[],[],[],[],[]]
    count = 0
    for var in range(5):
        for lead_time in range(31):
            print('Round:' + str(count))
            preds = EMOS_global_var_lead_models[var][lead_time].predict(
            [
                X_test_lead_all[var][lead_time].isel(mean_std=0).values.flatten(),
                X_test_lead_all[var][lead_time].isel(mean_std=1).values.flatten(),
            ],
            verbose=1,
        )
            
            EMOS_global_var_lead_preds[var].append(pred)
            count = count +1
    return EMOS_global_var_lead_preds

In [46]:
def main():
    """
    Save mean scores of EMOS_global
    """
    # 1. Load dataset
    dat_test_proc = ldp.load_data_all_test_proc_norm()
    # 2. Split dataset
    X_test_lead_all, y_test_var_lead_all = split_var_lead(dat_test_proc)
    # 3. Get predictions
    EMOS_global_var_lead_preds = EMOS_global_predict()
    # 4. Calculate all mean scores
    EMOS_global_scores = crps_var_lead_preds(
        EMOS_global_var_lead_preds, y_test_var_lead_all
    )
    EMOS_global_mean_score = [[], [], [], [], []]
    for var in range(5):
        for lead_time in range(31):
            EMOS_global_mean_score[var].append(
                EMOS_global_scores[var][lead_time].mean()
            )
    # 5. Pickle EMOS Global Mean score
    with open(
        "/Data/Delong_BA_Data/scores/EMOS_global_scores/EMOS_global_mean_scores.pkl",
        "wb",
    ) as f:  # open a text file
        pickle.dump(crps_var_lead_mean_test, f)  # serialize the list
    f.close()

<IPython.core.display.Javascript object>

In [43]:
y_test_var_lead_all[0][0]

<IPython.core.display.Javascript object>

In [33]:
preds = EMOS_global_var_lead_models[var][lead_time].predict(
    [
        X_test_lead_all[var][lead_time].isel(mean_std=0).values.flatten(),
        X_test_lead_all[var][lead_time].isel(mean_std=1).values.flatten(),
    ],
    verbose=1,
)



<IPython.core.display.Javascript object>

In [41]:
preds

array([[-3.6209137 ,  0.09500542],
       [-3.5981712 ,  0.10037337],
       [-3.5486622 ,  0.10353722],
       ...,
       [ 1.0502589 ,  0.17530304],
       [ 1.0290506 ,  0.18566719],
       [ 0.9829548 ,  0.19252296]], dtype=float32)

<IPython.core.display.Javascript object>

In [37]:
np.save("/Data/Delong_BA_Data/preds/EMOS_global_preds/test.npy", preds)

<IPython.core.display.Javascript object>

In [111]:
crps_emos = crps_normal(
    mu=preds[:, 0],
    sigma=preds[:, 1],
    y=y_test_var_lead_all[var][lead_time].values.flatten(),
)

<IPython.core.display.Javascript object>

In [112]:
crps_emos.mean()

0.010947947142388477

<IPython.core.display.Javascript object>