In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
# Basics
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Data
import xarray as xr
import h5py
import pickle

# Helpful
import time
import datetime
import itertools
from itertools import product
from tqdm import tqdm

# My Methods
import importlib
from src.utils.CRPS import *
from src.utils.data_split import *
from src.models.EMOS import *
import data.raw.load_data_raw as ldr
import data.processed.load_data_processed as ldp

2023-05-20 14:19:02.969436: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/opt/anaconda3/lib/
2023-05-20 14:19:02.969456: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


<IPython.core.display.Javascript object>

### Goal of this notebook: Train EMOS global
1. Train one EMOS model globally on the whole grid for each lead time (1...31) and variable(0...4) total 155 Models

#### 0. Basics

In [3]:
var_names = ["u10", "v10", "t2m", "t850", "z500"]

<IPython.core.display.Javascript object>

In [4]:
# Variables to make testing easier --> determines variable and lead_time
var = 0  # 0,...,4
lead_time = 0  # 0,...,30

<IPython.core.display.Javascript object>

#### 1. Load Dataset

In [5]:
dat_train_proc = ldp.load_data_all_train_proc_norm()
dat_test_proc = ldp.load_data_all_test_proc_norm()

<IPython.core.display.Javascript object>

#### 2. Data Split

In [6]:
X_train_var_lead_all, y_train_var_lead_all = split_var_lead(dat_train_proc)
X_test_lead_all, y_test_var_lead_all = split_var_lead(dat_test_proc)

<IPython.core.display.Javascript object>

#### 3. Load Baseline Scores

In [7]:
# Load all scores:
with open(
    "/Data/Delong_BA_Data/scores/crps_benchmark_scores/crps_var_lead_test.pkl", "rb"
) as f:
    crps_var_lead_test_scores = pickle.load(f)

<IPython.core.display.Javascript object>

In [8]:
# Load mean scores:
with open(
    "/Data/Delong_BA_Data/scores/crps_benchmark_scores/crps_var_lead_mean_test.pkl",
    "rb",
) as f:
    crps_var_lead_mean_test_scores = pickle.load(f)

<IPython.core.display.Javascript object>

#### 4. Train global Emos

In [None]:
def main(batch_size = 5000, epochs = 5, lr = 0.1, validation_split = 0.2):
    num = 0
    for var in range(5):
        for lead_time in range(31):
            start_time = time.time()
            num = num + 1
            print('Iteration: ' + num)
            EMOS_glob = build_EMOS_network_keras(compile=True, lr = lr)
            EMOS_glob.fit(
                [
                    X_train_var_lead_all[var][lead_time].isel(mean_std=0).values.flatten(),
                    X_train_var_lead_all[var][lead_time].isel(mean_std=1).values.flatten(),
                ],
                y_train_var_lead_all[var][lead_time].values.flatten(),
                batch_size=batch_size,
                epochs=epochs,
                validation_split=validation_split,
            )
            EMOS_glob.save('/home/dchen/BA_CH_EN/models/EMOS_global_models/EMOS_glob_' + var_names[var] + '_' + lead_time + '.h5')
            
            # Printing out time
            end_time = time.time()
            time_difference = end_time - start_time
            hours = int(time_difference // 3600)
            minutes = int((time_difference % 3600) // 60)
            seconds = int(time_difference % 60)
            formatted_time = f" Round {num} finished in:{hours} hours, {minutes} minutes, {seconds} seconds"

In [13]:
EMOS_glob = build_EMOS_network_keras(compile=True)

2023-05-20 15:02:47.309205: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/opt/anaconda3/lib/
2023-05-20 15:02:47.309365: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/opt/anaconda3/lib/
2023-05-20 15:02:47.309481: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/opt/anaconda3/lib/
2023-05-20 15:02:47.309592: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcufft.so.10'; dlerror: libcufft.so.10: cannot open shared object file:

<IPython.core.display.Javascript object>

In [70]:
X_train_var_lead_all[var][lead_time].isel(mean_std=0)

<IPython.core.display.Javascript object>

In [106]:
EMOS_glob.fit(
    [
        X_train_var_lead_all[var][lead_time].isel(mean_std=0).values.flatten(),
        X_train_var_lead_all[var][lead_time].isel(mean_std=1).values.flatten(),
    ],
    y_train_var_lead_all[var][lead_time].values.flatten(),
    batch_size=5000,
    epochs=5,
    validation_split=0.2,
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fc304473cd0>

<IPython.core.display.Javascript object>

In [108]:
preds = EMOS_glob.predict(
    [
        X_test_lead_all[var][lead_time].isel(mean_std=0).values.flatten(),
        X_test_lead_all[var][lead_time].isel(mean_std=1).values.flatten(),
    ],
    verbose=1,
)



<IPython.core.display.Javascript object>

In [111]:
crps_emos = crps_normal(
    mu=preds[:, 0],
    sigma=preds[:, 1],
    y=y_test_var_lead_all[var][lead_time].values.flatten(),
)

<IPython.core.display.Javascript object>

In [112]:
crps_emos.mean()

0.010947947142388477

<IPython.core.display.Javascript object>