In [5]:
%load_ext nb_black

The nb_black extension is already loaded. To reload it, use:
  %reload_ext nb_black


<IPython.core.display.Javascript object>

In [8]:
# Basics
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Data
import xarray as xr
import h5py

# Helpful
import time
import datetime
import itertools
from itertools import product

# My Methods
import importlib
from src.utils.CRPS import *
from src.models.EMOS import *
import data.raw.load_data_raw as ldr
import data.processed.load_data_processed as ldp

<IPython.core.display.Javascript object>

### Goal of this notebook: Train EMOS global
1. Train one EMOS model globally on the whole grid for each lead time (1...31) and variable(0...4) total 155 Models

#### 0. Basics

In [16]:
var_names = ["u10", "v10", "t2m", "t850", "z500"]

<IPython.core.display.Javascript object>

#### 1. Load Dataset

In [13]:
dat_train_proc = ldp.load_data_all_train_proc_norm()
dat_test_proc = ldp.load_data_all_test_proc_norm()

<IPython.core.display.Javascript object>

#### 2. Data Split

In [74]:
def X_lead_all(dat):
    """
    Data split into lead times and variables for all X values data
Args:
    dat(list): list of all 5 variables and their data including predictions and truth
    
    
Returns: 
    list: Nested list of lenght 5x31 with X values for all five variables and all 31 lead_times
    list: Nested list of lenght 5x31 with y values for all five variables and all 31 lead_times
    """
    var_names = ["u10", "v10", "t2m", "t850", "z500"]
    dat_X_lead_u10 = []  # list(31) of u10 with different lead times
    dat_X_lead_v10 = []
    dat_X_lead_t2m = []
    dat_X_lead_t850 = []
    dat_X_lead_z500 = []
    dat_X_lead_all = [
        dat_X_lead_u10,
        dat_X_lead_v10,
        dat_X_lead_t2m,
        dat_X_lead_t850,
        dat_X_lead_z500,
    ]  # List of all 5 X - variables and their lead times
    dat_y_lead_u10 = []  # list(31) of u10 ground truth values with different lead_times
    dat_y_lead_v10 = []
    dat_y_lead_t2m = []
    dat_y_lead_t850 = []
    dat_y_lead_z500 = []
    dat_y_lead_all = [
        dat_y_lead_u10,
        dat_y_lead_v10,
        dat_y_lead_t2m,
        dat_y_lead_t850,
        dat_y_lead_z500,
    ]
    for lead in range(1, 32):
        for var in range(5):
            dat_X_lead_all[var].append(
                dat[var][list(dat[var].data_vars.keys())[0]].isel(lead_time=lead)
            )
            dat_y_lead_all[var].append(
                dat[var][list(dat[var].data_vars.keys())[1]].isel(lead_time=lead)
            )
    return dat_X_lead_all, dat_y_lead_all

<IPython.core.display.Javascript object>

In [75]:
X_train_var_lead_all, y_train_var_lead_all = X_lead_all(dat_train_proc)
X_test_lead_all, y_test_var_lead_all = X_lead_all(dat_test_proc)

<IPython.core.display.Javascript object>

In [80]:
y_train_var_lead_all[0][0]

<IPython.core.display.Javascript object>

In [78]:
X_test_lead_all[0][0]

<IPython.core.display.Javascript object>

In [32]:
# Data split into lead times and variables for train data
dat_train_lead_u10 = []  # list(31) of u10 predictions with different lead times
dat_train_lead_v10 = []
dat_train_lead_t2m = []
dat_train_lead_t850 = []
dat_train_lead_z500 = []
dat_train_lead_all = [
    dat_train_lead_u10,
    dat_train_lead_v10,
    dat_train_lead_t2m,
    dat_train_lead_t850,
    dat_train_lead_z500,
]  # List of all 5 variables and theier lead times
for lead in range(1, 32):
    for var in range(5):
        dat_train_lead_all[var].append(
            dat_train_proc[var][var_names[var] + "_train"].isel(lead_time=lead)
        )

<IPython.core.display.Javascript object>

In [None]:
dat_train_lead_u10 = [] # list(31) of u10 predictions with different lead times
dat_train_lead_v10 = []
dat_train_lead_t2m = []
dat_train_lead_t850 = []
dat_train_lead_z500 = []
dat_train_lead_all = [
    dat_train_lead_u10,
    dat_train_lead_v10,
    dat_train_lead_t2m,
    dat_train_lead_t850,
    dat_train_lead_z500,
] #List of all 5 variables and theier lead times
for lead in range(1, 32):
    for var in range(5):
        dat_train_lead_all[var].append(
            dat_train_proc[var][var_names[var] + "_train"].isel(lead_time=lead)
        )