In [139]:
%load_ext nb_black

The nb_black extension is already loaded. To reload it, use:
  %reload_ext nb_black


<IPython.core.display.Javascript object>

In [170]:
# Basics
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time

#Data
import xarray as xr
import h5py

#Helpful
import itertools
from itertools import product

#My Methods
import importlib
import CRPS
import EMOS
from CRPS import *
from EMOS import *

<IPython.core.display.Javascript object>

### Notebook Content: 
1. Make smaller DataArray for one variable, with ensemble mean and variance
2. Pandas Dataframe of Variance, on first coordinate on Forecast_Date 1
3. Test implementation EMOS on one coordinate for one lead time for one year
4. Global mean and stds

### Notes on the Dataset
What must be inside: longitude, latitude, forecast_variable, ensemble, lead_time, forecast_time<br>

0. phony_dim_0: 357 -> Forecast_date
1. phony_dim_1: 32 -> Lead_time (6 * 1,...,32)
2. phony_dim_2: 5 -> Different forecasted variables, {"u10":, "v10":1, "t2m":2, "t850":3, "z500":4}
3. phony_dim_3: 120 -> latitude
4. phony_dim_4: 130 -> longitude
5. phony_dim_5: 51 -> Ensemble Forecast

6. ground_truth: ERA 5 dataset, does not need an ensemble -> 5 = ensemble
7. predictions: 

8. Variables: <br>
    8.1 u10: zonal wind velocity 10m from the surface <br>
    8.2 v10: meridonal wind velocity 10m from the surface <br>
    8.3 t2m: Temperature at 2m from the surface <br>
    8.4 t850: Temperature at 850m from the surface <br>
    8.5 z500: geopotential at 500m from the surface

In [55]:
# Test things out with data for 2018
path = "/mnt/sda/Data2/fourcastnet/data/predictions/ensemble_2018.h5"
dat_2018 = xr.open_dataset(path,)
preds_2018 = dat_2018.predictions.isel(
    phony_dim_5=slice(1, 51)
)  # Only select 50 ensemble, as first one is control
truth_2018 = dat_2018.ground_truth

<IPython.core.display.Javascript object>

#### 1. Make smaller DataArray for one variable, with ensemble mean and variance

In [56]:
start_time = time.time()
ens_mean_2018 = preds_2018.isel(phony_dim_2=2).mean(dim="phony_dim_5")
ens_std_2018 = preds_2018.isel(phony_dim_2=2).std(dim="phony_dim_5")
end_time = time.time()
print(f"Elapsed time: {start_time - end_time} seconds")

Elapsed time: -513.880823135376 seconds


<IPython.core.display.Javascript object>

In [57]:
ens_mean_std_u10_2018 = xr.concat([ens_mean_2018, ens_std_2018], dim="mean_std")

<IPython.core.display.Javascript object>

In [58]:
ens_mean_std_u10_2018 = ens_mean_std_u10_2018.transpose(
    "phony_dim_0", "phony_dim_1", "phony_dim_3", "phony_dim_4", "mean_std"
)

<IPython.core.display.Javascript object>

In [59]:
print(f"{ens_mean_std_u10_2018.nbytes / 10 ** 9} GB")

1.4257152 GB


<IPython.core.display.Javascript object>

In [60]:
# How much GB is to be expected
print(f"{(ens_mean_std_u10_2018.nbytes / 10 ** 9)* 25} GB")

35.64288 GB


<IPython.core.display.Javascript object>

#### 2. Pandas Dataframe of Variance, on first coordinate on Forecast_Date 1

In [61]:
std_u10_day_1 = ens_mean_std_u10_2018.isel(
    phony_dim_0=0, phony_dim_1=0, mean_std=1
)

<IPython.core.display.Javascript object>

In [62]:
df_std_u10 = std_u10_day_1.to_pandas()

<IPython.core.display.Javascript object>

In [63]:
df_std_u10.rename_axis(columns={"phony_dim_4": "lon"}, inplace=True)
df_std_u10.rename_axis(index={"phony_dim_3": "lat"}, inplace=True)

<IPython.core.display.Javascript object>

#### 3. Test implementation EMOS on one coordinate for one lead time for one year

In [75]:
ens_mean_std_loc = ens_mean_std_u10_2018.isel(
    phony_dim_3=0, phony_dim_4=0, phony_dim_1=7
)
y = truth_2018.isel(phony_dim_3=0, phony_dim_4=0, phony_dim_1=7, phony_dim_2=2)
ens_mean_std_loc_mean = ens_mean_std_loc.isel(mean_std=0)
ens_mean_std_loc_std = ens_mean_std_loc.isel(mean_std=1)

<IPython.core.display.Javascript object>

In [76]:
df_mean_std = ens_mean_std_loc.to_pandas()

<IPython.core.display.Javascript object>

In [77]:
df_mean_std.rename_axis(index={"phony_dim_0": ""}, inplace=True)
df_mean_std.rename(columns={0: "mean", 1: "std"}, inplace=True)

<IPython.core.display.Javascript object>

##### 3.1 Baseline Model

In [118]:
crps_baseline = crps_normal(
    mu=df_mean_std["mean"], sigma=df_mean_std["std"], y=y
)

<IPython.core.display.Javascript object>

##### 3.2 EMOS Model

In [173]:
EMOS_model = build_EMOS_network_keras(compile=True)

<IPython.core.display.Javascript object>

In [175]:
EMOS_model.summary()

Model: "model_12"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_25 (InputLayer)          [(None, 1)]          0           []                               
                                                                                                  
 input_26 (InputLayer)          [(None, 1)]          0           []                               
                                                                                                  
 dense_24 (Dense)               (None, 1)            2           ['input_25[0][0]']               
                                                                                                  
 dense_25 (Dense)               (None, 1)            2           ['input_26[0][0]']               
                                                                                           

<IPython.core.display.Javascript object>

In [176]:
EMOS_model.fit([df_mean_std["mean"], df_mean_std["std"]], y, epochs=10)





Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7faca02a25e0>

<IPython.core.display.Javascript object>

In [178]:
EMOS_model

<keras.engine.functional.Functional at 0x7faca023db20>

<IPython.core.display.Javascript object>

#### 4. Global mean and stds
1. Variable Index Positions:<br>
    u10: 0 <br>
    v10: 1 <br>
    t2m: 2 <br>
    t850: 5 <br>
    z500: 14 <br>

In [4]:
path_global_means = "/mnt/sda/Data2/fourcastnet/data/stats_v0/global_means.npy"
path_global_stds = "/mnt/sda/Data2/fourcastnet/data/stats_v0/global_stds.npy"

<IPython.core.display.Javascript object>

In [17]:
global_means = np.load(path_global_means).flatten()
global_stds = np.load(path_global_stds).flatten()

<IPython.core.display.Javascript object>