In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
# Basics
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import keras.backend as K
import tensorflow as tf

# Data
import xarray as xr
import h5py
import pickle

# Helpful
import time
import datetime
import itertools
from itertools import product
from tqdm import tqdm
import os

# My Methods
import importlib
from src.utils.CRPS import *
from src.utils.data_split import *
from src.models.EMOS import *
import data.raw.load_data_raw as ldr
import data.processed.load_data_processed as ldp
from src.models.EMOS_global.EMOS_global_load_models import *

2023-05-24 12:19:19.598340: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/opt/anaconda3/lib/
2023-05-24 12:19:19.598368: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


<IPython.core.display.Javascript object>

## Goal: Make ws10 feature

### 0. Basics

In [3]:
var_names = ["u10", "v10", "t2m", "t850", "z500"]

<IPython.core.display.Javascript object>

In [4]:
# Variables to make testing easier --> determines variable and lead_time
var = 0  # 0,...,4
lead_time = 1  # 0,...,30
forecast_date = 0
lat = 0
lon = 0

<IPython.core.display.Javascript object>

### 1. Load Dataset

In [5]:
dat_raw = ldr.load_data_raw()  # list length 5 with 2018 - 2022

# processed data
dat_train_proc_norm = ldp.load_data_all_train_proc_norm()
dat_test_proc_norm = ldp.load_data_all_test_proc_norm()

<IPython.core.display.Javascript object>

### 2. Test if mean is enough, or have to do everything from scratch?

In [6]:
u10_test_ens = (
    dat_raw[0]
    .isel(forecast_date=forecast_date, lead_time=lead_time, var=0, lat=lat, lon=lon)
    .values
)
v10_test_ens = (
    dat_raw[0]
    .isel(forecast_date=forecast_date, lead_time=lead_time, var=1, lat=lat, lon=lon)
    .values
)
u10_test_mean = (
    dat_train_proc_norm[0]
    .u10_train.isel(
        forecast_date=forecast_date, lead_time=lead_time, lat=lat, lon=lon, mean_std=0
    )
    .values
)
v10_test_mean = (
    dat_train_proc_norm[1]
    .v10_train.isel(
        forecast_date=forecast_date, lead_time=lead_time, lat=lat, lon=lon, mean_std=0
    )
    .values
)
u10_test_std = (
    dat_train_proc_norm[0]
    .u10_train.isel(
        forecast_date=forecast_date, lead_time=lead_time, lat=lat, lon=lon, mean_std=1
    )
    .values
)
v10_test_std = (
    dat_train_proc_norm[1]
    .v10_train.isel(
        forecast_date=forecast_date, lead_time=lead_time, lat=lat, lon=lon, mean_std=1
    )
    .values
)

<IPython.core.display.Javascript object>

### 3. From raw data make ws10

In [9]:
dat_raw[0].predictions.isel(forecast_date=0, lead_time=0, var=0).shape

(50, 120, 130)

<IPython.core.display.Javascript object>

In [10]:
ws10_2018 = np.hypot(
    dat_raw[0].predictions.isel(lead_time=0, var=0),
    dat_raw[0].predictions.isel(lead_time=0, var=1),
)

<IPython.core.display.Javascript object>

In [11]:
ws10_2018_truth = np.hypot(
    dat_raw[0].ground_truth.isel(forecast_date=0, var=0),
    dat_raw[0].ground_truth.isel(forecast_date=0, var=1),
)

<IPython.core.display.Javascript object>

In [13]:
len(ldr.load_data_raw()[0:4])

4

<IPython.core.display.Javascript object>

### 4. Make denonormalized ws10 data

In [25]:
def denormalize(mean, std, x):
    denormalized = (x * std) + mean
    return denormalized


def make_ws10_train_denormed():

    # Define path and file names for the h5 file to be created
    path = "/Data/Delong_BA_Data/mean_ens_std_denorm/ws10_train_denorm.h5"
    f = h5py.File(path, "a")
    name_train = "ws10_train"
    name_truth = "ws10_truth"

    # load global means and stds
    means = np.load(
        "/mnt/sda/Data2/fourcastnet/data/stats_v0/global_means.npy"
    ).flatten()[[0, 1, 2, 5, 14]]
    stds = np.load(
        "/mnt/sda/Data2/fourcastnet/data/stats_v0/global_stds.npy"
    ).flatten()[[0, 1, 2, 5, 14]]

    # Load raw data for the years 2018-2021
    # process one year at a time
    for year in tqdm(range(4)):
        dat_raw = ldr.load_data_raw()[year]  # load data for the year
        n_days = dat_raw.predictions.shape[0]  # get number of days

        # Create the datasets within the h5 file for 'train' and 'truth' data
        # Create them once, before entering the forecast_date loop
        if name_train in f:
            del f[name_train]  # delete the dataset if it already exists
        if name_truth in f:
            del f[name_truth]  # delete the dataset if it already exists

        train = f.create_dataset(
            name_train,
            (n_days, *dat_raw.predictions.isel(var=0, forecast_date=0).shape),
            dtype=np.float32,
            compression="gzip",
            compression_opts=9,
        )
        truth = f.create_dataset(
            name_truth,
            (n_days, *dat_raw.ground_truth.isel(var=0, forecast_date=0).shape),
            dtype=np.float32,
            compression="gzip",
            compression_opts=9,
        )

        for forecast_date in tqdm(range(n_days)):
            # Compute the magnitude (absolute value) of wind speed predictions and truths
            u10_year_date_pred = denormalize(
                means[0],
                stds[0],
                dat_raw.predictions.isel(var=0, forecast_date=forecast_date),
            )
            v10_year_date_pred = denormalize(
                means[1],
                stds[1],
                dat_raw.predictions.isel(var=1, forecast_date=forecast_date),
            )
            ws10_pred = np.hypot(u10_year_date_pred, v10_year_date_pred,)
            
            u10_year_date_truth = denormalize(
                means[0],
                stds[0],
                dat_raw.ground_truth.isel(var=1, forecast_date=forecast_date),
            )
            v10_year_date_truth = denormalize(
                means[1],
                stds[1],
                dat_raw.ground_truth.isel(var=0, forecast_date=forecast_date),
            )

            ws10_tru = np.hypot(u10_year_date_truth, v10_year_date_truth,)
            
            # Calculate mean and standard deviation of wind speed predictions
            ws10_pred_mean = ws10_pred.mean(dim="ens")
            ws10_pred_std = ws10_pred.std(dim="ens")

            # Concatenate mean and standard deviation data along new 'mean_std' dimension
            ws_train = xr.concat([ws10_pred_mean, ws10_pred_std], dim="mean_std")
            ws_train = ws_train.transpose("lead_time", "lat", "lon", "mean_std")

            # Populate the h5 file with the data
            train[forecast_date, ...] = ws_train
            truth[forecast_date, ...] = ws10_tru

    # Close the h5 file
    f.close()


<IPython.core.display.Javascript object>