In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
# Basics
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import keras.backend as K
import tensorflow as tf

# Data
import xarray as xr
import h5py
import pickle

# Helpful
import time
import datetime
import itertools
from itertools import product
from tqdm import tqdm
import os

# My Methods
import importlib
from src.utils.CRPS import *
from src.utils.data_split import *
from src.models.EMOS import *
import data.raw.load_data_raw as ldr
import data.processed.load_data_processed as ldp
from src.models.EMOS_global.EMOS_global_load_models import *

2023-05-24 12:19:19.598340: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/opt/anaconda3/lib/
2023-05-24 12:19:19.598368: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


<IPython.core.display.Javascript object>

## Goal: Make ws10 feature

### 0. Basics

In [3]:
var_names = ["u10", "v10", "t2m", "t850", "z500"]

<IPython.core.display.Javascript object>

In [4]:
# Variables to make testing easier --> determines variable and lead_time
var = 0  # 0,...,4
lead_time = 1  # 0,...,30
forecast_date = 0
lat = 0
lon = 0

<IPython.core.display.Javascript object>

### 1. Load Dataset

In [5]:
dat_raw = ldr.load_data_raw()  # list length 5 with 2018 - 2022

# processed data
dat_train_proc_norm = ldp.load_data_all_train_proc_norm()
dat_test_proc_norm = ldp.load_data_all_test_proc_norm()

<IPython.core.display.Javascript object>

### 2. Test if mean is enough, or have to do everything from scratch?

In [6]:
u10_test_ens = (
    dat_raw[0]
    .isel(forecast_date=forecast_date, lead_time=lead_time, var=0, lat=lat, lon=lon)
    .values
)
v10_test_ens = (
    dat_raw[0]
    .isel(forecast_date=forecast_date, lead_time=lead_time, var=1, lat=lat, lon=lon)
    .values
)
u10_test_mean = (
    dat_train_proc_norm[0]
    .u10_train.isel(
        forecast_date=forecast_date, lead_time=lead_time, lat=lat, lon=lon, mean_std=0
    )
    .values
)
v10_test_mean = (
    dat_train_proc_norm[1]
    .v10_train.isel(
        forecast_date=forecast_date, lead_time=lead_time, lat=lat, lon=lon, mean_std=0
    )
    .values
)
u10_test_std = (
    dat_train_proc_norm[0]
    .u10_train.isel(
        forecast_date=forecast_date, lead_time=lead_time, lat=lat, lon=lon, mean_std=1
    )
    .values
)
v10_test_std = (
    dat_train_proc_norm[1]
    .v10_train.isel(
        forecast_date=forecast_date, lead_time=lead_time, lat=lat, lon=lon, mean_std=1
    )
    .values
)

<IPython.core.display.Javascript object>

### 3. From raw data make ws10

In [8]:
dat_raw[0].predictions

<IPython.core.display.Javascript object>

In [9]:
dat_raw[0].predictions.isel(forecast_date=0, lead_time=0, var=0).shape

(50, 120, 130)

<IPython.core.display.Javascript object>

In [10]:
ws10_2018 = np.hypot(
    dat_raw[0].predictions.isel(lead_time=0, var=0),
    dat_raw[0].predictions.isel(lead_time=0, var=1),
)

<IPython.core.display.Javascript object>

In [11]:
ws10_2018_truth = np.hypot(
    dat_raw[0].ground_truth.isel(forecast_date=0, var=0),
    dat_raw[0].ground_truth.isel(forecast_date=0, var=1),
)

<IPython.core.display.Javascript object>

In [13]:
len(ldr.load_data_raw()[0:4])

4

<IPython.core.display.Javascript object>

In [22]:
ldr.load_data_raw()[0].isel(forecast_date=0).predictions

<IPython.core.display.Javascript object>

In [14]:
def create_test_data():

    # Define path and file names for the h5 file to be created
    path = "/Data/Delong_BA_Data/Mean_ens_std/ws10_test.h5"
    f = h5py.File(path, "a")
    name_test = "ws10_test"
    name_truth = "ws10_test_truth"

    # Load raw data for the year following the train data years
    dat_raw = ldr.load_data_raw()[4]  # Assuming index 4 corresponds to the test year
    n_days = dat_raw.predictions.shape[0]  # get number of days

    # Create the datasets within the h5 file for 'test' and 'truth' data
    # Create them once, before entering the forecast_date loop
    if name_test in f:
        del f[name_test]  # delete the dataset if it already exists
    if name_truth in f:
        del f[name_truth]  # delete the dataset if it already exists

    test = f.create_dataset(name_test, (n_days, *dat_raw.predictions.isel(var=0, forecast_date=0).shape), dtype=np.float32, compression="gzip", compression_opts=9)
    truth = f.create_dataset(name_truth, (n_days, *dat_raw.ground_truth.isel(var=0, forecast_date=0).shape), dtype=np.float32, compression="gzip", compression_opts=9)

    for forecast_date in tqdm(range(n_days)):
        # Compute the magnitude (absolute value) of wind speed predictions and truths
        ws10_pred = np.hypot(dat_raw.predictions.isel(var=0, forecast_date=forecast_date), dat_raw.predictions.isel(var=1, forecast_date=forecast_date))
        ws10_tru = np.hypot(dat_raw.ground_truth.isel(var=0, forecast_date=forecast_date), dat_raw.ground_truth.isel(var=1, forecast_date=forecast_date))

        # Calculate mean and standard deviation of wind speed predictions
        ws10_pred_mean = ws10_pred.mean(dim="ens")
        ws10_pred_std = ws10_pred.std(dim="ens")

        # Concatenate mean and standard deviation data along new 'mean_std' dimension
        ws_test = xr.concat([ws10_pred_mean, ws10_pred_std], dim="mean_std")
        ws_test = ws_test.transpose("lead_time", "lat", "lon", "mean_std")

        # Populate the h5 file with the data
        test[forecast_date, ...] = ws_test
        truth[forecast_date, ...] = ws10_tru

    # Close the h5 file
    f.close()


<IPython.core.display.Javascript object>

In [48]:
dat_raw[0].predictions.shape[0]

357

<IPython.core.display.Javascript object>

In [50]:
n_days_shape = []
for year in range(4):
    dat_raw = ldr.load_data_raw()[year]
    n_days_shape.append(dat_raw.predictions.shape[0])

<IPython.core.display.Javascript object>

In [51]:
n_days_shape

[357, 357, 358, 357]

<IPython.core.display.Javascript object>

In [62]:
import dask.array as da

<IPython.core.display.Javascript object>

In [None]:
def maini():
    """
    Function to process and save wind speed prediction and ground truth data for test dataset.
    """

    # Initialize timer for performance tracking
    start_time = time.time()

    # Define path and file names for the h5 file to be created
    path = "/Data/Delong_BA_Data/Mean_ens_std/ws10_test.h5"
    f = h5py.File(path, "a")
    name_test = "ws10_test"
    name_truth = "ws10_test_truth"

    # Load raw data for the year 2022
    dat_raw = ldr.load_data_raw()[4]

    # Compute the magnitude (absolute value) of wind speed predictions and truths
    ws10_preds = da.hypot(dat_raw.predictions.isel(var=0).data, dat_raw.predictions.isel(var=1).data)
    ws10_truth = da.hypot(dat_raw.ground_truth.isel(var=0).data, dat_raw.ground_truth.isel(var=1).data)

    ws10_preds = xr.DataArray(ws10_preds, dims=dat_raw.predictions.isel(var=0).dims, coords=dat_raw.predictions.isel(var=0).coords)
    ws10_truth = xr.DataArray(ws10_truth, dims=dat_raw.ground_truth.isel(var=0).dims, coords=dat_raw.ground_truth.isel(var=0).coords)



    # Concatenate mean and standard deviation data along new 'mean_std' dimension
    ws_test = xr.concat([ws10_preds_mean, ws10_preds_std], dim="mean_std")
    ws_test = ws_test.transpose("forecast_date", "lead_time", "lat", "lon", "mean_std")

    # Truth data
    y_test = ws10_truth

    # Extract shape of the data
    n_days, n_lead_times, lat, long, mean_var = ws_test.shape

    # Calculate elapsed time and print it
    half_time = time.time()
    time_difference_half = half_time - start_time
    hours = int(time_difference_half // 3600)
    minutes = int((time_difference_half % 3600) // 60)
    seconds = int(time_difference_half % 60)
    formatted_time_half = f" finished concatenation in:{hours} hours, {minutes} minutes, {seconds} seconds"
    print(f"{formatted_time_half}")

    # Create the datasets within the h5 file for 'test' and 'truth' data
    try:
        test = f.create_dataset(
            name_test,
            shape=(n_days, n_lead_times, lat, long, mean_var),
            dtype=np.float32,
            compression="gzip",
            compression_opts=9,
        )
    except:
        del f[name_test]  # if dataset already exists, delete it
        test = f.create_dataset(
            name_test,
            shape=(n_days, n_lead_times, lat, long, mean_var),
            dtype=np.float32,
            compression="gzip",
            compression_opts=9,
        )
    try:
        truth = f.create_dataset(
            name_truth,
            shape=(n_days, n_lead_times, lat, long),
            dtype=np.float32,
            compression="gzip",
            compression_opts=9,
        )
    except:
        del f[name_truth]  # if dataset already exists, delete it
        truth = f.create_dataset(
            name_truth,
            shape=(n_days, n_lead_times, lat, long),
            dtype=np.float32,
            compression="gzip",
            compression_opts=9,
        )

    # Populate the h5 file with the data
    for i in range(n_days):
        test[i, ...] = ws_test[i, ...]
        truth[i, ...] = y_test[i, ...]

    # Close the h5 file
    f.close()
    

In [49]:
test = [0, 1, 2, 3, 4, 5]
sum(test)

15

<IPython.core.display.Javascript object>

In [60]:
sum(n_days_shape[0:0])

0

<IPython.core.display.Javascript object>

In [61]:
sum(n_days_shape[0:1])

357

<IPython.core.display.Javascript object>