In [3]:
import os
if "datasets.ipynb" in os.listdir():
    os.chdir("../")
print("Running in {}".format(os.getcwd()))

Running in /data/hpcdata/users/anddon76/icenet/icenet-experimental


# datasets.ipynb

In this notebook, we create datasets for experimentation with different backbone models.

## 1. Single-Month Dataset

In [2]:
import pandas as pd
processing_dates = dict(
    train=[pd.to_datetime(el) for el in pd.date_range("2020-01-01", "2020-01-20")],
    val=[pd.to_datetime(el) for el in pd.date_range("2020-01-21", "2020-01-25")],
    test=[pd.to_datetime(el) for el in pd.date_range("2020-01-26", "2020-01-31")],
)
processed_name = "single_month"

In [3]:
from icenet.data.processors.era5 import IceNetERA5PreProcessor
from icenet.data.processors.meta import IceNetMetaPreProcessor
from icenet.data.processors.osi import IceNetOSIPreProcessor

pp = IceNetERA5PreProcessor(
    ["uas", "vas"],
    ["tas", "zg500", "zg250"],
    processed_name,
    processing_dates["train"],
    processing_dates["val"],
    processing_dates["test"],
    linear_trends=tuple(),
    north=False,
    south=True
)
osi = IceNetOSIPreProcessor(
    ["siconca"],
    [],
    processed_name,
    processing_dates["train"],
    processing_dates["val"],
    processing_dates["test"],
    linear_trends=tuple(),
    north=False,
    south=True
)
meta = IceNetMetaPreProcessor(
    processed_name,
    north=False,
    south=True
)

2024-01-24 15:09:29.841177: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-01-24 15:09:30.250593: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-01-24 15:09:30.250678: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-01-24 15:09:30.263815: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-01-24 15:09:30.291260: I tensorflow/core/platform/cpu_feature_guar

In [4]:
pp.init_source_data(
    lag_days=1,
)
pp.process()
osi.init_source_data(
    lag_days=1,
)
osi.process()
meta.process()

Cannot find the ecCodes library


## n. Test Dataset with Dataloader

In [11]:
from icenet.data.loaders import IceNetDataLoaderFactory

implementation = "dask"
loader_config = "loader.single_month.json"
dataset_name = "single_month"
lag = 1

dl = IceNetDataLoaderFactory().create_data_loader(
    implementation,
    loader_config,
    dataset_name,
    lag,
    n_forecast_days=1,
    north=False,
    south=True,
    output_batch_size=4,
    generate_workers=8
)

In [12]:
dl.generate()

2024-01-24 17:21:00.580507: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-01-24 17:21:00.580671: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-01-24 17:21:00.580879: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-01-24 17:21:00.581099: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly 

In [28]:
x, y, sw = dl.generate_sample(pd.Timestamp("2020-01-04"))
x.shape, y.shape, sw.shape

((432, 432, 9), (432, 432, 1, 1), (432, 432, 1, 1))

## n+1: Test PyTorch Dataset Implementation

In [5]:
from utils import IceNetDataSetPyTorch
implementation = "dask"
dataset_config = "dataset_config.single_month.json"
dataset_name = "single_month"
lag = 1
train_ds = IceNetDataSetPyTorch(dataset_config, "train", batch_size=4, shuffling=True)
val_ds = IceNetDataSetPyTorch(dataset_config, "val", batch_size=4, shuffling=False)
test_ds = IceNetDataSetPyTorch(dataset_config, "test", batch_size=4, shuffling=False)

In [8]:
x, y, sw = train_ds[0]
x.shape, y.shape, sw.shape

((432, 432, 9), (432, 432, 1, 1), (432, 432, 1, 1))