In [1]:
%load_ext autoreload
%autoreload 2

# Quickstart

In [2]:
import climate_learn as cl
from climate_learn.data.climate_dataset.args import ERA5Args
from climate_learn.data.task.args import ForecastingArgs
from climate_learn.data.dataset.args import MapDatasetArgs

## Load the data

In [3]:
root = "/home/data/datasets/weatherbench/era5/5.625deg/"
variables = ["geopotential_500", "temperature_850", "2m_temperature"]
in_vars = out_vars = [f"era5:{v}" for v in variables]
train_years = range(1979, 2016)
val_years = range(2016, 2017)
test_years = range(2017, 2019)

forecasting_args = ForecastingArgs(
    in_vars,
    out_vars,
    pred_range=6,
    subsample=6
)

train_dataset_args = MapDatasetArgs(
    ERA5Args(root, variables, train_years),
    forecasting_args
)

val_dataset_args = MapDatasetArgs(
    ERA5Args(root, variables, val_years),
    forecasting_args
)

test_dataset_args = MapDatasetArgs(
    ERA5Args(root, variables, test_years),
    forecasting_args
)

dm = cl.data.DataModule(
    train_dataset_args,
    val_dataset_args,
    test_dataset_args,
    batch_size=32,
    num_workers=8
)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 37/37 [00:24<00:00,  1.53it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.69it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:01<00:00,  1.49it/s]


## Load the models

In [4]:
# climatology is the average value over the training period
climatology = cl.load_forecasting_module(data_module=dm, preset="climatology")

# persistence returns its input as its prediction
persistence = cl.load_forecasting_module(data_module=dm, preset="persistence")

# Rasp-Theurey 2020 is the SoTA on WeatherBench
# resnet = cl.load_forecasting_module(data_module=dm, preset="rasp-theurey-2020")

Loading preset: climatology
Using preset optimizer
Using preset learning rate scheduler
Loading training loss: lat_mse
Loading validation loss: lat_rmse
Loading validation loss: lat_acc
Loading test loss: lat_rmse
Loading test loss: lat_acc
Loading validation transform: denormalize
Loading validation transform: denormalize
Loading test transform: denormalize
Loading test transform: denormalize
Loading preset: persistence
Using preset optimizer
Using preset learning rate scheduler
Loading training loss: lat_mse
Loading validation loss: lat_rmse
Loading validation loss: lat_acc
Loading test loss: lat_rmse
Loading test loss: lat_acc
Loading validation transform: denormalize
Loading validation transform: denormalize
Loading test transform: denormalize
Loading test transform: denormalize


## Train the models

Climatology and persistence don't require training.

In [5]:
# change this to whatever gpu device you want to use
gpu_num = 0

trainer = cl.Trainer(
    # stop when latitude-weighted RMSE, a validation metric, stops improving
    # early_stopping="lat_rmse:aggregate",
    # wait for 1 epoch of no improvement
    # patience=1,
    # uncomment to use gpu acceleration
    # accelerator="gpu",
    # devices=[gpu_num],
    # max epochs
    # max_epochs=2
)

Global seed set to 0
  warn("In interactive environment: cannot use DDP spawn strategy")


In [6]:
# trainer.fit(resnet, dm)

## Test the models

In [7]:
trainer.test(climatology, dm)

Output()

In [8]:
trainer.test(persistence, dm)

Output()

In [None]:
# trainer.test(resnet, dm)