In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import numpy as np

from bsd_dataset import get_dataset
from bsd_dataset.common.dataloaders import get_dataloader
from bsd_dataset.common.metrics import rmse, bias, pearson_correlation_coefficient
from bsd_dataset.regions import Region

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Define input options
input_datasets = {
    'cds:cmip5-single-levels:gfdl_cm3': {
        'ensemble_member': 'r1i1p1',
        'variable': [
            'mean_precipitation_flux',
            'near_surface_specific_humidity'
        ],
        'period': [
            '19800101-19841231', '19850101-19891231',
            '19900101-19941231', '19950101-19991231',
            '20000101-20041231', '20050101-20051231'
        ]
    },
    'gmted2010_0250': {},
}

In [4]:
# Define the study region
Spain = Region(
    top_left=(-12, 45),
    bottom_right=(2, 35)
)

In [5]:
# Get the dataset (already downloaded and extracted)
dataset = get_dataset(
    input_datasets=input_datasets,
    target_dataset='chirps_25',
    train_region=Spain,
    val_region=Spain,
    test_region=Spain,
    train_dates=('1981-01-01', '2003-12-31'),
    val_dates=('2004-01-01', '2004-12-31'),
    test_dates=('2005-01-01', '2005-12-31'),
    download=True,
    extract=True,
    root='/u/scratch/j/jkjewik/data'  ## CHANGE ME ##
)



2022-04-30 14:53:50,744 INFO Welcome to the CDS
2022-04-30 14:53:50,748 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/projections-cmip5-daily-single-levels
2022-04-30 14:53:51,029 INFO Request is queued
2022-04-30 14:53:52,207 INFO Request is running
2022-04-30 14:56:44,087 INFO Request is completed
2022-04-30 14:56:44,088 INFO Downloading https://download-0015-clone.copernicus-climate.eu/cache-compute-0015/cache/data1/dataset-projections-cmip5-daily-single-levels-d574344b-97b5-4715-ad9a-8cb2bca41a7d.tar.gz to /u/scratch/j/jkjewik/data/cds/cmip5-single-levels.gfdl_cm3.tar.gz (833M)
2022-04-30 14:58:25,773 INFO Download rate 8.2M/s


In [6]:
# Get the training subset (WARNING: this mutates the dataset!)
# You can also get the validation set with get_subset('val')
# and the testing set with get_subset('test')

train_ds = dataset.get_subset('train')
train_loader = get_dataloader(dataset, batch_size=16)

In [7]:
for x, y, mask in train_loader:
    print(f'Input shape: {x.shape}')  # batch size x channels x longitude x latitude
    print(f'Target shape: {y.shape}')  # batch size x longitude x latitude (no channel because it's just the precipitation)
    print(f'Mask shape: {mask.shape}')  # same shape as the target
    break

Input shape: torch.Size([16, 3, 56, 40])
Target shape: torch.Size([16, 56, 40])
Mask shape: torch.Size([16, 56, 40])


There are three channels in the input, corresponding to (1) mean precipitation flux, (2) near surface specific humidity, and (3) GMTED2010 elevation data.

In [8]:
# "mask" is True wherever the target data is NaN and
# False wherever it is not NaN
mask[0]

tensor([[ True,  True,  True,  ...,  True,  True,  True],
        [ True,  True,  True,  ...,  True,  True,  True],
        [ True,  True,  True,  ...,  True,  True,  True],
        ...,
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False]])

In [15]:
# Create a tensor that is ground truth + Gaussan noise (with nans set to 1 first)
y_true = y[0]
y_pred = torch.where(mask[0], torch.ones_like(y_true), y_true)
y_pred = y_pred + 10 * torch.randn(y_pred.shape)

In [16]:
rmse(y_pred, y_true)

  y_pred = torch.tensor(y_pred)
  y_true = torch.tensor(y_true)


tensor(7.3691)

In [17]:
pearson_correlation_coefficient(y_pred, y_true)

  y_pred = torch.tensor(y_pred)
  y_true = torch.tensor(y_true)


nan

In [18]:
bias(y_pred, y_true)  # bias in downscaling is (predictions - truth)

  y_pred = torch.tensor(y_pred)
  y_true = torch.tensor(y_true)


tensor(-55.3295)