In [16]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [26]:
import torch
import numpy as np

from bsd_dataset import get_dataset
from bsd_dataset.common.dataloaders import get_dataloader
from bsd_dataset.common.metrics import rmse, bias, pearson_correlation_coefficient
from bsd_dataset.regions import Region

In [18]:
# Define input options
input_datasets = {
    'cds:cmip5-single-levels:gfdl_cm3': {
        'ensemble_member': 'r1i1p1',
        'variable': [
            'mean_precipitation_flux',
            'near_surface_specific_humidity'
        ],
        'period': [
            '19800101-19841231', '19850101-19891231',
            '19950101-19991231', '20000101-20041231',
            '20050101-20051231'
        ]
    },
    'gmted2010_0250': {},
}

In [19]:
# Define the study region
Spain = Region(
    top_left=(-12, 45),
    bottom_right=(2, 35)
)

In [21]:
# Get the dataset (already downloaded and extracted)
dataset = get_dataset(
    input_datasets=input_datasets,
    target_dataset='chirps_25',
    train_region=Spain,
    val_region=Spain,
    test_region=Spain,
    train_dates=('1981-01-01', '2003-12-31'),
    val_dates=('2004-01-01', '2004-12-31'),
    test_dates=('2005-01-01', '2005-12-31'),
    download=False,
    extract=False,
    root='/u/scratch/j/jkjewik/data'  ## CHANGE ME ##
)

In [22]:
# Get the training subset (WARNING: this mutates the dataset!)
# You can also get the validation set with get_subset('val')
# and the testing set with get_subset('test')

train_ds = dataset.get_subset('train')
train_loader = get_dataloader(dataset, batch_size=16)

In [23]:
for x, y, mask in train_loader:
    print(f'Input shape: {x.shape}')  # batch size x channels x longitude x latitude
    print(f'Target shape: {y.shape}')  # batch size x longitude x latitude (no channel because it's just the precipitation)
    print(f'Mask shape: {mask.shape}')  # same shape as the target
    break

Input shape: torch.Size([16, 3, 56, 40])
Target shape: torch.Size([16, 56, 40])
Mask shape: torch.Size([16, 56, 40])


In [25]:
# "mask" is True wherever the target data is NaN and
# False wherever it is not NaN
mask[0]

tensor([[ True,  True,  True,  ...,  True,  True,  True],
        [ True,  True,  True,  ...,  True,  True,  True],
        [ True,  True,  True,  ...,  True,  True,  True],
        ...,
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False],
        [False, False, False,  ..., False, False, False]])

In [44]:
# Create a tensor that is ground truth + Gaussan noise (with nans set to 1 first)
y_true = y[0]
y_pred = torch.where(mask[0], torch.ones_like(y_true), y_true)
y_pred = y_pred + torch.randn(y_pred.shape)

In [45]:
rmse(y_pred, y_true)

  y_pred = torch.tensor(y_pred)
  y_true = torch.tensor(y_true)


tensor(0.7321)

In [46]:
pearson_correlation_coefficient(y_pred, y_true)

0.9129681765691058

In [47]:
bias(y_pred, y_true)  # bias in downscaling is (predictions - truth)

tensor(-15.4797)