In [25]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [33]:
import torch
from bsd_dataset import get_dataset, regions, DatasetRequest

In [34]:
if torch.cuda.is_available():
    print(torch.cuda.device_count())

8


# Experiment Settings

All latitudes and longitudes are provided as **unnormalized**. See the bottom of this notebook for details.

✅ = ready to use. ❌ = work in progress.

## All experiments
- Training period: 1983 - 2010.
- Validation period: 2011 - 2012.
- Testing period: 2013-2014.

## Experiment 0: Default (CHIRPS target)

**Experiment 0.1** ✅
- Train/val/test region: South America
- Input
    - Low-res precipitation, latitude, longitude (source: CMIP6, GFDL-ESM4)
- Target
    - Hi-res precipitation, latitude, longitude (source: CHIRPS, 0.25 deg res)

**Experiment 0.2** ❌
- Train/val/test region: Europe
- Input
    - Low-res precipitation, latitude, longitude (source: CMIP6, GFDL-ESM4)
- Target
    - Hi-res precipitation, latitude, longitude (source: CHIRPS, 0.25 deg res)

## Experiment 2: PERSIANN-CDR target

**Experiment 2.1** ✅
- Train/val/test region: South America
- Input
    - Low-res precipitation, latitude, longitude (source: CMIP6, GFDL-ESM4)
- Target 
    - Hi-res precipitation, latitude, longitude (source: PERSIANN-CDR)
    
**Experiment 2.2** ❌
- Train/val/test region: Europe
- Input
    - Low-res precipitation, latitude, longitude (source: CMIP6, GFDL-ESM4)
- Target 
    - Hi-res precipitation, latitude, longitude (source: PERSIANN-CDR)

In [35]:
### CHANGE ME! ###
experiment = 0.1

In [40]:
root = f'/home/data/BSDD/experiment-{experiment}'
exp_maj, exp_min = divmod(experiment * 10, 10)

if exp_maj == 0:
    input_datasets = [
        DatasetRequest(
            dataset='projections-cmip6',
            model='gfdl_esm4',
            variable='precipitation',
        )
    ]
    target_dataset = DatasetRequest(dataset='chirps', resolution=0.25)
    
    if exp_min == 1:
        train_region = regions.SouthAmerica
        val_region = regions.SouthAmerica
        test_region = regions.SouthAmerica
        
    if exp_min == 2:
        train_region = regions.Europe
        val_region = regions.Europe
        test_region = regions.Europe    
    
if exp_maj == 2:
    input_datasets = [
        DatasetRequest(
            dataset='projections-cmip6',
            model='gfdl_esm4',
            variable='precipitation',
        )
    ]
    target_dataset = DatasetRequest(dataset='persiann-cdr')
    
    if exp_min == 1:
        train_region = regions.SouthAmerica
        val_region = regions.SouthAmerica
        test_region = regions.SouthAmerica
        
    if exp_min == 2:
        train_region = regions.Europe
        val_region = regions.Europe
        test_region = regions.Europe

In [41]:
dataset = get_dataset(
    input_datasets,
    target_dataset,
    train_region=train_region,
    val_region=val_region,
    test_region=test_region,
    train_dates=('1983-01-01', '2010-12-31'),
    val_dates=('2011-01-01', '2012-12-31'),
    test_dates=('2013-01-01', '2014-12-31'),
    download=False,
    extract=False, 
    root=root,
    device='cuda:0'
)

In [42]:
train_dataset = dataset.get_split('train')
val_dataset = dataset.get_split('val')
test_dataset = dataset.get_split('test')

In [43]:
print(f'Training samples: {len(train_dataset)}')
print(f'Validation samples: {len(val_dataset)}')
print(f'Testing samples: {len(test_dataset)}')

Training samples: 10220
Validation samples: 730
Testing samples: 730


All tensors are latitude by longitude.

In [10]:
x, y, info = train_dataset[0]

print(f'Input shape: {x.shape} ({x.device})')
print(f'Target shape: {y.shape} ({x.device})')

Input shape: torch.Size([1, 75, 48]) (cuda:0)
Target shape: torch.Size([240, 300]) (cuda:0)


In [11]:
print('INFO SUMMARY')
for k, v in info.items():
    print(f' - {k} shape: {v.shape} ({v.device})')

INFO SUMMARY
 - x_lat shape: torch.Size([75, 48]) (cuda:0)
 - x_lon shape: torch.Size([75, 48]) (cuda:0)
 - y_lat shape: torch.Size([75, 48]) (cuda:0)
 - y_lon shape: torch.Size([75, 48]) (cuda:0)
 - y_mask shape: torch.Size([240, 300]) (cuda:0)


Latitudes and longitudes are provided as unnormalized. Latitudes are in the range \[-90, 90\], and longitudes are in the range \[0, 360\]. At the end are functions to perform normalization (I will eventually migrate this into the dataset itself).

In [12]:
info['x_lat']

tensor([[-54.5000, -54.5000, -54.5000,  ..., -54.5000, -54.5000, -54.5000],
        [-53.5000, -53.5000, -53.5000,  ..., -53.5000, -53.5000, -53.5000],
        [-52.5000, -52.5000, -52.5000,  ..., -52.5000, -52.5000, -52.5000],
        ...,
        [ 17.5000,  17.5000,  17.5000,  ...,  17.5000,  17.5000,  17.5000],
        [ 18.5000,  18.5000,  18.5000,  ...,  18.5000,  18.5000,  18.5000],
        [ 19.5000,  19.5000,  19.5000,  ...,  19.5000,  19.5000,  19.5000]],
       device='cuda:0', dtype=torch.float64)

In [13]:
info['x_lon']

tensor([[ 90.6250,  91.8750,  93.1250,  ..., 146.8750, 148.1250, 149.3750],
        [ 90.6250,  91.8750,  93.1250,  ..., 146.8750, 148.1250, 149.3750],
        [ 90.6250,  91.8750,  93.1250,  ..., 146.8750, 148.1250, 149.3750],
        ...,
        [ 90.6250,  91.8750,  93.1250,  ..., 146.8750, 148.1250, 149.3750],
        [ 90.6250,  91.8750,  93.1250,  ..., 146.8750, 148.1250, 149.3750],
        [ 90.6250,  91.8750,  93.1250,  ..., 146.8750, 148.1250, 149.3750]],
       device='cuda:0', dtype=torch.float64)

In [14]:
def normalize_latitudes(lats):
    # Converts from the range [-90, 90] to [0, 1]
    return (lats + 90) / 180

def normalize_longitudes(lons):
    # Converts from the range [-180, 180] to [0, 1]
    return (lons + 180) / 360

In [15]:
normalize_latitudes(info['x_lat'])

tensor([[0.1972, 0.1972, 0.1972,  ..., 0.1972, 0.1972, 0.1972],
        [0.2028, 0.2028, 0.2028,  ..., 0.2028, 0.2028, 0.2028],
        [0.2083, 0.2083, 0.2083,  ..., 0.2083, 0.2083, 0.2083],
        ...,
        [0.5972, 0.5972, 0.5972,  ..., 0.5972, 0.5972, 0.5972],
        [0.6028, 0.6028, 0.6028,  ..., 0.6028, 0.6028, 0.6028],
        [0.6083, 0.6083, 0.6083,  ..., 0.6083, 0.6083, 0.6083]],
       device='cuda:0', dtype=torch.float64)

In [16]:
normalize_longitudes(info['x_lon'])

tensor([[0.7517, 0.7552, 0.7587,  ..., 0.9080, 0.9115, 0.9149],
        [0.7517, 0.7552, 0.7587,  ..., 0.9080, 0.9115, 0.9149],
        [0.7517, 0.7552, 0.7587,  ..., 0.9080, 0.9115, 0.9149],
        ...,
        [0.7517, 0.7552, 0.7587,  ..., 0.9080, 0.9115, 0.9149],
        [0.7517, 0.7552, 0.7587,  ..., 0.9080, 0.9115, 0.9149],
        [0.7517, 0.7552, 0.7587,  ..., 0.9080, 0.9115, 0.9149]],
       device='cuda:0', dtype=torch.float64)