In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
from bsd_dataset import get_dataset, regions, DatasetRequest

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
if torch.cuda.is_available():
    print(torch.cuda.device_count())

8


# Experiment Settings

All latitudes and longitudes are provided as **unnormalized**. See the bottom of this notebook for details.

✅ = ready to use. ❌ = work in progress.

## All experiments
- Training period: 1983 - 2010.
- Validation period: 2011 - 2012.
- Testing period: 2013-2014.

## Experiment 0: Default (CHIRPS target)

**Experiment 0.1** ✅
- Train/val/test region: South America
- Input (source: CMIP6, GFDL-ESM4)
    - Max near-surface air temp
    - Min near-surface air temp
    - Near-surface air temp
    - Near-surface specific humidity
    - Near-surface wind speed
    - Precipitation
    - Sea level pressure
    - Latitude and longitude are also available
- Target (source: CHIRPS, 0.25 deg res)
    - Hi-res precipitation
    - Latitude and longitude are also available

**Experiment 0.2** ✅
- Train/val/test region: Europe
- Input (source: CMIP6, GFDL-ESM4)
    - Max near-surface air temp
    - Min near-surface air temp
    - Near-surface air temp
    - Near-surface specific humidity
    - Near-surface wind speed
    - Precipitation
    - Sea level pressure
    - Latitude and longitude are also available
- Target (source: CHIRPS, 0.25 deg res)
    - Hi-res precipitation
    - Latitude and longitude are also available
    
## Experiment 1: Multiple input variables (same resolution)

**Experiment 1.1** ✅
- Train/val/test region: South America
- Input (source: CMIP6, GFDL-ESM4 & FGOALS-f3-L)
    - Max near-surface air temp
    - Min near-surface air temp
    - Near-surface air temp
    - Near-surface specific humidity
    - Near-surface wind speed
    - Precipitation
    - Sea level pressure
    - Latitude and longitude are also available
- Target
    - Hi-res precipitation, latitude, longitude (source: CHIRPS, 0.25 deg res)
    
**Experiment 1.2** ✅
- Train/val/test region: Europe
- Input (source: CMIP6, GFDL-ESM4 & FGOALS-f3-L)
    - Max near-surface air temp
    - Min near-surface air temp
    - Near-surface air temp
    - Near-surface specific humidity
    - Near-surface wind speed
    - Precipitation
    - Sea level pressure
    - Latitude and longitude are also available
- Target
    - Hi-res precipitation, latitude, longitude (source: CHIRPS, 0.25 deg res)

## Experiment 2: PERSIANN-CDR target

**Experiment 2.1** ✅
- Train/val/test region: South America
- Input (source: CMIP6, GFDL-ESM4)
    - Max near-surface air temp
    - Min near-surface air temp
    - Near-surface air temp
    - Near-surface specific humidity
    - Near-surface wind speed
    - Precipitation
    - Sea level pressure
    - Latitude and longitude are also available
- Target (source: PERSIANN-CDR)
    - Hi-res precipitation
    - Latitude and longitude are also available
    
**Experiment 2.2** ✅
- Train/val/test region: Europe
- Input (source: CMIP6, GFDL-ESM4)
    - Max near-surface air temp
    - Min near-surface air temp
    - Near-surface air temp
    - Near-surface specific humidity
    - Near-surface wind speed
    - Precipitation
    - Sea level pressure
    - Latitude and longitude are also available
- Target (source: PERSIANN-CDR)
    - Hi-res precipitation
    - Latitude and longitude are also available
    
## Experiment 3: Distribution shift across geographies

**Experiment 3.1** ✅
- Train/val region: South America
- Test region: Europe
- Input (source: CMIP6, GFDL-ESM4)
    - Max near-surface air temp
    - Min near-surface air temp
    - Near-surface air temp
    - Near-surface specific humidity
    - Near-surface wind speed
    - Precipitation
    - Sea level pressure
    - Latitude and longitude are also available
- Target (source: CHIRPS, 0.25 deg res)
    - Hi-res precipitation
    - Latitude and longitude are also available
    
**Experiment 3.2** ✅
- Train/val region: Europe
- Test region: South America
- Input (source: CMIP6, GFDL-ESM4)
    - Max near-surface air temp
    - Min near-surface air temp
    - Near-surface air temp
    - Near-surface specific humidity
    - Near-surface wind speed
    - Precipitation
    - Sea level pressure
    - Latitude and longitude are also available
- Target (source: CHIRPS, 0.25 deg res)
    - Hi-res precipitation
    - Latitude and longitude are also available
   
## Experiment 4: Lower input resolution
**Experiment 4.1** ✅
- Train/val/test region: South America
- Input (source: CMIP6, MPI-ESM1-2-LR)
    - Max near-surface air temp
    - Min near-surface air temp
    - Near-surface air temp
    - Near-surface specific humidity
    - Near-surface wind speed
    - Precipitation
    - Sea level pressure
    - Latitude and longitude are also available
- Target (source: CHIRPS, 0.25 deg res)
    - Hi-res precipitation
    - Latitude and longitude are also available
    
**Experiment 4.2** ✅
- Train/val/test region: Europe
- Input (source: CMIP6, MPI-ESM1-2-LR)
    - Max near-surface air temp
    - Min near-surface air temp
    - Near-surface air temp
    - Near-surface specific humidity
    - Near-surface wind speed
    - Precipitation
    - Sea level pressure
    - Latitude and longitude are also available
- Target (source: CHIRPS, 0.25 deg res)
    - Hi-res precipitation
    - Latitude and longitude are also available
    
## Experiment 5: Higher input resolution
**Experiment 5.1** ✅
- Train/val/test region: South America
- Input (source: CMIP6, CNRM-CM6-1-HR)
    - Max near-surface air temp
    - Min near-surface air temp
    - Near-surface air temp
    - Near-surface specific humidity
    - Near-surface wind speed
    - Precipitation
    - Sea level pressure
    - Latitude and longitude are also available
- Target (source: CHIRPS, 0.25 deg res)
    - Hi-res precipitation
    - Latitude and longitude are also available
    
**Experiment 5.2** ✅
- Train/val/test region: Europe
- Input (source: CMIP6, CNRM-CM6-1-HR)
    - Max near-surface air temp
    - Min near-surface air temp
    - Near-surface air temp
    - Near-surface specific humidity
    - Near-surface wind speed
    - Precipitation
    - Sea level pressure
    - Latitude and longitude are also available
- Target (source: CHIRPS, 0.25 deg res)
    - Hi-res precipitation
    - Latitude and longitude are also available

In [58]:
### CHANGE ME! ###
experiment = 2.2

In [59]:
root = f'/home/data/BSDD/experiment-{experiment}'
exp_maj, exp_min = divmod(experiment * 10, 10)

if exp_maj == 0:
    input_datasets = [
        DatasetRequest(
            dataset='projections-cmip6',
            model='gfdl_esm4',
            variable='precipitation',
        )
    ]
    target_dataset = DatasetRequest(dataset='chirps', resolution=0.25)
    
    if exp_min == 1:
        train_region = regions.SouthAmerica
        val_region = regions.SouthAmerica
        test_region = regions.SouthAmerica
        
    if exp_min == 2:
        train_region = regions.Europe
        val_region = regions.Europe
        test_region = regions.Europe
    
if exp_maj == 1:
    input_datasets = [
        DatasetRequest(
            dataset='projections-cmip6',
            model='gfdl_esm4',
            variable=variable
        ),
        DatasetRequest(
            dataset='projections-cmip6',
            model='fgoals_f3_l',
            variable=variable
        )
    ]
    target_dataset = DatasetRequest(dataset='chirps', resolution=0.25)
    
    if exp_min == 1:
        train_region = regions.SouthAmerica
        val_region = regions.SouthAmerica
        test_region = regions.SouthAmerica
        
    if exp_min == 2:
        train_region = regions.Europe
        val_region = regions.Europe
        test_region = regions.Europe
    
if exp_maj == 2:
    input_datasets = [
        DatasetRequest(
            dataset='projections-cmip6',
            model='gfdl_esm4',
            variable='precipitation',
        )
    ]
    target_dataset = DatasetRequest(dataset='persiann-cdr')
    
    if exp_min == 1:
        train_region = regions.SouthAmerica
        val_region = regions.SouthAmerica
        test_region = regions.SouthAmerica
        
    if exp_min == 2:
        train_region = regions.Europe
        val_region = regions.Europe
        test_region = regions.Europe
        
if exp_maj == 3:
    input_datasets = [
        DatasetRequest(
            dataset='projections-cmip6',
            model='gfdl_esm4',
            variable='precipitation',
        )
    ]
    target_dataset = DatasetRequest(dataset='chirps', resolution=0.25)
    
    if exp_min == 1:
        train_region = regions.SouthAmerica
        val_region = regions.SouthAmerica
        test_region = regions.Europe
        
    if exp_min == 2:
        train_region = regions.Europe
        val_region = regions.Europe
        test_region = regions.SouthAmerica
        
if exp_maj == 4:
    input_datasets = [
        DatasetRequest(
            dataset='projections-cmip6',
            model='mpi_esm1_2_lr',
            variable=variable
        )
    ]
    target_dataset = DatasetRequest(dataset='chirps', resolution=0.25)
    
    if exp_min == 1:
        train_region = regions.SouthAmerica
        val_region = regions.SouthAmerica
        test_region = regions.SouthAmerica
        
    if exp_min == 2:
        train_region = regions.Europe
        val_region = regions.Europe
        test_region = regions.Europe

if exp_maj == 5:
    input_datasets = [
        DatasetRequest(
            dataset='projections-cmip6',
            model='cnrm_cm6_1_hr',
            variable=variable
        )
    ]
    target_dataset = DatasetRequest(dataset='chirps', resolution=0.25)
    
    if exp_min == 1:
        train_region = regions.SouthAmerica
        val_region = regions.SouthAmerica
        test_region = regions.SouthAmerica
        
    if exp_min == 2:
        train_region = regions.Europe
        val_region = regions.Europe
        test_region = regions.Europe

In [60]:
dataset = get_dataset(
    input_datasets,
    target_dataset,
    train_region=train_region,
    val_region=val_region,
    test_region=test_region,
    train_dates=('1983-01-01', '2010-12-31'),
    val_dates=('2011-01-01', '2012-12-31'),
    test_dates=('2013-01-01', '2014-12-31'),
    download=False,
    extract=False, 
    root=root,
    device='cuda:1'
)

Download https://www.ncei.noaa.gov/data/precipitation-persiann/access/1983/ to /home/data/BSDD/experiment-2.2/tmp/persiann.1983.html
Download https://www.ncei.noaa.gov/data/precipitation-persiann/access/1984/ to /home/data/BSDD/experiment-2.2/tmp/persiann.1984.html
Download https://www.ncei.noaa.gov/data/precipitation-persiann/access/1985/ to /home/data/BSDD/experiment-2.2/tmp/persiann.1985.html
Download https://www.ncei.noaa.gov/data/precipitation-persiann/access/1986/ to /home/data/BSDD/experiment-2.2/tmp/persiann.1986.html
Download https://www.ncei.noaa.gov/data/precipitation-persiann/access/1987/ to /home/data/BSDD/experiment-2.2/tmp/persiann.1987.html
Download https://www.ncei.noaa.gov/data/precipitation-persiann/access/1988/ to /home/data/BSDD/experiment-2.2/tmp/persiann.1988.html
Download https://www.ncei.noaa.gov/data/precipitation-persiann/access/1989/ to /home/data/BSDD/experiment-2.2/tmp/persiann.1989.html
Download https://www.ncei.noaa.gov/data/precipitation-persiann/access

In [61]:
train_dataset = dataset.get_split('train')
val_dataset = dataset.get_split('val')
test_dataset = dataset.get_split('test')

In [62]:
print(f'Training samples: {len(train_dataset)}')
print(f'Validation samples: {len(val_dataset)}')
print(f'Testing samples: {len(test_dataset)}')

Training samples: 10220
Validation samples: 730
Testing samples: 730


All tensors are latitude by longitude.

## Training Data
Validation data is the same shape, even for experiment 1.

In [63]:
x, y, info = train_dataset[0]
print(f'Input shape: {x.shape} ({x.device})')
print(f'Target shape: {y.shape} ({x.device})')

Input shape: torch.Size([7, 20, 40]) (cuda:1)
Target shape: torch.Size([80, 200]) (cuda:1)


In [64]:
print('INFO SUMMARY')
for k, v in info.items():
    print(f' - {k} shape: {v.shape} ({v.device})')

INFO SUMMARY
 - x_lat shape: torch.Size([20, 40]) (cuda:1)
 - x_lon shape: torch.Size([20, 40]) (cuda:1)
 - y_lat shape: torch.Size([80, 200]) (cuda:1)
 - y_lon shape: torch.Size([80, 200]) (cuda:1)
 - y_mask shape: torch.Size([80, 200]) (cuda:1)


## Testing Data
Generally the same shape as the training and validation data, except in experiment 1.

In [32]:
x, y, info = test_dataset[0]
print(f'Input shape: {x.shape} ({x.device})')
print(f'Target shape: {y.shape} ({x.device})')

Input shape: torch.Size([7, 20, 40]) (cuda:1)
Target shape: torch.Size([80, 200]) (cuda:1)


In [33]:
print('INFO SUMMARY')
for k, v in info.items():
    print(f' - {k} shape: {v.shape} ({v.device})')

INFO SUMMARY
 - x_lat shape: torch.Size([20, 40]) (cuda:1)
 - x_lon shape: torch.Size([20, 40]) (cuda:1)
 - y_lat shape: torch.Size([80, 200]) (cuda:1)
 - y_lon shape: torch.Size([80, 200]) (cuda:1)
 - y_mask shape: torch.Size([80, 200]) (cuda:1)


## Latitude and longitude information
Latitudes and longitudes are provided as unnormalized. Latitudes are in the range \[-90, 90\], and longitudes are in the range \[0, 360\]. At the end are functions to perform normalization (I will eventually migrate this into the dataset itself).

In [17]:
info['x_lat']

tensor([[30.5000, 30.5000, 30.5000,  ..., 30.5000, 30.5000, 30.5000],
        [31.5000, 31.5000, 31.5000,  ..., 31.5000, 31.5000, 31.5000],
        [32.5000, 32.5000, 32.5000,  ..., 32.5000, 32.5000, 32.5000],
        ...,
        [62.5000, 62.5000, 62.5000,  ..., 62.5000, 62.5000, 62.5000],
        [63.5000, 63.5000, 63.5000,  ..., 63.5000, 63.5000, 63.5000],
        [64.5000, 64.5000, 64.5000,  ..., 64.5000, 64.5000, 64.5000]],
       device='cuda:0', dtype=torch.float64)

In [18]:
info['x_lon']

tensor([[-179.3750, -178.1250, -176.8750,  ...,  176.8750,  178.1250,
          179.3750],
        [-179.3750, -178.1250, -176.8750,  ...,  176.8750,  178.1250,
          179.3750],
        [-179.3750, -178.1250, -176.8750,  ...,  176.8750,  178.1250,
          179.3750],
        ...,
        [-179.3750, -178.1250, -176.8750,  ...,  176.8750,  178.1250,
          179.3750],
        [-179.3750, -178.1250, -176.8750,  ...,  176.8750,  178.1250,
          179.3750],
        [-179.3750, -178.1250, -176.8750,  ...,  176.8750,  178.1250,
          179.3750]], device='cuda:0', dtype=torch.float64)

In [19]:
def normalize_latitudes(lats):
    # Converts from the range [-90, 90] to [0, 1]
    return (lats + 90) / 180

def normalize_longitudes(lons):
    # Converts from the range [-180, 180] to [0, 1]
    return (lons + 180) / 360

In [20]:
normalize_latitudes(info['x_lat'])

tensor([[0.6694, 0.6694, 0.6694,  ..., 0.6694, 0.6694, 0.6694],
        [0.6750, 0.6750, 0.6750,  ..., 0.6750, 0.6750, 0.6750],
        [0.6806, 0.6806, 0.6806,  ..., 0.6806, 0.6806, 0.6806],
        ...,
        [0.8472, 0.8472, 0.8472,  ..., 0.8472, 0.8472, 0.8472],
        [0.8528, 0.8528, 0.8528,  ..., 0.8528, 0.8528, 0.8528],
        [0.8583, 0.8583, 0.8583,  ..., 0.8583, 0.8583, 0.8583]],
       device='cuda:0', dtype=torch.float64)

In [21]:
normalize_longitudes(info['x_lon'])

tensor([[0.0017, 0.0052, 0.0087,  ..., 0.9913, 0.9948, 0.9983],
        [0.0017, 0.0052, 0.0087,  ..., 0.9913, 0.9948, 0.9983],
        [0.0017, 0.0052, 0.0087,  ..., 0.9913, 0.9948, 0.9983],
        ...,
        [0.0017, 0.0052, 0.0087,  ..., 0.9913, 0.9948, 0.9983],
        [0.0017, 0.0052, 0.0087,  ..., 0.9913, 0.9948, 0.9983],
        [0.0017, 0.0052, 0.0087,  ..., 0.9913, 0.9948, 0.9983]],
       device='cuda:0', dtype=torch.float64)