In [1]:
from torch.utils.data import DataLoader
import numpy as np
import torch
from glob import glob

import deep_snow.dataset

In [2]:
train_data_dir = '/mnt/Backups/gbrench/repos/deep-snow/data/subsets_v4/train'
train_path_list = glob(f'{train_data_dir}/ASO_50M_SD*.nc')

val_data_dir = '/mnt/Backups/gbrench/repos/deep-snow/data/subsets_v4/val'
val_path_list = glob(f'{val_data_dir}/ASO_50M_SD*.nc')

In [3]:
# define data to be returned by dataloader
all_channels = [
    # ASO products
    'aso_sd', # ASO lidar snow depth (target dataset)
    'aso_gap_map', # gaps in ASO data
    
    # Sentinel-1 products
    'snowon_vv', # snow on Sentinel-1 VV polarization backscatter in dB, closest acquisition to ASO acquisition
    'snowon_vh', # snow on Sentinel-1 VH polarization backscatter in dB, closest acquisition to ASO acquisition
    'snowoff_vv', # snow off Sentinel-1 VV polarization backscatter in dB, closest acquisition to ASO acquisition
    'snowoff_vh', # snow off Sentinel-1 VH polarization backscatter in dB, closest acquisition to ASO acquisition
    'snowon_vv_mean', # snow on Sentinel-1 VV polarization backscatter in dB, mean of acquisition in 4 week period around ASO acquisition
    'snowon_vh_mean', # snow on Sentinel-1 VH polarization backscatter in dB, mean of acquisition in 4 week period around ASO acquisition
    'snowoff_vv_mean', # snow off Sentinel-1 VV polarization backscatter in dB, mean of acquisition in 4 week period around ASO acquisition
    'snowoff_vh_mean', # snow off Sentinel-1 VH polarization backscatter in dB, mean of acquisition in 4 week period around ASO acquisition
    'snowon_cr', # cross ratio, snowon_vh - snowon_vv
    'snowoff_cr', # cross ratio, snowoff_vh - snowoff_vv
    'delta_cr', # change in cross ratio, snowon_cr - snowoff_cr
    'rtc_gap_map', # gaps in Sentinel-1 data
    'rtc_mean_gap_map', # gaps in Sentinel-1 mean data
    
    # Sentinel-2 products 
    'aerosol_optical_thickness', # snow on Sentinel-2 aerosol optical thickness band 
    'coastal_aerosol', # snow on Sentinel-2 coastal aerosol band
    'blue', # snow on Sentinel-2 blue band
    'green', # snow on Sentinel-2 green band
    'red', # snow on Sentinel-2 red band
    'red_edge1', # snow on Sentinel-2 red edge 1 band
    'red_edge2', # snow on Sentinel-2 red edge 2 band
    'red_edge3', # snow on Sentinel-2 red edge 3 band
    'nir', # snow on Sentinel-2 near infrared band
    'water_vapor', # snow on Sentinel-2 water vapor
    'swir1', # snow on Sentinel-2 shortwave infrared band 1
    'swir2', # snow on Sentinel-2 shortwave infrared band 2
    'scene_class_map', # snow on Sentinel-2 scene classification product
    'water_vapor_product', # snow on Sentinel-2 water vapor product
    'ndvi', # Normalized Difference Vegetation Index from Sentinel-2
    'ndsi', # Normalized Difference Snow Index from Sentinel-2
    'ndwi', # Normalized Difference Water Index from Sentinel-2
    's2_gap_map', # gaps in Sentinel-2 data

    # SNODAS 
    'snodas_sd', # snodas snow depth

    # PROBA-V global land cover dataset (Buchhorn et al., 2020)
    'fcf', # fractional forest cover
    
    # COP30 digital elevation model      
    'elevation',
    'slope',
    'aspect',
    'curvature',
    'tpi',
    'tri',

    # latitude and longitude
    'latitude',
    'longitude',

    # day of water year
    'dowy'
                    ]

# prepare training and validation dataloaders
train_data = deep_snow.dataset.Dataset(train_path_list, all_channels, norm=False)
train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=2000, shuffle=False)

# prepare training and validation dataloaders
val_data = deep_snow.dataset.Dataset(val_path_list, all_channels, norm=False)
val_loader = torch.utils.data.DataLoader(dataset=val_data, batch_size=2000, shuffle=False)

In [4]:
# find dataset min and max for normalization
norm_dict = {}
for i, outputs in enumerate(train_loader):
    print(f'loop {i+1}')
    for j, item in enumerate(outputs):
        data_name = selected_channels[j]
        if i == 0:
            norm_dict[data_name] = [item.min().item(), item.max().item()]
        if item.max() > norm_dict[data_name][1]:
            norm_dict[data_name][1] = item.max().item()
        if item.min() < norm_dict[data_name][0] and not item.min() == 0:
            norm_dict[data_name][0] = item.min().item()

loop 1
loop 2
loop 3
loop 4
loop 5
loop 6
loop 7


In [5]:
norm_dict

{'aso_sd': [0.0, 397.2589111328125],
 'aso_gap_map': [0.0, 1.0],
 'snowon_vv': [-57.02630615234375, 29.14653778076172],
 'snowon_vh': [-64.75164794921875, 15.596705436706543],
 'snowoff_vv': [-52.30192184448242, 26.94390869140625],
 'snowoff_vh': [-61.0316047668457, 15.569765090942383],
 'snowon_vv_mean': [-57.02630615234375, 29.60448455810547],
 'snowon_vh_mean': [-64.75164794921875, 16.35344696044922],
 'snowoff_vv_mean': [-54.76593780517578, 26.17017364501953],
 'snowoff_vh_mean': [-60.62683868408203, 14.829648971557617],
 'snowon_cr': [-38.32542419433594, 13.106416702270508],
 'snowoff_cr': [-37.60618209838867, 11.941126823425293],
 'delta_cr': [-32.43565368652344, 23.977340698242188],
 'rtc_gap_map': [0.0, 1.0],
 'rtc_mean_gap_map': [0.0, 1.0],
 'aerosol_optical_thickness': [0.0, 572.0],
 'coastal_aerosol': [0.0, 24304.0],
 'blue': [0.0, 23371.0],
 'green': [0.0, 26440.0],
 'red': [0.0, 21994.0],
 'red_edge1': [0.0, 21321.0],
 'red_edge2': [0.0, 21131.0],
 'red_edge3': [0.0, 20978

In [None]:
# find dataset 1st and 99th percentiles for normalization
train_norm_dict = {}
val_norm_dict = {}

for channel in all_channels:
    # prepare training and validation dataloaders
    train_data = deep_snow.dataset.Dataset(train_path_list, [channel], norm=False)
    train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=2000, shuffle=False)
    
    # prepare training and validation dataloaders
    val_data = deep_snow.dataset.Dataset(val_path_list, [channel], norm=False)
    val_loader = torch.utils.data.DataLoader(dataset=val_data, batch_size=2000, shuffle=False)
    
    train_array = np.Array([])
    for i, outputs in enumerate(train_loader):
        print(f'loop {i+1}')
        train_array = np.concatenate(train_array, output.flatten.numpy())
    train_array = train_array[train_array != 0]
    p1, p99 = np.percentile(train_array, [1, 99])
    del train_array
    train_norm_dict[channel] = [p1, p99]
            
    val_array = np.Array([])
    for i, outputs in enumerate(val_loader):
        print(f'loop {i+1}')
        val_array = np.concatenate(val_array, output.flatten.numpy())
    val_array = val_array[val_array != 0]
    p1, p99 = np.percentile(val_array, [1, 99])
    del val_array
    val_norm_dict[channel] = [p1, p99]

In [None]:
train_norm_dict