### Data 

In [1]:
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, Dataset
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import torch
from glob import glob
from PIL import Image
import seaborn as sns
import math
import random
import xarray as xr
from torch.masked import masked_tensor, as_masked_tensor

In [2]:
data_dir = '/home/jovyan/shared-public/crunchy-snow/data/subsetsv1/train'
path_list = glob(f'{data_dir}/ASO_50M_SD*.nc')

In [3]:
# these are set by finding the min and max across the entire dataset
norm_dict = {'aso_sd':[0, 24.9],
             'vv':[0, 13523.8],
             'vh':[0, 43.2],
             'AOT':[0, 572.1],
             'coastal':[0, 23459.1],
             'blue':[0, 23004.1],
             'green':[0, 26440.1],
             'red':[0, 21576.1],
             'red_edge1':[0, 20796.1],
             'red_edge2':[0, 20432.1],
             'red_edge3':[0, 20149.1],
             'nir':[0, 21217.1],
             'water_vapor':[0, 18199.1],
             'swir1':[0, 17549.1],
             'swir2':[0, 17314.1],
             'scene_class_map':[0, 15],
             'water_vapor_product':[0, 6517.5],
             'elevation':[-100, 9000]}

In [4]:
def calc_norm(tensor, minmax_list):
    '''
    normalize a tensor between 0 and 1 using a min and max value stored in a list
    '''
    normalized = (tensor-minmax_list[0])/(minmax_list[1]-minmax_list[0])
    return normalized

In [5]:
# define dataset 
class dataset(torch.utils.data.Dataset):
    '''
    class that reads data from a netCDF and returns normalized tensors 
    '''
    def __init__(self, path_list, norm_dict, norm=True):
        self.path_list = path_list
        self.norm_dict = norm_dict
        self.norm = norm
        
    #dataset length
    def __len__(self):
        self.filelength = len(self.path_list)
        return self.filelength
    
    #load images
    def __getitem__(self,idx):
        ds = xr.open_dataset(self.path_list[idx])
        #ds = ds.coarsen(x = 6, boundary = 'trim').mean().coarsen(y = 6, boundary = 'trim').mean()
        # convert to tensors
        aso_sd = torch.from_numpy(ds.aso_sd.values)
        snowon_vv = torch.from_numpy(ds.snowon_vv.values)
        snowon_vh = torch.from_numpy(ds.snowon_vh.values)
        snowoff_vv = torch.from_numpy(ds.snowoff_vv.values)
        snowoff_vh = torch.from_numpy(ds.snowoff_vh.values)
        snowon_vv_mean = torch.from_numpy(ds.snowon_vv_mean.values)
        snowon_vh_mean = torch.from_numpy(ds.snowon_vh_mean.values)
        snowoff_vv_mean = torch.from_numpy(ds.snowoff_vv_mean.values)
        snowoff_vh_mean = torch.from_numpy(ds.snowoff_vh_mean.values)
        aerosol_optical_thickness = torch.from_numpy(ds.AOT.values)
        coastal_aerosol = torch.from_numpy(ds.B01.values)
        blue = torch.from_numpy(ds.B02.values)
        green = torch.from_numpy(ds.B03.values)
        red = torch.from_numpy(ds.B04.values)
        red_edge1 = torch.from_numpy(ds.B05.values)
        red_edge2 = torch.from_numpy(ds.B06.values)
        red_edge3 = torch.from_numpy(ds.B07.values)
        nir = torch.from_numpy(ds.B08.values)
        water_vapor = torch.from_numpy(ds.B09.values)
        swir1 = torch.from_numpy(ds.B11.values)
        swir2 = torch.from_numpy(ds.B12.values)
        scene_class_map = torch.from_numpy(ds.SCL.values)
        water_vapor_product = torch.from_numpy(ds.WVP.values)
        fcf = torch.from_numpy(ds.fcf.values)
        elevation = torch.from_numpy(ds.elevation.values)
        aso_gap_map = torch.from_numpy(ds.aso_gap_map.values)
        rtc_gap_map = torch.from_numpy(ds.rtc_gap_map.values)
        rtc_mean_gap_map = torch.from_numpy(ds.rtc_mean_gap_map.values)
        
        # feature engineering / derived features
        # NDSI
        ndsi = green.subtract(swir1)/green.add(swir1)
        # NDVI
        ndvi = nir.subtract(red)/nir.add(red)
        # vh/vv
        # Heat Load Index
        # slopes
        
        # normalize layers (except gap maps and fcf)
        if self.norm == True:
            aso_sd = calc_norm(aso_sd, self.norm_dict['aso_sd'])
            snowon_vv = calc_norm(snowon_vv, self.norm_dict['vv'])
            snowon_vh = calc_norm(snowon_vh, self.norm_dict['vh'])
            snowoff_vv = calc_norm(snowoff_vv, self.norm_dict['vv'])
            snowoff_vh = calc_norm(snowoff_vh, self.norm_dict['vh'])
            snowon_vv_mean = calc_norm(snowon_vv_mean, self.norm_dict['vv'])
            snowon_vh_mean = calc_norm(snowon_vh_mean, self.norm_dict['vh'])
            snowoff_vv_mean = calc_norm(snowoff_vv_mean, self.norm_dict['vv'])
            snowoff_vh_mean = calc_norm(snowoff_vh_mean, self.norm_dict['vh'])
            aerosol_optical_thickness = calc_norm(aerosol_optical_thickness, self.norm_dict['AOT'])
            coastal_aerosol = calc_norm(coastal_aerosol, self.norm_dict['coastal'])
            blue = calc_norm(blue, self.norm_dict['blue'])
            green = calc_norm(green, self.norm_dict['green'])
            red = calc_norm(red, self.norm_dict['red'])
            red_edge1 = calc_norm(red_edge1, self.norm_dict['red_edge1'])
            red_edge2 = calc_norm(red_edge2, self.norm_dict['red_edge2'])
            red_edge3 = calc_norm(red_edge3, self.norm_dict['red_edge3'])
            nir = calc_norm(nir, self.norm_dict['nir'])
            water_vapor = calc_norm(water_vapor, self.norm_dict['water_vapor'])
            swir1 = calc_norm(swir1, self.norm_dict['swir1'])
            swir2 = calc_norm(swir2, self.norm_dict['swir2'])
            scene_class_map = calc_norm(scene_class_map, self.norm_dict['scene_class_map'])
            water_vapor_product = calc_norm(water_vapor_product, self.norm_dict['water_vapor_product'])
            elevation = calc_norm(elevation, self.norm_dict['elevation'])
        
        # return only selected bands, for now
        # return green, swir1
        # return ndsi, ndvi
        return aso_sd, snowon_vv, snowon_vh, snowoff_vv, snowoff_vh, blue, green, red, nir, swir1, swir2, fcf, elevation, aso_gap_map, rtc_gap_map, ndsi, ndvi


In [6]:
# create dataloaders
train_data = dataset(path_list, norm_dict, norm=True)
train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=1, shuffle=True)

### Boilerplate CNN
##### (hopefully with model architecture flexibility)
Most of the following code was taken and adjusted from https://github.com/nikhilroxtomar/Semantic-Segmentation-Architecture/blob/main/PyTorch/unet.py

In [7]:
import torch.nn as nn

In [8]:
# convolutional layer
class cnn_block(nn.Module):
    def __init__(self, in_channel, out_channel):
        super().__init__()

        self.conv1 = nn.Conv2d(in_channel, out_channel, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(out_channel, out_channel, kernel_size=3, padding=1)
        self.relu = nn.ReLU()

    def forward(self, x_in):
        # CNN + relu layer
        x_conv1 = self.conv1(x_in)
        x_relu1 = self.relu(x_conv1)
        # CNN + relu layer
        x_conv2 = self.conv2(x_relu1)
        x_out = self.relu(x_conv2)
        return x_out

# cnn + max pooling block
class conv_pool_block(nn.Module):
    def __init__(self, in_channel, out_channel):
        super().__init__()

        self.conv = cnn_block(in_channel, out_channel)
        self.pool = nn.MaxPool2d((2, 2))

    def forward(self, x_in):
        x_out = cnn_block(in_channel, out_channel)
        p_out = self.pool(x_out)

        return x_out, p_out


# upsample
class inverse_conv_block(nn.Module):
    def __init__(self, in_channel, out_channel):
        super().__init__()

        self.upsample = nn.ConvTranspose2d(in_channel, out_channel, kernel_size=2, stride=2, padding=0)
        self.conv = cnn_block(out_channel+out_channel, out_channel)

    def forward(self, x_in, skip):
        x_up = self.upsample(x_in)
        x_cat = torch.cat([x_up, skip], axis=1)
        x_out = self.conv(x_cat)
        return x_out

In [9]:
# building the u-net
class build_cnn(nn.Module):
    def __init__(self):
        super().__init__()

        """ convolutions + pooling """
        self.cp1 = conv_pool_block(3, 32)
        self.cp2 = conv_pool_block(32, 64)
        self.cp3 = conv_pool_block(64, 128)
        self.cp4 = conv_pool_block(128, 256)

        """ convolution """
        self.b = cnn_block(256, 512)

        """ inverse convolutions """
        self.ic1 = inverse_conv_block(512, 256)
        self.ic2 = inverse_conv_block(256, 128)
        self.ic3 = inverse_conv_block(128, 64)
        self.ic4 = inverse_conv_block(64, 32)

        """ output """
        self.outputs = nn.Conv2d(32, 1, kernel_size=1, padding=0)

    def forward(self, inputs):
        """ conv + pool """
        x1, p1 = self.cp1(inputs)
        x2, p2 = self.cp2(p1)
        x3, p3 = self.cp3(p2)
        x4, p4 = self.cp4(p3)

        """ conv """
        b = self.b(p4)

        """ upsample """
        u1 = self.ic1(b, x4)
        u2 = self.ic2(u1, x3)
        u3 = self.ic3(u2, x2)
        u4 = self.ic4(u3, x1)

        """ Classifier """
        outputs = self.outputs(u4)

        return outputs

In [10]:
model = build_cnn()

### Hyperparameters