In [1]:
from torch.utils.data import DataLoader
import numpy as np
import matplotlib.pyplot as plt
import torch
from glob import glob
import seaborn as sns
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from tqdm import tqdm
import pickle
import random
import time

import deep_snow.models
import deep_snow.dataset

In [2]:
# def set_seed(seed: int = 43):
#     random.seed(seed)
#     np.random.seed(seed)
#     torch.manual_seed(seed)
#     torch.cuda.manual_seed(seed)
#     torch.backends.cudnn.deterministic = True
#     torch.backends.cudnn.benchmark = False

#     set_seed()

In [3]:
def sample_lognormal(center, sigma):
    mu = np.log(center)
    return np.random.lognormal(mean=mu, sigma=sigma)

# def visualize_lognormal(center, sigmas, n_samples=10000):
#     f, ax = plt.subplots(figsize=(10, 6))
#     for sigma in sigmas:
#         mu = np.log(center)
#         samples = np.random.lognormal(mean=mu, sigma=sigma, size=n_samples)
#         sns.kdeplot(samples, label=f'sigma={sigma}', ax=ax)
#     ax.set_xlabel('Sampled Value')
#     ax.set_ylabel('Density')
#     ax.set_title(f'Lognormal Samples Centered on {center}')
#     f.legend()
#     ax.grid(True, which='both', linestyle='--')
#     ax.set_xlim(0, 0.0005)
#     ax.set_ylim(0)

# # Example usage
# visualize_lognormal(center=1e-4, sigmas=[1.0])

In [4]:
# get paths to data
train_data_dir = '/mnt/Backups/gbrench/repos/deep-snow/data/subsets_v4/train'
train_path_list = glob(f'{train_data_dir}/ASO_50M_SD*.nc')

val_data_dir = '/mnt/Backups/gbrench/repos/deep-snow/data/subsets_v4/val'
val_path_list = glob(f'{val_data_dir}/ASO_50M_SD*.nc')

In [5]:
# # to test code with a small sample of the data
# n_imgs = 512

# train_path_list = random.sample(train_path_list, n_imgs)
# val_path_list = random.sample(val_path_list, n_imgs)

In [6]:
# define data to be returned by dataloader
all_channels = [
    # ASO products
    'aso_sd', # ASO lidar snow depth (target dataset)
    'aso_gap_map', # gaps in ASO data
    
    # Sentinel-1 products
    'snowon_vv', # snow on Sentinel-1 VV polarization backscatter in dB, closest acquisition to ASO acquisition
    'snowon_vh', # snow on Sentinel-1 VH polarization backscatter in dB, closest acquisition to ASO acquisition
    'snowoff_vv', # snow off Sentinel-1 VV polarization backscatter in dB, closest acquisition to ASO acquisition
    'snowoff_vh', # snow off Sentinel-1 VH polarization backscatter in dB, closest acquisition to ASO acquisition
    'snowon_vv_mean', # snow on Sentinel-1 VV polarization backscatter in dB, mean of acquisition in 4 week period around ASO acquisition
    'snowon_vh_mean', # snow on Sentinel-1 VH polarization backscatter in dB, mean of acquisition in 4 week period around ASO acquisition
    'snowoff_vv_mean', # snow off Sentinel-1 VV polarization backscatter in dB, mean of acquisition in 4 week period around ASO acquisition
    'snowoff_vh_mean', # snow off Sentinel-1 VH polarization backscatter in dB, mean of acquisition in 4 week period around ASO acquisition
    'snowon_cr', # cross ratio, snowon_vh - snowon_vv
    'snowoff_cr', # cross ratio, snowoff_vh - snowoff_vv
    'delta_cr', # change in cross ratio, snowon_cr - snowoff_cr
    'rtc_gap_map', # gaps in Sentinel-1 data
    'rtc_mean_gap_map', # gaps in Sentinel-1 mean data
    
    # Sentinel-2 products 
    'aerosol_optical_thickness', # snow on Sentinel-2 aerosol optical thickness band 
    'coastal_aerosol', # snow on Sentinel-2 coastal aerosol band
    'blue', # snow on Sentinel-2 blue band
    'green', # snow on Sentinel-2 green band
    'red', # snow on Sentinel-2 red band
    'red_edge1', # snow on Sentinel-2 red edge 1 band
    'red_edge2', # snow on Sentinel-2 red edge 2 band
    'red_edge3', # snow on Sentinel-2 red edge 3 band
    'nir', # snow on Sentinel-2 near infrared band
    'water_vapor', # snow on Sentinel-2 water vapor
    'swir1', # snow on Sentinel-2 shortwave infrared band 1
    'swir2', # snow on Sentinel-2 shortwave infrared band 2
    'scene_class_map', # snow on Sentinel-2 scene classification product
    'water_vapor_product', # snow on Sentinel-2 water vapor product
    'ndvi', # Normalized Difference Vegetation Index from Sentinel-2
    'ndsi', # Normalized Difference Snow Index from Sentinel-2
    'ndwi', # Normalized Difference Water Index from Sentinel-2
    's2_gap_map', # gaps in Sentinel-2 data

     # snodas datset
    'snodas_sd', # snow depth

    # PROBA-V global land cover dataset (Buchhorn et al., 2020)
    'fcf', # fractional forest cover
    
    # COP30 digital elevation model      
    'elevation',
    'slope',
    'aspect',
    'northness',
    'eastness',
    'curvature',
    'tpi',
    'tri',

    # latitude and longitude
    'latitude',
    'longitude',

    # day of water year
    'dowy'
                    ]

In [7]:
def train_model(input_channels, return_channels, epochs, lr, weight_decay, n_layers=5):
    model = deep_snow.models.ResDepth(n_input_channels=len(input_channels), depth=n_layers)
    model_name = f'ResDepth_lr{lr}_weightdecay{weight_decay}'
    model.to('cuda');  # Run on GPU
    # Define optimizer and loss function
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
    loss_fn = nn.MSELoss()
    epochs = epochs
    
    train_loss = []
    val_loss = []
    counter = 0
    min_val_loss = 1
    patience = 0
    patience_limit = 30

    # training and validation loop
    for epoch in range(epochs):
        epoch_start_time = time.time()
        print(f'\nStarting epoch {epoch+1}')
        train_epoch_loss = []
        val_epoch_loss = []
            
        # Loop through training data with tqdm progress bar
        pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}", unit="batch", ncols=130)
        for data_tuple in pbar:
            model.train()
            optimizer.zero_grad()
    
            # read data into dictionary
            data_dict = {name: tensor for name, tensor in zip(return_channels, data_tuple)}
            # prepare inputs by concatenating along channel dimension
            inputs = torch.cat([data_dict[channel] for channel in input_channels], dim=1).to('cuda')
    
            # generate prediction
            pred_sd = model(inputs)
    
            # Limit prediction to areas with valid data
            pred_sd = torch.where(data_dict['aso_gap_map'].to('cuda') + data_dict['rtc_gap_map'].to('cuda') + data_dict['s2_gap_map'].to('cuda') == 0, pred_sd, torch.zeros_like(pred_sd).to('cuda'))
            aso_sd = torch.where(data_dict['aso_gap_map'].to('cuda') + data_dict['rtc_gap_map'].to('cuda') + data_dict['s2_gap_map'].to('cuda') == 0, data_dict['aso_sd'].to('cuda'), torch.zeros_like(pred_sd).to('cuda'))
    
            # Calculate loss
            train_batch_loss = loss_fn(pred_sd, aso_sd.to('cuda'))
            train_epoch_loss.append(train_batch_loss.item())
    
            # Update tqdm progress bar with batch loss
            pbar.set_postfix({'batch loss': train_batch_loss.item(), 'mean epoch loss': np.mean(train_epoch_loss)})
    
            train_batch_loss.backward()  # Propagate the gradients in backward pass
            optimizer.step()
    
        train_loss.append(np.mean(train_epoch_loss))
        print(f'Training loss: {np.mean(train_epoch_loss)}')
    
        # Run model on validation data with tqdm progress bar
        for data_tuple in tqdm(val_loader, desc="Validation", unit="batch"):
            with torch.no_grad():
                model.eval()
                
                # read data into dictionary
                data_dict = {name: tensor for name, tensor in zip(return_channels, data_tuple)}
                # prepare inputs by concatenating along channel dimension
                inputs = torch.cat([data_dict[channel] for channel in input_channels], dim=1).to('cuda')
        
                # generate prediction
                pred_sd = model(inputs)
        
                # Limit prediction to areas with valid data
                pred_sd = torch.where(data_dict['aso_gap_map'].to('cuda') + data_dict['rtc_gap_map'].to('cuda') + data_dict['s2_gap_map'].to('cuda') == 0, pred_sd, torch.zeros_like(pred_sd).to('cuda'))
                aso_sd = torch.where(data_dict['aso_gap_map'].to('cuda') + data_dict['rtc_gap_map'].to('cuda') + data_dict['s2_gap_map'].to('cuda') == 0, data_dict['aso_sd'].to('cuda'), torch.zeros_like(pred_sd).to('cuda'))
        
                # Calculate loss
                val_batch_loss = loss_fn(pred_sd, aso_sd.to('cuda'))
                val_epoch_loss.append(val_batch_loss.item())
    
        val_loss.append(np.mean(val_epoch_loss))
        print(f'Validation loss: {np.mean(val_epoch_loss)}')
        scheduler.step(np.mean(val_epoch_loss))

        # save loss 
        with open(f'../../../loss/{model_name}_val_loss.pkl', 'wb') as f:
            pickle.dump(val_loss, f)
            
        with open(f'../../../loss/{model_name}_train_loss.pkl', 'wb') as f:
            pickle.dump(train_loss, f)
        
        # Early stopping check (start saving after 30 epochs)
        if np.mean(val_epoch_loss) < min_val_loss:
            min_val_loss = np.mean(val_epoch_loss)
            min_val_loss_epoch = epoch
            patience = 0
            if epoch > 30:
                torch.save(model.state_dict(), f'../../../weights/{model_name}_epochs{epoch}_minvalloss{min_val_loss:.5f}')
        else:
            patience += 1

        if patience >= patience_limit:
            print(f"\nEarly stopping at epoch {epoch + 1}. No improvement in validation loss for {patience_limit} epochs.")
            break

        epoch_end_time = time.time()
        print(f'epoch time: {epoch_end_time - epoch_start_time:.4f} seconds')

    #plot_loss(train_loss, val_loss)
    return [min_val_loss_epoch, min_val_loss]

In [8]:
# define input channels for model
input_channels = ['snodas_sd',
                  'blue',
                  'swir1',
                  'ndsi',
                  'elevation',
                  'northness',
                  'slope',
                  'curvature',
                  'dowy',
                  'delta_cr',
                  'fcf'
                 ]

In [None]:
num_trials = 20
epochs=500
exp_dict = {}
return_channels = input_channels + ['aso_sd', 'aso_gap_map', 'rtc_gap_map', 's2_gap_map']

# prepare training and validation dataloaders
train_data = deep_snow.dataset.Datasetv2(train_path_list, return_channels, norm=True)
train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=16, shuffle=True)
val_data = deep_snow.dataset.Datasetv2(val_path_list, return_channels, norm=True, augment=False)
val_loader = torch.utils.data.DataLoader(dataset=val_data, batch_size=16, shuffle=True)

for trial in range(num_trials):
    
    print('---------------------------------------------------------')
    print(f'starting trial {trial}')
    lr = sample_lognormal(center=1e-4, sigma=1.0)
    weight_decay = sample_lognormal(center=3e-5, sigma=1.0)
    print(f'lr: {lr}, weight decay: {weight_decay}')
    min_val_loss_epoch, min_val_loss = train_model(input_channels, return_channels, epochs=epochs, lr=lr, weight_decay=weight_decay)
    print(f'lr: {lr}, weight decay: {weight_decay}, final epoch: {min_val_loss_epoch}, final val loss: {min_val_loss}')
    exp_dict[trial] = [lr, weight_decay, min_val_loss_epoch, min_val_loss]
    # save experiments 
    with open(f'../../../loss/ResDepth_lr_tuning_loss_v3.pkl', 'wb') as f:
        pickle.dump(exp_dict, f)

---------------------------------------------------------
starting trial 0
lr: 1.9744292861009897e-05, weight decay: 9.961094014252058e-06

Starting epoch 1


Epoch 1/500: 100%|█████████████████████████████| 774/774 [50:37<00:00,  3.92s/batch, batch loss=0.000459, mean epoch loss=0.00292]


Training loss: 0.0029189745416216195


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [10:24<00:00,  4.63s/batch]


Validation loss: 0.0014672098416162447
epoch time: 3661.7201 seconds

Starting epoch 2


Epoch 2/500: 100%|██████████████████████████████| 774/774 [01:06<00:00, 11.71batch/s, batch loss=0.00202, mean epoch loss=0.00194]


Training loss: 0.001939668178962043


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.31batch/s]


Validation loss: 0.0013599274376045085
epoch time: 69.0982 seconds

Starting epoch 3


Epoch 3/500: 100%|███████████████████████████████| 774/774 [01:04<00:00, 12.03batch/s, batch loss=0.0018, mean epoch loss=0.00183]


Training loss: 0.0018324526115961358


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.82batch/s]


Validation loss: 0.0013542951537399656
epoch time: 67.3107 seconds

Starting epoch 4


Epoch 4/500: 100%|██████████████████████████████| 774/774 [01:04<00:00, 12.04batch/s, batch loss=0.00123, mean epoch loss=0.00176]


Training loss: 0.001763266807188008


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.47batch/s]


Validation loss: 0.0012261059303354058
epoch time: 67.2534 seconds

Starting epoch 5


Epoch 5/500: 100%|████████████████████████████████| 774/774 [01:04<00:00, 11.91batch/s, batch loss=0.0017, mean epoch loss=0.0017]


Training loss: 0.0017018101797562492


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.51batch/s]


Validation loss: 0.0012340591638348997
epoch time: 67.9481 seconds

Starting epoch 6


Epoch 6/500: 100%|██████████████████████████████| 774/774 [01:03<00:00, 12.22batch/s, batch loss=0.00322, mean epoch loss=0.00167]


Training loss: 0.0016663927154044575


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.76batch/s]


Validation loss: 0.0011551146558270342
epoch time: 66.3099 seconds

Starting epoch 7


Epoch 7/500: 100%|█████████████████████████████| 774/774 [01:04<00:00, 12.02batch/s, batch loss=0.000255, mean epoch loss=0.00162]


Training loss: 0.0016179723519862278


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.51batch/s]


Validation loss: 0.0011866655689017434
epoch time: 67.3589 seconds

Starting epoch 8


Epoch 8/500: 100%|████████████████████████████████| 774/774 [01:04<00:00, 12.03batch/s, batch loss=0.0011, mean epoch loss=0.0016]


Training loss: 0.001601347204244236


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.50batch/s]


Validation loss: 0.0011045339379321647
epoch time: 67.3352 seconds

Starting epoch 9


Epoch 9/500: 100%|██████████████████████████████| 774/774 [01:03<00:00, 12.21batch/s, batch loss=0.00126, mean epoch loss=0.00157]


Training loss: 0.0015729626954878496


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.47batch/s]


Validation loss: 0.001157454367084601
epoch time: 66.3647 seconds

Starting epoch 10


Epoch 10/500: 100%|█████████████████████████████| 774/774 [01:05<00:00, 11.87batch/s, batch loss=0.00122, mean epoch loss=0.00155]


Training loss: 0.0015459846679553827


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.56batch/s]


Validation loss: 0.0010675148622357046
epoch time: 68.1622 seconds

Starting epoch 11


Epoch 11/500: 100%|█████████████████████████████| 774/774 [01:03<00:00, 12.13batch/s, batch loss=0.00129, mean epoch loss=0.00153]


Training loss: 0.0015262935064272482


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.80batch/s]


Validation loss: 0.0010624568598965804
epoch time: 66.7689 seconds

Starting epoch 12


Epoch 12/500: 100%|█████████████████████████████| 774/774 [01:04<00:00, 12.03batch/s, batch loss=0.00236, mean epoch loss=0.00151]


Training loss: 0.0015116291753224276


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.50batch/s]


Validation loss: 0.0010396969937978106
epoch time: 67.3359 seconds

Starting epoch 13


Epoch 13/500: 100%|██████████████████████████████| 774/774 [01:03<00:00, 12.23batch/s, batch loss=0.00101, mean epoch loss=0.0015]


Training loss: 0.001504101576382843


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.80batch/s]


Validation loss: 0.0010280397871543687
epoch time: 66.2479 seconds

Starting epoch 14


Epoch 14/500: 100%|██████████████████████████████| 774/774 [01:04<00:00, 11.99batch/s, batch loss=0.0017, mean epoch loss=0.00147]


Training loss: 0.001471330674381106


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.45batch/s]


Validation loss: 0.0010068121006177669
epoch time: 67.5506 seconds

Starting epoch 15


Epoch 15/500: 100%|█████████████████████████████| 774/774 [01:04<00:00, 12.08batch/s, batch loss=0.00193, mean epoch loss=0.00146]


Training loss: 0.0014630138174245959


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.62batch/s]


Validation loss: 0.0010455512356539085
epoch time: 67.0289 seconds

Starting epoch 16


Epoch 16/500: 100%|████████████████████████████| 774/774 [01:04<00:00, 11.98batch/s, batch loss=0.000638, mean epoch loss=0.00145]


Training loss: 0.0014480472441121349


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.45batch/s]


Validation loss: 0.0010271906456047737
epoch time: 67.5977 seconds

Starting epoch 17


Epoch 17/500: 100%|█████████████████████████████| 774/774 [01:04<00:00, 12.06batch/s, batch loss=0.00235, mean epoch loss=0.00144]


Training loss: 0.001438898020368298


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.88batch/s]


Validation loss: 0.000978916508352591
epoch time: 67.1243 seconds

Starting epoch 18


Epoch 18/500: 100%|████████████████████████████| 774/774 [01:04<00:00, 12.07batch/s, batch loss=0.000854, mean epoch loss=0.00143]


Training loss: 0.0014253969002358353


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.79batch/s]


Validation loss: 0.0009959855655863605
epoch time: 67.0859 seconds

Starting epoch 19


Epoch 19/500: 100%|█████████████████████████████| 774/774 [01:03<00:00, 12.22batch/s, batch loss=0.00157, mean epoch loss=0.00142]


Training loss: 0.0014194331098560278


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.65batch/s]


Validation loss: 0.0009978494187601393
epoch time: 66.2941 seconds

Starting epoch 20


Epoch 20/500: 100%|█████████████████████████████| 774/774 [01:04<00:00, 11.99batch/s, batch loss=0.000863, mean epoch loss=0.0014]


Training loss: 0.001404837632924198


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.33batch/s]


Validation loss: 0.0010245208622008148
epoch time: 67.5372 seconds

Starting epoch 21


Epoch 21/500: 100%|█████████████████████████████| 774/774 [01:04<00:00, 12.08batch/s, batch loss=0.000513, mean epoch loss=0.0014]


Training loss: 0.0014036123193222885


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.60batch/s]


Validation loss: 0.0009568178225351773
epoch time: 67.0521 seconds

Starting epoch 22


Epoch 22/500: 100%|██████████████████████████████| 774/774 [01:04<00:00, 11.92batch/s, batch loss=0.00205, mean epoch loss=0.0014]


Training loss: 0.0013989202319632358


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.45batch/s]


Validation loss: 0.000949496278190054
epoch time: 67.9090 seconds

Starting epoch 23


Epoch 23/500: 100%|████████████████████████████| 774/774 [01:04<00:00, 12.05batch/s, batch loss=0.000861, mean epoch loss=0.00139]


Training loss: 0.0013870538484934748


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.67batch/s]


Validation loss: 0.0009318965388013532
epoch time: 67.1903 seconds

Starting epoch 24


Epoch 24/500: 100%|████████████████████████████| 774/774 [01:04<00:00, 11.94batch/s, batch loss=0.000899, mean epoch loss=0.00139]


Training loss: 0.0013863305763053607


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.36batch/s]


Validation loss: 0.0009922840591537318
epoch time: 67.8003 seconds

Starting epoch 25


Epoch 25/500: 100%|█████████████████████████████| 774/774 [01:04<00:00, 11.99batch/s, batch loss=0.00249, mean epoch loss=0.00138]


Training loss: 0.0013776205132883259


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.38batch/s]


Validation loss: 0.0009372413300702141
epoch time: 67.5379 seconds

Starting epoch 26


Epoch 26/500: 100%|████████████████████████████| 774/774 [01:03<00:00, 12.10batch/s, batch loss=0.000908, mean epoch loss=0.00137]


Training loss: 0.0013708550862785824


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.56batch/s]


Validation loss: 0.0009240930432160557
epoch time: 66.9171 seconds

Starting epoch 27


Epoch 27/500: 100%|█████████████████████████████| 774/774 [01:04<00:00, 11.92batch/s, batch loss=0.00154, mean epoch loss=0.00136]


Training loss: 0.001362993988942912


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.83batch/s]


Validation loss: 0.0009418432256939856
epoch time: 67.8856 seconds

Starting epoch 28


Epoch 28/500: 100%|█████████████████████████████| 774/774 [01:03<00:00, 12.13batch/s, batch loss=0.00164, mean epoch loss=0.00135]


Training loss: 0.001354192447040479


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.70batch/s]


Validation loss: 0.0010257424683206612
epoch time: 66.7809 seconds

Starting epoch 29


Epoch 29/500: 100%|████████████████████████████| 774/774 [01:04<00:00, 12.06batch/s, batch loss=0.000842, mean epoch loss=0.00136]


Training loss: 0.0013553895799185026


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.42batch/s]


Validation loss: 0.0009670231907404269
epoch time: 67.1313 seconds

Starting epoch 30


Epoch 30/500: 100%|█████████████████████████████| 774/774 [01:04<00:00, 12.00batch/s, batch loss=0.00289, mean epoch loss=0.00135]


Training loss: 0.0013460121915067453


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.42batch/s]


Validation loss: 0.0009082290252317088
epoch time: 67.4905 seconds

Starting epoch 31


Epoch 31/500: 100%|█████████████████████████████| 774/774 [01:03<00:00, 12.15batch/s, batch loss=0.00142, mean epoch loss=0.00134]


Training loss: 0.0013429948045998194


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.84batch/s]


Validation loss: 0.000944313004233495
epoch time: 66.6522 seconds

Starting epoch 32


Epoch 32/500: 100%|█████████████████████████████| 774/774 [01:04<00:00, 11.99batch/s, batch loss=0.00051, mean epoch loss=0.00134]


Training loss: 0.0013415958921015035


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.62batch/s]


Validation loss: 0.0008962859613044808
epoch time: 67.9605 seconds

Starting epoch 33


Epoch 33/500: 100%|█████████████████████████████| 774/774 [01:04<00:00, 12.03batch/s, batch loss=0.00419, mean epoch loss=0.00133]


Training loss: 0.0013345798614927632


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.78batch/s]


Validation loss: 0.0009315456113890158
epoch time: 67.3052 seconds

Starting epoch 34


Epoch 34/500: 100%|████████████████████████████| 774/774 [01:03<00:00, 12.10batch/s, batch loss=0.000531, mean epoch loss=0.00133]


Training loss: 0.0013278379767087231


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.88batch/s]


Validation loss: 0.0009601737263707306
epoch time: 66.9096 seconds

Starting epoch 35


Epoch 35/500: 100%|█████████████████████████████| 774/774 [01:03<00:00, 12.15batch/s, batch loss=0.00302, mean epoch loss=0.00132]


Training loss: 0.001324147025226418


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.84batch/s]


Validation loss: 0.0008951517318901434
epoch time: 67.0875 seconds

Starting epoch 36


Epoch 36/500: 100%|█████████████████████████████| 774/774 [01:04<00:00, 12.01batch/s, batch loss=0.00128, mean epoch loss=0.00132]


Training loss: 0.001324446213430754


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.84batch/s]


Validation loss: 0.0009069385152542964
epoch time: 67.3787 seconds

Starting epoch 37


Epoch 37/500: 100%|████████████████████████████| 774/774 [01:03<00:00, 12.21batch/s, batch loss=0.000628, mean epoch loss=0.00132]


Training loss: 0.0013188876569462406


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.51batch/s]


Validation loss: 0.0008923308622023022
epoch time: 66.7759 seconds

Starting epoch 38


Epoch 38/500: 100%|█████████████████████████████| 774/774 [01:03<00:00, 12.22batch/s, batch loss=0.00124, mean epoch loss=0.00131]


Training loss: 0.0013147638599863287


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.80batch/s]


Validation loss: 0.0009067844091785244
epoch time: 66.2710 seconds

Starting epoch 39


Epoch 39/500: 100%|█████████████████████████████| 774/774 [01:03<00:00, 12.16batch/s, batch loss=0.00376, mean epoch loss=0.00131]


Training loss: 0.0013113050051220003


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.42batch/s]


Validation loss: 0.0008981644020187323
epoch time: 66.6211 seconds

Starting epoch 40


Epoch 40/500: 100%|██████████████████████████████| 774/774 [01:04<00:00, 12.05batch/s, batch loss=0.00133, mean epoch loss=0.0013]


Training loss: 0.0013011470118750221


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.80batch/s]


Validation loss: 0.0008778773354909693
epoch time: 67.5960 seconds

Starting epoch 41


Epoch 41/500: 100%|█████████████████████████████| 774/774 [01:04<00:00, 12.05batch/s, batch loss=0.00133, mean epoch loss=0.00131]


Training loss: 0.0013065405099896969


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.61batch/s]


Validation loss: 0.0009445368156000902
epoch time: 67.2010 seconds

Starting epoch 42


Epoch 42/500: 100%|█████████████████████████████| 774/774 [01:04<00:00, 11.95batch/s, batch loss=0.000957, mean epoch loss=0.0013]


Training loss: 0.0013031293778493459


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.79batch/s]


Validation loss: 0.0009032030653475818
epoch time: 67.7197 seconds

Starting epoch 43


Epoch 43/500: 100%|█████████████████████████████| 774/774 [01:04<00:00, 11.99batch/s, batch loss=0.000783, mean epoch loss=0.0013]


Training loss: 0.0012988119433148242


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.85batch/s]


Validation loss: 0.0008775041239224029
epoch time: 67.8438 seconds

Starting epoch 44


Epoch 44/500: 100%|██████████████████████████████| 774/774 [01:05<00:00, 11.87batch/s, batch loss=0.00161, mean epoch loss=0.0013]


Training loss: 0.0012965129606184508


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.52batch/s]


Validation loss: 0.0008717839248236958
epoch time: 68.6212 seconds

Starting epoch 45


Epoch 45/500: 100%|████████████████████████████| 774/774 [01:04<00:00, 12.02batch/s, batch loss=0.000564, mean epoch loss=0.00129]


Training loss: 0.001287708156548882


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.48batch/s]


Validation loss: 0.000903030825321804
epoch time: 67.3783 seconds

Starting epoch 46


Epoch 46/500: 100%|█████████████████████████████| 774/774 [01:04<00:00, 12.06batch/s, batch loss=0.00185, mean epoch loss=0.00129]


Training loss: 0.00129089557624934


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.72batch/s]


Validation loss: 0.0008675499354635745
epoch time: 67.5913 seconds

Starting epoch 47


Epoch 47/500: 100%|████████████████████████████| 774/774 [01:04<00:00, 12.07batch/s, batch loss=0.000896, mean epoch loss=0.00128]


Training loss: 0.0012841321392892079


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.76batch/s]


Validation loss: 0.0009231996094092243
epoch time: 67.1071 seconds

Starting epoch 48


Epoch 48/500: 100%|████████████████████████████| 774/774 [01:04<00:00, 12.08batch/s, batch loss=0.000765, mean epoch loss=0.00129]


Training loss: 0.0012856607541736115


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.65batch/s]


Validation loss: 0.0009265997819494698
epoch time: 67.0617 seconds

Starting epoch 49


Epoch 49/500: 100%|█████████████████████████████| 774/774 [01:02<00:00, 12.41batch/s, batch loss=0.00214, mean epoch loss=0.00128]


Training loss: 0.0012804185921231222


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.52batch/s]


Validation loss: 0.0009110212327127725
epoch time: 65.3510 seconds

Starting epoch 50


Epoch 50/500: 100%|████████████████████████████| 774/774 [01:03<00:00, 12.13batch/s, batch loss=0.000517, mean epoch loss=0.00128]


Training loss: 0.00127546694873846


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.80batch/s]


Validation loss: 0.0008723365113837645
epoch time: 66.7759 seconds

Starting epoch 51


Epoch 51/500: 100%|████████████████████████████| 774/774 [01:04<00:00, 12.05batch/s, batch loss=0.000968, mean epoch loss=0.00127]


Training loss: 0.0012697226544281105


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.48batch/s]


Validation loss: 0.0008663541772101005
epoch time: 67.5743 seconds

Starting epoch 52


Epoch 52/500: 100%|█████████████████████████████| 774/774 [01:03<00:00, 12.20batch/s, batch loss=0.00102, mean epoch loss=0.00126]


Training loss: 0.0012638072493168446


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.58batch/s]


Validation loss: 0.0009282397160185846
epoch time: 66.4102 seconds

Starting epoch 53


Epoch 53/500: 100%|████████████████████████████| 774/774 [01:03<00:00, 12.12batch/s, batch loss=0.000136, mean epoch loss=0.00127]


Training loss: 0.0012690975245301015


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.72batch/s]


Validation loss: 0.0008928303259718059
epoch time: 66.8286 seconds

Starting epoch 54


Epoch 54/500: 100%|█████████████████████████████| 774/774 [01:03<00:00, 12.19batch/s, batch loss=0.00153, mean epoch loss=0.00126]


Training loss: 0.001259744829891246


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.53batch/s]


Validation loss: 0.0008746944813066403
epoch time: 66.4534 seconds

Starting epoch 55


Epoch 55/500: 100%|████████████████████████████| 774/774 [01:05<00:00, 11.85batch/s, batch loss=0.000158, mean epoch loss=0.00126]


Training loss: 0.0012640248665911596


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.73batch/s]


Validation loss: 0.0008923615188838017
epoch time: 68.2474 seconds

Starting epoch 56


Epoch 56/500: 100%|████████████████████████████| 774/774 [01:04<00:00, 11.94batch/s, batch loss=0.000714, mean epoch loss=0.00126]


Training loss: 0.0012622620429929698


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.67batch/s]


Validation loss: 0.0008787804247630347
epoch time: 67.7849 seconds

Starting epoch 57


Epoch 57/500: 100%|█████████████████████████████| 774/774 [01:03<00:00, 12.13batch/s, batch loss=0.00157, mean epoch loss=0.00126]


Training loss: 0.0012585566628918964


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.84batch/s]


Validation loss: 0.0008592498291464936
epoch time: 67.2013 seconds

Starting epoch 58


Epoch 58/500: 100%|█████████████████████████████| 774/774 [01:04<00:00, 12.01batch/s, batch loss=0.00304, mean epoch loss=0.00126]


Training loss: 0.001256351112219618


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.78batch/s]


Validation loss: 0.0008997713492674477
epoch time: 67.4271 seconds

Starting epoch 59


Epoch 59/500: 100%|█████████████████████████████| 774/774 [01:04<00:00, 12.01batch/s, batch loss=0.00151, mean epoch loss=0.00126]


Training loss: 0.0012567651148094948


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.76batch/s]


Validation loss: 0.0008434997481850183
epoch time: 67.8742 seconds

Starting epoch 60


Epoch 60/500: 100%|█████████████████████████████| 774/774 [01:04<00:00, 12.03batch/s, batch loss=0.00151, mean epoch loss=0.00125]


Training loss: 0.001247945331084874


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.64batch/s]


Validation loss: 0.0008954516931596488
epoch time: 67.2800 seconds

Starting epoch 61


Epoch 61/500: 100%|██████████████████████████████| 774/774 [01:03<00:00, 12.25batch/s, batch loss=0.0013, mean epoch loss=0.00125]


Training loss: 0.0012462769969640534


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.33batch/s]


Validation loss: 0.0008640023624239903
epoch time: 66.1798 seconds

Starting epoch 62


Epoch 62/500: 100%|█████████████████████████████| 774/774 [01:03<00:00, 12.12batch/s, batch loss=0.00113, mean epoch loss=0.00125]


Training loss: 0.0012536990111549349


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.65batch/s]


Validation loss: 0.0008511467626602906
epoch time: 66.8001 seconds

Starting epoch 63


Epoch 63/500: 100%|█████████████████████████████| 774/774 [01:04<00:00, 12.05batch/s, batch loss=0.00027, mean epoch loss=0.00124]


Training loss: 0.0012406292212396037


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.67batch/s]


Validation loss: 0.0008447148577370509
epoch time: 67.2189 seconds

Starting epoch 64


Epoch 64/500: 100%|█████████████████████████████| 774/774 [01:03<00:00, 12.17batch/s, batch loss=0.00137, mean epoch loss=0.00124]


Training loss: 0.001241445938062533


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.76batch/s]


Validation loss: 0.0008983957556429905
epoch time: 66.5370 seconds

Starting epoch 65


Epoch 65/500: 100%|█████████████████████████████| 774/774 [01:04<00:00, 12.06batch/s, batch loss=0.00138, mean epoch loss=0.00124]


Training loss: 0.0012354956443208867


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.56batch/s]


Validation loss: 0.0008380028799702241
epoch time: 67.5043 seconds

Starting epoch 66


Epoch 66/500: 100%|██████████████████████████████| 774/774 [01:03<00:00, 12.09batch/s, batch loss=0.0013, mean epoch loss=0.00124]


Training loss: 0.0012431236697688461


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.53batch/s]


Validation loss: 0.0008249493122868309
epoch time: 67.3467 seconds

Starting epoch 67


Epoch 67/500: 100%|██████████████████████████████| 774/774 [01:03<00:00, 12.13batch/s, batch loss=0.0014, mean epoch loss=0.00123]


Training loss: 0.0012277640382651282


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.45batch/s]


Validation loss: 0.0008499445101565302
epoch time: 66.7954 seconds

Starting epoch 68


Epoch 68/500:   4%|█▏                            | 30/774 [00:02<01:02, 11.98batch/s, batch loss=0.00242, mean epoch loss=0.00118]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

Epoch 108/500: 100%|████████████████████████████| 774/774 [01:04<00:00, 12.07batch/s, batch loss=0.00131, mean epoch loss=0.00116]


Training loss: 0.0011587459849515865


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.79batch/s]


Validation loss: 0.000804998532705078
epoch time: 67.0939 seconds

Starting epoch 109


Epoch 109/500: 100%|███████████████████████████| 774/774 [01:03<00:00, 12.13batch/s, batch loss=0.000847, mean epoch loss=0.00116]


Training loss: 0.0011572631054394818


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.85batch/s]


Validation loss: 0.0008073309180973512
epoch time: 66.7692 seconds

Starting epoch 110


Epoch 110/500: 100%|████████████████████████████| 774/774 [01:04<00:00, 12.02batch/s, batch loss=0.00118, mean epoch loss=0.00115]


Training loss: 0.0011544861488481345


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.48batch/s]


Validation loss: 0.0008012709234780804
epoch time: 67.3423 seconds

Starting epoch 111


Epoch 111/500: 100%|███████████████████████████| 774/774 [01:03<00:00, 12.11batch/s, batch loss=0.000971, mean epoch loss=0.00115]


Training loss: 0.0011538978892543055


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.20batch/s]


Validation loss: 0.0008016997482627631
epoch time: 66.8964 seconds

Starting epoch 112


Epoch 112/500: 100%|████████████████████████████| 774/774 [01:03<00:00, 12.11batch/s, batch loss=0.00151, mean epoch loss=0.00115]


Training loss: 0.0011539844119905082


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.84batch/s]


Validation loss: 0.0007995452685497219
epoch time: 67.3232 seconds

Starting epoch 113


Epoch 113/500: 100%|███████████████████████████| 774/774 [01:04<00:00, 11.97batch/s, batch loss=0.000323, mean epoch loss=0.00115]


Training loss: 0.001153778099016201


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.82batch/s]


Validation loss: 0.0007993587435016202
epoch time: 68.0620 seconds

Starting epoch 114


Epoch 114/500: 100%|███████████████████████████| 774/774 [01:03<00:00, 12.11batch/s, batch loss=0.000321, mean epoch loss=0.00115]


Training loss: 0.0011529632548405127


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.58batch/s]


Validation loss: 0.0008002627371482689
epoch time: 66.8641 seconds

Starting epoch 115


Epoch 115/500: 100%|███████████████████████████| 774/774 [01:03<00:00, 12.17batch/s, batch loss=0.000297, mean epoch loss=0.00115]


Training loss: 0.0011529943314707329


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.79batch/s]


Validation loss: 0.0008003015154692413
epoch time: 66.5702 seconds

Starting epoch 116


Epoch 116/500: 100%|████████████████████████████| 774/774 [01:04<00:00, 12.05batch/s, batch loss=0.00164, mean epoch loss=0.00115]


Training loss: 0.0011535179398757484


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.57batch/s]


Validation loss: 0.0008011727214411453
epoch time: 67.2222 seconds

Starting epoch 117


Epoch 117/500: 100%|███████████████████████████| 774/774 [01:04<00:00, 12.09batch/s, batch loss=0.000733, mean epoch loss=0.00115]


Training loss: 0.0011530251887881737


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.74batch/s]


Validation loss: 0.0007996465603894278
epoch time: 66.9850 seconds

Starting epoch 118


Epoch 118/500: 100%|████████████████████████████| 774/774 [01:04<00:00, 11.94batch/s, batch loss=0.00419, mean epoch loss=0.00115]


Training loss: 0.0011541223974057078


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.40batch/s]


Validation loss: 0.0008002589354334675
epoch time: 67.7755 seconds

Starting epoch 119


Epoch 119/500: 100%|███████████████████████████| 774/774 [01:04<00:00, 12.03batch/s, batch loss=0.000962, mean epoch loss=0.00115]


Training loss: 0.001152809483624239


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.84batch/s]


Validation loss: 0.000801280726510514
epoch time: 67.2864 seconds

Starting epoch 120


Epoch 120/500: 100%|███████████████████████████| 774/774 [01:04<00:00, 11.99batch/s, batch loss=0.000345, mean epoch loss=0.00115]


Training loss: 0.0011524766188474958


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.77batch/s]


Validation loss: 0.000800869767920597
epoch time: 67.5049 seconds

Starting epoch 121


Epoch 121/500: 100%|████████████████████████████| 774/774 [01:04<00:00, 12.03batch/s, batch loss=0.00157, mean epoch loss=0.00115]


Training loss: 0.0011533192625583345


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.37batch/s]


Validation loss: 0.0007995567841808898
epoch time: 67.3286 seconds

Starting epoch 122


Epoch 122/500: 100%|████████████████████████████| 774/774 [01:04<00:00, 12.03batch/s, batch loss=0.00133, mean epoch loss=0.00115]


Training loss: 0.0011528867266755443


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.72batch/s]


Validation loss: 0.0008027998058996336
epoch time: 67.3064 seconds

Starting epoch 123


Epoch 123/500: 100%|███████████████████████████| 774/774 [01:02<00:00, 12.29batch/s, batch loss=0.000489, mean epoch loss=0.00115]


Training loss: 0.0011527622499310974


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.53batch/s]


Validation loss: 0.0008003592130046821
epoch time: 65.9363 seconds

Starting epoch 124


Epoch 124/500: 100%|███████████████████████████| 774/774 [01:04<00:00, 11.96batch/s, batch loss=0.000951, mean epoch loss=0.00115]


Training loss: 0.001152920628990647


Validation: 100%|██████████████████████████████████████████████████████████████████| 135/135 [00:02<00:00, 45.20batch/s]


Validation loss: 0.0007991500930209575
epoch time: 68.0997 seconds

Starting epoch 125


Epoch 125/500:   3%|▊                            | 22/774 [00:01<01:02, 12.12batch/s, batch loss=0.00296, mean epoch loss=0.00105]