## Setting up in colab

In [1]:
# Cloning repository into current folder
!rm -rf notebooks src requirements.txt README.md
!git clone https://github.com/andraspalasti/deeplearning-hw.git
!mv deeplearning-hw/* .
!rm -rf deeplearning-hw/

# Install the packages used
%pip install -q -r requirements.txt
%pip install -q gdown

Cloning into 'deeplearning-hw'...
remote: Enumerating objects: 191, done.[K
remote: Counting objects: 100% (191/191), done.[K
remote: Compressing objects: 100% (128/128), done.[K
remote: Total 191 (delta 86), reused 149 (delta 49), pack-reused 0[K
Receiving objects: 100% (191/191), 6.14 MiB | 9.75 MiB/s, done.
Resolving deltas: 100% (86/86), done.
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.6/190.6 kB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m251.7/251.7 kB[0m [31m25.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# Download data from google drive
!gdown 'https://drive.google.com/uc?id=1V-oxFZhctefBXL4noEgNG-ENaHoszTJE'

Downloading...
From: https://drive.google.com/uc?id=1V-oxFZhctefBXL4noEgNG-ENaHoszTJE
To: /content/airbus-dataset.zip
100% 8.88G/8.88G [02:36<00:00, 56.6MB/s]


In [3]:
!unzip -q airbus-dataset.zip

## Training the model

In [4]:
import os
from pathlib import Path
import torch
from torch.utils.data import DataLoader

from src.train import train_model
from src.unet import UNet
from src.data.datasets import AirbusDataset, AirbusTrainingset

In [5]:
# Determine where to save checkpoints
try:
    from google.colab import drive
    drive.mount('/content/gdrive')
    checkpoint_dir = Path('/content/gdrive/MyDrive/checkpoints')
except:
    checkpoint_dir = Path('checkpoints')

Mounted at /content/gdrive


In [6]:
# Create datasets and loaders
data_dir = Path('data/processed')

val_set = AirbusDataset(data_dir / 'val_ship_segmentations.csv', data_dir / 'val', should_contain_ship=True)
train_set = AirbusTrainingset(data_dir / 'train_ship_segmentations.csv', data_dir / 'train')

loader_args = dict(
    batch_size=20,
    num_workers=os.cpu_count(),
    pin_memory=True,
    generator=torch.Generator().manual_seed(42) # So we have the same shuffling through each training
)
train_loader = DataLoader(train_set, shuffle=True, **loader_args)
val_loader = DataLoader(val_set, shuffle=False, **loader_args)

In [7]:
# Create device to run training on (if available use gpu)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device {device}')

Using device cuda


In [8]:
# Create model
model = UNet(n_channels=3, n_classes=1)
model = model.to(device)
print(f'Network:\n'
    f'\t{model.n_channels} input channels\n'
    f'\t{model.n_classes} output channels (classes)\n'
    f'\t{"Bilinear" if model.bilinear else "Transposed conv"} upscaling')

Network:
	3 input channels
	1 output channels (classes)
	Transposed conv upscaling


In [9]:
# If you want you can load in previous models
load = Path('/content/gdrive/MyDrive/checkpoints/solar-cherry-34/checkpoint_epoch2.pth')
if load:
    state_dict = torch.load(load, map_location=device)
    if 'learning_rate' in state_dict:
        learning_rate = state_dict['learning_rate']
        del state_dict['learning_rate']
    model.load_state_dict(state_dict)
    print(f'Successfully loaded model from {load} with lr: {learning_rate}')
else:
    learning_rate = 0.0001

Successfully loaded model from /content/gdrive/MyDrive/checkpoints/solar-cherry-34/checkpoint_epoch2.pth with lr: 0.0001


In [10]:
!wandb login --relogin

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [11]:
try:
    train_model(
        model,
        device,
        train_loader,
        val_loader,
        learning_rate=0.0001,
        epochs=3,
        checkpoint_dir=checkpoint_dir,
        amp=True,
    )
except torch.cuda.OutOfMemoryError:
    torch.cuda.empty_cache()
    print('Detected OutOfMemoryError!')

[34m[1mwandb[0m: Currently logged in as: [33mandraspalasti2[0m. Use [1m`wandb login --relogin`[0m to force relogin


Starting training:
        Epochs:          3
        Batch size:      20
        Learning rate:   0.0001
        Training size:   88997
        Validation size: 3982
        Device:          cuda
        Mixed Precision: True
    




Validation Dice score: 0.6542448401451111




Validation Dice score: 0.5747670531272888




Validation Dice score: 0.6245277523994446




Validation Dice score: 0.6295183897018433




Validation Dice score: 0.6167723536491394




Validation Dice score: 0.6032523512840271




Validation Dice score: 0.6490146517753601




Validation Dice score: 0.6574696898460388




Validation Dice score: 0.6647867560386658


Epoch 1/3: 89000img [47:48, 31.03img/s, loss (batch)=0.133]


Validation Dice score: 0.6718677878379822
Checkpoint 1 saved!




Validation Dice score: 0.6592105627059937




Validation Dice score: 0.6722691059112549




Validation Dice score: 0.6742385029792786




Validation Dice score: 0.6690824627876282




Validation Dice score: 0.6698282957077026




Validation Dice score: 0.6722084879875183




Validation Dice score: 0.6721780896186829




Validation Dice score: 0.6698583364486694




Validation Dice score: 0.677624523639679




Validation Dice score: 0.6791423559188843


Epoch 2/3: 89000img [47:44, 31.07img/s, loss (batch)=0.125]


Checkpoint 2 saved!




Validation Dice score: 0.6775093078613281




Validation Dice score: 0.6859358549118042




Validation Dice score: 0.6749242544174194




Validation Dice score: 0.6667897701263428




Validation Dice score: 0.6808034181594849




Validation Dice score: 0.676139235496521




Validation Dice score: 0.6819992065429688




Validation Dice score: 0.6719774007797241




Validation Dice score: 0.6788261532783508


Epoch 3/3: 89000img [47:46, 31.05img/s, loss (batch)=0.119]


Validation Dice score: 0.6889224052429199
Checkpoint 3 saved!
