In [1]:
# Essentials
import time
import copy
from collections import OrderedDict
import random
import os
from tifffile import TiffFile
from PIL import Image, ImageOps
from pathlib import Path
from tqdm.notebook import tqdm
# Data
import numpy as np
import pandas as pd
# Plot
import matplotlib.pyplot as plt
# Torch
import torch
import torch.nn as nn
from torch.utils.data import Dataset, Subset
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader
from torch.autograd import Variable
import torch.nn.functional as F
# Torchvision
from torchvision import datasets, transforms
import torchvision.transforms.functional as TF
# segmentation_models_pytorch
import segmentation_models_pytorch as smp
# Albumentations
import albumentations as A
from albumentations.pytorch import ToTensorV2
# Local 
from unet import UNet
from LCD import LandCoverData
from dataset import *
from train import *
from utils import *
from metrics import *
from losses import *
LCD = LandCoverData()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Device: ", device)

Device:  cpu


# ⚠️Seed everything!
It's important to seed everything for reproducibility.

In [2]:
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [3]:
seed = 2021
seed_everything(seed)

# 📜 Set all variables here
In this cell, we define all hyperparameters that we will be using for the rest of the notebook. It helps to group them in one place so we can track them better.

In [14]:
MODEL = 'resnet18'
SEGMENT = 'UNET'
OPTIMIZER = 'adam'
NB_EPOCHS = 40
LEARNING_RATE = 0.001
BATCH_SIZE = 64
IN_CHANNELS = 4

filename = f"{SEGMENT}_{MODEL}_{NB_EPOCHS}_epochs_{LEARNING_RATE}_learningrate_{BATCH_SIZE}_batchsize_seed_{seed}_CHANNELS_{IN_CHANNELS}"
print(filename)

UNET_resnet18_40_epochs_0.001_learningrate_64_batchsize_seed_2021_CHANNELS_4


### Define custom transforms

In [5]:
train_transform = A.Compose([
    A.ToFloat(max_value=65535.0),
    A.VerticalFlip(p=0.5),
    A.HorizontalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.Rotate(limit=5, p=0.5),
    A.ElasticTransform(p=0.5, alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03),
    A.FromFloat(max_value=65535.0),
    A.Normalize(mean=(0.5, 0.5, 0.5, 0.5), std=(1, 1, 1, 1), max_pixel_value=65535),
    ToTensorV2()
])

test_transform = A.Compose([
    A.Normalize(mean=(0.5, 0.5, 0.5, 0.5), std=(1, 1, 1, 1), max_pixel_value=65535),
    ToTensorV2()
])

### Initiate datasets
Here we perform a train/validation split in otder to evaluate our model. We then feed them to the ImageSegementationDataset so to make the datasets.

In [15]:
train_dir='dataset/train'
test_dir = 'dataset/test'
train_idx, val_idx = train_val_dataset(train_dir, val_split=0.2)
train_set = ImageSegementationDataset(train_dir, in_channels=IN_CHANNELS, path_index=train_idx, mode='train', transforms=train_transform)
val_set = ImageSegementationDataset(train_dir, in_channels=IN_CHANNELS, path_index=val_idx, mode='valid', transforms=test_transform)
test_set = ImageSegementationDataset(test_dir, in_channels=IN_CHANNELS, mode='test', transforms=test_transform)

print("Train set contains", len(train_set), "elements")
print("Validation set contains", len(val_set), "elements")
print("Test set contains", len(test_set), "elements")

Train set contains 14792 elements
Validation set contains 3699 elements
Test set contains 5043 elements


In [16]:
loader_train = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
loader_valid = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=True)
loader_test = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=True)

data_sizes = {"train": len(loader_train), "valid": len(loader_valid)}
print("There are", data_sizes['train'], "batches in the training set")
print("There are", data_sizes['valid'], "batches in the validation set")

There are 232 batches in the training set
There are 58 batches in the validation set


# Training the Model

We start by initializing our model along with the loss and optimizers and feed that to the Trainer class.

In [8]:
def prepare(model, segment, optimizer, nb_epochs, learning_rate, batch_size, in_channels, loss):
    if segment == 'UNET':
        model = smp.Unet(encoder_name=MODEL,in_channels=IN_CHANNELS, classes=10, activation=None)
    elif segment == 'LINKNET':
        model = smp.LinkNet(encoder_name=MODEL,in_channels=IN_CHANNELS, classes=10, activation=None)
    if optimizer == 'adam':
        optimizer_ft = optim.Adam(model.parameters(), lr=LEARNING_RATE)
    elif optimizer == 'sgd':
        optimizer_ft = optim.SGD(model.parameters(), lr=LEARNING_RATE)
    
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer_ft, patience=5, factor=0.5)
    criterion = loss
    loaders = {
        "train": loader_train,
        "val": loader_valid
    }
    return model, loaders, optimizer_ft, criterion, scheduler

In [9]:
model, loaders, optimizer_ft, criterion, scheduler = prepare(
    model = MODEL, 
    segment = SEGMENT, 
    optimizer = OPTIMIZER, 
    nb_epochs = NB_EPOCHS, 
    learning_rate = LEARNING_RATE, 
    batch_size = BATCH_SIZE, 
    in_channels = IN_CHANNELS,
    loss = CombinedLoss())
trainer = Trainer(model, loaders, optimizer_ft, criterion, scheduler, device)
trainer.reset()

Now we run the training

In [11]:
trainer.run(3, reduce_on_plateau=True)

# Submission time

In [11]:
trainer.generate_submission(loader_test, "test_submission")

  0%|          | 0/316 [00:00<?, ?it/s]

File is saved as test_submission.csv


# Saving the best model

In [None]:
best_params = trainer.get_best_model()
torch.save(best_params,f"{filename}.pt")

# Loading the model

In [None]:
def loading_saved_model(model_name):
    """Loads the saved model"""
    model = unet
    model.load_state_dict(torch.load(model_name, map_location = device))
    model.eval()
    return model

model_loaded = loading_saved_model("unet.pt")