In [1]:
# Essentials
import time
import copy
from collections import OrderedDict
import random
import os
from tifffile import TiffFile
from PIL import Image, ImageOps
from pathlib import Path
from tqdm.notebook import tqdm
# Data
import numpy as np
import pandas as pd
# Plot
import matplotlib.pyplot as plt
# Torch
import torch
import torch.nn as nn
from torch.utils.data import Dataset, Subset
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader
from torch.autograd import Variable
import torch.nn.functional as F
# Torchvision
from torchvision import datasets, transforms
import torchvision.transforms.functional as TF
# segmentation_models_pytorch
import segmentation_models_pytorch as smp
# Albumentations
import albumentations as A
from albumentations.pytorch import ToTensorV2
# Local 
from unet import UNet
from LCD import LandCoverData
from dataset import *
from train import *
from utils import *
from metrics import *
from losses import *
LCD = LandCoverData()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Device: ", device)

Device:  cuda:0


# ⚠️Seed everything!
It's important to seed everything for reproducibility.

In [2]:
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [3]:
seed = 2021
seed_everything(seed)

# 📜 Set all variables here
In this cell, we define all hyperparameters that we will be using for the rest of the notebook. It helps to group them in one place so we can track them better.

In [4]:
MODEL = 'resnet18'
OPTIMIZER = 'adam'
NB_EPOCHS = 20
LEARNING_RATE = 0.01
BATCH_SIZE = 8
IN_CHANNELS = 4

In [5]:
f"{MODEL}_{NB_EPOCHS}_epochs_{LEARNING_RATE}_learningrate_{BATCH_SIZE}_batchsize_seed_{seed}"

'resnet18_20_epochs_0.01_learningrate_8_batchsize_seed_2021'

# Dataset

### Define custom transforms

In [6]:
train_transform = A.Compose([
    A.ToFloat(max_value=65535.0),
    A.VerticalFlip(p=0.5),
    A.HorizontalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.Rotate(limit=5, p=0.5),
    A.ElasticTransform(p=0.5, alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03),
    A.FromFloat(max_value=65535.0),
    A.Normalize(mean=(0.5, 0.5, 0.5, 0.5), std=(1, 1, 1, 1), max_pixel_value=65535),
    ToTensorV2()
])

test_transform = A.Compose([
    A.Normalize(mean=(0.5, 0.5, 0.5, 0.5), std=(1, 1, 1, 1), max_pixel_value=65535),
    ToTensorV2()
])

### Initiate datasets
Here we perform a train/validation split in otder to evaluate our model. We then feed them to the ImageSegementationDataset so to make the datasets.

In [7]:
train_dir='dataset/train'
test_dir = 'dataset/test'
train_idx, val_idx = train_val_dataset(train_dir, val_split=0.2)
train_set = ImageSegementationDataset(train_dir, in_channels=IN_CHANNELS, path_index=train_idx, mode='train', transforms=train_transform)
val_set = ImageSegementationDataset(train_dir, in_channels=IN_CHANNELS, path_index=val_idx, mode='valid', transforms=test_transform)
test_set = ImageSegementationDataset(test_dir, in_channels=IN_CHANNELS, mode='test', transforms=test_transform)

print("Train set contains", len(train_set), "elements")
print("Validation set contains", len(val_set), "elements")
print("Test set contains", len(test_set), "elements")

Train set contains 14792 elements
Validation set contains 3699 elements
Test set contains 5043 elements


In [8]:
loader_train = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
loader_valid = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=True)
loader_test = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=True)

data_sizes = {"train": len(loader_train), "valid": len(loader_valid)}
print("There are", data_sizes['train'], "batches in the training set")
print("There are", data_sizes['valid'], "batches in the validation set")

There are 1849 batches in the training set
There are 463 batches in the validation set


# Training the Model

We start by initializing our model along with the loss and optimizers and feed that to the Trainer class.

In [11]:
model = smp.Unet(encoder_name='resnet18',in_channels=IN_CHANNELS, classes=10, encoder_weights=None, activation=None)
optimizer_ft = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=0.9)
scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
criterion = CombinedLoss()
loaders = {
    "train": loader_train,
    "val": loader_valid
}
trainer = Trainer(model, loaders, optimizer_ft, criterion, scheduler, device)
trainer.reset()

Now we run the training

In [None]:
trainer.run(NB_EPOCHS)

  0%|          | 0/1849 [00:00<?, ?it/s]

  encoded_target.scatter_(1, torch.tensor(y_true.unsqueeze(1), dtype=torch.int64), 1.)


  0%|          | 0/463 [00:00<?, ?it/s]

Epoch # 0 Training Loss: 0.9259679610500986 Training IoU: 0.3644735773917066 Training KL: 0.3375816458114035
Epoch # 0 Validation Loss: 0.9352664579325567 Validation IoU: 0.36304874948483723 Validation KL: 0.5576068113176201


  0%|          | 0/1849 [00:00<?, ?it/s]

  0%|          | 0/463 [00:00<?, ?it/s]

Epoch # 1 Training Loss: 0.8497689200969178 Training IoU: 0.4450224161895631 Training KL: 0.1458603977352331
Epoch # 1 Validation Loss: 0.9070754482473204 Validation IoU: 0.40295520579957256 Validation KL: 0.3432704763975755


  0%|          | 0/1849 [00:00<?, ?it/s]

  0%|          | 0/463 [00:00<?, ?it/s]

Epoch # 2 Training Loss: 0.8273856417432612 Training IoU: 0.47341209198327 Training KL: 0.10492243956506227
Epoch # 2 Validation Loss: 0.9320513846807295 Validation IoU: 0.39265912077089704 Validation KL: 0.5645786635952297


  0%|          | 0/1849 [00:00<?, ?it/s]

  0%|          | 0/463 [00:00<?, ?it/s]

Epoch # 3 Training Loss: 0.8136267419632478 Training IoU: 0.48788185462713407 Training KL: 0.0889202898168531
Epoch # 3 Validation Loss: 0.7824994817408321 Validation IoU: 0.5110397409086509 Validation KL: 0.06965593250401794


  0%|          | 0/1849 [00:00<?, ?it/s]

  0%|          | 0/463 [00:00<?, ?it/s]

Epoch # 4 Training Loss: 0.8052889547069502 Training IoU: 0.4949536218855067 Training KL: 0.07913937300020871
Epoch # 4 Validation Loss: 0.7992691883000648 Validation IoU: 0.4984687639072466 Validation KL: 0.08789810437522165


  0%|          | 0/1849 [00:00<?, ?it/s]

  0%|          | 0/463 [00:00<?, ?it/s]

Epoch # 5 Training Loss: 0.7982697026545192 Training IoU: 0.5023590567130624 Training KL: 0.07485932760926217
Epoch # 5 Validation Loss: 0.8492770435743147 Validation IoU: 0.46701781559585304 Validation KL: 0.24993395734848534


  0%|          | 0/1849 [00:00<?, ?it/s]

  0%|          | 0/463 [00:00<?, ?it/s]

Epoch # 6 Training Loss: 0.7920505349993254 Training IoU: 0.5096587922061971 Training KL: 0.06899204060905015
Epoch # 6 Validation Loss: 0.7923011383279096 Validation IoU: 0.5094118983300867 Validation KL: 0.09390284008064718


  0%|          | 0/1849 [00:00<?, ?it/s]

  0%|          | 0/463 [00:00<?, ?it/s]

Epoch # 7 Training Loss: 0.778528617355487 Training IoU: 0.5252334546198447 Training KL: 0.05124774953010128
Epoch # 7 Validation Loss: 0.7556470936369433 Validation IoU: 0.5434184089124422 Validation KL: 0.04843279642395838


  0%|          | 0/1849 [00:00<?, ?it/s]

  0%|          | 0/463 [00:00<?, ?it/s]

Epoch # 8 Training Loss: 0.7768822800088019 Training IoU: 0.5266228282457704 Training KL: 0.04944418251292731
Epoch # 8 Validation Loss: 0.7593844648828775 Validation IoU: 0.5379298010824711 Validation KL: 0.057994992555414925


  0%|          | 0/1849 [00:00<?, ?it/s]

  0%|          | 0/463 [00:00<?, ?it/s]

Epoch # 9 Training Loss: 0.775409436696539 Training IoU: 0.5272626981766546 Training KL: 0.047156574126741915
Epoch # 9 Validation Loss: 0.7560534690935204 Validation IoU: 0.5437882064127108 Validation KL: 0.05496748331535609


  0%|          | 0/1849 [00:00<?, ?it/s]

  0%|          | 0/463 [00:00<?, ?it/s]

Epoch # 10 Training Loss: 0.7748960807170012 Training IoU: 0.5292115999234446 Training KL: 0.047920716514662565
Epoch # 10 Validation Loss: 0.7626562269151083 Validation IoU: 0.5388450686133454 Validation KL: 0.06652797348191135


  0%|          | 0/1849 [00:00<?, ?it/s]

  0%|          | 0/463 [00:00<?, ?it/s]

Epoch # 11 Training Loss: 0.7723423686460781 Training IoU: 0.5305719117989195 Training KL: 0.04717104459333831
Epoch # 11 Validation Loss: 0.7440726433922868 Validation IoU: 0.5409848900365875 Validation KL: 0.05303130976821652


  0%|          | 0/1849 [00:00<?, ?it/s]

  0%|          | 0/463 [00:00<?, ?it/s]

Epoch # 12 Training Loss: 0.7230946579258013 Training IoU: 0.5207386863481581 Training KL: 0.05328803743353256
Epoch # 12 Validation Loss: 0.6911270324921247 Validation IoU: 0.5342462663929143 Validation KL: 0.05249543538343895


  0%|          | 0/1849 [00:00<?, ?it/s]

  0%|          | 0/463 [00:00<?, ?it/s]

Epoch # 13 Training Loss: 0.30515530497931737 Training IoU: 0.515716693760271 Training KL: 0.055951326526585396
Epoch # 13 Validation Loss: 0.10568360086393665 Validation IoU: 0.5438843487841751 Validation KL: 0.04711580292450008


  0%|          | 0/1849 [00:00<?, ?it/s]

  0%|          | 0/463 [00:00<?, ?it/s]

Epoch # 14 Training Loss: 0.13072247340622434 Training IoU: 0.5241175877889945 Training KL: 0.05009185234922613
Epoch # 14 Validation Loss: 0.10193434347987948 Validation IoU: 0.5444513193932817 Validation KL: 0.046253007109230214


  0%|          | 0/1849 [00:00<?, ?it/s]

  0%|          | 0/463 [00:00<?, ?it/s]

Epoch # 15 Training Loss: 0.12518841058837846 Training IoU: 0.5266236248298548 Training KL: 0.04859723081079672
Epoch # 15 Validation Loss: 0.10009943491117486 Validation IoU: 0.5458542764412179 Validation KL: 0.04764030772815441


  0%|          | 0/1849 [00:00<?, ?it/s]

# Submission time

In [24]:
def batch_distribution(y):
    class_distribution = np.apply_along_axis(np.bincount, axis=1, arr=y.flatten(1), minlength=LCD.N_CLASSES)
    # Normalize to sum to 1  
    return (class_distribution.T/class_distribution.sum(1)).T

In [26]:
sub_dict = {"sample_id":[], "no_data":[],"clouds":[],"artificial":[],"cultivated":[],"broadleaf":[],"coniferous":[],"herbaceous":[],"natural":[],"snow":[],"water":[]}
for image, path in tqdm(loader_test):
    image = image.to(device)
    with torch.no_grad():
        output = unet_pre_trained(image)
        _, preds = torch.max(output, 1)
        class_dis = batch_distribution(preds.cpu())
        sub_dict["sample_id"] += [int(p.split('.')[0]) for p in path]
        for key in LCD.CLASSES:
            sub_dict[key] += class_dis[:,LCD.CLASSES.index(key)].tolist()

  0%|          | 0/631 [00:00<?, ?it/s]

In [27]:
df_sub = pd.DataFrame.from_dict(sub_dict)
df_sub = df_sub.sort_values(by='sample_id')
df_sub.head()

Unnamed: 0,sample_id,no_data,clouds,artificial,cultivated,broadleaf,coniferous,herbaceous,natural,snow,water
3436,10087,0.0,0.0,0.007431,0.24852,0.19635,0.013412,0.528641,0.000122,0.0,0.005524
3073,10088,0.0,0.0,0.00499,0.410492,0.176392,0.002701,0.405426,0.0,0.0,0.0
497,10089,0.0,0.0,0.055969,0.259369,0.226807,0.024384,0.426941,1.5e-05,0.0,0.006516
1319,10090,0.0,0.0,0.009201,0.037643,0.517822,0.14859,0.285034,0.001709,0.0,0.0
2484,10091,0.0,0.0,0.026031,0.36319,0.1884,0.024353,0.397568,0.000229,0.0,0.000229


In [28]:
df_sub.to_csv('submission.csv')

# Saving the model

In [None]:
torch.save(model_ft.state_dict(),"unet.pt")

# Loading the model

In [None]:
def loading_saved_model(model_name):
    """Loads the saved model"""
    model = unet
    model.load_state_dict(torch.load(model_name, map_location = device))
    model.eval()
    return model

model_loaded = loading_saved_model("unet.pt")