In [5]:
import torch
import torch.nn as nn
import torchvision.models
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torch.nn.functional as F

import albumentations as A
from albumentations.pytorch import ToTensorV2

from tqdm import tqdm
from PIL import Image
import cv2
import matplotlib.pyplot as plt
import numpy as np

import os
from time import time

AttributeError: partially initialized module 'torchvision' has no attribute 'extension' (most likely due to a circular import)

### Get the data

In [None]:
import gdown
url = 'https://drive.google.com/uc?id=10f1H2T-5W-BiqabHHtlZ4ASs19TZmg8R'
output = 'data.zip'
gdown.download(url, output, quiet=False)

Downloading...
From (original): https://drive.google.com/uc?id=10f1H2T-5W-BiqabHHtlZ4ASs19TZmg8R
From (redirected): https://drive.google.com/uc?id=10f1H2T-5W-BiqabHHtlZ4ASs19TZmg8R&confirm=t&uuid=8c91a23e-b723-404e-864e-01c84f6f72f9
To: /content/data.zip
100%|██████████| 979M/979M [00:19<00:00, 50.4MB/s]


'data.zip'

In [None]:
!unzip data.zip

### Utilities (0.5 point)

Complete dataset to load prepared images and masks. Don't forget to use augmentations.

Some of the images are 1 channels, so use `gray2rgb`.

In [None]:
def gray2rgb(img):
    if len(img.shape) != 3:
        img = np.dstack([img, img, img])
    return img

def get_iou(gt, pred):
    pred = pred > 0.5
    return (gt & pred).sum() / (gt | pred).sum()

class BirdsDataset(Dataset):
    def __init__(self, folder, ...) -> None:
        images_folder = os.path.join(folder, 'images')
        gt_folder = os.path.join(folder, 'gt')

        for class_name in os.listdir(images_folder):
            for fname in os.listdir(os.path.join(images_folder, class_name)):
                # YOUR CODE HERE

        self.transform = A.Compose([
            # YOUR CODE HERE
            ToTensorV2()
        ])

    def __getitem__(self, index):
        # YOUR CODE HERE
        img = ...
        mask = ...
        img = gray2rgb(img)
        # YOUR CODE HERE
        return transformed_img, transformed_mask

    def __len__(self):
        # YOUR CODE HERE
        return

### Architecture (1 point)
Your task for today is to build your own Unet to solve the segmentation problem.

As an encoder, you can use pre-trained on IMAGENET models(or parts) from torchvision. The decoder must be trained from scratch.
It is forbidden to use data not from the `data` folder.

I advise you to experiment with the number of blocks so as not to overfit on the training sample and get good quality on validation.

In [None]:
class DecoderBlock(nn.Module):
    def __init__(self, in_channels, mid_channels, out_channels):
        super().__init__()
        # YOUR CODE HERE

    def forward(self,x):
        # YOUR CODE HERE
        return

class Unet(nn.Module):
    def __init__(self):
        super().__init__()
        # YOUR CODE HERE
        # encoder blocks
        self.encoder1=
        self.encoder2=
        self.encoder3=
        # decoder blocks
        self.decoder1=
        self.decoder2=
        self.decoder3=


    def forward(self,x):
        # YOUR CODE HERE
        return

### Train script (0.5 point)

Complete the train and predict scripts.

In [None]:
def train_segmentation_model(data_path):
    BATCH_SIZE = 8
    N_EPOCH = 15
    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

    train_dataset = BirdsDataset(data_path + 'train')
    val_dataset = BirdsDataset(data_path + 'val')
    train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

    model = Unet().to(DEVICE)
    optimizer = # YOUR CODE HERE
    criterion = # YOUR CODE HERE
    losses_train, losses_val, ious_train, ious_val = [], [], [], []

    for epoch in range(N_EPOCH):
        model.train()

        for tqdm(inputs, masks) in train_dataloader:
            inputs = inputs.to(DEVICE)
            masks = masks.to(DEVICE)
            # YOUR CODE HERE
        losses_train.append(...)
        ious_train.append(...)

        model.eval()
        with torch.no_grad():
            for inputs, masks in tqdm(val_dataloader):
                inputs = inputs.to(DEVICE)
                masks = masks.to(DEVICE)
                # YOUR CODE HERE
        losses_val.append(...)
        ious_val.append(...)

        torch.save(model.state_dict(), f'model_{epoch}.pth')

        print(f"Epoch: {epoch}, train loss: {losses_train[-1]}, val loss: {losses_val[-1]}, train iou: {ious_train[-1]}, val iou: {ious_val[-1]}")

In [None]:
def predict(model, img_path):
    with torch.no_grad():
        # YOUR CODE HERE TO PREPARE IMAGE
        # GET PREDICTIONS
        # POST PROCESS
        return segm

def get_model(path):
    model = Unet()
    model.load_state_dict(torch.load(path))
    model.eval()
    return model

In [None]:
train_segmentation_model('data/')

You can also experiment with models and write a small report about results. If the report will be meaningful, you will receive an extra point.

### Testing (8 points)
Your model will be tested on the new data, similar to validation, so use techniques to prevent overfitting the model.

* IoU > 0.85 — 8 points
* IoU > 0.80 — 7 points
* IoU > 0.75 — 6 points
* IoU > 0.70 — 5 points
* IoU > 0.60 — 4 points
* IoU > 0.50 — 3 points
* IoU > 0.40 — 2 points
* IoU > 0.30 — 1 points

In [None]:
model = get_model('model_14.pth').to('cuda')

In [None]:
ious, times = [], []
test_dir = 'data/val/'

for class_name in tqdm(sorted(os.listdir(os.path.join(test_dir, 'images')))):
    for img_name in sorted(os.listdir(os.path.join(test_dir, 'images', class_name))):

        t_start = time()
        pred = predict(model, os.path.join(test_dir, 'images', class_name, img_name))
        times.append(time() - t_start)

        gt_name = img_name.replace('jpg', 'png')
        gt = np.asarray(Image.open(os.path.join(test_dir, 'gt', class_name, gt_name)), dtype = np.uint8)
        if len(gt.shape) > 2:
            gt = gt[:, :, 0]

        iou = get_iou(gt==255, pred>0.5)
        ious.append(iou)

np.mean(ious), np.mean(times)

### Compression (1 point)

Try to speed up the model in any way without losing more than 1% in iou score.
For example [torch2trt](https://github.com/NVIDIA-AI-IOT/torch2trt)

In [None]:
def get_fast_model():
    # YOUR CODE HERE
    return model

In [None]:
fast_model = get_fast_model().to('cuda')

In [None]:
ious, times = [], []
test_dir = 'data/val/'

for class_name in tqdm(sorted(os.listdir(os.path.join(test_dir, 'images')))):
    for img_name in sorted(os.listdir(os.path.join(test_dir, 'images', class_name))):

        t_start = time()
        pred = predict(fast_model, os.path.join(test_dir, 'images', class_name, img_name))
        times.append(time() - t_start)

        gt_name = img_name.replace('jpg', 'png')
        gt = np.asarray(Image.open(os.path.join(test_dir, 'gt', class_name, gt_name)), dtype = np.uint8)
        if len(gt.shape) > 2:
            gt = gt[:, :, 0]

        iou = get_iou(gt==255, pred>0.5)
        ious.append(iou)

np.mean(ious), np.mean(times)

**Bonus:** For the best iou score on test(without compression) in group you will get 1.5, 1, 0.5 extra points(for 1st, 2nd, 3rd places).