### Посмотрим на данные

In [5]:
from PIL import Image

In [6]:
import cv2
import torch
import os

In [7]:
path = '/kaggle/input/semantic-drone-dataset/dataset/semantic_drone_dataset/'

In [8]:
orig_img = Image.open(os.path.join(path,"original_images/000.jpg"))
orig_img

In [9]:
mask_img = Image.open(os.path.join(path, "label_images_semantic/000.png"))
mask_img

In [10]:
import numpy as np
mask_img_arr = np.array(mask_img)
mask_img_arr.shape

In [11]:
np.unique(mask_img_arr)

### Создадим датасет

In [12]:
import albumentations as albu

def get_training_augmentation():
    train_transform = [

        albu.HorizontalFlip(p=0.5),

        albu.ShiftScaleRotate(scale_limit=0.5, rotate_limit=0, shift_limit=0.1, p=1, border_mode=0),

        albu.PadIfNeeded(min_height=320, min_width=320, always_apply=True, border_mode=0),
        albu.RandomCrop(height=320, width=320, always_apply=True),

        albu.IAAAdditiveGaussianNoise(p=0.2),
        albu.IAAPerspective(p=0.5),

        albu.OneOf(
            [
                albu.CLAHE(p=1),
                albu.RandomBrightness(p=1),
                albu.RandomGamma(p=1),
            ],
            p=0.9,
        ),

        albu.OneOf(
            [
                albu.IAASharpen(p=1),
                albu.Blur(blur_limit=3, p=1),
                albu.MotionBlur(blur_limit=3, p=1),
            ],
            p=0.9,
        ),

        albu.OneOf(
            [
                albu.RandomContrast(p=1),
                albu.HueSaturationValue(p=1),
            ],
            p=0.9,
        ),
    ]
    return albu.Compose(train_transform)

In [13]:
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import os

class SegmentationDataset(Dataset):
    
    def __init__(self, imgs_dir, masks_dir, augmentation=None, w=512, h=512):
        self.imgs_dir = imgs_dir
        self.masks_dir = masks_dir
        
        self.imgs_paths = os.listdir(self.imgs_dir)
        self.imgs_paths.sort()
        
        self.masks_paths = os.listdir(self.masks_dir)
        self.masks_paths.sort()
        
        self.transform = transforms.Compose([transforms.ToTensor(),
                                             transforms.Resize((h, w)),
                                             transforms.Normalize((0.5,), (0.5,))])
                                             #transforms.RandomCrop((h-100, w-100))]
            
        self.transform_mask = transforms.Compose([transforms.ToTensor(),
                                                  transforms.Resize((h, w))])
        self.augmentation = augmentation

        
    def __len__(self):
        return len(self.imgs_paths)
    
    def __getitem__(self, idx):
        img = cv2.imread(os.path.join(self.imgs_dir, self.imgs_paths[idx]))
        # TODO: Изменить размер изображения, используя Pytorch Transformations, вынести размер изображения как параметр
        # img = cv2.resize(img, (512, 512))
        # TODO: Перевести в тензор, используя Pytorch Transformations
        # img = torch.from_numpy(img).float()
        # TODO: Добавить нормировку изображения, используя Pytorch Transformations
        # TODO: Добавить аугментацию
        
        mask = cv2.imread(os.path.join(self.masks_dir, self.masks_paths[idx]), cv2.IMREAD_GRAYSCALE)
        if self.augmentation:
            sample = self.augmentation(image=img, mask=mask)
            img, mask = sample['image'], sample['mask']
            
        # Меняем размерность с (ширина x высота x количество каналов) на (количество каналов х ширина х высота)
        # img = img.permute(2, 0, 1)
        
        
        # TODO: Аналогично совершить эти преобразования, используя Pytorch Transformations
        # mask = cv2.resize(mask, (512, 512))
        # mask = torch.from_numpy(mask).long()
        img = self.transform(img)
        mask = self.transform_mask(mask).long()
        return img, mask

In [14]:
dataset = SegmentationDataset(os.path.join(path, 'original_images/'),
                              os.path.join(path, "label_images_semantic/"),
                              augmentation=get_training_augmentation())

In [15]:
# TODO: Отдельный датасет на validation set
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [300, 100])

In [16]:
len(train_dataset)

In [17]:
img, mask = next(iter(train_dataset))

In [18]:
img.shape, img.dtype

In [19]:
mask.shape, mask.dtype

In [20]:
# TODO: Поиграться с размером батча
batch_size= 16

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
# TODO: Отдельный loader на validation set
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

### Создадим модель

In [21]:
!pip install segmentation_models_pytorch

In [47]:
import segmentation_models_pytorch as smp

model = smp.Unet('mobilenet_v2', encoder_weights='imagenet', classes=23)

In [48]:
# TODO: Заменить голову на свою :)
# model.segmentation_head = ....
model.segmentation_head

In [49]:
import torch.nn as nn
head = torch.nn.Sequential(nn.Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
                           nn.BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True),
                           nn.ReLU(inplace=True),
                           nn.Conv2d(16, 23, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)))
model.segmentation_head = head

In [50]:
# TODO: Поиграться с заморозкой весов
for param in model.encoder.parameters():
    param.requires_grad = False

In [51]:
img_batch, mask_batch = next(iter(train_loader))

In [52]:
img_batch.shape

In [53]:
mask_batch.shape

In [54]:
output_batch = model(img_batch)

In [55]:
output_batch.shape

In [56]:
from torch.nn import CrossEntropyLoss

CrossEntropyLoss()(output_batch, mask_batch.squeeze(1)) 

### Обучение

In [31]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print('Using device:', device)

In [57]:
model.to(device)

In [58]:
epoch_count = 10

loss = CrossEntropyLoss()

In [59]:
# TODO: поиграться с learning rate
learning_rate = 0.01

# TODO поиграться с выбором алгоритма в целом
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [60]:
from sklearn.metrics import accuracy_score

In [None]:
from tqdm import tqdm

min_val_loss = np.inf
best_model = None

train_loss_history = []
val_loss_history = []
val_acc_history = []

last_loss = 100 # vast validation loss
patience = 2
triggertimes = 0

for epoch_num in range(epoch_count):
    for batch_num, (img_batch, mask_batch) in enumerate(tqdm(train_loader)):

        img_batch = img_batch.to(device)
        mask_batch = mask_batch.squeeze(1).to(device)
    
        optimizer.zero_grad()
        output_batch = model(img_batch)
        loss_value = loss(output_batch, mask_batch)
        loss_value.backward()
        optimizer.step()           
     
        # TODO: Добавить вычисление метрики, исходя из значений output_batch и mask_batch
        # Варианта метрик: IoU, попиксельная Accuracy
    print(f"Epoch {epoch_num} / {epoch_count} | train loss = {loss_value}")    
        
    #TODO: Добавить расчет метрик и лосса на валидации и их логирование
    val_losses = []
    val_accs = []
    for batch_num, (img_batch, mask_batch) in enumerate(tqdm(val_loader)):

        img_batch = img_batch.to(device) 
        mask_batch = mask_batch.squeeze(1).to(device)
    
        output_batch = model(img_batch)
        val_losses.append(loss(output_batch, mask_batch).item())
        val_accs.append(accuracy_score(np.argmax(output_batch.cpu().detach().numpy(), axis=1).reshape(-1),
                                       mask_batch.cpu().detach().numpy().reshape(-1)))
        
    val_loss = np.mean(val_losses)
    val_accuracy = np.mean(val_accs)
    
    print(f"Epoch {epoch_num} / {epoch_count} | val Loss = {val_loss} | val accuracy = {val_accuracy}")

    
    # write losses and metrics to history lists
    train_loss_history.append(loss_value.item())
    val_loss_history.append(val_loss)
    val_acc_history.append(val_accuracy)
    
    
    # TODO: Сохранять лучшую модель по метрикам на валидации
    if val_loss < min_val_loss:
        min_val_loss = val_loss
        best_model = model
    # TODO: Добавить early stopping: если модель на валидации не улучшается некоторое 
    # количество эпох, то прекратить обучение

    if val_loss > last_loss:
        trigger_times += 1
        print('Trigger Times:', trigger_times)

        if trigger_times >= patience:
            print('Early stopping!')
            break

    else:
        print('trigger times: 0')
        trigger_times = 0
    last_loss = val_loss


    

In [None]:
# TODO: Нарисовать графики изменения лосса на трейне/валидации в зависимсоти от эпох и аналогичный график по метрикам

In [None]:
import matplotlib.pyplot as plt
plt.plot(train_loss_history)
plt.plot(val_loss_history)
plt.plot(val_acc_history)
plt.show()