#HW 5

## Домашнее задание

1. Необходимо подготовить датасет https://www.kaggle.com/olekslu/makeup-lips-segmentation-28k-samples для обучения модели на сегментацию губ
2. Обучить модель на выбор из [segmentation_models_pytorch](https://segmentation-modelspytorch.readthedocs.io/en/latest/index.html)


---

In [None]:
# Загрузка библиотек

import numpy as np
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

import torchvision.transforms as transforms

from PIL import Image
from torch.nn import functional as F

from sklearn.model_selection import train_test_split

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Загрузка данных

csv_path = '/content/drive/My\ Drive/set-lipstick-original/list.csv'
img_path = '/content/drive/My\ Drive/set-lipstick-original/720p/'
mask_path = '/content/drive/My\ Drive/set-lipstick-original/mask/'

In [None]:
df = pd.read_csv(csv_path)

imgs_set = set(os.listdir(img_path))
masks_set = set(os.listdir(mask_path))

imgs_set = set(''.join(filter(lambda x: x.isdigit(), i)) for i in imgs_set)
masks_set = set(''.join(filter(lambda x: x.isdigit(), i)) for i in masks_set)

not_mask = imgs_set.difference(masks_set)

not_mask = [f'image{i}.jpg' for i in not_mask]

df = df.loc[~df['filename'].isin(not_mask)]
df.reset_index(drop=True, inplace=True)

df.head()

---

In [None]:
# class CustomDatasetFromImages

class CustomDatasetFromImages(Dataset):
    def __init__(self, data_info: pd.DataFrame, img_path, mask_path, transform=None):
        self.data_info = data_info
        self.data_info['filename'] = self.data_info['filename'].apply(lambda f: os.path.join(img_path, f) if os.path.exists(os.path.join(img_path, f)) else np.NaN)
        self.data_info['mask'] = self.data_info['mask'].apply(lambda f: os.path.join(mask_path, f) if os.path.join(mask_path, f) else np.NaN)
        self.data_info.dropna(axis=0, inplace=True)

        self.img_path = [os.path.join(img_path, image_id) for image_id in sorted(os.listdir(img_path))]
        self.mask_path = [os.path.join(mask_path, image_id) for image_id in sorted(os.listdir(mask_path))]
        self.data_len = len(self.data_info)
        self.transform = transform

    def __getitem__(self, index):

        # read images and masks
        image = cv2.cvtColor(cv2.imread(str(self.data_info.iloc[index]['filename'])), cv2.COLOR_BGR2RGB)
        mask = cv2.cvtColor(cv2.imread(str(self.data_info.iloc[index]['mask'])), cv2.COLOR_BGR2RGB)

        image = cv2.resize(image, (256, 256))
        mask = cv2.resize(mask, (256, 256))
        mask = mask / 255

        image = image.astype('float32')
        mask = mask.astype('float32')

        if self.transform:
            image, mask = self.transform(image, mask)
        
        return transforms.ToTensor()(image.astype('float32')), transforms.ToTensor()(mask.astype('float32'))

    def __len__(self):
        return self.data_len

In [None]:
# Train test split

X_train, X_test = train_test_split(df,test_size=0.3)

X_train.reset_index(drop=True,inplace=True)
X_test.reset_index(drop=True,inplace=True)

train_data = CustomDatasetFromImages(X_train,img_path,mask_path)
test_data = CustomDatasetFromImages(X_test,img_path,mask_path)

In [None]:
# Data loader

train_data_loader = DataLoader(train_data,batch_size=1,shuffle = True)
test_data_loader = DataLoader(test_data,batch_size=1,shuffle = False)

for img, mask in train_data_loader:
    print(img.shape)
    print(mask.shape)
    plt.imshow(np.array(img[0].permute(1,2,0),np.int32))
    plt.show()
    plt.imshow(mask[0].permute(1,2,0))
    plt.show()

    break

In [None]:
data = next(iter(train_data_loader))
print(data[0].mean(), data[0].std(), data[0].shape)

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

In [None]:
!pip install segmentation_models_pytorch > /dev/null

In [None]:
import segmentation_models_pytorch as smp
from segmentation_models_pytorch import Linknet

model = Linknet(encoder_name='resnet50', 
               encoder_weights='imagenet', 
               classes=1,
               activation='sigmoid').to(device)

In [None]:
# Optimizer, criterion, metric

optimizer = torch.optim.Adam(params=model.parameters(), lr=0.005)

criterion = smp.utils.losses.DiceLoss()
metric = [smp.utils.metrics.IoU(),]

In [None]:
import segmentation_models_pytorch.utils as smp_utils

train_epoch = smp_utils.train.TrainEpoch(model=model, 
                                         loss=criterion, 
                                         metrics=metric, 
                                         optimizer=optimizer, 
                                         device=device, 
                                         verbose=True)

valid_epoch = smp_utils.train.ValidEpoch(model=model, 
                                         loss=criterion, 
                                         metrics=metric, 
                                         device=device)

In [None]:
# Model

epochs = 3
max_score = 0
PATH_MODEL = './best_model.pth'
PATH_WEIGHTS = './best_weights.pth'
score_break_value = 0.9

for epoch in range(1,epochs+1):
    print(f'Current epoch - {epoch} from {epochs}')
    train_score = train_epoch.run(train_data_loader)
    valid_score = valid_epoch.run(test_data_loader)
    print(f'Next step.\n\n')
    
    if max_score < valid_score['iou_score']:
        max_score = valid_score['iou_score']
        torch.save(model.state_dict(), PATH_WEIGHTS)
        torch.save(model, PATH_MODEL)
        print('Модель сохранена!')
        
    if max_score > score_break_value:
        break