# Production ready for Carvana_learning_local_research 

## Подключение библиотек и загрузка датасета

In [1]:
!pip install segmentation_models_pytorch
import torch.nn as nn
import pandas as pd
import matplotlib.pyplot as plt
import os
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torch
from torch.nn import functional as F
import numpy as np
import glob
import segmentation_models_pytorch as smp
from sklearn.model_selection import train_test_split



In [None]:
# Выполнять, если датасет не загружен
!pip install -q kaggle
!mkdir ~/.kaggle
!cp ~/kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle competitions download -c carvana-image-masking-challenge
!unzip ~/carvana-image-masking-challenge.zip ~/carvana_dataset/

!unzip ~/carvana_dataset/train.zip -d ~/carvana_dataset/train
!unzip ~/carvana_dataset/test.zip -d ~/carvana_dataset/test
!unzip ~/carvana_dataset/train_masks.zip -d ~/carvana_dataset/train_masks

!unzip ~/carvana_dataset/train_hq.zip -d ~/carvana_dataset/train_hq
!unzip ~/carvana_dataset/test_hq.zip -d ~/carvana_dataset/test_hq

!unzip ~/carvana_dataset/train_masks.csv.zip  ~/carvana_dataset/
!unzip ~/carvana_dataset/sample_submission.csv.zip  ~/carvana_dataset/
!unzip ~/carvana_dataset/metadata.csv.zip  ~/carvana_dataset/

!rm ~/carvana-image-masking-challenge.zip
!rm ~/carvana_dataset/test.zip
!rm ~/carvana_dataset/train_masks.zip
!rm ~/carvana_dataset/train.zip
!rm ~/carvana_dataset/test_hq.zip
!rm ~/carvana_dataset/train_hq.zip
!rm ~/carvana_dataset/train_masks.csv.zip
!rm ~/carvana_dataset/sample_submission.csv.zip
!rm ~/carvana_dataset/metadata.csv.zip

## Используемые функции

In [2]:
def get_data_csv(imgs_path: str = None, masks_path: str = None) -> pd.DataFrame:
    '''Funtion gets images from imgs_path and masks from masks_path
    and generates pd.DataFrame, contains links to images and masks related 
    to a certain photo of car
  
    input parameters:
    string: imgs_path - path to folder with images,
    masks_path - path to folder with masks
  
    output parameters:
    pd.DataFrame: data - dataframe, contains links to images and masks'''

    assert (imgs_path != None) & (masks_path != None)
    # imgs_path or masks_path is equal None

    data_img = {}
    data_mask = {}
    data_img['imgs_path'] = []
    data_mask['masks_path'] = []
    data_img['imgs_path'] = list(glob.glob(imgs_path + "/*"))
    data_mask['masks_path'] = list(glob.glob(masks_path + "/*"))

    data_img = pd.DataFrame(data_img)
    data_mask = pd.DataFrame(data_mask)

    def file_name(x):
        return x.split("/")[-1].split(".")[0]

    data_img["file_name"] = data_img["imgs_path"].apply(lambda x: file_name(x))
    data_mask["file_name"] = data_mask["masks_path"].apply(lambda x: file_name(x)[:-5])

    data = pd.merge(data_img, data_mask, on = "file_name", how = "inner")

    return data

In [3]:
def get_train_test(source_df: pd.DataFrame, separate_feature: str = None, test_size: int = 0.25) -> pd.DataFrame:
    '''Function get source_df and split it on train and valid pd.DataFrame 
    with test_size coefficient. If separate_feature not None, splitting will 
    be on unique values of that feature

    input parameters:
    source_df: pd.DataFrame - datafraim that will be splitted

    separate_feature: str - datafraim will 
    be splitted on unique values of that feature

    test_size: int - splitting coefficient
  
    output parameters:
    pd.DataFrame: data - dataframe, contains links to images and masks'''
  
    if (separate_feature != None) & (separate_feature in source_df.columns):
        train_cars, valid_cars = train_test_split(data[separate_feature].unique(), test_size=test_size, random_state=42)
        data_valid = data[np.isin(data[separate_feature].values, valid_cars)]
        data_train = data[np.isin(data[separate_feature].values, train_cars)]
        assert data.shape[0] == (data_valid.shape[0] + data_train.shape[0])
        assert np.isin(data_train[separate_feature].values, data_valid[separate_feature].values).sum() == 0
    else:
        data_train, data_valid = train_test_split(data, test_size=test_size)

    return data_train, data_valid


In [4]:
def DICE(logits, targets):
        smooth = 1
        num = targets.size(0)
        probs = torch.sigmoid(logits)
        outputs = torch.where(probs > 0.5, 1, 0)
        m1 = outputs.view(num, -1)
        m2 = targets.view(num, -1)
        intersection = (m1 * m2)

        score = 2. * (intersection.sum(1) + smooth) / (m1.sum(1) + m2.sum(1) + smooth)
        score = score.sum() / num
        return score

In [5]:
# Заменить np на torch
def tensor_to_rle(tensor):
    # We avoid issues with '1' at the start or end (at the corners of 
    # the original image) by setting those pixels to '0' explicitly.
    # We do not expect these to be non-zero for an accurate mask, 
    # so this should not harm the score.
    tensor = tensor.view(1, -1)
    tensor = tensor.squeeze(0)
    tensor[0] = 0
    tensor[-1] = 0
    rle = torch.where(tensor[1:] != tensor[:-1])[0] + 2
    rle[1::2] = rle[1::2] - rle[:-1:2]
    rle = rle.cpu().detach().numpy()
    rle_str = rle_to_string(rle)
    #rle_str = np.array_str(rle)
    return rle_str

In [6]:
def numpy_to_rle(mask_image):
    pixels = mask_image.flatten()
    # We avoid issues with '1' at the start or end (at the corners of 
    # the original image) by setting those pixels to '0' explicitly.
    # We do not expect these to be non-zero for an accurate mask, 
    # so this should not harm the score.
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] = runs[1::2] - runs[:-1:2]
    rle_str = rle_to_string(runs)
    return rle_str

In [7]:
def rle_to_string(runs):
    return ' '.join(str(x) for x in runs)

In [8]:
def mask_to_rle(mask_addr):
    mask = Image.open(mask_addr).convert('LA') # преобразование в серый
    mask = np.asarray(mask).astype('float')[:,:,0]
    mask = mask/255.0
    mask_rle = numpy_to_rle(mask)
    return mask_rle

## Используемые классы

In [9]:
class DiceMetric(nn.Module):
    def __init__(self, treashold=0.5):
        super(DiceMetric, self).__init__()
        self.treashold = treashold

    def forward(self, logits, targets):
        with torch.no_grad():
            smooth = 1
            num = targets.size(0)
            probs = torch.sigmoid(logits)
            outputs = torch.where(probs > self.treashold, 1, 0)
            m1 = outputs.view(num, -1)
            m2 = targets.view(num, -1)
            intersection = (m1 * m2)

            score = 2. * (intersection.sum(1) + smooth) / (m1.sum(1) + m2.sum(1) + smooth)
            score = score.sum() / num
            return score

In [10]:
# Попробовать softdice loss + bce (как в dlcource.ai)
class SoftDiceLoss(nn.Module):
    def __init__(self, weight=None, size_average=True):
        super(SoftDiceLoss, self).__init__()

    def forward(self, logits, targets):
        smooth = 1
        num = targets.size(0)
        probs = torch.sigmoid(logits)
        m1 = probs.view(num, -1)
        m2 = targets.view(num, -1)
        intersection = (m1 * m2)

        score = 2. * (intersection.sum(1) + smooth) / (m1.sum(1) + m2.sum(1) + smooth)
        score = 1 - score.sum() / num
        return score

In [11]:
class CustomDatasetForTrain(Dataset):
    def __init__(self, data_info):
        # Подаем наш подготовленный датафрейм
        self.data_info = data_info
        
        # Разделяем датафрейм на rgb картинки 
        self.image_arr = self.data_info.iloc[:,0]
        # и на сегментированные картинки
        self.label_arr = self.data_info.iloc[:,2]
        
        # Количество пар картинка-сегментация
        self.data_len = len(self.data_info.index)

    def __getitem__(self, index):
        # Читаем картинку и сразу же представляем ее в виде numpy-массива 
        img = np.asarray(Image.open(self.image_arr[index])).astype('float')
        # Нормализуем изображение в значениях [0,1]
        img = torch.as_tensor(img)/255    
        # 1) unsqueeze - меняет размерность img c (H, W, 3) -> (1, H, W, 3),
        # т.е. оборачивает картинку в батч размером в одну картинку
        # 2) permute - меняет местами измерения , т.е. (1, H, W, 3) -> (1, 3, H, W)
        img = img.unsqueeze(0).permute(0,3,1,2)
        
        # Мы используем функцию интерполяции для того,
        # чтобы поменять рамерность картинки с HхW на 256х256
        # clamp не позволяет выйти за границы
        img = F.interpolate(input=img, size=(512, 512), align_corners=False, mode='bicubic').clamp(min=0, max=1)
        img = img.squeeze(0)
        # Читаем сегментированную картинку и сразу же представляем ее в виде numpy-массива 
        mask = Image.open(self.label_arr[index]).convert('LA') # преобразование в серый
        mask = np.asarray(mask).astype('float')[:,:,0]
        mask = torch.as_tensor(np.where(mask > 100, 1.0, 0)).unsqueeze(0) # введение порога и нормализация
        mask = mask.unsqueeze(0)
        mask = mask.float()
        # делаем ресайз картинки на 256х256
        mask = F.interpolate(input=mask, size=512, mode='nearest')
        mask = mask.squeeze(0)
        
        
        return (img.float(), mask.float())

    def __len__(self):
        return self.data_len

In [12]:
class CustomDatasetForTest(Dataset):
    def __init__(self, data_info):
        # Подаем наш подготовленный датафрейм
        self.data_info = data_info
        
        # Получаем адреса RGB изображений 
        self.image_names = self.data_info.iloc[:,0]
        
        # Количество пар картинка-сегментация
        self.data_len = len(self.data_info.index)

    def __getitem__(self, index):
        # Читаем картинку и сразу же представляем ее в виде numpy-массива 
        img = np.asarray(Image.open(source + self.image_names[index])).astype('float')
        # Нормализуем изображение в значениях [0,1]
        img = torch.as_tensor(img)/255    
        # 1) unsqueeze - меняет размерность img c (H, W, 3) -> (1, H, W, 3),
        # т.е. оборачивает картинку в батч размером в одну картинку
        # 2) permute - меняет местами измерения , т.е. (1, H, W, 3) -> (1, 3, H, W)
        img = img.unsqueeze(0).permute(0,3,1,2)
        
        # Мы используем функцию интерполяции для того,
        # чтобы поменять рамерность картинки с HхW на 256х256
        # clamp не позволяет выйти за границы
        img = F.interpolate(input=img, size=(512, 512), align_corners=False, mode='bicubic').clamp(min=0, max=1)
        img = img.squeeze(0)
        
        image_name = self.image_names[index]
    
        return (index, img.float(), image_name)

    def __len__(self):
        return self.data_len

In [13]:
# Убрать .cuda() из класса добавить в даталоадеры и сравнить скорость обучения
# Попробовать разные типы интерполяции

class NeuralNetwork(nn.Module):
    def __init__(self, model):
        super(NeuralNetwork, self).__init__()
        self.model = model

    def forward(self, x):
        x = self.model(x)
        return x
    
    @staticmethod
    def tensor_to_rle(tensor):
        # We avoid issues with '1' at the start or end (at the corners of 
        # the original image) by setting those pixels to '0' explicitly.
        # We do not expect these to be non-zero for an accurate mask, 
        # so this should not harm the score.
        with torch.no_grad():
            tensor = tensor.view(1, -1)
            tensor = tensor.squeeze(0)
            tensor[0] = 0
            tensor[-1] = 0
            rle = torch.where(tensor[1:] != tensor[:-1])[0] + 2
            rle[1::2] = rle[1::2] - rle[:-1:2]
            rle = rle.cpu().detach().numpy()
            rle_str = rle_to_string(rle)
            #rle_str = np.array_str(rle)
            return rle_str
    
    @staticmethod
    def rle_to_string(runs):
        return ' '.join(str(x) for x in runs)
    
    
    
    def fit(self, criterion, metric, optimizer, train_data_loader, valid_data_loader=None, epochs=1):
        
        self.optimizer = optimizer
        # запускаем главный тренировочный цикл
        epoch_train_losses = []
        epoch_valid_losses = []
        epoch_valid_metrics = []
        for epoch in range(epochs):
            self.model.train()
            time1 = time.time()
            running_loss =0.0
            train_losses = []
            for batch_idx, (data, labels) in enumerate(train_data_loader):
                data, labels = Variable(data), Variable(labels)        
                data = data.cuda()
                labels = labels.cuda()

                optimizer.zero_grad()
                outputs = self.model(data)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                train_losses.append(loss.item())
                if (batch_idx+1) % 300 == 299:
                    print(f'Train Epoch: {epoch+1}, Loss: {running_loss/300}')
                    time2 = time.time()
                    print(f'Spend time for 300 images: {time2-time1} sec')
                    time1 = time.time()
                    running_loss = 0.0

            train_loss = np.mean(train_losses)        
            
            
            if valid_data_loader != None:
                self.model.eval()
                valid_metrics = []
                valid_losses = []
                for batch_idx, (data, labels) in enumerate(valid_data_loader):
                    data, labels = Variable(data), Variable(labels)        
                    data = data.cuda()
                    labels = labels.cuda()
                    outputs = self.model(data)
                    loss = criterion(outputs, labels)
                    valid_losses.append(loss.item())
                    outputs = F.interpolate(input=outputs, size=(1280, 1918), mode='nearest')
                    # Нужно проверить не изменилась ли маска после интерполяции
                    # Лучше избавиться от лишней интерполяции
                    labels = F.interpolate(input=labels, size=(1280, 1918), mode='nearest')

                    metric_value = metric(outputs, labels).item()
                    #metric_value = metric_value.cpu()
                    valid_metrics.append(metric_value)
                    
                valid_loss    = np.mean(valid_losses)
                valid_metric  = np.mean(valid_metrics)
            
            print(f'Epoch {epoch+1}, train loss: {train_loss}, valid_loss: {valid_loss}, valid_metric: {valid_metric}')
            epoch_train_losses.append(train_loss)
            epoch_valid_losses.append(valid_loss)
            epoch_valid_metrics.append(valid_metric)
        
        return epoch_train_losses, epoch_valid_losses, epoch_valid_metrics
    
    
    
    def predict(self, test_data_loader, predict_directory, mask_treashold=0.5, generate_rle_dataframe=True):
        self.model.eval()
        img_names = []
        img_rles = []
        
        for batch_idx, (index, img, img_name)  in enumerate(test_data_loader):

            img = Variable(img)        
            img = img.cuda()
            pred_mask_logit = self.model(img)
            pred_mask_logit = F.interpolate(input=pred_mask_logit, size=(1280, 1918), mode='nearest')
            pred_mask_logit_prob = torch.sigmoid(pred_mask_logit)
            pred_mask = torch.where(pred_mask_logit_prob > mask_treashold, 1, 0)
            pred_mask = pred_mask.squeeze(0)
            pred_mask_cpu = pred_mask.cpu()
            pred_mask_cpu = pred_mask_cpu.numpy()
            pred_mask_cpu = pred_mask_cpu * 255.0
            PIL_image = Image.fromarray(pred_mask_cpu[0].astype('uint8'), 'L')
            PIL_image.save((predict_directory+img_name[0]).split('.')[0]+'.gif')
            if generate_rle_dataframe == True:
                img_names.append(img_name[0])
                img_rles.append(tensor_to_rle(pred_mask))
                
        if generate_rle_dataframe == True:
            rle_dataframe = pd.DataFrame(list(zip(img_names, img_rles)), columns =['img_name', 'img_rle'])
            return rle_dataframe

## Обучение модели

In [14]:
#Импортируем библиотеку time для расчета, сколько времени у нас уходит на одну эпоху
import time
from torch.autograd import Variable

In [15]:
dataset_path = '/home/dima/carvana_dataset'
imgs_path  = dataset_path + '/train/train'
masks_path = dataset_path + '/train_masks/train_masks'
    
data = get_data_csv(imgs_path=imgs_path, masks_path=masks_path)
    
# Добавляем признак, по которому будем разбивать датасет на train и test,
# чтобы не было разных фотографий одной и той же машины в двух датасетах
data["car"] = data["file_name"].apply(lambda x: x.split('_')[0])

train_df, valid_df = get_train_test(data, separate_feature='car', test_size=0.25)
train_df.reset_index(inplace=True, drop=True)
valid_df.reset_index(inplace=True, drop=True)

train_data = CustomDatasetForTrain(train_df)
valid_data = CustomDatasetForTrain(valid_df)

train_data_loader = DataLoader(train_data,batch_size=1,shuffle=True)
valid_data_loader = DataLoader(valid_data,batch_size=1,shuffle=False)

In [16]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = smp.Unet('mobilenet_v2', classes=1, encoder_weights='imagenet').to(device)
my_model = NeuralNetwork(model=model)

In [17]:
learning_rate = 0.001
num_epochs = 1
criterion = SoftDiceLoss()
optimizer = torch.optim.Adam(my_model.parameters(), lr=learning_rate)
metric = DiceMetric(treashold=0.5)

In [18]:
my_model.fit(criterion,
             metric, 
             optimizer,
             train_data_loader, 
             valid_data_loader, 
             epochs=num_epochs)

Train Epoch: 1, Loss: 0.12431617200374603
Spend time for 300 images: 36.063334465026855 sec
Train Epoch: 1, Loss: 0.018702537814776102
Spend time for 300 images: 34.58461332321167 sec
Train Epoch: 1, Loss: 0.023418235778808593
Spend time for 300 images: 34.36200189590454 sec
Train Epoch: 1, Loss: 0.014062984784444173
Spend time for 300 images: 34.34321665763855 sec
Train Epoch: 1, Loss: 0.010355209310849508
Spend time for 300 images: 34.243250131607056 sec
Train Epoch: 1, Loss: 0.00928508977095286
Spend time for 300 images: 35.17607402801514 sec
Train Epoch: 1, Loss: 0.013880410989125569
Spend time for 300 images: 34.73040556907654 sec
Train Epoch: 1, Loss: 0.008560932079950968
Spend time for 300 images: 33.805450201034546 sec
Train Epoch: 1, Loss: 0.007805132269859314
Spend time for 300 images: 34.26298260688782 sec
Train Epoch: 1, Loss: 0.00740453581015269
Spend time for 300 images: 34.14823007583618 sec
Train Epoch: 1, Loss: 0.007062990069389343
Spend time for 300 images: 33.9447875

([0.0201815306639471], [0.006782039115205407], [0.9932809326332063])

## Предсказание модели

In [19]:
predict_directory = '/home/dima/carvana_dataset/test/predict_small/'
test_dataset = '/home/dima/carvana_dataset/test/test/'

In [20]:
test_dataframe = {}
test_dataframe['img_addr'] = list(glob.glob(test_dataset + "/*"))
test_dataframe = pd.DataFrame(test_dataframe)

In [21]:
mask_treashold = 0.5

In [22]:
class CustomDatasetForTest(Dataset):
    def __init__(self, data_info):
        # Подаем наш подготовленный датафрейм
        self.data_info = data_info
        
        # Получаем адреса RGB изображений 
        self.image_addresses = self.data_info.iloc[:,0]
        
        # Количество пар картинка-сегментация
        self.data_len = len(self.data_info.index)

    def __getitem__(self, index):
        # Читаем картинку и сразу же представляем ее в виде numpy-массива 
        img = np.asarray(Image.open(self.image_addresses[index])).astype('float')
        # Нормализуем изображение в значениях [0,1]
        img = torch.as_tensor(img)/255    
        # 1) unsqueeze - меняет размерность img c (H, W, 3) -> (1, H, W, 3),
        # т.е. оборачивает картинку в батч размером в одну картинку
        # 2) permute - меняет местами измерения , т.е. (1, H, W, 3) -> (1, 3, H, W)
        img = img.unsqueeze(0).permute(0,3,1,2)
        
        # Мы используем функцию интерполяции для того,
        # чтобы поменять рамерность картинки с HхW на 256х256
        # clamp не позволяет выйти за границы
        img = F.interpolate(input=img, size=(512, 512), align_corners=False, mode='bicubic').clamp(min=0, max=1)
        img = img.squeeze(0)
        
        image_address = self.image_addresses[index]
        image_name = image_address.split('/')[-1]
    
        return (index, img.float(), image_name)

    def __len__(self):
        return self.data_len

In [23]:
test_data = CustomDatasetForTest(test_dataframe)
test_data_loader = DataLoader(test_data, batch_size=1, shuffle=False)
#loader = iter(test_data_loader)
#index, img, img_name = loader.next()

In [24]:
rle_dataframe = my_model.predict(test_data_loader, predict_directory, 
                                 mask_treashold=mask_treashold, generate_rle_dataframe=True)

In [25]:
rle_dataframe.to_csv('rle_dataframe.csv', index=True)

In [31]:
rle_dataframe.head()

Unnamed: 0,img_name,img_rle
0,13857e9947b2_11.jpg,988558 71 990476 71 992394 71 994297 139 99445...
1,3ffa310d71ce_12.jpg,863881 116 865799 116 867717 116 869593 214 87...
2,da5e65183070_02.jpg,729759 56 731677 56 733595 56 735468 191 73738...
3,bc202073bf8c_14.jpg,518450 179 520368 179 522286 179 524151 11 524...
4,10e03166b5dc_13.jpg,757960 22 759878 22 761796 22 763699 41 765617...


In [29]:
sample_submission = pd.read_csv('/home/dima/carvana_dataset/sample_submission.csv')

In [32]:
sample_submission.head()

Unnamed: 0,img,rle_mask
0,0004d4463b50_01.jpg,1 1
1,0004d4463b50_02.jpg,1 1
2,0004d4463b50_03.jpg,1 1
3,0004d4463b50_04.jpg,1 1
4,0004d4463b50_05.jpg,1 1


In [33]:
sample_submission = sample_submission.merge(rle_dataframe, how='left', left_on='img', right_on='img_name')

In [35]:
sample_submission.drop(columns=['rle_mask', 'img_name'], inplace=True)

In [37]:
sample_submission.rename(columns={'img_rle': 'rle_mask'}, inplace=True)

In [39]:
sample_submission.head()

Unnamed: 0,img,rle_mask
0,0004d4463b50_01.jpg,610847 11 612765 11 614601 164 616519 164 6184...
1,0004d4463b50_02.jpg,610851 7 612769 7 614597 172 616515 172 618433...
2,0004d4463b50_03.jpg,614657 116 614829 8 616575 116 616747 8 618493...
3,0004d4463b50_04.jpg,610847 15 612765 15 614668 101 614904 11 61658...
4,0004d4463b50_05.jpg,614679 94 614799 19 616597 94 616717 19 618515...


In [41]:
sample_submission.to_csv('submission_01_10.csv', index=False)