# Package and Test Data

### Because late submitting in this kaggle competition also required close internet. I have to download and upload the required file to kaggle when I submit and see the result. Hence, we can just run the cell below with internet instead of the second cell.

In [None]:
!pip install segmentation_models_pytorch -q
import segmentation_models_pytorch as smp

In [1]:
!mkdir -p /tmp/pip/cache/
!cp ../input/segmentationmodels/segmentation_models_pytorch-0.3.0-py3-none-any.whl /tmp/pip/cache/
!cp ../input/segmentationmodels/timm-0.4.12-py3-none-any.whl /tmp/pip/cache/
!cp ../input/segmentationmodels/efficientnet_pytorch-0.7.1.xyz /tmp/pip/cache/efficientnet_pytorch-0.7.1.tar.gz
!cp ../input/segmentationmodels/pretrainedmodels-0.7.4.xyz /tmp/pip/cache/pretrainedmodels-0.7.4.tar.gz
!pip install --no-index --find-links /tmp/pip/cache/ segmentation-models-pytorch
import segmentation_models_pytorch as smp

Looking in links: /tmp/pip/cache/
Processing /tmp/pip/cache/segmentation_models_pytorch-0.3.0-py3-none-any.whl
Processing /tmp/pip/cache/timm-0.4.12-py3-none-any.whl
Processing /tmp/pip/cache/pretrainedmodels-0.7.4.tar.gz
  Preparing metadata (setup.py) ... [?25l- done
[?25hProcessing /tmp/pip/cache/efficientnet_pytorch-0.7.1.tar.gz
  Preparing metadata (setup.py) ... [?25l- done
Building wheels for collected packages: efficientnet-pytorch, pretrainedmodels
  Building wheel for efficientnet-pytorch (setup.py) ... [?25l- \ done
[?25h  Created wheel for efficientnet-pytorch: filename=efficientnet_pytorch-0.7.1-py3-none-any.whl size=16446 sha256=69f30f965ac0c8460e63733507d61546c9996d0352cfa9b5c84df0af05d15814
  Stored in directory: /root/.cache/pip/wheels/02/50/84/82c97bfff69785108e55a6236d7ba11c83990eed25d4eab240
  Building wheel for pretrainedmodels (setup.py) ... [?25l- \ done
[?25h  Created wheel for pretrainedmodels: filename=pretrainedmodels-0.7.4-py3-

In [2]:
import os
import matplotlib.pyplot as plt
import torch
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
import numpy as np
import pandas as pd
from torch import nn, optim
from PIL import Image
import math
import torch.nn.functional as F
from torch.nn import Parameter
import cv2
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm
file_dir  = "../input/severstal-steel-defect-detection/test_images"

In [3]:
test = [name for name in (os.listdir(file_dir))]

# Data Channels

In [4]:
class TestDataset(Dataset):
    def __init__(self, file_names, file_dir, transform, label_flip = None):
        self.file_names = file_names
        self.file_dir = file_dir
        self.transform = transform
    def __getitem__(self, index):
      
        image = cv2.imread(os.path.join(self.file_dir,self.file_names[index]))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        image = self.transform(image=image)["image"]
        

        return image,self.file_names[index]

    
    def __len__(self):
        return len(self.file_names)

In [5]:
classification_transform = A.Compose([A.Normalize(mean=[0.485, 0.456, 0.406],
                                        std=[0.229, 0.224, 0.225]),
                           ToTensorV2()])
detection_transform = A.Compose([A.Normalize(mean=[0.485, 0.456, 0.406],
                                        std=[0.229, 0.224, 0.225]),
                           ToTensorV2()])

# Load Models

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


### I use three different models to classify the defect, get mean of the probablities of these models for each pixel instead of getting the probalities of one model. It is called ensemble.

In [7]:
#classifier_model1 = torch.load("../input/steel-classifier-model-fold2/classifier_fold2.h5").to(device)
#classifier_model1 = torch.load("../input/steel-classifier-model-fold4/classifier_fold4.h5").to(device)
classifier_model1 = torch.load("../input/steel-classifier-model-fold7/classifier_fold7.h5").to(device)
classifier_model2 = torch.load("../input/steel-classifier-model-fold1-efficientnet/classifier_fold1_efficientnet_b3.h5").to(device)
#classifier_model2 = torch.load("../input/steel-classifier-model-fold4-resnext50/classifier_fold4_resnext50.h5").to(device)
classifier_model3 = torch.load("../input/steel-classifier-model-fold2-efficientnet/classifier_fold2_efficientnet_b3.h5").to(device)
#classifier_model4 = torch.load("../input/steel-classifier-model-fold3-efficientnet/classifier_fold3_efficientnet_b3.h5").to(device)
classifier_models = [classifier_model1,classifier_model2,classifier_model3]

### Function "tensor_to_str" get the result from models, determine whether this kind of defect exists by threshold 0.55 for each class of defect, and create four tuples for storing information with form (filename, defect class, whether this kind of defect exists in this image) for each images.

In [8]:
def tensor_to_str(inputs, filename):
    inputs = inputs.view(-1)
    result = []
    for i in range(1,5):
        if inputs[i] > 0.55:
            result.append((filename, i, True))
        else:
            result.append((filename, i, False))
    return result

# Classification

### Function "classified_df" uses given models and data for classification. It firstly ensemble three models get the result with test time augmentation (TTA) which is horizontal flip to get the result. And then, it uses function "tensor_to_str" to change results to tuples, and store them in an array. Finally, change the array of tuples to a Pandas Dataframe.

In [9]:
def classified_df(models, data_loader):
    result = []
    with torch.no_grad():  
          for image,file_name in tqdm(data_loader):
                inputs = torch.zeros(1, 5).to(device)
                h_image = torch.flip(image, dims=[3]).to(device)
                for model in models:
                    model.eval()
                    image = image.to(device)
                    out = torch.sigmoid(model(image))
                    inputs = inputs + out
                    out = torch.sigmoid(model(h_image))
                    inputs = inputs + out
                        
                #inputs =  inputs/len(models)
                inputs =  inputs/(2*len(models))
                pred = tensor_to_str(inputs, file_name[0])
                result += pred
                
    return pd.DataFrame(result, columns = ["ImageId", "ClassId", "ClassExist"])

In [10]:
classification_data = TestDataset(test, file_dir, transform =classification_transform)

In [11]:
classification_data_loader = DataLoader(classification_data, batch_size = 1, shuffle=False)

In [12]:
df = classified_df(classifier_models, classification_data_loader)

100%|██████████| 5506/5506 [10:50<00:00,  8.46it/s]


In [13]:
df

Unnamed: 0,ImageId,ClassId,ClassExist
0,6d2a4005f.jpg,1,False
1,6d2a4005f.jpg,2,False
2,6d2a4005f.jpg,3,False
3,6d2a4005f.jpg,4,True
4,a2491cf5f.jpg,1,False
...,...,...,...
22019,b3912fc50.jpg,4,False
22020,e4612d7c4.jpg,1,False
22021,e4612d7c4.jpg,2,False
22022,e4612d7c4.jpg,3,True


# Detection

### Function "drop_small_area" determines whether defect exists for each pixel by threshold. Then, it uses algorithm for cv2 to find each components in the mask numpy array, and change the defect with area smaller than parameter "min_size" to no defect. This operation can reduce the effect of noise in prediction. Notice I check if sum of pixels of defect less than "min_size" because I want to skip the algorithm when it is uncessary to do.

In [16]:
def drop_small_area(inputs, threshold, min_size):
    predictions = np.zeros((256, 1600), np.float32)
    inputs = inputs.cpu().detach().numpy()
    if inputs.sum() < min_size:
        return predictions 
    mask = cv2.threshold(inputs, threshold, 1, cv2.THRESH_BINARY)[1]
    num_component, component = cv2.connectedComponents(mask.astype(np.uint8))
    for c in range(1, num_component):
        p = (component == c)
        if p.sum() > min_size:
            predictions[p] = 1
    return predictions

### Function "mask_to_pred" is inverse of function "rle2mask" which declared in notebook "class3-detection-fold1.ipynb". Notice I check if sum of pixels which is defect equal to zero because I want to skip searching 1s in numpy array, which is a O(n) algorithm costs much time, where n is the total pixels 256*1600 in image.

In [17]:
def mask_to_pred(inputs):
    inputs = np.rot90(np.flipud(inputs), k=3)
    inputs = inputs.reshape(-1,1)
    start_count = False
    result = []
    count = 0
    if inputs.sum() == 0:
        return ""
    
    for i in range(len(inputs)):
        if inputs[i] == 1:
            if not start_count:
                start_count = True
                result.append(str(i))
            count += 1
        elif inputs[i] == 0 and start_count:
            result.append(str(count))
            count = 0
            start_count = False
    
    return " ".join(result)            

### Function "detection_df" uses given models and data for classification. It firstly ensemble three models get the result with test time augmentation (TTA) which is horizontal flip to get the result. And then, it uses function "drop_small_area" to reduce the effect of noise. Finally, use function "mask_to_pred" to change result to an appropriate form for competition, and store them in an array.

In [18]:
def detection_df(models, data_loader, threshold, min_size):
    encoded_pixels = []
    with torch.no_grad():  
          for image,file_name in tqdm(data_loader):
                inputs = torch.zeros(256, 1600).to(device)
                h_image = torch.flip(image, dims=[3]).to(device)
                for model in models:
                    model.eval()
                    image = image.to(device)
                    pred = torch.sigmoid(model(image))
                    inputs = inputs + pred
                    pred = torch.flip(torch.sigmoid(model(h_image)), dims=[3])
                    inputs = inputs + pred
    
                inputs =  inputs/(2*len(models))
                inputs = inputs.permute(2,3,0,1)
                inputs=drop_small_area(inputs, threshold, min_size)              
                encoded_pixels.append(mask_to_pred(inputs))
                
    return encoded_pixels

In [19]:
detection_class4_model1 = torch.load("../input/steel-detection-model-fold5-class4/class4_detector_fold5.h5").to(device)
detection_class4_model2 = torch.load("../input/steel-detection-model-fold1-class4-fpn/class4_detector_fold1_FPN.h5").to(device)
detection_class4_model3 = torch.load("../input/steel-detection-model-fold2-class4/class4_detector_fold2.h5").to(device)
detection_class4_models = [detection_class4_model1,detection_class4_model2,detection_class4_model3]
#detection_class4_models = [detection_class4_model1,detection_class4_model2]

In [20]:
#detection_class3_model1 = torch.load("../input/steel-detection-model-fold1-class3/class3_detector_fold1.h5").to(device)
detection_class3_model1 = torch.load("../input/steel-detection-model-fold1-class3-fpn/class3_detector_fold1_FPN.h5").to(device)
detection_class3_model2 = torch.load("../input/steel-detection-model-fold2-class3/class3_detector_fold2.h5").to(device)
detection_class3_model3 = torch.load("../input/steel-detection-model-fold3-class3/class3_detector_fold3.h5").to(device)
detection_class3_models = [detection_class3_model1,detection_class3_model2,detection_class3_model3]
#detection_class3_models = [detection_class3_model1,detection_class3_model2]

In [21]:
df_class4 = df[(df["ClassId"]==4) & df["ClassExist"]]
df_class3 = df[(df["ClassId"]==3) & df["ClassExist"]]

In [22]:
df_class4

Unnamed: 0,ImageId,ClassId,ClassExist
3,6d2a4005f.jpg,4,True
59,b1e55236c.jpg,4,True
83,0dca5a37d.jpg,4,True
167,c530161a6.jpg,4,True
183,1e446e941.jpg,4,True
...,...,...,...
21399,3cfda2739.jpg,4,True
21427,75779fab1.jpg,4,True
21671,be16b3bd7.jpg,4,True
21739,f39068d43.jpg,4,True


In [23]:
df_class3

Unnamed: 0,ImageId,ClassId,ClassExist
10,0854a9de6.jpg,3,True
22,f4d56263d.jpg,3,True
26,7b75c6da8.jpg,3,True
30,9f51a0553.jpg,3,True
34,732f9ff8b.jpg,3,True
...,...,...,...
21994,6ec0efcf3.jpg,3,True
21998,af71eb041.jpg,3,True
22006,c7f574347.jpg,3,True
22018,b3912fc50.jpg,3,True


In [24]:
test_class4 = df_class4["ImageId"].tolist()
test_class3 = df_class3["ImageId"].tolist()

In [25]:
detection_class4_data = TestDataset(test_class4, file_dir, transform = detection_transform)
detection_class3_data = TestDataset(test_class3, file_dir, transform = detection_transform)

In [26]:
detection_class4_data_loader = DataLoader(detection_class4_data, batch_size = 1, shuffle=False)
detection_class3_data_loader = DataLoader(detection_class3_data, batch_size = 1, shuffle=False)

In [27]:
df["EncodedPixels"] = ""

In [28]:
df

Unnamed: 0,ImageId,ClassId,ClassExist,EncodedPixels
0,6d2a4005f.jpg,1,False,
1,6d2a4005f.jpg,2,False,
2,6d2a4005f.jpg,3,False,
3,6d2a4005f.jpg,4,True,
4,a2491cf5f.jpg,1,False,
...,...,...,...,...
22019,b3912fc50.jpg,4,False,
22020,e4612d7c4.jpg,1,False,
22021,e4612d7c4.jpg,2,False,
22022,e4612d7c4.jpg,3,True,


In [29]:
encoded_class4_pixels = detection_df(detection_class4_models, detection_class4_data_loader,0.6,2000)

100%|██████████| 500/500 [07:46<00:00,  1.07it/s]


In [30]:
df.loc[(df["ClassId"]==4) & (df["ClassExist"]),"EncodedPixels"] = encoded_class4_pixels

In [31]:
df

Unnamed: 0,ImageId,ClassId,ClassExist,EncodedPixels
0,6d2a4005f.jpg,1,False,
1,6d2a4005f.jpg,2,False,
2,6d2a4005f.jpg,3,False,
3,6d2a4005f.jpg,4,True,122633 9 122887 15 123142 18 123397 20 123653 ...
4,a2491cf5f.jpg,1,False,
...,...,...,...,...
22019,b3912fc50.jpg,4,False,
22020,e4612d7c4.jpg,1,False,
22021,e4612d7c4.jpg,2,False,
22022,e4612d7c4.jpg,3,True,


In [32]:
encoded_class3_pixels = detection_df(detection_class3_models, detection_class3_data_loader,0.6,1500)

100%|██████████| 3046/3046 [33:29<00:00,  1.52it/s]


In [33]:
df.loc[(df["ClassId"]==3) & (df["ClassExist"]),"EncodedPixels"] = encoded_class3_pixels

In [34]:
df

Unnamed: 0,ImageId,ClassId,ClassExist,EncodedPixels
0,6d2a4005f.jpg,1,False,
1,6d2a4005f.jpg,2,False,
2,6d2a4005f.jpg,3,False,
3,6d2a4005f.jpg,4,True,122633 9 122887 15 123142 18 123397 20 123653 ...
4,a2491cf5f.jpg,1,False,
...,...,...,...,...
22019,b3912fc50.jpg,4,False,
22020,e4612d7c4.jpg,1,False,
22021,e4612d7c4.jpg,2,False,
22022,e4612d7c4.jpg,3,True,291588 7 291735 87 291824 2 291831 5 291843 15...


In [35]:
df["ImageId"] = df["ImageId"] + "_" + df["ClassId"].astype('str')
df.loc[df["EncodedPixels"]=="","EncodedPixels"] = ""
df = df.rename({'ImageId': 'ImageId_ClassId'}, axis=1)
df = df.drop(["ClassId", "ClassExist"], axis=1)
df

Unnamed: 0,ImageId_ClassId,EncodedPixels
0,6d2a4005f.jpg_1,
1,6d2a4005f.jpg_2,
2,6d2a4005f.jpg_3,
3,6d2a4005f.jpg_4,122633 9 122887 15 123142 18 123397 20 123653 ...
4,a2491cf5f.jpg_1,
...,...,...
22019,b3912fc50.jpg_4,
22020,e4612d7c4.jpg_1,
22021,e4612d7c4.jpg_2,
22022,e4612d7c4.jpg_3,291588 7 291735 87 291824 2 291831 5 291843 15...


In [36]:
df.to_csv("submission.csv", index = False)