**Imports & Setup:**
---------------------

In [2]:
import sys
import cv2
import pandas as pd
import gc
import math
import random
import os
from PIL import Image
import time
from timeit import default_timer as timer
from tqdm import tqdm_notebook as tqdm
from iterstrat.ml_stratifiers import MultilabelStratifiedShuffleSplit
import random
import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split

In [3]:
import torch
import torchvision
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
from torch.utils.data import *
from torchvision import models
from torch import nn
from torch.optim.lr_scheduler import MultiStepLR
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.autograd import Variable
from torch.nn.modules.distance import PairwiseDistance
import torch.utils.model_zoo as model_zoo
import torch.backends.cudnn as cudnn
from torch.utils.tensorboard import SummaryWriter
from torch.nn.parameter import Parameter
from efficientnet_pytorch import EfficientNet

In [4]:
torch.cuda.get_device_name(0)

'GeForce RTX 2080 Ti'

In [5]:
# Set train path
train_path = '../input/severstal-steel-defect-detection/train_images'
test_path = '../input/severstal-steel-defect-detection/test_images'
#train_path_crops = '../output/class_train'
#test_path_crops = '../output/class_test'

# Set device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

In [5]:
seed_everything(42)

------------------

**Methods:**
-------------

In [3]:
def mask2rle(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels= img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def rle2mask(rle, input_shape, label):
    width, height = input_shape[:2]
    
    mask= np.zeros( width*height ).astype(np.uint8)
    
    array = np.asarray([int(x) for x in rle.split()])
    starts = array[0::2]
    lengths = array[1::2]

    current_position = 0
    for index, start in enumerate(starts):
        mask[int(start):int(start+lengths[index])] = label
        current_position += lengths[index]
        
    return mask.reshape(height, width).T

def build_masks(rles, input_shape):
    depth = len(rles)
    masks = []
    
    for i, rle in enumerate(rles):
        if type(rle) is str:
            masks.append(rle2mask(rle, input_shape, 1).astype(np.uint8))
        else:
            masks.append(np.zeros((256,1600)).astype(np.uint8))
    
    return masks

def build_rles(masks):
    width, height, depth = masks.shape
    
    rles = [mask2rle(masks[:, :, i+1])
            for i in range(depth-1)]
    
    return rles

In [None]:
# https://github.com/ufoym/imbalanced-dataset-sampler/blob/master/sampler.py

# train_loader = torch.utils.data.DataLoader(
#     train_dataset, 
#     sampler=ImbalancedDatasetSampler(train_dataset),
#     batch_size=args.batch_size, 
#     **kwargs
# )


class ImbalancedDatasetSampler(torch.utils.data.sampler.Sampler):
    """Samples elements randomly from a given list of indices for imbalanced dataset
    Arguments:
        indices (list, optional): a list of indices
        num_samples (int, optional): number of samples to draw
    """

    def __init__(self, dataset, indices=None, num_samples=None):
                
        # if indices is not provided, 
        # all elements in the dataset will be considered
        self.indices = list(range(len(dataset))) \
            if indices is None else indices
            
        # if num_samples is not provided, 
        # draw `len(indices)` samples in each iteration
        self.num_samples = len(self.indices) \
            if num_samples is None else num_samples
            
        # distribution of classes in the dataset 
        label_to_count = {}
        for idx in self.indices:
            label = self._get_label(dataset, idx)
            if label in label_to_count:
                label_to_count[label] += 1
            else:
                label_to_count[label] = 1
                
        # weight for each sample
        weights = [1.0 / label_to_count[self._get_label(dataset, idx)]
                   for idx in self.indices]
        self.weights = torch.DoubleTensor(weights)

    def _get_label(self, dataset, idx):
        return dataset.df['class_single'][idx]

                
    def __iter__(self):
        return (self.indices[i] for i in torch.multinomial(
            self.weights, self.num_samples, replacement=True))

    def __len__(self):
        return self.num_samples
        