In [2]:
import os
import sys
import random

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('seaborn-white')
# import seaborn as sns
# sns.set_style("white")

%matplotlib inline

import cv2
from sklearn.model_selection import train_test_split
from tqdm import tqdm_notebook #, tnrange

#from itertools import chain
# from skimage.io import imread, imshow #, concatenate_images
# from skimage.transform import resize
# from skimage.morphology import label

import torch
import torchvision
from torch import nn
from torch.nn import functional as F
from torch.utils import data
from torchvision import models
import torch.utils.model_zoo as model_zoo

import time
t_start = time.time()

print("Program started at:", t_start)
print(os.listdir())

Program started at: 1539044371.7804842
['sample_submission.csv', 'ff kNN.ipynb', 'EDA and LSTM tf.ipynb', 'test.zip', 'unet_resnet_pytorch.ipynb', 'train', 'depths.csv', 'train.zip', '.ipynb_checkpoints', 'unet_resnet_v0.model', 'resnet_unet_v0.ipynb', 'train.csv', 'test', 'EDA and LSTM.ipynb', 'registryupload_1.csv', 'unet_resnet_v0.csv', 'kNN_rmse.csv', 'kNN_results.csv', 'registryupload_2.csv']


In [3]:
# Resnet34 encoder from https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
           'resnet152']


model_urls = {
    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}

# A same convolution conv3x3 layer
def conv3x3(inplanes, planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)

def upconv2x2(in_channels, out_channels, mode='transpose'):
    if mode == 'transpose':
        return nn.ConvTranspose2d(
            in_channels,
            out_channels,
            kernel_size=2,
            stride=2)
    else:
        # out_channels is always going to be the same
        # as in_channels
        return nn.Sequential(
            nn.Upsample(mode='bilinear', scale_factor=2),
            conv1x1(in_channels, out_channels))
    
def conv1x1(in_channels, out_channels, groups=1):
    return nn.Conv2d(
        in_channels,
        out_channels,
        kernel_size=1,
        groups=groups,
        stride=1)

# One residual block (for encoding layers)
# Downsamples at the beginning if necessary
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.downsample = downsample
        self.inplanes = inplanes
        self.bn1 = nn.BatchNorm2d(inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) # WHY ARE WE USING 3x3 WINDOW? OH WELL
        self.conv1 = conv3x3(inplanes, planes, stride)    
        self.bn2 = nn.BatchNorm2d(planes)  
        self.conv2 = conv3x3(planes, planes)              
        self.stride = stride

    def forward(self, x):
        residual = x
        out = self.bn1(out)
        out = self.relu(out)
        if self.inplanes == 64: # This is probably not the best way to do this... fix in the future
            out = self.maxpool(out)
            out = self.conv2(out)
        else:
            out = self.conv1(x)
        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv2(out)
                
        out += residual
        
        return out

# Decoder block containing 2D transposed Convolution upsampling the features
class DecoderBlock(nn.Module):
    def __init__(self, inplanes, outplanes, stride=1):
        super().__init__()

        self.block = nn.Sequential(
            conv3x3(inplanes, outplanes, stride),
            nn.ReLU(inplace=True),
            conv3x3(outplanes, outplanes, stride),
            nn.ReLU(inplace=True),
            upconv2x2(outplanes, outplanes)
        )

    def forward(self, x):
        return self.block(x)    

# Need this separate class to load weights into? We'll see
# class ResNet(nn.Module):
        
# WIP
class UResNet(nn.Module):

    def __init__(self, block, layers, num_classes=1000):
        super(UResNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3,
                               bias=False) 
        self.inplanes = 64
        
        # Encoding layers
        self.layer1 = self._make_encoding_layer(block, 64, layers[0])
        self.layer2 = self._make_encoding_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_encoding_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_encoding_layer(block, 512, layers[3], stride=2)
        
        # self.avgpool = nn.AvgPool2d(7, stride=1)  # I don't think we need this in the U-net bottleneck layer
        # self.fc = nn.Linear(512 * block.expansion, num_classes) # I don't think we need this in the U-net bottleneck layer
        
        # Decoding layers with cat
        self.layer4i = upconv2x2(512, 512)
        self.layer4e = DecoderBlock(256 * 3, 256)
        self.layer3e = DecoderBlock(128 * 3, 128)
        self.layer2e = DecoderBlock(64 * 3, 64)
        
        # Decoding layers without cat
        self.dec1 = DecoderBlock(64, 32)
        self.dec2 = DecoderBlock(32, 16)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def _make_encoding_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion) # Is this batch norm necessary?,
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)
    
       
    def forward(self, x):
        # Pre-res layers
        x = self.conv1(x) # 128x128x1 -> 64x64x64
                
        # Resnet layers
        conv2 = self.layer1(x) # 64x64x64 -> 32x32x64
        conv3 = self.layer2(conv2) # output: 16x16x128
        conv4 = self.layer3(conv3) # output: 8x8x256
        conv5 = self.layer4(conv4) # output: 4x4x512
        conv4i = self.layer4i(conv5) # output: 8x8x512
        conv4e = self.layer4e(torch.cat([conv4i, conv4], 1)) # output: 16x16x256
        conv3e = self.layer3e(torch.cat([conv4e, conv3], 1)) # output: 32x32x128
        conv2e = self.layer2e(torch.cat([conv3e, conv2], 1)) # output: 64x64x64
        dec1 = self.dec1(conv2e) # output: 128x128x32
        
        y = conv3x3(32, 16, 1)
        y = nn.ReLU(inplace=True)
        y = conv3x3(16, 16, 1)
        y = nn.ReLU(inplace=True)
        y = conv3x3(16, 1, 1)
        y = F.sigmoid(y)

        # Don't think we need these for U-net
#         x = self.avgpool(x)
#         x = x.view(x.size(0), -1)
#         x = self.fc(x)
        
        # CENTER BLOCK
        
        return y
    


def resnet34(pretrained=False, **kwargs):
    """Constructs a ResNet-34 model.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = UResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
    model.train()
    return model.to(device)
    
print("Network architecture created.")

Network architecture created.


In [16]:
class TGSSaltDataset(data.Dataset):

    def __init__(self,
                 root_path,
                 file_list,
                 is_test=False,
                 divide=False,
                 image_size=(128, 128)):

        self.root_path = root_path
        self.file_list = file_list
        self.is_test = is_test

        self.divide = divide
        self.image_size = image_size

        self.orig_image_size = (101, 101)
        self.padding_pixels = None
        
        """
        root_path: folder specifying files location
        file_list: list of images IDs
        is_test: whether train or test data is used (contains masks or not)
        
        divide: whether to divide by 255
        image_size: output image size, should be divisible by 32
        
        orig_image_size: original images size
        padding_pixels: placeholder for list of padding dimensions
        """

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, index):
        if index not in range(0, len(self.file_list)):
            return self.__getitem__(np.random.randint(0, self.__len__()))

        file_id = self.file_list[index]

        # Get image path
        image_folder = os.path.join(self.root_path, 'images')
        image_path = os.path.join(image_folder, file_id + '.png')
    
        # Get mask path
        mask_folder = os.path.join(self.root_path, 'masks')
        mask_path = os.path.join(mask_folder, file_id + '.png')

        # Load image
        image = self.__load_image(image_path)
        if not self.is_test:
            # Load mask for training or evaluation
            mask = self.__load_image(mask_path, mask=True)
            if self.divide:
                image = image / 255.
                mask = mask / 255.
            # Transform into torch float Tensors of shape (CxHxW).
            image = torch.from_numpy(
                image).float().permute([2, 0, 1])
            mask = torch.from_numpy(
                np.expand_dims(mask, axis=-1)).float().permute([2, 0, 1])
            return image, mask

        if self.is_test:
            if self.divide:
                image = image / 255.
            image = torch.from_numpy(image).float().permute([2, 0, 1])
            return (image,)

    def set_padding(self):

        """
        Compute padding borders for images based on original and specified image size.
        """
        
        pad_floor = np.floor(
            (np.asarray(self.image_size) - np.asarray(self.orig_image_size)) / 2)
        pad_ceil = np.ceil((np.asarray(self.image_size) -
                            np.asarray(self.orig_image_size)) / 2)

        self.padding_pixels = np.asarray(
            (pad_floor[0], pad_ceil[0], pad_floor[1], pad_ceil[1])).astype(np.int32)

        return

    def __pad_image(self, img):
        
        """
        Pad images according to border set in set_padding.
        Original image is centered.
        """

        y_min_pad, y_max_pad, x_min_pad, x_max_pad = self.padding_pixels[
            0], self.padding_pixels[1], self.padding_pixels[2], self.padding_pixels[3]

        img = cv2.copyMakeBorder(img, y_min_pad, y_max_pad,
                                 x_min_pad, x_max_pad,
                                 cv2.BORDER_REPLICATE)

        assert img.shape[:2] == self.image_size, '\
        Image after padding must have the same shape as input image.'

        return img

    def __load_image(self, path, mask=False):
        
        """
        Helper function for loading image.
        If mask is loaded, it is loaded in grayscale (, 0) parameter.
        """

        if mask:
            img = cv2.imread(str(path), 0)
        else:
            img = cv2.imread(str(path), 0)

        height, width = img.shape[0], img.shape[1]

        img = self.__pad_image(img)

        return img

    def return_padding_borders(self):
        """
        Return padding borders to easily crop the images.
        """
        return self.padding_pixels

# Load initial data into dataframes

In [17]:
train_df = pd.read_csv('train.csv', index_col = 'id')
depths_df = pd.read_csv('depths.csv', index_col='id')
train_df = train_df.join(depths_df)
test_df = depths_df[~depths_df.index.isin(train_df.index)] # All depths not in train dataset are in 

print ('# of training images:', len(os.listdir('train/images')))
print ('# of training masks:', len(os.listdir('train/masks')))
print ('# of test images:', len(os.listdir('test/images')))

train_df.head()


# of training images: 4000
# of training masks: 4000
# of test images: 18000


Unnamed: 0_level_0,rle_mask,z
id,Unnamed: 1_level_1,Unnamed: 2_level_1
575d24d81d,,843
a266a2a9df,5051 5151,794
75efad62c1,9 93 109 94 210 94 310 95 411 95 511 96 612 96...,468
34e51dba6a,48 54 149 54 251 53 353 52 455 51 557 50 659 4...,727
4875705fb0,1111 1 1212 1 1313 1 1414 1 1514 2 1615 2 1716...,797


# Parameters

In [18]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
data_src = ''

quick_try = False
grayscale = True

orig_image_size = (101, 101)
image_size = (128, 128)

# Load images using opencv

In [19]:
X_train = []
y_train = []

print('Loading training set.')
for i in tqdm_notebook(train_df.index):
    img_src = 'train/images/{}.png'.format(i)
    mask_src = 'train/masks/{}.png'.format(i)
    if grayscale:
        img_temp = cv2.imread(img_src, 0)
    else:
        img_temp = cv2.imread(img_src)
    mask_temp = cv2.imread(mask_src, 0)
    if orig_image_size != image_size:
        img_temp = cv2.resize(img_temp, image_size)
        mask_temp = cv2.resize(mask_temp, image_size)
    X_train.append(img_temp)
    y_train.append(mask_temp)
    # print(img_temp.shape)
X_train = np.asarray(X_train)
y_train = np.asarray(y_train)

if grayscale:
    X_train = np.expand_dims(X_train, -1)
y_train = np.expand_dims(y_train, -1)

Loading training set.


HBox(children=(IntProgress(value=0, max=4000), HTML(value='')))




In [20]:
print('Compute mask coverage for each observation.')

def cov_to_class(val):
    for i in range(0, 11):
        if val * 10 <= i:
            return i

# Percent of area covered by mask.
train_df['coverage'] = np.mean(y_train / 255., axis=(1, 2))
train_df['coverage_class'] = train_df.coverage.map(
    cov_to_class)

Compute mask coverage for each observation.


__Parameters for data loading:__

In [21]:
train_path = 'train'
test_path = 'test'

train_ids = train_df.index.values
test_ids = test_df.index.values

from sklearn.model_selection import train_test_split

tr_ids, valid_ids, tr_coverage, valid_coverage = train_test_split(
    train_ids,
    train_df.coverage.values,
    test_size=0.2, stratify=train_df.coverage_class, random_state= 1234)

__Define Data Loading__:

In [22]:
# Training dataset:
dataset_train = TGSSaltDataset(train_path, tr_ids, divide=True)
dataset_train.set_padding()
y_min_pad, y_max_pad, x_min_pad, x_max_pad = dataset_train.return_padding_borders()
        
# Validation dataset:
dataset_val = TGSSaltDataset(train_path, valid_ids, divide=True)
dataset_val.set_padding()

# Test dataset:
dataset_test = TGSSaltDataset(test_path, test_ids, is_test=True, divide=True)
dataset_test.set_padding()


# Data loaders:
# Use multiple workers to optimize data loading speed.
# Pin memory for quicker GPU processing.
train_loader = data.DataLoader(
    dataset_train,
    batch_size=32,
    shuffle=True,
    num_workers=4,
    pin_memory=True)

# Do not shuffle for validation and test.
valid_loader = data.DataLoader(
    dataset_val,
    batch_size=32,
    shuffle=False,
    num_workers=4,
    pin_memory=True)

test_loader = data.DataLoader(
    dataset_test,
    batch_size=32,
    shuffle=False,
    num_workers=4,
    pin_memory=True)

# Training:

In [23]:
# Get defined UNet model.
model = resnet34()
# Set Binary Crossentropy as loss function.
loss_fn = torch.nn.BCELoss()

# Set optimizer.
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


# Train for n epochs
n = 2
for e in range(n):

    # Training:
    train_loss = []
    for image, mask in tqdm_notebook(train_loader):

        # Put image on chosen device
        image = image.type(torch.float).to(device)
        # Predict with model:
        y_pred = model(image)
        # Compute loss between true and predicted values
        loss = loss_fn(y_pred, mask.to(device))

        # Set model gradients to zero.
        optimizer.zero_grad()
        # Backpropagate the loss.
        loss.backward()

        # Perform single optimization step - parameter update
        optimizer.step()
        
        # Append training loss
        train_loss.append(loss.item())

    # Validation:
    val_loss = []
    val_iou = []
    for image, mask in valid_loader:
        
        image = image.to(device)
        y_pred = model(image)
        
        loss = loss_fn(y_pred, mask.to(device))
        val_loss.append(loss.item())

    print("Epoch: %d, Train: %.3f, Val: %.3f" %
          (e, np.mean(train_loss), np.mean(val_loss)))

HBox(children=(IntProgress(value=0), HTML(value='')))

RuntimeError: Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 106, in _worker_loop
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/home/ubuntu/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 106, in <listcomp>
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "<ipython-input-16-dd5bc7c6ab95>", line 59, in __getitem__
    image).float().permute([2, 0, 1])
RuntimeError: number of dims don't match in permute
