In [1]:
!pip install tqdm

Collecting tqdm
  Downloading tqdm-4.19.5-py2.py3-none-any.whl (51kB)
[K    100% |████████████████████████████████| 61kB 4.7MB/s 
[?25hInstalling collected packages: tqdm
Successfully installed tqdm-4.19.5


In [0]:
!pip install http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl && pip install torchvision

In [0]:
!pip install h5py

In [0]:
!wget https://www.dropbox.com/s/0k2qz2oqp9e149h/stage1_train.zip && unzip stage1_train.zip -d stage1_train/

In [0]:
!wget https://www.dropbox.com/s/ej948r7040sgnof/stage1_test.zip && unzip stage1_test.zip -d stage1_test/

In [0]:
import os
import sys
import numpy as np
import pandas as pd
from tqdm import tqdm
from itertools import chain
from skimage.io import imread, imshow, imread_collection, concatenate_images
from skimage.transform import resize
from skimage.morphology import label
import PIL
import pickle

from torch import nn, optim
from torch.nn import functional as F
import torch
from torchvision import models
from torch.autograd import Variable

import matplotlib.pyplot as plt
%matplotlib inline

In [0]:
IMG_WIDTH = 256
IMG_HEIGHT = 256
IMG_CHANNELS = 3
TRAIN_PATH = 'stage1_train/stage1_train/'
TEST_PATH = 'stage1_test/stage1_test/'

In [0]:
train_ids = next(os.walk(TRAIN_PATH))[1]
test_ids = next(os.walk(TEST_PATH))[1]

In [0]:
len(train_ids), len(test_ids)

#Creating Train Set

In [0]:
data = []

for i, id_ in tqdm(enumerate(train_ids), total=len(train_ids)):
  item = {}
  
  path = TRAIN_PATH + id_
  img = imread(path + '/images/' + id_ + '.png')
  img = img[:,:,:IMG_CHANNELS]
  img = resize(img, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
  

  mask = np.zeros((IMG_HEIGHT, IMG_WIDTH, 1))
  for mask_file in next(os.walk(TRAIN_PATH+id_+'/masks/'))[2]:
    mask_ = imread(path + '/masks/' + mask_file)
    mask_ = resize(mask_, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)
    mask_ = np.expand_dims(mask_, axis=-1)
    mask = np.maximum(mask, mask_)

  
  item['mask'] = torch.from_numpy(mask).float()
  item['img'] = torch.from_numpy(img).float()
  
  data.append(item)

In [0]:
pickle.dump(data, open('data.p', 'wb'))

In [0]:
data = pickle.load(open('data.p', 'rb'))

In [0]:
len(data)

#Creating Test Set

In [0]:
test_data = []
sizes_test = []

for i, id_ in tqdm(enumerate(test_ids), total=len(test_ids)):
  test_item = {}
  
  test_path = TEST_PATH + id_
  
  im = imread(test_path + '/images/' + id_ + '.png')
  im = im[:,:,:IMG_CHANNELS]
  sizes_test.append([im.shape[0], im.shape[1]])
  im = resize(im, (IMG_HEIGHT, IMG_WIDTH), mode='constant', preserve_range=True)

  test_item['img'] = torch.from_numpy(im).float()
  test_data.append(test_item)

In [0]:
len(test_data)

#Pytorch Data Loading

In [0]:
import PIL
from torchvision import transforms

class Dataset():
  def __init__(self, data, source_transform, target_transform):
    self.d = data
    self.src_transform = source_transform
    self.targ_transform = target_transform
  
  def __getitem__(self, index):
    item = self.d[index]
    
    img = item['img'].numpy()
    targ = item['mask'].byte().numpy()
    img = self.src_transform(img)
    targ = self.targ_transform(targ)
    
    return img, targ
  
  def __len__(self):
    return len(self.d)
  

source_transform = transforms.Compose([
    #transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.5,0.5,0.5],std = [0.5,0.5,0.5])
])

target_transform = transforms.Compose([
    #transforms.ToPILImage(),
    transforms.ToTensor(),
])

In [0]:
dataset = Dataset(data, source_transform, target_transform)

In [0]:
dataloader = torch.utils.data.DataLoader(dataset, batch_size = 16, shuffle=True)

In [0]:
for x in dataloader:
  print (x)
  break

#Model

In [0]:
class ConvBlock(nn.Module):
  def __init__(self, in_ch, out_ch):
    super(ConvBlock, self).__init__()
    self.conv1 = nn.Conv2d(in_ch, out_ch, 3, padding=1)
    self.conv2 = nn.Conv2d(out_ch, out_ch, 3, padding=1)
    
  def forward(self, x):
    x = F.relu(self.conv1(x))
    x = F.relu(self.conv2(x))
    return x
  
  
class input_conv(nn.Module):
  def __init__(self, in_ch, out_ch):
    super(input_conv, self).__init__()
    self.inp_conv = ConvBlock(in_ch, out_ch)
    
  def forward(self, x):
    x = self.inp_conv(x)
    return x
  

class up(nn.Module):
  def __init__(self, in_ch, out_ch):
    super(up, self).__init__()
#     self.up_conv = nn.ConvTranspose2d(in_ch, out_ch, kernel_size=2, stride=2)
    self.up_conv = nn.Upsample(scale_factor=2, mode='nearest')
    self.conv = ConvBlock(in_ch, out_ch)
    
  def forward(self, x1, x2):
    x1 = self.up_conv(x1)
    x = torch.cat([x2, x1], dim=1)
    x = self.conv(x)
    return x
    
    
class down(nn.Module):
  def __init__(self, in_ch, out_ch):
    super(down, self).__init__()
    self.pool = nn.MaxPool2d(2)
    self.conv = ConvBlock(in_ch, out_ch)
    
  def forward(self, x):
    x = self.pool(x)
    x = self.conv(x)
    return x
  
class Dilations(nn.Module):
  def __init__(self, in_ch, out_ch, dil):
    super(Dilations, self).__init__()
    self.conv_dil = nn.Conv2d(in_ch, out_ch, 3, padding=dil, dilation=dil)
    
  def forward(self, x):
    x = F.relu(self.conv_dil(x))
#     print (x.size())
    return x
  
class last_conv(nn.Module):
  def __init__(self, in_ch, out_ch):
    super(last_conv, self).__init__()
    self.conv1 = nn.Conv2d(in_ch, out_ch, 1)
    
  def forward(self, x):
    x = self.conv1(x)
    return x

In [0]:
class Unet(nn.Module):
  def __init__(self, no_channels, no_classes):
    super(Unet, self).__init__()
    self.inp = input_conv(no_channels, 32)
    self.down1 = down(32, 64)
    self.down2 = down(64, 128)
    self.down3 = down(128, 256)
    self.down4 = down(256, 256)
    self.dil1 = Dilations(256, 512, 1)
    self.dil2 = Dilations(512, 512, 2)
    self.dil3 = Dilations(512, 512, 4)
    self.dil4 = Dilations(512, 512, 8)
    self.dil5 = Dilations(512, 512, 16)
    self.dil6 = Dilations(512, 256, 32)
    self.up1 = up(512, 128)
    self.up2 = up(256, 64)
    self.up3 = up(128, 32)
    self.up4 = up(64, 32)
    self.out = last_conv(32, no_classes)
    
  def forward(self, x):
    x1 = self.inp(x)
    x2 = self.down1(x1)
    x3 = self.down2(x2)
    x4 = self.down3(x3)
    x5 = self.down4(x4)
    x6 = self.dil1(x5)
    x7 = self.dil2(x6)
    x8 = self.dil3(x7)
    x9 = self.dil4(x8)
    x10 = self.dil5(x9)
    x11 = self.dil6(x10)
    x = self.up1(x11, x4)
    x = self.up2(x, x3)
    x = self.up3(x, x2)
    x = self.up4(x, x1)
    x = self.out(x)
    x = nn.functional.sigmoid(x)
    return x
    

In [0]:
def Unet_nuclei():
  model = Unet(3,1)
  return model.cuda()

model_pytorch = Unet_nuclei()

In [0]:
model_pytorch

#Train

In [0]:
optimizer = optim.Adam(model_pytorch.parameters(), lr=1e-3)

In [0]:
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)

In [0]:
for epoch in range(60):
    for x, y in tqdm(dataloader):        
        inputs = Variable(x).cuda()
        labels = Variable(y).cuda()
        
        # forward + backward + optimize
        optimizer.zero_grad()
        #forward pass
        outputs = model_pytorch(inputs)
        # calculate the loss
        loss = nn.BCELoss()(outputs, labels)
        # backpropagation
        loss.backward()
        # Does the update after calculating the gradients
        optimizer.step()
        
        if (i+1) % 5 == 0:
            print('[%d, %5d] loss: %.4f' % (epoch, i+1, loss.data[0]))

#Loading Test Dataset

In [0]:
class TestDataset():
  def __init__(self, test_data, source_transform):
    self.t = test_data
    self.src_transform = source_transform
    
  def __getitem__(self, index):
    test_item = self.t[index]
    test_img = test_item['img'].numpy()
    test_img = self.src_transform(test_img)
    
    return test_img
    
  def __len__(self):
    return len(self.t)

In [0]:
test_dataset = TestDataset(test_data, source_transform)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=16)

#Testing

In [0]:
model_pytorch = model_pytorch.eval()

In [0]:
predictions = []
for test_x in tqdm(test_dataloader):
  inp = Variable(test_x).cuda()
  
  out = model_pytorch(inp)
  predictions.append(out)

In [0]:
predictions[0][1]

In [0]:
inp[0]

In [0]:
imshow(predictions[4][0][0].data.cpu().numpy())

In [0]:
imshow(inp[0].data.cpu().permute(1,2,0).numpy()*0.5 + 0.5)

In [0]:
preds_test_upsampled = []
count = 0
for i in range(len(predictions)):
  for j in predictions[i]:
    temp = j[0].data.cpu().numpy()
    preds_test_upsampled.append(resize(temp, (sizes_test[count][0], sizes_test[count][1]), mode='constant', preserve_range=True))
    count+=1

len(preds_test_upsampled)

In [0]:
def rle_encoding(x):
    dots = np.where(x.T.flatten() == 1)[0]
    run_lengths = []
    prev = -2
    for b in dots:
        if (b>prev+1): run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
    return run_lengths

def prob_to_rles(x, cutoff=0.5):
    lab_img = label(x > cutoff)
    for i in range(1, lab_img.max() + 1):
        yield rle_encoding(lab_img == i)

In [0]:
new_test_ids = []
rles = []
for n, id_ in enumerate(test_ids):
    rle = list(prob_to_rles(preds_test_upsampled[n]))
    rles.extend(rle)
    new_test_ids.extend([id_] * len(rle))

In [0]:
sub = pd.DataFrame()
sub['ImageId'] = new_test_ids
sub['EncodedPixels'] = pd.Series(rles).apply(lambda x: ' '.join(str(y) for y in x))
sub.to_csv('submission.csv', index=False)

In [0]:
sub.head()

In [0]:
from google.colab import files

files.download('submission.csv')