In [87]:
import numpy as np
import pandas as pd
import glob
import random
import matplotlib.pyplot as plt
from skimage.transform import resize
from tqdm.notebook import tqdm
from pydicom import dcmread
import pydicom
import csv
import os

In [88]:
from tensorflow import keras



In [89]:
import torch
from torch.utils import data
import torch.optim as optim
from torchvision.models import resnet50
import torch

import torch.nn as nn
import torch.nn.functional as F

In [90]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [91]:
device

device(type='cpu')

In [92]:
# empty dictionary
pneumonia_locations = {}
# load table
with open(os.path.join('./rsna-pneumonia-detection-challenge/stage_2_train_labels.csv'), mode='r') as infile:
    # open reader
    reader = csv.reader(infile)
    # skip header
    next(reader, None)
    # loop through rows
    for rows in reader:
        # retrieve information
        filename = rows[0]
        location = rows[1:5]
        pneumonia = rows[5]
        # if row contains pneumonia add label to dictionary
        # which contains a list of pneumonia locations per filename
        if pneumonia == '1':
            # convert string to float to int
            location = [int(float(i)) for i in location]
            # save pneumonia location in dictionary
            if filename in pneumonia_locations:
                pneumonia_locations[filename].append(location)
            else:
                pneumonia_locations[filename] = [location]

In [93]:
pneumonia_locations

{'00436515-870c-4b36-a041-de91049b9ab4': [[264, 152, 213, 379],
  [562, 152, 256, 453]],
 '00704310-78a8-4b38-8475-49f4573b2dbb': [[323, 577, 160, 104],
  [695, 575, 162, 137]],
 '00aecb01-a116-45a2-956c-08d2fa55433f': [[288, 322, 94, 135],
  [547, 299, 119, 165]],
 '00c0b293-48e7-4e16-ac76-9269ba535a62': [[306, 544, 168, 244],
  [650, 511, 206, 284]],
 '00f08de1-517e-4652-a04f-d1dc9ee48593': [[181, 184, 206, 506],
  [571, 275, 230, 476]],
 '0100515c-5204-4f31-98e0-f35e4b00004a': [[703, 416, 84, 77]],
 '010ccb9f-6d46-4380-af11-84f87397a1b8': [[652, 437, 161, 293],
  [301, 405, 141, 279]],
 '012a5620-d082-4bb8-9b3b-e72d8938000c': [[133, 613, 275, 275],
  [678, 427, 224, 340]],
 '0174c4bb-28f5-41e3-a13f-a396badc18bd': [[155, 182, 273, 501],
  [599, 220, 227, 508]],
 '019d950b-dd38-4cf3-a686-527a75728be6': [[229, 318, 250, 301],
  [604, 216, 196, 328]],
 '01a6eaa6-222f-4ea8-9874-bbd89dc1a1ce': [[141, 306, 225, 327],
  [609, 285, 236, 355]],
 '01a7353d-25bb-4ff8-916b-f50dd541dccf': [[214, 

In [94]:
# load and shuffle filenames
folder = './rsna-pneumonia-detection-challenge/stage_2_train_images'
filenames = os.listdir(folder)
random.shuffle(filenames)
# split into train and validation filenames
n_valid_samples = 2560
train_filenames = filenames[n_valid_samples:]
valid_filenames = filenames[:n_valid_samples]
print('n train samples', len(train_filenames))
print('n valid samples', len(valid_filenames))
n_train_samples = len(filenames) - n_valid_samples

n train samples 24124
n valid samples 2560


In [95]:
class Dataset(data.Dataset):
    def __init__(self, device, folder, filenames, pneumonia_locations=None, batch_size=32, image_size=320, shuffle=True, augment=False, predict=False):
        self.device = device
        self.folder = folder
        self.filenames = filenames
        self.pneumonia_locations = pneumonia_locations
        self.batch_size = batch_size
        self.image_size = image_size
        self.shuffle = shuffle
        self.augment = augment
        self.predict = predict
        self.on_epoch_end()

    def reshape(self, imgs, msks):
        # reshape for pytorch
        imgs = imgs.permute(0, 3, 1, 2)
        msks = msks.permute(0, 3, 1, 2)
        return imgs, msks

    def __load__(self, filename):
        # load dicom file as numpy array
        img = pydicom.dcmread(os.path.join(self.folder, filename)).pixel_array
        # create empty mask
        msk = np.zeros(img.shape)
        # get filename without extension
        filename = filename.split('.')[0]
        # if image contains pneumonia
        if filename in pneumonia_locations:
            # loop through pneumonia
            for location in pneumonia_locations[filename]:
                # add 1's at the location of the pneumonia
                x, y, w, h = location
                msk[y:y+h, x:x+w] = 1
        # if augment then horizontal flip half the time
        if self.augment and random.random() > 0.5:
            img = np.fliplr(img)
            msk = np.fliplr(msk)
        # resize both image and mask
        img = resize(img, (self.image_size, self.image_size), mode='reflect')
        msk = resize(msk, (self.image_size, self.image_size), mode='reflect') > 0.5
        # add trailing channel dimension
        img = np.expand_dims(img, -1)
        msk = np.expand_dims(msk, -1)
        return img, msk
    
    def __loadpredict__(self, filename):
        # load dicom file as numpy array
        img = pydicom.dcmread(os.path.join(self.folder, filename)).pixel_array
        # resize image
        img = resize(img, (self.image_size, self.image_size), mode='reflect')
        # add trailing channel dimension
        img = np.expand_dims(img, -1)
        return img
        
    def __getitem__(self, index):
        # select batch
        filenames = self.filenames[index*self.batch_size:(index+1)*self.batch_size]
        # predict mode: return images and filenames
        if self.predict:
            # load files
            imgs = [self.__loadpredict__(filename) for filename in filenames]
            # create numpy batch
            imgs = np.array(imgs)
            return imgs, filenames
        # train mode: return images and masks
        else:
            # load files
            items = [self.__load__(filename) for filename in filenames]
            # unzip images and masks
            imgs, msks = zip(*items)
            # create numpy batch
            imgs = torch.tensor(imgs).to(device).type(torch.FloatTensor)
            msks = torch.tensor(msks).to(device).type(torch.FloatTensor)
            imgs, msks = self.reshape(imgs, msks)
            return imgs, msks
        
    def on_epoch_end(self):
        if self.shuffle:
            random.shuffle(self.filenames)
        
    def __len__(self):
        if self.predict:
            # return everything
            return int(np.ceil(len(self.filenames) / self.batch_size))
        else:
            # return full batches only
            return int(len(self.filenames) / self.batch_size)

In [96]:
BATCH_SIZE = 8
IMAGE_SIZE = 320


In [97]:
folder = './rsna-pneumonia-detection-challenge/stage_2_train_images'
train_gen = Dataset(device, folder, train_filenames, pneumonia_locations, batch_size=BATCH_SIZE, image_size=IMAGE_SIZE, shuffle=True, augment=True, predict=False)
valid_gen = Dataset(device, folder, valid_filenames, pneumonia_locations, batch_size=BATCH_SIZE, image_size=IMAGE_SIZE, shuffle=False, predict=False)

In [98]:
#Print some Data
for i in range(len(train_gen)):
    imgs, msks = train_gen[i]
    

    print(i, imgs.shape,msks.shape)

    if i == 3:
        break

0 torch.Size([8, 1, 320, 320]) torch.Size([8, 1, 320, 320])
1 torch.Size([8, 1, 320, 320]) torch.Size([8, 1, 320, 320])
2 torch.Size([8, 1, 320, 320]) torch.Size([8, 1, 320, 320])
3 torch.Size([8, 1, 320, 320]) torch.Size([8, 1, 320, 320])


In [None]:
class conv_block(nn.Module):
    """
    Define the [convolution - batch normalization - activation] block 
    """

    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=True,
                 bn_momentum=0.9, alpha_leaky=0.03):
        super(conv_block, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size,
                              stride=stride, padding=padding, bias=bias)
        self.bn = nn.BatchNorm2d(out_channels, eps=1e-05, momentum=bn_momentum)
        self.activ = nn.LeakyReLU(negative_slope=alpha_leaky)

    def forward(self, x):
        return self.activ(self.bn(self.conv(x)))
    

class conv_t_block(nn.Module):
    """
    Define the [convolution_transpose - batch normalization - activation] block 
    """

    def __init__(self, in_channels, out_channels, output_size=None, kernel_size=3, bias=True,
                 bn_momentum=0.9, alpha_leaky=0.03):
        super(conv_t_block, self).__init__()
        self.conv_t = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=2, padding=1, 
                                         bias=bias)
        self.bn = nn.BatchNorm2d(out_channels, eps=1e-05, momentum=bn_momentum)
        self.activ = nn.LeakyReLU(negative_slope=alpha_leaky)

    def forward(self, x, output_size):
        return self.activ(self.bn(self.conv_t(x, output_size=output_size)))   

In [99]:
class Net(nn.Module):
    '''
    Custom ResNet module
    '''

    def __init__(self):
        super(Net, self).__init__()
        
        self.down_1 = nn.Sequential(conv_block(in_channels=1, out_channels=64), conv_block(in_channels=64, out_channels=64))
        self.down_2 = nn.Sequential(conv_block(in_channels=64, out_channels=128), conv_block(in_channels=128, out_channels=128))
        self.down_3 = nn.Sequential(conv_block(in_channels=128, out_channels=256), conv_block(in_channels=256, out_channels=256))
        self.down_4 = nn.Sequential(conv_block(in_channels=256, out_channels=512), conv_block(in_channels=512, out_channels=512))
        self.down_5 = nn.Sequential(conv_block(in_channels=512, out_channels=512), conv_block(in_channels=512, out_channels=512))

        self.middle = nn.Sequential(conv_block(in_channels=512, out_channels=512), conv_block(in_channels=512, out_channels=512))
        self.middle_t = conv_t_block(in_channels=512, out_channels=256)

        self.up_5 = nn.Sequential(conv_block(in_channels=768, out_channels=512), conv_block(in_channels=512, out_channels=512))
        self.up_5_t = conv_t_block(in_channels=512, out_channels=256)
        self.up_4 = nn.Sequential(conv_block(in_channels=768, out_channels=512), conv_block(in_channels=512, out_channels=512))
        self.up_4_t = conv_t_block(in_channels=512, out_channels=128)
        self.up_3 = nn.Sequential(conv_block(in_channels=384, out_channels=256), conv_block(in_channels=256, out_channels=256))
        self.up_3_t = conv_t_block(in_channels=256, out_channels=64)
        self.up_2 = nn.Sequential(conv_block(in_channels=192, out_channels=128), conv_block(in_channels=128, out_channels=128))
        self.up_2_t = conv_t_block(in_channels=128, out_channels=32)
        self.up_1 = nn.Sequential(conv_block(in_channels=96, out_channels=64), conv_block(in_channels=64, out_channels=1))
        
    def forward(self, x):
        down1 = self.down_1(x)
        out = F.max_pool2d(down1, kernel_size=2, stride=2)

        down2 = self.down_2(out)
        out = F.max_pool2d(down2, kernel_size=2, stride=2)

        down3 = self.down_3(out)
        out = F.max_pool2d(down3, kernel_size=2, stride=2)

        down4 = self.down_4(out)
        out = F.max_pool2d(down4, kernel_size=2, stride=2)

        down5 = self.down_5(out)
        out = F.max_pool2d(down5, kernel_size=2, stride=2)

        out = self.middle(out)
        out = self.middle_t(out, output_size=down5.size())

        out = torch.cat([down5, out], 1)
        out = self.up_5(out)
        out = self.up_5_t(out, output_size=down4.size())

        out = torch.cat([down4, out], 1)
        out = self.up_4(out)
        out = self.up_4_t(out, output_size=down3.size())
        
        out = torch.cat([down3, out], 1)
        out = self.up_3(out)
        out = self.up_3_t(out, output_size=down2.size())
        
        out = torch.cat([down2, out], 1)
        out = self.up_2(out)
        out = self.up_2_t(out, output_size=down1.size())
        
        out = torch.cat([down1, out], 1)
        out = self.up_1(out)
        print("outshape ",out.shape)
        return out

In [100]:
def iou_loss(y_true, y_pred):
    '''
    Intersection-Over-Union Loss
    '''
    y_true = torch.reshape(y_true, [-1])
    y_pred = torch.reshape(y_pred, [-1])
    intersection = (y_true * y_pred).sum()
    score = (intersection + 1.) / (y_true.sum() + y_pred.sum() - intersection + 1.)
    return 1 - score

In [101]:
def iou_bce_loss(y_true, y_pred):
    '''
    Main loss function using:
        Binary Cross Entropy +
        Intersection-Over-Union Loss
    '''
    return 0.5 * F.binary_cross_entropy_with_logits(y_true, y_pred) + 0.5 * iou_loss(y_true, y_pred)

In [106]:
def mean_iou(y_true, y_pred, device):
    '''
    Mean-Intersection-Over-Union
    '''
    y_pred = torch.round(y_pred)
    intersect = (y_true * y_pred).sum(axis=[1, 2, 3])
    union = y_true.sum(axis=[1, 2, 3]) + y_pred.sum(axis=[1, 2, 3])
    smooth = torch.ones(intersect.shape).to(device)
    return ((intersect + smooth) / (union - intersect + smooth)).sum()

In [107]:
net = Net().to(device)

In [108]:
optimizer = optim.Adam(net.parameters(), lr=0.0001)

In [109]:
for batch_ndx, sample in enumerate(train_gen):
    x, y = sample

    optimizer.zero_grad()

    out = net(x)
    break;
    loss = iou_bce_loss(out, y)
    loss.backward()

    optimizer.step()

    iou = mean_iou(out, y, device)

    print("Batch_ndx:{0:5d}, Loss:{1:2.4f}, Mean-Intersection-Over-Union:{2:2.4f}"
            .format(batch_ndx, loss, iou))

outshape  torch.Size([8, 1, 320, 320])


In [72]:
folder = './rsna-pneumonia-detection-challenge/stage_2_train_images'+"/00436515-870c-4b36-a041-de91049b9ab4.dcm"

In [73]:
folder

'./rsna-pneumonia-detection-challenge/stage_2_train_images/00436515-870c-4b36-a041-de91049b9ab4.dcm'

In [74]:
img = pydicom.dcmread(folder).pixel_array
msk = np.zeros(img.shape)
#x, y, w, h = location
#msk[y:y+h, x:x+w] = 1

In [76]:
msk.shape

(1024, 1024)

In [61]:
l = pneumonia_locations["00436515-870c-4b36-a041-de91049b9ab4"]

In [62]:
l

[[264, 152, 213, 379], [562, 152, 256, 453]]

In [None]:
for location in :

In [77]:
for location in pneumonia_locations["00436515-870c-4b36-a041-de91049b9ab4"]:
    # add 1's at the location of the pneumonia
    x, y, w, h = location
    msk[y:y+h, x:x+w] = 1

In [84]:
msk[y:y+h, x:x+w].shape

(453, 256)

In [53]:
pneumonia_locations

{'00436515-870c-4b36-a041-de91049b9ab4': [[264, 152, 213, 379],
  [562, 152, 256, 453]],
 '00704310-78a8-4b38-8475-49f4573b2dbb': [[323, 577, 160, 104],
  [695, 575, 162, 137]],
 '00aecb01-a116-45a2-956c-08d2fa55433f': [[288, 322, 94, 135],
  [547, 299, 119, 165]],
 '00c0b293-48e7-4e16-ac76-9269ba535a62': [[306, 544, 168, 244],
  [650, 511, 206, 284]],
 '00f08de1-517e-4652-a04f-d1dc9ee48593': [[181, 184, 206, 506],
  [571, 275, 230, 476]],
 '0100515c-5204-4f31-98e0-f35e4b00004a': [[703, 416, 84, 77]],
 '010ccb9f-6d46-4380-af11-84f87397a1b8': [[652, 437, 161, 293],
  [301, 405, 141, 279]],
 '012a5620-d082-4bb8-9b3b-e72d8938000c': [[133, 613, 275, 275],
  [678, 427, 224, 340]],
 '0174c4bb-28f5-41e3-a13f-a396badc18bd': [[155, 182, 273, 501],
  [599, 220, 227, 508]],
 '019d950b-dd38-4cf3-a686-527a75728be6': [[229, 318, 250, 301],
  [604, 216, 196, 328]],
 '01a6eaa6-222f-4ea8-9874-bbd89dc1a1ce': [[141, 306, 225, 327],
  [609, 285, 236, 355]],
 '01a7353d-25bb-4ff8-916b-f50dd541dccf': [[214, 

In [85]:
h

453

In [86]:
w

256