# Histopathologic Cancer Detection with PyTorch

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import Dataset, random_split, DataLoader

import torchvision.transforms as transforms
from torchvision import utils

from torchsummary import summary

import os
from PIL import Image, ImageDraw
import copy

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [3]:
toPIL = transforms.Compose([
            transforms.ToPILImage()
        ])

In [4]:
class CovidDatasetTrain(Dataset):
    def __init__(self):
        train_dir = "xray_dataset_covid19/train/"
        
        train_normal_dir = train_dir + "NORMAL/"
        train_pneumonia_dir = train_dir + "PNEUMONIA/"
        
        train_normal_fnames = os.listdir(train_normal_dir)
        train_pneumonia_fnames = os.listdir(train_pneumonia_dir)
        
        self.train_dataset = [[train_normal_dir + image, 0] for image in train_normal_fnames]
        self.train_dataset = self.train_dataset + [[train_pneumonia_dir + image, 1] for image in train_pneumonia_fnames]
        
        self.transform = transforms.Compose([
            transforms.Grayscale(),
            transforms.Resize(1024),
            transforms.CenterCrop(1024),
            transforms.ToTensor()
        ])
        
    def __len__(self):
        return(len(self.train_dataset))
    
    def __getitem__(self, idx):
        data = self.train_dataset[idx]
        image = Image.open(data[0])
        image = self.transform(image)
        return(image, data[1], idx)

In [5]:
class CovidDatasetTest(Dataset):
    def __init__(self):
        test_dir = "xray_dataset_covid19/test/"
        
        test_normal_dir = test_dir + "NORMAL/"
        test_pneumonia_dir = test_dir + "PNEUMONIA/"
        
        test_normal_fnames = os.listdir(test_normal_dir)
        test_pneumonia_fnames = os.listdir(test_pneumonia_dir)
        
        self.test_dataset = [[test_normal_dir + image, 0] for image in test_normal_fnames]
        self.test_dataset = self.test_dataset + [[test_pneumonia_dir + image, 1] for image in test_pneumonia_fnames]
        
        self.transform = transforms.Compose([
            transforms.Grayscale(),
            transforms.Resize(1024),
            transforms.CenterCrop(1024),
            transforms.ToTensor()
        ])
        
    def __len__(self):
        return(len(self.test_dataset))
    
    def __getitem__(self, idx):
        data = self.test_dataset[idx]
        image = Image.open(data[0])
        image = self.transform(image)
        return(image, data[1], idx)

In [6]:
train_df = CovidDatasetTrain()
covid_trainloader = DataLoader(train_df, batch_size=2, shuffle=True)

In [7]:
test_df = CovidDatasetTest()
covid_testloader = DataLoader(test_df, batch_size=2, shuffle=True)

In [8]:
for batch_idx, (data, label, _) in enumerate(covid_trainloader):
    print(data.size())
    print(label)
    break 
print(len(train_df))

torch.Size([2, 1, 1024, 1024])
tensor([0, 1])
148


In [9]:
for batch_idx, (data, label, _) in enumerate(covid_testloader):
    print(data.size())
    print(label)
    break
print(len(test_df))

torch.Size([2, 1, 1024, 1024])
tensor([1, 1])
40


In [10]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=2)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=2)
        self.conv4 = nn.Conv2d(64, 128, kernel_size=3, stride=2)
        
        self.num_flatten = 128 * 3 * 3
        
        self.fc1 = nn.Linear(self.num_flatten, 100)
        self.fc2 = nn.Linear(100, 1)
    
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        
        x = F.relu(self.conv3(x))
        x = F.max_pool2d(x, 2, 2)
    
        x = F.relu(self.conv4(x))
        x = F.max_pool2d(x, 2, 2)
        
        x = x.view(-1, self.num_flatten)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, 0.3)
        x = self.fc2(x)
        
        return torch.sigmoid(x)

In [11]:
classifier = CNN()
classifier

CNN(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(2, 2))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2))
  (conv4): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2))
  (fc1): Linear(in_features=1152, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=1, bias=True)
)

In [12]:
classifier.to(device)

CNN(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(2, 2))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2))
  (conv4): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2))
  (fc1): Linear(in_features=1152, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=1, bias=True)
)

In [13]:
summary(classifier, input_size=(1, 1024, 1024))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 16, 511, 511]             160
            Conv2d-2         [-1, 32, 127, 127]           4,640
            Conv2d-3           [-1, 64, 31, 31]          18,496
            Conv2d-4            [-1, 128, 7, 7]          73,856
            Linear-5                  [-1, 100]         115,300
            Linear-6                    [-1, 1]             101
Total params: 212,553
Trainable params: 212,553
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 4.00
Forward/backward pass size (MB): 36.33
Params size (MB): 0.81
Estimated Total Size (MB): 41.14
----------------------------------------------------------------


In [14]:
def accuracy(labels, out):
    labels = torch.round(labels).squeeze(1)
    outs = out.squeeze(1)
    return(torch.sum(out==labels)/2.0*float(len(labels)))

In [15]:
loss_function = nn.BCELoss()

In [16]:
optimizer = torch.optim.SGD(classifier.parameters(), lr=0.001)

In [17]:
def train(model, n_epoches, loss_hist):    
    train_loss = 0
    test_loss = 0
    for epoch in range(1, n_epoches+1):
        model.train()
    
        epoch_train_loss = 0
        epoch_test_loss = 0
        for batch_idx, (data, label, _) in enumerate(covid_trainloader):
            
            data = data.to(device)
            label = label.type(torch.FloatTensor).unsqueeze(1).to(device)
        
            optimizer.zero_grad()
        
            preds = model(data)
            
            loss = loss_function(preds, label)
            loss.backward()
            optimizer.step()
            
            epoch_train_loss += loss.item()
        with torch.no_grad():
            model.eval()
                
                
            for batch_idx, (data, label, _) in enumerate(covid_testloader):
                data = data.to(device)
                label = label.type(torch.FloatTensor).unsqueeze(1).to(device)
                    
                pred = model(data)
                
                print(accuracy(pred, label))
                
                val_loss = loss_function(pred, label)
                epoch_test_loss += val_loss.item()
                
        train_loss += epoch_train_loss / len(covid_trainloader.dataset)
        test_loss += epoch_test_loss / len(covid_testloader.dataset)
        
        loss_hist["train loss"].append(train_loss)
        loss_hist["test loss"].append(test_loss)
        
        print("-------------------------------------------------")
        print('Epoch: {} Train mean loss: {:.8f}'.format(epoch, epoch_train_loss / len(covid_trainloader.dataset)))
        print('       {} Test  mean loss: {:.8f}'.format(epoch, epoch_test_loss / len(covid_testloader.dataset)))
        print("-------------------------------------------------")
    return loss_hist

In [18]:
loss_hist = {}
loss_hist["train loss"] = []
loss_hist["test loss"] = []

loss_hist = train(classifier, 10, loss_hist)
loss_hist

tensor(2., device='cuda:0')
tensor(2., device='cuda:0')
tensor(2., device='cuda:0')
tensor(0., device='cuda:0')
tensor(4., device='cuda:0')
tensor(2., device='cuda:0')
tensor(2., device='cuda:0')
tensor(0., device='cuda:0')
tensor(0., device='cuda:0')
tensor(2., device='cuda:0')
tensor(0., device='cuda:0')
tensor(2., device='cuda:0')
tensor(2., device='cuda:0')
tensor(4., device='cuda:0')
tensor(4., device='cuda:0')
tensor(0., device='cuda:0')
tensor(4., device='cuda:0')
tensor(4., device='cuda:0')
tensor(2., device='cuda:0')
tensor(2., device='cuda:0')
-------------------------------------------------
Epoch: 1 Train mean loss: 0.34747938
       1 Test  mean loss: 0.34708789
-------------------------------------------------
tensor(0., device='cuda:0')
tensor(0., device='cuda:0')
tensor(4., device='cuda:0')
tensor(2., device='cuda:0')
tensor(2., device='cuda:0')
tensor(4., device='cuda:0')
tensor(2., device='cuda:0')
tensor(2., device='cuda:0')
tensor(4., device='cuda:0')
tensor(2., dev

{'train loss': [0.34747938167404485,
  0.6947022680495236,
  1.0422129429675437,
  1.3891340047121048,
  1.7360204502537444,
  2.0830485498582996,
  2.4301149063819163,
  2.7770699355247856,
  3.124024987220764,
  3.4706133813471407],
 'test loss': [0.3470878854393959,
  0.6941897436976432,
  1.0407382503151892,
  1.388062787055969,
  1.7347552895545957,
  2.0817221000790593,
  2.428814698755741,
  2.775997300446033,
  3.1236026957631107,
  3.470506872236728]}