# Histopathologic Cancer Detection with PyTorch

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import Dataset, random_split, DataLoader

import torchvision.transforms as transforms
from torchvision import utils

from torchsummary import summary

import os
from PIL import Image, ImageDraw
import copy

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [3]:
toPIL = transforms.Compose([
            transforms.ToPILImage()
        ])

In [4]:
class CovidDatasetTrain(Dataset):
    def __init__(self):
        train_dir = "xray_dataset_covid19/train/"
        
        train_normal_dir = train_dir + "NORMAL/"
        train_pneumonia_dir = train_dir + "PNEUMONIA/"
        
        train_normal_fnames = os.listdir(train_normal_dir)
        train_pneumonia_fnames = os.listdir(train_pneumonia_dir)
        
        self.train_dataset = [[train_normal_dir + image, 0] for image in train_normal_fnames]
        self.train_dataset = self.train_dataset + [[train_pneumonia_dir + image, 1] for image in train_pneumonia_fnames]
        
        self.transform = transforms.Compose([
            transforms.Grayscale(),
            transforms.Resize(1024),
            transforms.CenterCrop(1024),
            transforms.ToTensor()
        ])
        
    def __len__(self):
        return(len(self.train_dataset))
    
    def __getitem__(self, idx):
        data = self.train_dataset[idx]
        image = Image.open(data[0])
        image = self.transform(image)
        return(image, (data[1]), idx)

In [5]:
class CovidDatasetTest(Dataset):
    def __init__(self):
        test_dir = "xray_dataset_covid19/test/"
        
        test_normal_dir = test_dir + "NORMAL/"
        test_pneumonia_dir = test_dir + "PNEUMONIA/"
        
        test_normal_fnames = os.listdir(test_normal_dir)
        test_pneumonia_fnames = os.listdir(test_pneumonia_dir)
        
        self.test_dataset = [[test_normal_dir + image, 0] for image in test_normal_fnames]
        self.test_dataset = self.test_dataset + [[test_pneumonia_dir + image, 1] for image in test_pneumonia_fnames]
        
        self.transform = transforms.Compose([
            transforms.Grayscale(),
            transforms.Resize(1024),
            transforms.CenterCrop(1024),
            transforms.ToTensor()
        ])
        
    def __len__(self):
        return(len(self.test_dataset))
    
    def __getitem__(self, idx):
        data = self.test_dataset[idx]
        image = Image.open(data[0])
        image = self.transform(image)
        return(image, (data[1]), idx)

In [6]:
train_df = CovidDatasetTrain()
covid_trainloader = DataLoader(train_df, batch_size=10, shuffle=True)

In [7]:
test_df = CovidDatasetTest()
covid_testloader = DataLoader(test_df, batch_size=10, shuffle=True)

In [8]:
for batch_idx, (data, label, _) in enumerate(covid_trainloader):
    print(data.size())
    print(label)
    break 
print(len(train_df))

torch.Size([10, 1, 1024, 1024])
tensor([1, 1, 1, 1, 0, 1, 1, 1, 1, 1])
148


In [9]:
for batch_idx, (data, label, _) in enumerate(covid_testloader):
    print(data.size())
    print(label)
    break
print(len(test_df))

torch.Size([10, 1, 1024, 1024])
tensor([0, 1, 0, 1, 0, 1, 0, 0, 0, 0])
40


In [10]:
def accuracy(labels, out):
    return(torch.sum(out==labels)/float(len(labels)))

In [11]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3)
        self.conv4 = nn.Conv2d(64, 128, kernel_size=3)
        
        self.num_flatten = 128 * 62 * 62
        
        self.fc1 = nn.Linear(self.num_flatten, 1000)
        self.fc2 = nn.Linear(1000, 2)
    
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        
        x = F.relu(self.conv3(x))
        x = F.max_pool2d(x, 2, 2)
    
        x = F.relu(self.conv4(x))
        x = F.max_pool2d(x, 2, 2)
        
        x = x.view(-1, self.num_flatten)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, 0.3)
        x = self.fc2(x)
        
        return F.softmax(x, dim=1)

In [12]:
model = CNN()
model

CNN(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=492032, out_features=1000, bias=True)
  (fc2): Linear(in_features=1000, out_features=2, bias=True)
)

In [13]:
model.to(device)

CNN(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=492032, out_features=1000, bias=True)
  (fc2): Linear(in_features=1000, out_features=2, bias=True)
)

In [14]:
summary(model, input_size=(1, 1024, 1024))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1       [-1, 16, 1022, 1022]             160
            Conv2d-2         [-1, 32, 509, 509]           4,640
            Conv2d-3         [-1, 64, 252, 252]          18,496
            Conv2d-4        [-1, 128, 124, 124]          73,856
            Linear-5                 [-1, 1000]     492,033,000
            Linear-6                    [-1, 2]           2,002
Total params: 492,132,154
Trainable params: 492,132,154
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 4.00
Forward/backward pass size (MB): 236.78
Params size (MB): 1877.34
Estimated Total Size (MB): 2118.12
----------------------------------------------------------------


In [15]:
loss_function = nn.CrossEntropyLoss()

In [16]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [17]:
def train(model, n_epoches, loss_hist):    
    for epoch in range(1, n_epoches+1):
        model.train()
    
        train_loss = 0
        for batch_idx, (data, label, _) in enumerate(covid_trainloader):
            data = data.to(device)
            label = label.to(device)
        
            optimizer.zero_grad()
        
            preds = model(data)
            pred, _ = torch.max(preds, dim=1)
            print(pred, label)
            
            loss = loss_function(pred, label)
            train_loss += loss.item()
            loss.backward()
            optimizer.step()
        
            model.eval()
        
            with torch.no_grad():
                test_loss = 0
                for batch_idx, (data, label, _) in enumerate(covid_testloader):
                    data = data.to(device)
                    
                    pred = model(data)
                    
                    loss = loss_function(pred, label)
                    test_loss += loss.item()
                    
            if batch_idx%25==0:
                print("Batch no. finished in Epoch: ", batch_idx)
        
        loss_hist["train loss"].append(train_loss)
        loss_hist["test loss"].append(test_loss)
        
        print("-------------------------------------------------")
        print('Epoch: {} Train mean loss: {:.8f}'.format(epoch, train_loss / len(covid_trainloader.dataset)))
        print('       {} Test  mean loss: {:.8f}'.format(epoch, test_loss / len(covid_testloader.dataset)))
        print("-------------------------------------------------")
    return loss_hist

In [18]:
loss_hist = {}
loss_hist["train loss"] = []
loss_hist["test loss"] = []

loss_hist = train(model, 10, loss_hist)
loss_hist

tensor([0.5025, 0.5001, 0.5024, 0.5008, 0.5016, 0.5021, 0.5034, 0.5031, 0.5017,
        0.5039], device='cuda:0', grad_fn=<MaxBackward0>) tensor([0, 0, 1, 0, 1, 0, 1, 1, 1, 0], device='cuda:0')


IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)