In [None]:
#CODE TAKEN DIRECTLY FROM
#https://github.com/vicely07/Pneumonet-A-Pytorch-Chest-Xray-Pneumonia-Detection/blob/main/Pytorch_Xray_Pneumonia_Detection_project.ipynb
#ALL CREDITS GO DIRECTLY TO THIS UPLOADER

In [2]:
###import packages for project

import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt
import copy
import time
import PIL
import scipy.ndimage as nd
import os
import sys

The more data, the better the model will learn. Hence, apply some data augmentation to generate different variations of the original data to increase the sample size for training, validation and testing process. This augmentation can be performed by defining a set of transforming functions in the torchvision module. The detailed codes are as following:

In [3]:
## data augmentation with torchvision.transforms

transformers = {'train_transforms' : transforms.Compose([
    transforms.Resize(224),
    transforms.CenterCrop(224),
    transforms.RandomRotation(20),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5])
]),
'test_transforms' : transforms.Compose([
    transforms.Resize(224),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5])
]),
'valid_transforms' : transforms.Compose([
    transforms.Resize(224),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5])
])}

In [4]:
trans = ['train_transforms','valid_transforms','test_transforms']

After defining the transformers, now we can use torchvision.datasets.ImageFolder module we load images from our dataset directory and apply the predefined transformers on them as following:

In [5]:
path = os.path.join(sys.path[0], "image_data_w_generative_data/")
#path = "/content/drive/My Drive/FB-Ai-Hackathon/pneumonia-pytorch-localization/Data/"
categories = ['train','val','test']
dset = {x : torchvision.datasets.ImageFolder(path+x, transform=transformers[y]) for x,y in zip(categories, trans)}

After refreshing your memory on the basics, we can start with this project using the COVID chest X-ray data. First, we need to initialize our model class by calling the nn.Module, which create a graph-like structure of our network. In particularly, as we mentioned earlier, the pretrained model of Resnet152 was used in our training process. This transfer learning give us a big advantage in retraining on Hence, we need to define our ResNet-152 in the init of nn.Module for transfer learning. Then after define the init function, we need to create a forward function as part of the requirement for Pytorch.

In [6]:
dataset_sizes = {x : len(dset[x]) for x in categories}

In [7]:
for x in categories:
  print('{}: {}'.format(x,dataset_sizes[x]))

train: 5973
val: 16
test: 624


In [8]:
num_threads = 0
dataloaders =  {x : torch.utils.data.DataLoader(dset[x], batch_size=16, shuffle=True, num_workers=num_threads)
               for x in categories} 

After refreshing your memory on the basics, we can start with this project using the COVID-19 & Pneumonia chest X-ray data. First, we need to initialize our model class by calling the nn.Module, which create a graph-like structure of our network. In particularly, as we mentioned earlier, the pretrained model of Resnet152 was used in our training process. This transfer learning give us a big advantage in retraining on Hence, we need to define our ResNet-152 in the init of nn.Module for transfer learning. Then after define the init function, we need to create a forward function as part of the requirement for Pytorch.

In [9]:
##Build model
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.model = torchvision.models.resnet152(pretrained=True)
        self.classifier = nn.Sequential(
            nn.Linear(self.model.fc.in_features,2),
            nn.LogSoftmax(dim=1)
        )
        for params in self.model.parameters():
            params.requires_grad = False
        self.model.fc = self.classifier
        
    def forward(self, x):
        return self.model(x)
    
    def fit(self, dataloaders, num_epochs):
        loss_arr = []
        epoch_acc_arr = []
        train_on_gpu = torch.cuda.is_available()
        optimizer = optim.Adam(self.model.fc.parameters())
        scheduler = optim.lr_scheduler.StepLR(optimizer, 4)
        criterion = nn.NLLLoss()
        since = time.time()
        
        best_model_wts = copy.deepcopy(self.model.state_dict())
        best_acc =0.0
        if train_on_gpu:
            self.model = self.model.cuda()
        for epoch in range(1, num_epochs+1):
            print("epoch {}/{}".format(epoch, num_epochs))
            print("-" * 10)
            
            for phase in ['train','test']:
                if phase == 'train':
                    scheduler.step()
                    self.model.train()
                else:
                    self.model.eval()
                
                running_loss = 0.0
                running_corrects = 0.0
                
                for inputs, labels in dataloaders[phase]:
                    if train_on_gpu:
                        inputs = inputs.cuda()
                        labels = labels.cuda()
                    optimizer.zero_grad()
                    
                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = self.model(inputs)
                        _, preds = torch.max(outputs, 1)
                        loss = criterion(outputs, labels)
                        
                        if phase == 'train':
                            loss.backward()
                            optimizer.step()
                    
                    running_loss += loss.item() * inputs.size(0)
                    running_corrects += torch.sum(preds == labels.data)
                
                epoch_loss = running_loss / dataset_sizes[phase]
                epoch_acc = running_corrects.double() / dataset_sizes[phase]
                print("{} loss:  {:.4f}  acc: {:.4f}".format(phase, epoch_loss, epoch_acc))
                loss_arr.append(epoch_loss)
                epoch_acc_arr.append(epoch_acc)
                
                if phase == 'test' and epoch_acc > best_acc:
                    best_acc = epoch_acc
                    best_model_wts = copy.deepcopy(self.model.state_dict())
                # print(loss_arr)
                # print(epoch_acc_arr)
            

        
        time_elapsed = time.time() - since
        print('time completed: {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 600))
        print("best val acc: {:.4f}".format(best_acc))
        
        self.model.load_state_dict(best_model_wts)
        return self.model, loss_arr, epoch_acc_arr

In [11]:
# Calling the model and fit on training data:
model = Model()
model_ft, loss_arr, epoch_acc_arr = model.fit(dataloaders,10)




epoch 1/10
----------




train loss:  0.3225  acc: 0.8557
test loss:  0.4614  acc: 0.7740
epoch 2/10
----------
train loss:  0.2746  acc: 0.8867
test loss:  0.4851  acc: 0.7532
epoch 3/10
----------
train loss:  0.2461  acc: 0.8969
test loss:  0.4394  acc: 0.7901
epoch 4/10
----------
train loss:  0.2146  acc: 0.9096
test loss:  0.4567  acc: 0.7821
epoch 5/10
----------
train loss:  0.1993  acc: 0.9181
test loss:  0.5166  acc: 0.7804
epoch 6/10
----------
train loss:  0.2063  acc: 0.9144
test loss:  0.4839  acc: 0.7901
epoch 7/10
----------
train loss:  0.2131  acc: 0.9094
test loss:  0.4673  acc: 0.7853
epoch 8/10
----------
train loss:  0.2034  acc: 0.9173
test loss:  0.5129  acc: 0.7869
epoch 9/10
----------
train loss:  0.2040  acc: 0.9188
test loss:  0.4946  acc: 0.7853
epoch 10/10
----------
train loss:  0.2000  acc: 0.9203
test loss:  0.5358  acc: 0.7708
time completed: 10m 11s
best val acc: 0.7901


In [13]:

torch.save(model.state_dict(), 'classifier_model_dicts/classifier_model_20_percent_generative')

np.save("training_losses_time_series/Classifier_20_percent_generative_loss_arr", loss_arr, allow_pickle= True)
empt = []
for element in epoch_acc_arr:
    empt.append(float(element))
np.save("training_losses_time_series/Classifier_20_percent_generative_acc_arr", empt, allow_pickle= True)


When we want to load this trained weights back to the model for prediction on new data, we just need to follow these lines of code:

In [104]:
# Loading the saved model for prediction
state_dict = torch.load("classifier_model_dicts/classifier_model_20_percent_generative")
model.load_state_dict(state_dict, strict=False)
model_ft = model.model
model_ft = model_ft.eval()

In [None]:
loader = transforms.Compose([transforms.Resize((224,224)), transforms.ToTensor(),
                            transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5])])
def image_loader(image_name):
    image = PIL.Image.open(image_name).convert("RGB")
    image = loader(image).float()
    image = image.unsqueeze(0)
    return image

After training on the data, we can now test the performance of our model using the accuracy metrics. Let's see what is the accuracy of our model on the training set:

In [15]:

def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.eval()
    model.to("cuda")

    with torch.no_grad():
        for x, y in loader:
            x = x.to("cuda")
            y = y.to("cuda")
            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)
        print(f'Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}') 
    #model.train()

#check_accuracy(dataloaders['train'], model)

In [16]:
state_dict = torch.load("classifier_model_dicts/classifier_model_base")
model.load_state_dict(state_dict, strict=False)
model_ft = model.model
model_ft = model_ft.eval()

check_accuracy(dataloaders['test'], model)

Got 500 / 624 with accuracy 80.13


In [17]:
state_dict = torch.load("classifier_model_dicts/classifier_model_10_percent_generative")
model.load_state_dict(state_dict, strict=False)
model_ft = model.model
model_ft = model_ft.eval()

check_accuracy(dataloaders['test'], model)

Got 507 / 624 with accuracy 81.25


In [18]:
state_dict = torch.load("classifier_model_dicts/classifier_model_20_percent_generative")
model.load_state_dict(state_dict, strict=False)
model_ft = model.model
model_ft = model_ft.eval()

check_accuracy(dataloaders['test'], model)

Got 493 / 624 with accuracy 79.01


In [20]:
state_dict = torch.load("classifier_model_dicts/classifier_model_all_generative")
model.load_state_dict(state_dict, strict=False)
model_ft = model.model
model_ft = model_ft.eval()

check_accuracy(dataloaders['test'], model)

Got 410 / 624 with accuracy 65.71
