# COVID19 Action-Radiology-CXR
## Gursharan Ahir
## 16EC10021

In [0]:
%matplotlib inline
import numpy as np
import pandas as pd
import os 
import math
import shutil 
import torchvision.transforms.functional as tf
from numpy import argmax
from skimage.io import imread
from skimage.transform import resize
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn. preprocessing import LabelEncoder
from sklearn. preprocessing import OneHotEncoder
import matplotlib.pyplot as plt

# Data Processing

In [9]:
from google.colab import drive
drive.mount('/content/drive',force_remount=True)

Mounted at /content/drive


In [0]:
current_dir="./drive/My Drive/Datasetsnew/Covid19action-radiology-CXR_v1.1"

In [0]:
mode=['Test','Train']

for i in mode:
    df=pd.read_csv(str(current_dir)+"/"+str(i)+"_Combined.csv")
    Covid_19=df[(df['COVID-19']==1)]
    Other=df[(df['COVID-19']==0)]
    vars()["Covid_filename_"+str(i)]=Covid_19['Image Name'].to_numpy()
    vars()["Others_filename_"+str(i)]=Other['Image Name'].to_numpy()
    

In [12]:
df

Unnamed: 0,Image Name,Data Source,Partition,Non-Pneumonia,Other Pneumonia,COVID-19
0,CASE58_XRAY_1.jpeg,Source-2,1,0,0,1
1,case2_xray_1.jpeg,Source-2,1,0,0,1
2,case2_xray_2.jpeg,Source-2,1,0,0,1
3,case8_xray_1.jpeg,Source-2,1,0,0,1
4,case8_xray_2.jpeg,Source-2,1,0,0,1
...,...,...,...,...,...,...
228933,aspiration-pneumonia-5-day27.jpg,Source-5,4,0,1,0
228934,pneumocystis-jirovecii-pneumonia-3-1.jpg,Source-5,4,0,1,0
228935,pneumocystis-jirovecii-pneumonia-3-2.jpg,Source-5,4,0,1,0
228936,pneumocystis-jirovecii-pneumonia-3-3.jpg,Source-5,4,0,1,0


In [0]:
# Organizing the data for easy data loading
ty=['Covid','Others']
ph=['Train','Test']

for name in ph:
        
    for i in ty:
        dirname=current_dir+'/COVID/preprocessed/'+name+'/'+str(i)
        os.makedirs(dirname)
        
        for j in range(1,8):
            if j==3 or j==4:
                continue
            
            path=str(current_dir)+'/images/Source'+str(j)
            
            for k in range(len(vars()[str(i)+"_filename_"+str(name)])):        
                if os.path.exists(path+'/'+str(vars()[str(i)+"_filename_"+str(name)].item(k))):                   
                    shutil.copy(path+'/'+str(vars()[str(i)+"_filename_"+str(name)].item(k)), dirname)        
    

# Training

In [0]:
from __future__ import print_function
from __future__ import division
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, models, transforms
import time

In [36]:
set_gpu= torch.cuda.is_available()
if set_gpu:
    print('cuda is available')

cuda is available


In [0]:
data_dir = current_dir+"/COVID/preprocessed"
model_name = "densenet"
num_classes = 2  
batch_size = 8      
num_epochs = 50    

# feature_extract: False - To finetune whole model, True- To finetune the end reshaped layers
feature_extract = False

In [0]:
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated(device=None)
torch.cuda.reset_max_memory_cached(device=None)

In [0]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False    


In [0]:
# Training and validating the model

def model_train(model, dataloaders, criterion, optimizer, num_epochs, is_inception=False):
    start = time.time()
    print("Epoch    Train_acc   Train_loss    Val_acc    Val_loss")
        
    tr_acc=[]
    vl_acc=[]
    tr_loss=[]
    vl_loss=[]

    best_acc = 0.0


    for epoch in range(num_epochs):
        print('-' * 50)
       
        # Each epoch has a training and validation phase
        for phase in ['Train', 'Test']:
            if phase == 'Train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
 
                if(set_gpu):
                    inputs=inputs.cuda()
                    labels=labels.cuda()

                # zero the parameter gradients
                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == 'Train'):
                    
                    # Feed Forward Data
                    outputs = model(inputs)
                    
                    # CE Loss calculated
                    loss = criterion(outputs, labels)

                    # The class with max probability taken as the predicted label
                    _, preds = torch.max(outputs, 1)
             
                    if phase == 'Train':
                        loss.backward()
                        optimizer.step()

                # Running loss and correct predictions calculated
                running_loss += loss.item()
                running_corrects += torch.sum(preds == labels.data)
            
            #Loss and Accuracy for each epoch (average of all)
            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            
            if phase =='Train':
                train_acc=epoch_acc
                train_loss=epoch_loss
                tr_acc.append(epoch_acc*100.0)
                tr_loss.append(epoch_loss)
                
                
            if phase == 'Test':
                val_acc=epoch_acc
                val_loss=epoch_loss
                vl_acc.append(epoch_acc*100.0)
                vl_loss.append(epoch_loss)
                if(epoch_acc>best_acc):
                    best_acc=epoch_acc
                print("  {}        {:.3f}       {:.3f}      {:.3f}      {:.3f}".format(epoch+1,train_acc*100.0,train_loss,val_acc*100.0,val_loss))


    elapsed = time.time() - start
    print('Training complete in {:.0f}m {:.0f}s'.format(elapsed // 60, elapsed % 60))
    print('Best Validation Accuracy:{:.4f} '.format(best_acc*100.0))
 
    return model, tr_acc, tr_loss, vl_acc,vl_loss


In [0]:
# The model architecture could be chosen from the pytorch models

def initialize(model_name, num_classes, feature_extract, use_pretrained=True):
    model_ft = None
    input_size = 0

    if model_name == "resnet":
        """ Resnet18
        """
        model_ft = models.resnet18(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "alexnet":
        """ Alexnet
        """
        model_ft = models.alexnet(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "vgg":
        """ VGG11_bn
        """
        model_ft = models.vgg11_bn(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "squeezenet":
        """ Squeezenet
        """
        model_ft = models.squeezenet1_0(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        model_ft.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1,1), stride=(1,1))
        model_ft.num_classes = num_classes
        input_size = 224

    elif model_name == "densenet":
        """ Densenet
        """
        model_ft = models.densenet121(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier.in_features
        model_ft.classifier = nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "inception":
        model_ft = models.inception_v3(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.AuxLogits.fc.in_features
        model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs,num_classes)
        input_size = 299

    else:
        print("Invalid model name, exiting...")
        exit()

    return model_ft, input_size

# Initialize the model for this run
model_ft, input_size = initialize(model_name, num_classes, feature_extract, use_pretrained=False)

In [42]:
# Data augmentation and normalization of images

data_transforms = {
    'Train': transforms.Compose([
        transforms.RandomRotation(30),
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'Test': transforms.Compose([
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

print("Initializing Datasets and Dataloaders...")

# Create train and val datasets
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['Train', 'Test']}

# Create train and val dataloaders
dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True, num_workers=4) for x in ['Train', 'Test']}

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

Initializing Datasets and Dataloaders...


In [0]:
# Send the model to GPU
if(set_gpu):
  model_ft = model_ft.cuda()

params_to_update = model_ft.parameters()

if feature_extract:
    params_to_update = []
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)

# Optimzer set as Adam
optimizer_ft = optim.Adam(params_to_update, lr=0.001)

In [0]:
# Set the loss function as CE loss
criterion = nn.CrossEntropyLoss()

# Train and evaluate
def select_model(dataloaders_dict, criterion, num_epochs, l_rate):
    print("Learning Rate=",l_rate)
    optimizer_ft = optim.Adam(model_ft.classifier.parameters(), lr=l_rate)
    model, tr_acc, tr_loss, vl_acc, vl_loss  = model_train(model_ft, dataloaders_dict, criterion, optimizer_ft, num_epochs=num_epochs, is_inception=(model_name=="inception"))
   
    return model, tr_acc, tr_loss, vl_acc,vl_loss

In [54]:
# Learning Rate
lr=0.006

tr_acc=[]
vl_acc=[]

# Commence Training and Validation
model_ft, tr_acc, tr_loss, vl_acc, vl_loss = select_model(dataloaders_dict, criterion, num_epochs,lr)
time.sleep(60)


Learning Rate= 0.006
Epoch    Train_acc   Train_loss    Val_acc    Val_loss
--------------------------------------------------
  1        72.131       0.106      60.000      0.155
--------------------------------------------------
  2        66.667       0.086      74.286      0.100
--------------------------------------------------
  3        72.678       0.075      65.714      0.101
--------------------------------------------------
  4        75.410       0.068      62.857      0.107
--------------------------------------------------
  5        72.678       0.074      71.429      0.078
--------------------------------------------------
  6        69.399       0.083      71.429      0.087
--------------------------------------------------
  7        68.306       0.090      74.286      0.086
--------------------------------------------------
  8        74.317       0.077      60.000      0.111
--------------------------------------------------
  9        68.306       0.091      68.571

In [56]:
# Model Space
total_params=0
for param_tensor in model_ft.state_dict():
    size=model_ft.state_dict()[param_tensor].shape
    pars=1
    for i in range(len(size)):
      pars*=size[i]
    total_params+=pars

print("Total Model Parameters:",total_params)

Total Model Parameters: 7039675
