## 1 . Import The Required Pakages

In [27]:
#Import the required packages  
import torch
import torchvision 
import torchvision.transforms as transforms #Image processing
from torch.utils.data import DataLoader 
from torchvision.datasets import ImageFolder
from torch.optim import Adam
from torch.autograd import Variable 
import os  
import glob
import pathlib 
import torch.nn as nn 
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

## 2 .Chest X-Ray Images (Pneumonia) Dataset

It is a validated dataset from Mendeley Data containing 5863 X-Ray images labeled with two categories: Normal/Pneumonia.The dataset is organized into 3 folders (train, test, val) and contains subfolders for each image category (Pneumonia/Normal). There are 5,863 X-Ray images (JPEG) and 2 categories (Pneumonia/Normal)<a href="https://www.kaggle.com/paultimothymooney/chest-xray-pneumonia"> Chest X-Ray Images</a> .

In [3]:
#Checking for device 
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## 2. Data Preprocessing:


Inorder to preprocess the data we create a transform function which is a compisation  of many transforms function provided by pytorch.<br>
<b>Transform Function</b><br>
<ul><li>Input :an PIL image.</li>
<li>Output : Transformed version.</li></ul>

<ol>
  <p><b>Transforms used:</b></p>
  <li><code>transforms.Resize()</code>Resize the image because we need all images to be in the same size.</li>
  <li><code>transforms.ToTensor()</code>Convert all the image to PyTorch tensors.</li>
  <li><code>transforms.Normalize</code>Normalize the images using the mean and std of the dataset.</li>
</ol>
We combine these transforms to pipeline with  <code>Transforms.Compose</code>, where it clubs all the transforms provided to it, and run them into sequence. 

In [4]:

transform = transforms.Compose(
    #1.Resize the image to have the same size                     
    [transforms.Resize((150,150)),
    #2.Transform the images from numpy array to Tensor
     transforms.ToTensor(),#change the pixel range for each color chanel from 0-255 to 0-1
    #3.Normalize the images using the mean and std of the dataset
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

## 2.Dataloading
Since the train and testing data are disributed in different folders per lable, we used <b>ImageFolder</b> class to load the data.<br>
<ol>
    <p><b>ImageFolder Parameters</b></p>
    <li>Root (string) – Root directory path.</li>
        <li>Transform Function.</li>

</ol><br>
After loading the dataset, we have to pass it to the <code>Dataloader</code> class for parallelizing the data loading process with the support of automatic batching.We pass the following constructor :
<ul>
   <li>Dataset _ train_dataset to train_dataloader,test_dataloader to test_dataloader. </li>
<li>Batch size – Refers to the number of samples in each batch.</li>
<li>Shuffle – Whether you want the data to be reshuffled or not.</li>
<li>collate_fn </li>
</ul> 


In [5]:
##It causes a problem
def collate_fn( batch):
        new_batch = []
        for idx in range(len(batch)):
            sample = batch[idx]
            new_batch.append(sample)

        # scalar output
        sample_batch = np.array(new_batch)
        sample_batch = torch.FloatTensor(sample_batch)
        sample_batch.squeeze_(2)
        print(sample_batch)
        return sample_batch

In [6]:
#path to the train and test data directory 
train_data_path = "chest_xray/train"
test_data_path = "chest_xray/test"
#DataLoader for training and testing data: we feed the data in the form of the dataloader
#Test and train datasets 
train_dataset = ImageFolder(train_data_path,transform= transform)
test_dataset = ImageFolder(test_data_path,transform= transform)

train_dataloader = DataLoader(train_dataset
                  , batch_size=255, shuffle=True)
test_dataloader = DataLoader(test_dataset
                  , batch_size=255, shuffle=True)

In [50]:
#calculating the size of training and testing images
train_count=len(glob.glob(train_data_path+'/**/*.jpeg'))
test_count=len(glob.glob(test_data_path+'/**/*.jpeg'))
print("Train dataset size : ",train_count)
print("Test dataset size : ",test_count)

Train dataset size :  5216
Test dataset size :  624


In [8]:
#print the classes 
test_dataset.class_to_idx

{'NORMAL': 0, 'PNEUMONIA': 1}

In [9]:
#The classes 
root = pathlib.Path(train_data_path)
classes = sorted([j.name.split('/')[-1] for j in root.iterdir()])
#there is two classes 'NORMAL', 'PNEUMONIA'.
classes 

['NORMAL', 'PNEUMONIA']

In [10]:
from PIL import Image
def is_grey_scale(img_path):
    img = Image.open(img_path).convert('RGB')
    w, h = img.size
    for i in range(w):
        for j in range(h):
            r, g, b = img.getpixel((i,j))
            if r != g != b: 
                return False
    return True

print(is_grey_scale(test_data_path+"/NORMAL/IM-0013-0001.jpeg"))

True


## 3.Define a Convolutional Neural Network
Our model consist of:<br>
<ol>
    <li>Three Convolutional layers : create a feature map to predict the class probabilities for each feature by applying a filter that scans the whole image, few pixels at a time.<br>
    <ul><li>The first argument to it is the number of input channel: in our case it is a 3 input channel.</li>
        <li>The second argument is the number of output channels</li>
        <li>The kernel_size argument is the size of the convolutional filter.</li>
        <li>Padding argument: we calculate using this formula "((w-f+2P)/s) +1",where (w:number of the input, f:filter size,p is the padding and s is the stride </li>
        <br></ul>
    </li>
    <li>Batch normalization  functions: as reported in <a href="https://arxiv.org/pdf/1502.03167.pdf ">Batch Normalization: Accelerating Deep Network Training byReducing Internal Covariate Shift</a> , "Merely adding Batch Normalization to a state-of-the-art image classification model yields a substantial speedup in training."</li>
        
    
   <li>ReLU functions : The rectified linear activation function or ReLU for short is a piecewise linear function that will output the input directly if it is positive, otherwise, it will output zero.We use it here becuse it overcomes the vanishing gradient problem, allowing models to learn faster and perform better, which is caused when using the sigmoid and hyperbolic tangent activation functions.</li>
    <li> Max pooling operation,it has two arguments: 
    <ul><li> Pooling size, which is 2 x 2 and hence the argument is 2. </li>
        <li>Stride: is the number of pixels shifts over the input matrix.</li>
        </ul></li>
    
  <li>two fully connected layers are created.</li>
</ol>


## ForwardFunction 
After defining the layers.The next step is to define how the data flows through these layers when performing the forward pass through the network, by defining the <b>forward</b> function,it has <b>x</b>: which is the data that is to be passed through the model (i.e. a batch of data).  This output is then fed into the following layer and so on. Note, after self.layer2, we apply a reshaping function to out, which flattens the data dimensions from 75 x 75 x 64 into 360000 x 1. Next, the dropout is applied followed by the two fully connected layers, with the final output being returned from the function.


In [15]:
#Define the CNN model

class CNN(nn.Module): 
    def __init__(self,num_classes = 2): #['NORMAL', 'PNEUMONIA']
        super(CNN,self).__init__()
        
        #Output size after convolution filter
        #((w-f+2P)/s) +1
    
     
        #Input shape= (256,3,150,150) -(batch size, number of chanels,hight,wiedth)
        self.conv1=nn.Conv2d
        (in_channels=3,out_channels=12,kernel_size=3,stride=1,padding=1) 
        self.bn1=nn.BatchNorm2d(num_features=12)                                 
        self.relu1=nn.ReLU()                                                    
        self.pool=nn.MaxPool2d(kernel_size=2,stride = 2)                        

        #Reduce the image size be factor 2
        #Shape= (256,12,75,75)
        
        
        self.conv2=nn.Conv2d(in_channels=12,out_channels=20,kernel_size=3,stride=1,padding=1)
        self.relu2=nn.ReLU()
        
        
        
        self.conv3=nn.Conv2d(in_channels=20,out_channels=32,kernel_size=3,stride=1,padding=1)
        self.bn3=nn.BatchNorm2d(num_features=32)
        self.relu3=nn.ReLU()
        #Shape= (256,32,75,75)
        
        
        self.fc1=nn.Linear(in_features=75 * 75 * 32,out_features=84)
        self.fc3 = nn.Linear(84, 2)

        
        
        
        #Feed forwad function
    def forward(self,input):
        output=self.conv1(input)
        output=self.bn1(output)
        output=self.relu1(output)
            
        output=self.pool(output)
            
        output=self.conv2(output)
        output=self.relu2(output)
            
        output=self.conv3(output)
        output=self.bn3(output)
        output=self.relu3(output)
            
            
            #Above output will be in matrix form, with shape (256,32,75,75)
            
        output=output.view(-1,32*75*75)
            
            
        output=self.fc1(output)
            
        return output

## 5. Define an Optimizer, a Loss Function and the Hyperparameters 

First, we define the hyperparameters of the training.
<ol>
    <li><b>The learning rate:</b> reflects how much the model is updated per batch.If it is too small, the training proceeds slowly.If it's too large, the weights will be adjusted too much and miss the true minimum loss, or even become unstable.</li>
    <li><b>Batch size:</b> The batch size is a hyperparameter that defines the number of samples to work through before updating the internal model parameters.</li>
    <li>
      <b>Epoch size</b>: The number of epochs is a hyperparameter that defines the number times that the learning algorithm will work through the entire training dataset.</li>
    

In [16]:
#hyperparameters 

batch_size = 255           #feed the batch size according to the cpu or gpu memory
lr = 0.0001                # Define a learning rate.
num_epochs = 10            # Maximum training epochs

Before we train the model, we have to first create an instance of our CNN class, and define our loss function and optimizer.<br>
We chose the Cross-entropy loss as a loss function, since we deal with a classification problem. And as optimizer Adam optimizer function. <br>
<b>Cross-entropy loss function</b>: measures the performance of a classification model whose output is a probability value between 0 and 1. Cross-entropy loss increases as the predicted probability diverges from the actual label. <br>
<b>Adam optimizer function:</b> we pass the model parameters and the learning rate.

In [17]:
#create CNN instance
model=CNN().to(device)
# Loss and optimizer
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)



## Training the Model

In [18]:
#Model training and saving best model
loss_list = []
acc_list = []
n_total_step =len(train_dataloader)
for epoch in range(num_epochs):    # loop over the dataset multiple times(num_epoch times)

    
    #Evaluation and training on training dataset
    model.train()
   
    
    for i, (images,labels) in enumerate(train_dataloader): #loop over the train_loader to get the different batches from the dataset
       
    #push the images,labels to the device to get the gpu support if it is available
        images = images.to(device)
        labels = labels.to(device)
            
        
        
    #forward pass and create the loss  
        
        outputs=model(images)         #the model  predict the output 
        loss=loss_function(outputs,labels)  
        
    #Backward pass and optimize
        optimizer.zero_grad()         #Empty the gradients 
        loss.backward()
        optimizer.step()
        
    # Track the accuracy
        total = labels.size(0)
        _, predicted = torch.max(outputs.data, 1)
        correct = (predicted == labels).sum().item()
        acc_list.append(correct / total)
        
        print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Accuracy: {:.2f}%'
                  .format(epoch + 1, num_epochs, i + 1, n_total_step, loss.item(),
                          (correct / total) * 100))
print("Finished Training")

       

Epoch [1/10], Step [1/21], Loss: 4.7874, Accuracy: 0.00%
Epoch [1/10], Step [2/21], Loss: 0.7041, Accuracy: 51.37%
Epoch [1/10], Step [3/21], Loss: 1.0373, Accuracy: 76.08%
Epoch [1/10], Step [4/21], Loss: 0.8068, Accuracy: 74.51%
Epoch [1/10], Step [5/21], Loss: 0.3660, Accuracy: 85.49%
Epoch [1/10], Step [6/21], Loss: 0.4615, Accuracy: 80.78%
Epoch [1/10], Step [7/21], Loss: 0.2203, Accuracy: 92.55%
Epoch [1/10], Step [8/21], Loss: 0.1597, Accuracy: 92.16%
Epoch [1/10], Step [9/21], Loss: 0.2793, Accuracy: 89.80%
Epoch [1/10], Step [10/21], Loss: 0.2262, Accuracy: 90.59%
Epoch [1/10], Step [11/21], Loss: 0.2640, Accuracy: 90.20%
Epoch [1/10], Step [12/21], Loss: 0.1688, Accuracy: 92.55%
Epoch [1/10], Step [13/21], Loss: 0.1635, Accuracy: 93.33%
Epoch [1/10], Step [14/21], Loss: 0.3535, Accuracy: 87.06%
Epoch [1/10], Step [15/21], Loss: 0.1740, Accuracy: 92.55%
Epoch [1/10], Step [16/21], Loss: 0.2353, Accuracy: 90.20%
Epoch [1/10], Step [17/21], Loss: 0.1216, Accuracy: 94.51%
Epoch [

Epoch [7/10], Step [15/21], Loss: 0.0170, Accuracy: 100.00%
Epoch [7/10], Step [16/21], Loss: 0.0203, Accuracy: 99.61%
Epoch [7/10], Step [17/21], Loss: 0.0160, Accuracy: 100.00%
Epoch [7/10], Step [18/21], Loss: 0.0141, Accuracy: 100.00%
Epoch [7/10], Step [19/21], Loss: 0.0152, Accuracy: 99.61%
Epoch [7/10], Step [20/21], Loss: 0.0125, Accuracy: 99.61%
Epoch [7/10], Step [21/21], Loss: 0.0127, Accuracy: 100.00%
Epoch [8/10], Step [1/21], Loss: 0.0122, Accuracy: 100.00%
Epoch [8/10], Step [2/21], Loss: 0.0064, Accuracy: 100.00%
Epoch [8/10], Step [3/21], Loss: 0.0106, Accuracy: 100.00%
Epoch [8/10], Step [4/21], Loss: 0.0108, Accuracy: 100.00%
Epoch [8/10], Step [5/21], Loss: 0.0121, Accuracy: 99.61%
Epoch [8/10], Step [6/21], Loss: 0.0109, Accuracy: 100.00%
Epoch [8/10], Step [7/21], Loss: 0.0099, Accuracy: 100.00%
Epoch [8/10], Step [8/21], Loss: 0.0128, Accuracy: 100.00%
Epoch [8/10], Step [9/21], Loss: 0.0183, Accuracy: 99.61%
Epoch [8/10], Step [10/21], Loss: 0.0143, Accuracy: 10

In [51]:
##Save the model
PATH = './best_checkpoint.model'
torch.save(model.state_dict(), PATH)

## 6.Evaluation 

We use three evaluation metrices :

   <ol>
    <li>Accuracy:</li> Accuracy is the quintessential classification metric. It is pretty easy to understand. And easily suited for binary as well as a multiclass classification problem.
        <li>Precision-Recall is a useful measure of success of prediction when the classes are very imbalanced. In information retrieval, precision is a measure of result relevancy, while recall is a measure of how many truly relevant results are returned.</li><ul>
    <li>Precision (P) is defined as the number of true positives (TP) over the number of true positives(TP) plus the number of false positives (FP): $$\frac{TP}{TP+FP}$$. </li>
    <li>Recall (R) is defined as the number of true positives (TP) over the number of true positives(TP) plus the number of false negatives (FN): $$\frac{TP}{TP+FN}$$.</li>
    </ul>
          
   </ol>

In [29]:

# Test the model

# In test phase, we don't need to compute gradients (for memory efficiency)
model.eval()
with torch.no_grad():
    y_true = [] #use it to calculate the Precision and Recall
    y_predicated =[] #use it to calculate the Precision and Recall
    correct = 0
    total = 0
    for images, labels in test_dataloader:
        labels = labels.to(device)
        outputs = model(images)
       

        _, predicted = torch.max(outputs.data, 1)
        
        y_true.append(labels)
        y_predicated.append(predicted)
        
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    # accuracy: (tp + tn) / (p + n)
    
    print('Accuracy of the network on the 624 test images: {} %'.format(100 * correct / total))

# Save the model checkpoint
#torch.save(model.state_dict(), 'best_checkpoint.model')


Accuracy of the network on the 624 test images: 74.19871794871794 %


In [47]:
##flatten the array of tensors in order to calculate preccision and recall scores
y_true_flatten =[]
y_predicated_flatten =[]
for i in y_true:
    for j in i:
        y_true_flatten.append(j)
for i in y_predicated:
    for j in i:
        y_predicated_flatten.append(j)

In [48]:
##### precision tp / (tp + fp)
precision = precision_score(y_true_flatten, y_predicated_flatten)
print('Precision: %f' % precision)

Precision: 0.709324


In [49]:
# recall: tp / (tp + fn)
recall = recall_score(y_true_flatten, y_predicated_flatten)
print('Recall: %f' % recall)

Recall: 0.994872
