# Second Checkpoint
### Predicting Pneumonia from X-Ray image

Jimena Salinas Valdespino, Santiago Segovia Baquero, Stephania Tello Zamudio, Ivanna Rodríguez Lobo

In [22]:
import os
import pandas as pd

import torch
import torch.nn as nn # basic building block for neural neteorks
import torch.nn.functional as F # import convolution functions like Relu
import torch.optim as optim # optimzer

from torch.utils import data
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
from torchvision.io import read_image

from sklearn.metrics import accuracy_score

## Creating a Pytorch Dataset


In [2]:
class CustomImageDataset(Dataset):
    def __init__(self, csv_file, img_dir_path, resize=False, transform=None):
        """
        Inputs:
            - csv_file (str): file path to the csv file
            - img_dir_path: directory path to the images
            - transform: Compose (a PyTorch Class) that strings together several
              transform functions (e.g. data augmentation steps)
        """
        self.img_labels = pd.read_csv(csv_file, skiprows=1, header=None)
        self.img_dir = img_dir_path
        self.transform = transform
        self.resize = resize
        self.dimensions = self.get_dimensions()

    def __len__(self):
        """
        Returns: (int) length of your dataset
        """
        return len(self.img_labels)

    def __getitem__(self, idx):
        """
        Loads and returns your sample (the image and the label) at the
        specified index

        Parameter: idx (int): index of interest

        Returns: image, label
        """
        img_path =  os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        
        # read the image
        image = read_image(img_path)

        # get the label
        label = self.img_labels.iloc[idx, 1]

        # apply transformations to image
        if self.transform:
            image = self.transform(image)

        return image, label
    
    def get_dimensions(self):
        """
        This method creates a dictionary with the unique combinations of heightxwidth
        for each image in the dataset.

        returns a dictionary with dimensions as keys and the number of images
            with that dimension as values
        """
        dimensions = {}
        for index in range(len(self.img_labels)):
            image = self[index][0]
            if self.resize:
                image = self.resize_image(image)
            _, height, width = image.shape
            dimensions[(height,width)] = dimensions.get((height,width),0) + 1

        return dimensions
    
    def resize_image(self,image):
        """
        If the resize parameter==True, then all the images are
        converted to a 150x150 size.

        returns the resized image
        """
        transform = T.Resize((150,150))
        
        return transform(image)

Once we implemented the Dataset class, we create one object per Dataset:

In [3]:
train_data = CustomImageDataset(csv_file = '../data/data_train.csv',
                                img_dir_path = '../data')

val_data = CustomImageDataset(csv_file = '../data/data_val.csv',
                              img_dir_path = '../data')

test_data = CustomImageDataset(csv_file = '../data/data_test.csv',
                               img_dir_path = '../data')

The labels for each one of the datasets are the following:

In [4]:
# Train dataset
train_data.img_labels.iloc[:,1].value_counts()

1    3875
0    1341
Name: 1, dtype: int64

In [5]:
# Validation dataset
val_data.img_labels.iloc[:,1].value_counts()

0    8
1    8
Name: 1, dtype: int64

In [6]:
# Test dataset
test_data.img_labels.iloc[:,1].value_counts()

1    390
0    234
Name: 1, dtype: int64

Inside our CustomImageDataset class we defined a method to compute a count of each height x width combination in our data set. The cells below display our results:

In [7]:
train_data.dimensions
len(train_data.dimensions)

4366

There are 4,366 unique height x width combinations in our training data set.

In [8]:
test_data.dimensions
len(test_data.dimensions)

598

There are 598 unique height x width combinations in our testing data set.

In [9]:
val_data.dimensions
len(val_data.dimensions)

16

There are 16 unique height x width combinations in our validation data set.

Given that there are different sizes, and some very large images, we want to standarize the size of all images. We do this by passing a `resize` boolean parameter to our `CustomImageDataset` class:

In [10]:
train_data = CustomImageDataset(csv_file = '../data/data_train.csv',
                                img_dir_path = '../data',
                                resize=True)

val_data = CustomImageDataset(csv_file = '../data/data_val.csv',
                              img_dir_path = '../data',
                              resize=True)

test_data = CustomImageDataset(csv_file = '../data/data_test.csv',
                               img_dir_path = '../data',
                               resize=True)



As a check, we can call on the `dictionary` attribute for each one of our datasets:

In [11]:
train_data.dimensions

{(150, 150): 5216}

In [12]:
val_data.dimensions

{(150, 150): 16}

In [13]:
test_data.dimensions

{(150, 150): 624}

## Image Augmentation

In order to avoid overfitting, we need to do image augmentation for our training
dataset. We do this below. We decided to augment the images following some 
examples of people who worked with this dataset in Kaggle.

The transformations we used are the following: rotate the image by 30 degrees, zoom into the image by 20%, flip the image horizontally, increase the image's sharpness, and change the color depth of the image.

In [14]:
train_transform = T.Compose([
    T.RandomRotation(30),
    T.RandomResizedCrop(size=(150, 150), scale=(0.8, 1.2)),
    T.RandomHorizontalFlip(),
    T.RandomAdjustSharpness(sharpness_factor=2),
    T.RandomPosterize(bits=4),
    T.Grayscale(1)
])

We then apply the above transformations to our training dataset below.

In [15]:
# Apply the transforms to the training dataset
train_data = CustomImageDataset(csv_file = '../data/data_train.csv',
                                img_dir_path = '../data',
                                resize=True,
                                transform=train_transform)



## Creating our DataLoader

Below, we create our DataLoader. The purpose of doing this is to load our data in
batches to fit and test our model.

In [16]:
train_dataloader = DataLoader(train_data, 
                              batch_size=64, 
                              shuffle=True)

val_dataloader = DataLoader(val_data, 
                            batch_size=64, 
                            shuffle=True)

test_dataloader = DataLoader(test_data, 
                             batch_size=64, 
                             shuffle=True)

# Model

In [17]:
class CustomNeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        
        # inspire by Turing award winning LeCun, Bengio and Hinton's paper from 1998
        # https://ieeexplore.ieee.org/document/726791 (cited more than 25,000 times!!!!!!!!!)
        # code from https://blog.paperspace.com/writing-lenet5-from-scratch-in-python/ 
        self.LeNet = nn.Sequential(     
            # convolutional layers            
            nn.Sequential(                                            # FIRST LAYER: (INPUT LAYER)
              nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=0),    # CONVOLUTION 
              nn.BatchNorm2d(6),
              nn.ReLU(),
              nn.MaxPool2d(kernel_size = 2, stride = 2)),             # POOLING
            nn.Sequential(                                            # SECOND LAYER: HIDDEN LAYER 1
              nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=0),   # CONVOLUTION 
              nn.BatchNorm2d(16),
              nn.ReLU(),
              nn.MaxPool2d(kernel_size = 2, stride = 2)),             # POOLING
            # fully connected layers
            nn.Flatten(),
            nn.Linear(18496, 64),                                   # THIRD LAYER: LINEAR YEAR, HIDDEN LAYER 2
            nn.ReLU(),                                                # HIDDEN LAYER's ACTIVATION FUNCION
            nn.Linear(64, 64),                                       # FOURTH LAYER: LINEAR YEAR, HIDDEN LAYER 3
            nn.ReLU(),                                                # HIDDEN LAYER's ACTIVATION FUNCION
            # output layer
            nn.Linear(64, 2)                                          # OUTPUT LAYER
        )

    def forward(self, x):
        out = self.LeNet(x)
        return out

# Model's performance

In [18]:
model = CustomNeuralNetwork()

To assess the prediction capacity of our model we compare the model's loss an classification power. The first one is defined as the accuracy of the model in terms of the predicted probabilities, while the second refers to how good the model classifies the actual labels (*e.g.*, normal lungs or lungs with pneumonia).

The loss functions that we will use are:

1. Binary cross-entropy:
2. Cross-entropy:

On the other hand, the accuracy measures we'll use are:

1. Accuracy:
2. Recall:
3. Precision:
4. F1:

**Falta dar una pequenia explicacion de estas medidas**

In [19]:
# Define a loss function and optimizer:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [39]:
# Train and validate the network
EPOCHS = 1

train_losses = []
train_accuracies = []
val_losses = []
val_accuarcies = []

for _ in range(EPOCHS):  # loop over the dataset multiple times
    # TRAIN
    # Make sure gradient tracking is on, and do a pass over the data
    model.train()
    running_loss = 0.0
    accuracies = []
    for i, data in enumerate(train_dataloader):
      if i == 0:
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        
        inputs = inputs.float()

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
      
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # keep track of the loss
        running_loss += loss.item()

        # ALSO CALCULATE YOUR ACCURACY METRIC
        outputs = outputs.detach().numpy()
        labels = labels.detach().numpy()

        accuracy = accuracy_score(labels, predicted)
        accuracies.append(accuracy)
      
    avg_train_loss = running_loss / (i + 1)     # i + 1 gives us the total number of batches in train dataloader
    # CALCULATE AVERAGE ACCURACY METRIC
    avg_train_acc = sum(accuracies)/len(accuracies)
    train_losses.append(avg_train_loss)
    train_accuracies.append(avg_train_acc)

    #VALIDATE
    # in the validation part, we don't want to keep track of the gradients 
    # model.eval()            
    
    # implement a similar loop!
    # but you can leave out loss.backward()



In [40]:
train_losses



[0.007517002704666882]

In [41]:
train_accuracies

[0.6875]

In [38]:
import numpy as np

list0 =[1,2,3,4]
print(sum(list0)/len(list0))

2.5
