<a href="https://colab.research.google.com/github/ayandalab/Deep-Learning-Pneumonia-Classification/blob/main/Pneumonia_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [131]:
from google_drive_downloader import GoogleDriveDownloader as gdd


In [132]:
gdd.download_file_from_google_drive(file_id='1d_93d9oFNRBK9Vg6BRxs9wvRbKtNTylY',
                                    dest_path='content/pneumonia_dataset.zip',
                                    unzip=True)


In [133]:
import pandas as pd                                     # Data analysis and manipultion tool
import numpy as np                                      # Fundamental package for linear algebra and multidimensional arrays
import tensorflow as tf                                 # Deep Learning Tool
import os                                               # OS module in Python provides a way of using operating system dependent functionality
import cv2                                              # Library for image processing
from sklearn.model_selection import train_test_split    # For splitting the data into train and validation set
from sklearn.metrics import accuracy_score
from keras.layers.normalization import BatchNormalization

In [134]:
data=[]
img_size = 100
def create_data():
    for item in ['normal','pneumonia']:
        path='/content/content/pneumonia_dataset/train/' + item+"/"
            
        for img in os.listdir(path):         # os.listdir gets you all the list of name of files located in the given path
            try:
                img_array=cv2.imread(os.path.join(path,img),cv2.IMREAD_GRAYSCALE)    # converts the image to pixels and gray scales the images
                new_img_array=cv2.resize(img_array,(img_size,img_size))
                if item == 'normal':
                    data.append([new_img_array,0])
                else:
                   data.append([new_img_array, 1]) # appending the list of image pixels and respective target value in data
            except Exception as e:
                    pass    # try and except is exception handling case in python, saves you from getting errors
                
            
create_data()

In [135]:
len(data)

2425

In [136]:
data[2]

[array([[165,  34,   6, ..., 148, 147, 149],
        [164,  19,   4, ..., 159, 160, 161],
        [168,  13,   4, ..., 169, 172, 171],
        ...,
        [ 81, 105, 126, ..., 202, 202, 203],
        [ 73, 104, 124, ..., 202, 204, 203],
        [ 71, 100, 123, ..., 200, 202, 203]], dtype=uint8), 0]

In [137]:
np.random.shuffle(data)

In [138]:
x = []
y = []
for image in data:
  x.append(image[0])
  y.append(image[1])

# converting x & y to numpy array as they are list
x = np.array(x)
y = np.array(y)

In [139]:
np.unique(y, return_counts=True)

(array([0, 1]), array([1280, 1145]))

In [140]:
x =  x.reshape(-1, 100, 100, 1)

In [141]:
# split the data
X_train, X_val, y_train, y_val = train_test_split(x,y,test_size=0.3, random_state = 42)

In [142]:
X_train.shape

(1697, 100, 100, 1)

In [143]:
X_train =  X_train.reshape(-1, 100, 100, 1)

In [144]:
X_train.shape

(1697, 100, 100, 1)

In [145]:
X_train[0]

array([[[12],
        [ 2],
        [ 2],
        ...,
        [ 2],
        [ 2],
        [ 2]],

       [[11],
        [ 2],
        [ 2],
        ...,
        [ 2],
        [ 2],
        [ 2]],

       [[10],
        [ 2],
        [ 2],
        ...,
        [ 2],
        [ 2],
        [ 2]],

       ...,

       [[ 3],
        [ 3],
        [ 3],
        ...,
        [ 3],
        [ 3],
        [ 3]],

       [[ 4],
        [ 3],
        [ 3],
        ...,
        [ 3],
        [ 3],
        [ 4]],

       [[ 3],
        [ 3],
        [ 3],
        ...,
        [ 3],
        [ 3],
        [ 3]]], dtype=uint8)

In [146]:
cnn = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(100, 100, 1)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    
    tf.keras.layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    
    # tf.keras.layers.Flatten(input_shape=(100, 100, 1)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [147]:
cnn.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [148]:
cnn.fit(X_train, y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f6144526b10>

In [149]:
cnn.evaluate(X_val, y_val)



[1.652341604232788, 0.5686812996864319]

In [150]:
# Loading the order of the image's name that has been provided
test_image_order = pd.read_csv("/content/content/pneumonia_dataset/test.csv")
test_image_order.head()

Unnamed: 0,filename
0,CXR_test_519.png
1,CXR_test_578.png
2,CXR_test_359.png
3,CXR_test_573.png
4,CXR_test_471.png


In [151]:
file_paths = [[fname, '/content/content/pneumonia_dataset/test/' + fname] for fname in test_image_order['filename']]

In [152]:
# Confirm if number of images is same as number of labels given
if len(test_image_order) == len(file_paths):
    print('Number of image names i.e. ', len(test_image_order), 'matches the number of file paths i.e. ', len(file_paths))
else:
    print('Number of image names does not match the number of filepaths')

Number of image names i.e.  606 matches the number of file paths i.e.  606


In [153]:
test_images = pd.DataFrame(file_paths, columns=['filename', 'filepaths'])
test_images.head()

Unnamed: 0,filename,filepaths
0,CXR_test_519.png,/content/content/pneumonia_dataset/test/CXR_te...
1,CXR_test_578.png,/content/content/pneumonia_dataset/test/CXR_te...
2,CXR_test_359.png,/content/content/pneumonia_dataset/test/CXR_te...
3,CXR_test_573.png,/content/content/pneumonia_dataset/test/CXR_te...
4,CXR_test_471.png,/content/content/pneumonia_dataset/test/CXR_te...


In [154]:
test_pixel_data = []     # initialize an empty numpy array
for i in range(len(test_images)):
  
  img_array = cv2.imread(test_images['filepaths'][i], cv2.IMREAD_GRAYSCALE)   # converting the image to gray scale
  new_img_array=cv2.resize(img_array,(img_size,img_size))
  test_pixel_data.append(new_img_array)

In [155]:
test_pixel_data = np.asarray(test_pixel_data)

In [156]:
test_pixel_data =  test_pixel_data.reshape(-1, 100, 100, 1)

In [157]:
test_pixel_data

array([[[[  3],
         [  3],
         [  3],
         ...,
         [  5],
         [  6],
         [  7]],

        [[  3],
         [  2],
         [  2],
         ...,
         [  5],
         [  6],
         [  6]],

        [[  4],
         [  2],
         [  2],
         ...,
         [  5],
         [  5],
         [  6]],

        ...,

        [[  7],
         [  5],
         [  3],
         ...,
         [100],
         [ 85],
         [ 64]],

        [[  7],
         [  4],
         [  3],
         ...,
         [106],
         [ 97],
         [ 71]],

        [[  5],
         [  3],
         [  3],
         ...,
         [103],
         [ 94],
         [ 72]]],


       [[[  0],
         [  0],
         [  0],
         ...,
         [  0],
         [  0],
         [  0]],

        [[  0],
         [  0],
         [  0],
         ...,
         [  0],
         [  0],
         [  0]],

        [[  0],
         [  0],
         [  0],
         ...,
         [  0],
         [

In [158]:
pred = cnn.predict(test_pixel_data)

In [159]:
pred

array([[5.97843885e-01],
       [3.88270064e-06],
       [5.84970117e-02],
       [6.53674245e-01],
       [4.92527395e-01],
       [9.95425820e-01],
       [7.55041838e-03],
       [9.50671434e-01],
       [3.78732085e-02],
       [1.61457211e-01],
       [9.98285651e-01],
       [9.99371529e-01],
       [3.55069041e-02],
       [6.01342738e-01],
       [1.69028640e-02],
       [2.84613729e-01],
       [7.85380363e-01],
       [7.54721701e-01],
       [1.00000000e+00],
       [4.63273227e-02],
       [4.59846973e-01],
       [2.49990374e-01],
       [1.11755550e-01],
       [9.93370891e-01],
       [9.99927163e-01],
       [8.46076012e-01],
       [9.96984720e-01],
       [9.26548004e-01],
       [4.88867551e-01],
       [9.14298594e-01],
       [3.40253115e-04],
       [9.93570447e-01],
       [5.72428107e-03],
       [2.81611085e-03],
       [9.99985576e-01],
       [3.56034935e-01],
       [4.62953985e-01],
       [6.27622426e-01],
       [9.99990582e-01],
       [8.87243927e-01],


In [160]:
predictions = []
for item in pred:
  if item <= 0.5:
    predictions.append('normal')
  else:
    predictions.append('pneumonia')

In [162]:
predictions

['pneumonia',
 'normal',
 'normal',
 'pneumonia',
 'normal',
 'pneumonia',
 'normal',
 'pneumonia',
 'normal',
 'normal',
 'pneumonia',
 'pneumonia',
 'normal',
 'pneumonia',
 'normal',
 'normal',
 'pneumonia',
 'pneumonia',
 'pneumonia',
 'normal',
 'normal',
 'normal',
 'normal',
 'pneumonia',
 'pneumonia',
 'pneumonia',
 'pneumonia',
 'pneumonia',
 'normal',
 'pneumonia',
 'normal',
 'pneumonia',
 'normal',
 'normal',
 'pneumonia',
 'normal',
 'normal',
 'pneumonia',
 'pneumonia',
 'pneumonia',
 'normal',
 'normal',
 'pneumonia',
 'normal',
 'pneumonia',
 'pneumonia',
 'normal',
 'pneumonia',
 'pneumonia',
 'normal',
 'normal',
 'pneumonia',
 'normal',
 'pneumonia',
 'pneumonia',
 'pneumonia',
 'normal',
 'normal',
 'pneumonia',
 'pneumonia',
 'pneumonia',
 'normal',
 'pneumonia',
 'pneumonia',
 'normal',
 'pneumonia',
 'normal',
 'pneumonia',
 'pneumonia',
 'pneumonia',
 'normal',
 'pneumonia',
 'normal',
 'normal',
 'normal',
 'pneumonia',
 'pneumonia',
 'pneumonia',
 'normal',
 '

In [163]:
res = pd.DataFrame({'filename': test_images['filename'], 'label': predictions})  # prediction is nothing but the final predictions of your model on input features of your new unseen test data
res.to_csv("submission.csv", index = False) 

# To download the csv file locally
from google.colab import files        
files.download('submission.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>