In [1]:
!pip install kaggle



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
! mkdir ~/.kaggle

In [4]:
!cp /content/drive/MyDrive/kaggle_creds/kaggle.json ~/.kaggle/kaggle.json

In [5]:
! chmod 600 ~/.kaggle/kaggle.json

In [6]:
!kaggle datasets download -d paultimothymooney/chest-xray-pneumonia

Downloading chest-xray-pneumonia.zip to /content
100% 2.28G/2.29G [00:16<00:00, 182MB/s]
100% 2.29G/2.29G [00:17<00:00, 145MB/s]


In [8]:
import zipfile
import os

zip_file_path = '/content/chest-xray-pneumonia.zip'

extract_path = '/content/'

with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

os.listdir(extract_path)

['.config', 'chest_xray', 'chest-xray-pneumonia.zip', 'drive', 'sample_data']

Importing Libraries

In [9]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Activation, Flatten, Conv2D, BatchNormalization, MaxPooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.metrics import categorical_crossentropy
from sklearn.metrics import confusion_matrix
from sklearn.utils import shuffle
import random
from glob import glob
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import cv2

Dataset from
https://www.kaggle.com/datasets/paultimothymooney/chest-xray-pneumonia

Loading train samples

In [10]:
image_ext = ['jpeg', 'jpg', 'png', 'bmp']

normal_train_samples = []
pneumonia_train_samples = []

for i in image_ext:
    normal = glob(f'/content/chest_xray/train/NORMAL/*{i}')
    normal_train_samples.extend(normal)

    pneumonia = glob(f'/content/chest_xray/train/PNEUMONIA/*{i}')
    pneumonia_train_samples.extend(pneumonia)
len(normal_train_samples), len(pneumonia_train_samples)

(1341, 3875)

Creating train inputs and labels

In [11]:
train_samples = []
train_labels = []

np.random.seed(123)

np.array(train_samples.extend(normal_train_samples))
np.array(train_samples.extend(pneumonia_train_samples))

train_labels.extend(map(int, np.zeros(len(normal_train_samples))))
train_labels.extend(map(int, [1] * len(pneumonia_train_samples)))

train_samples, train_labels = shuffle(train_samples, train_labels)

len(train_samples), len(train_labels)

(5216, 5216)

Loading test samples

In [12]:
normal_test_samples = []
pneumonia_test_samples = []


for i in image_ext:
    normal = glob(f'/content/chest_xray/test/NORMAL/*{i}')
    normal_test_samples.extend(normal)

    pneumonia = glob(f'/content/chest_xray/test/PNEUMONIA/*{i}')
    pneumonia_test_samples.extend(pneumonia)
len(normal_test_samples), len(pneumonia_test_samples)

(234, 390)

Creating test inputs and labels

In [13]:
test_samples = []
test_labels = []

np.array(test_samples.extend(normal_test_samples))
np.array(test_samples.extend(pneumonia_test_samples))

test_labels.extend(map(int, np.zeros(len(normal_test_samples))))
test_labels.extend(map(int, [1] * len(pneumonia_test_samples)))

test_samples, test_labels = shuffle(test_samples, test_labels)

len(test_samples), len(test_labels)

(624, 624)

Loading evaluation sample

In [14]:
normal_val_samples = []
pneumonia_val_samples = []


for i in image_ext:
    normal = glob(f'/content/chest_xray/val/NORMAL/*{i}')
    normal_val_samples.extend(normal)

    pneumonia = glob(f'/content/chest_xray/val/PNEUMONIA/*{i}')
    pneumonia_val_samples.extend(pneumonia)
len(normal_val_samples), len(pneumonia_val_samples)

(8, 8)

Creating evaluation inputs and labels

In [15]:
val_samples = []
val_labels = []

np.array(val_samples.extend(normal_val_samples))
np.array(val_samples.extend(pneumonia_val_samples))

val_labels.extend(map(int, np.zeros(len(normal_val_samples))))
val_labels.extend(map(int, [1] * len(pneumonia_val_samples)))

val_samples, val_labels = shuffle(val_samples, val_labels)

len(val_samples), len(val_labels)

(16, 16)

Reading & Resizing Data

In [16]:
dim = 500
def image_resize(samples):
    image_samples = []
    for img in samples:
        image = cv2.imread(img)
        resize_img = cv2.resize(image, (dim, dim))
        image_samples.append(resize_img)
    return image_samples

In [17]:
train_data = np.array(image_resize(train_samples))
train_labels = np.array(train_labels)

In [18]:
train_data.shape, train_labels.shape

((5216, 500, 500, 3), (5216,))

In [19]:
test_data = np.array(image_resize(test_samples))
test_labels = np.array(test_labels)

In [20]:
test_data.shape, test_labels.shape

((624, 500, 500, 3), (624,))

In [21]:
val_data = np.array(image_resize(val_samples))
val_labels = np.array(val_labels)

In [22]:
val_data.shape, val_labels.shape

((16, 500, 500, 3), (16,))

Model building

In [23]:
model = Sequential([
    Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same', input_shape=(dim, dim, 3)),
    MaxPooling2D(pool_size=(2, 2), strides=2),

    Conv2D(filters=16, kernel_size=(3, 3), activation='relu', padding='same'),
    MaxPooling2D(),

    Flatten(),
    Dense(units=2, activation='sigmoid')
])

In [24]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 500, 500, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 250, 250, 32)      0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 250, 250, 16)      4624      
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 125, 125, 16)      0         
 g2D)                                                            
                                                                 
 flatten (Flatten)           (None, 250000)            0         
                                                                 
 dense (Dense)               (None, 2)                 5

In [25]:
model.compile(optimizer=Adam(learning_rate=0.01), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

Model Training

In [None]:
model.fit(
    x=train_data,
    y=train_labels,
    batch_size=100,
    validation_data=(test_data,test_labels),
    epochs=5,
    shuffle=True,
    verbose=1
)

Save Model

In [None]:
model.save('model.h5')
print('Model Saved!')

Load Saved Model

In [None]:
loaded_model = load_model('model.h5')
loaded_model.summary()

Model Evaluation

In [None]:
loaded_model.evaluate(test_data, test_labels)

Prediction

In [None]:
predictions = model.predict(val_data)

In [None]:
np.round(predictions), sample_labels

Confusion Matrix

In [None]:
confusion_matrix(y_true=test_data, y_pred=test_labels)