Dataset Basic info:

- Train images (and after agumentation):

        - Normal Images: 1349 (2338)
        - Virus images: 1345 (2341)
        - Bacteria images: 2538 (same)
        - Total: 5232

- Test images:

        - Normal Images: 234 (same)
        - Virus images: 148 (246)
        - Bacteria images: 242 (same)
        - Total: 624


In [1]:
# libraries
import numpy as np
import pandas as pd
import cv2
import os
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras import layers



# Chest X-Ray Pneumonia Prediction

In [6]:
# Loading data
train_df = pd.read_csv('../data/chest_pneumonia/training_data.csv')
train_df.head()

Unnamed: 0,path,shape,rows,columns,label
0,data/chest_pneumonia/train/normal/train_h_aug_...,"(291, 291)",291,291,Healthy
1,data/chest_pneumonia/train/normal/NORMAL2-IM-0...,"(2234, 2359)",2234,2359,Healthy
2,data/chest_pneumonia/train/normal/train_h_aug_...,"(291, 291)",291,291,Healthy
3,data/chest_pneumonia/train/normal/train_h_aug_...,"(291, 291)",291,291,Healthy
4,data/chest_pneumonia/train/normal/train_h_aug_...,"(291, 291)",291,291,Healthy


In [7]:
test_df = pd.read_csv('../data/chest_pneumonia/test_data.csv')
test_df.tail()

Unnamed: 0,path,shape,rows,columns,label
717,data/chest_pneumonia/test/pneumonia/person120_...,"(688, 1024)",688,1024,Bacterial
718,data/chest_pneumonia/test/pneumonia/person171_...,"(672, 1088)",672,1088,Bacterial
719,data/chest_pneumonia/test/pneumonia/person109_...,"(808, 1256)",808,1256,Bacterial
720,data/chest_pneumonia/test/pneumonia/person83_b...,"(648, 912)",648,912,Bacterial
721,data/chest_pneumonia/test/pneumonia/person112_...,"(640, 952)",640,952,Bacterial


## Resizing images

In [8]:
max_rows_train = max(train_df['rows'])
max_cols_train = max(train_df['columns'])
max_rows_test = max(test_df['rows'])
max_cols_test = max(test_df['columns'])
biggest_dim = max(max_rows_train, max_cols_train, max_rows_test, max_cols_test)

In [7]:
# Training images

train_imgs = np.zeros([len(train_df), biggest_dim // 10, biggest_dim // 10], dtype = 'uint8')
for i in range(len(train_df)):
    img = cv2.imread(train_df['path'][i], 0)
    if img.shape != (biggest_dim //10, biggest_dim // 10):
        train_imgs[i] = cv2.resize(img, dsize=(biggest_dim //10, biggest_dim // 10,), interpolation= cv2.INTER_CUBIC)
     
# Reshaping for keras input
train_imgs = train_imgs.reshape(train_imgs.shape[0], train_imgs.shape[1], train_imgs.shape[2], 1)

In [8]:
train_imgs.shape

(7217, 291, 291, 1)

In [9]:
# Test images
test_imgs = np.zeros([len(test_df), biggest_dim // 10, biggest_dim // 10], dtype = 'uint8')
for i in range(len(test_df)):
    img = cv2.imread(test_df['path'][i], 0)
    if img.shape != (biggest_dim //10, biggest_dim // 10):
        test_imgs[i] = cv2.resize(img, dsize=(biggest_dim //10, biggest_dim // 10), interpolation= cv2.INTER_CUBIC)
     
# Reshaping for keras input
test_imgs = test_imgs.reshape(test_imgs.shape[0], test_imgs.shape[1], test_imgs.shape[2], 1)

In [10]:
test_imgs.shape

(722, 291, 291, 1)

In [11]:
# Labels
le = LabelEncoder()
train_labels = np.array(train_df['label'])
test_labels = np.array(test_df['label'])
train_labels = le.fit_transform(train_labels)
test_labels = le.fit_transform (test_labels)

# Creating a Model and training

In [18]:
# Building a model
model = tf.keras.models.Sequential()
model.add(layers.Conv2D(filters=8, 
                        kernel_size=(3, 3), 
                        input_shape=(291, 291, 1), 
                        padding='same'))
model.add(layers.MaxPooling2D(pool_size=(2, 2), padding="same"))
model.add(layers.Dropout(0.25))
model.add(layers.Flatten())
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 291, 291, 8)       80        
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 146, 146, 8)       0         
_________________________________________________________________
dropout (Dropout)            (None, 146, 146, 8)       0         
_________________________________________________________________
flatten (Flatten)            (None, 170528)            0         
_________________________________________________________________
dense (Dense)                (None, 32)                5456928   
_________________________________________________________________
dense_1 (Dense)              (None, 10)                330       
Total params: 5,457,338
Trainable params: 5,457,338
Non-trainable params: 0
______________________________________________

In [19]:
# Compiling the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [20]:
model.fit(train_imgs, train_labels, epochs=10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f7f0c275160>

In [29]:
# Saving the model
model.save('../models/model_pneu.h5')