Dataset Basic info:

- Train images (and after agumentation):

        - Normal Images: 1349 (2338)
        - Virus images: 1345 (2341)
        - Bacteria images: 2538 (same)
        - Total: 5232

- Test images:

        - Normal Images: 234 (same)
        - Virus images: 148 (246)
        - Bacteria images: 242 (same)
        - Total: 624


In [1]:
# libraries
import numpy as np
import pandas as pd
import cv2
import os
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf



# Chest X-Ray Pneumonia Prediction

In [2]:
# Loading data
train_df = pd.read_csv('../data/chest_pneumonia/training_data.csv')
train_df.head()

Unnamed: 0,path,shape,rows,columns,label
0,../data/chest_pneumonia/train/normal/train_h_a...,"(291, 291)",291,291,Healthy
1,../data/chest_pneumonia/train/normal/NORMAL2-I...,"(2234, 2359)",2234,2359,Healthy
2,../data/chest_pneumonia/train/normal/train_h_a...,"(291, 291)",291,291,Healthy
3,../data/chest_pneumonia/train/normal/train_h_a...,"(291, 291)",291,291,Healthy
4,../data/chest_pneumonia/train/normal/train_h_a...,"(291, 291)",291,291,Healthy


In [3]:
test_df = pd.read_csv('../data/chest_pneumonia/test_data.csv')
test_df.tail()

Unnamed: 0,path,shape,rows,columns,label
718,../data/chest_pneumonia/test/pneumonia/person1...,"(688, 1024)",688,1024,Bacterial
719,../data/chest_pneumonia/test/pneumonia/person1...,"(672, 1088)",672,1088,Bacterial
720,../data/chest_pneumonia/test/pneumonia/person1...,"(808, 1256)",808,1256,Bacterial
721,../data/chest_pneumonia/test/pneumonia/person8...,"(648, 912)",648,912,Bacterial
722,../data/chest_pneumonia/test/pneumonia/person1...,"(640, 952)",640,952,Bacterial


## Resizing images

In [4]:
max_rows_train = max(train_df['rows'])
max_cols_train = max(train_df['columns'])
max_rows_test = max(test_df['rows'])
max_cols_test = max(test_df['columns'])
biggest_dim = max(max_rows_train, max_cols_train, max_rows_test, max_cols_test)

In [5]:
# Training images

train_imgs = np.zeros([len(train_df), biggest_dim // 10, biggest_dim // 10], dtype = 'uint8')
for i in range(len(train_df)):
    img = cv2.imread(train_df['path'][i], 0)
    if img.shape != (biggest_dim // 10, biggest_dim // 10):
        train_imgs[i] = cv2.resize(img, dsize=(biggest_dim //10, biggest_dim // 10), interpolation= cv2.INTER_CUBIC)
     
# Reshaping for keras input
train_imgs = train_imgs.reshape(train_imgs.shape[0], train_imgs.shape[1], train_imgs.shape[2], 1)

In [6]:
train_imgs.shape

(7217, 291, 291, 1)

In [7]:
# Test images
test_imgs = np.zeros([len(test_df), biggest_dim // 10, biggest_dim // 10], dtype = 'uint8')
for i in range(len(test_df)):
    img = cv2.imread(test_df['path'][i], 0)
    if img.shape != (biggest_dim //10, biggest_dim // 10):
        test_imgs[i] = cv2.resize(img, dsize=(biggest_dim //10, biggest_dim // 10), interpolation= cv2.INTER_CUBIC)
     
# Reshaping for keras input
test_imgs = test_imgs.reshape(test_imgs.shape[0], test_imgs.shape[1], test_imgs.shape[2], 1)

In [8]:
test_imgs.shape

(723, 291, 291, 1)

In [4]:
# Labels
le = LabelEncoder()
train_labels = np.array(train_df['label'])
test_labels = np.array(test_df['label'])
train_labels = le.fit_transform(train_labels)
test_labels = le.fit_transform (test_labels)

# Loading Trained Model and Predicting

In [10]:
model = tf.keras.models.load_model('../models/model_pneu.h5')
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 291, 291, 8)       80        
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 146, 146, 8)       0         
_________________________________________________________________
dropout (Dropout)            (None, 146, 146, 8)       0         
_________________________________________________________________
flatten (Flatten)            (None, 170528)            0         
_________________________________________________________________
dense (Dense)                (None, 32)                5456928   
_________________________________________________________________
dense_1 (Dense)              (None, 10)                330       
Total params: 5,457,338
Trainable params: 5,457,338
Non-trainable params: 0
______________________________________________

In [11]:
predictions = np.argmax(model.predict(test_imgs), axis=-1)

In [12]:
predictions_strings = le.inverse_transform(predictions)

In [13]:
model.evaluate(test_imgs, test_labels)



[22.635337829589844, 0.770401120185852]

## Saving predictions

In [14]:
predictions_df = pd.DataFrame({'image': test_df['path'], 'predicted_label': predictions_strings, 'true_label': test_df['label']})

In [15]:
# Exporting predictions to html to open in web server
predictions_df.to_html('api/static/predictions_df.html')

In [16]:
# Exporting predictions to json to open in web server
predictions_df.to_json('../reports/predictions.json')

In [17]:
shapes = []
for i in range(len(train_df)):
    shapes.append(train_imgs[i, :, :, 0].shape)

In [18]:
clean_data_train_df = pd.DataFrame({'path': train_df['path'], 'image shape': shapes, 'label': train_df['label']})

In [19]:
shapes = []
for i in range(len(test_df)):
    shapes.append(test_imgs[i, :, :, 0].shape)

In [20]:
clean_data_test_df = pd.DataFrame({'path': test_df['path'], 'image shape': shapes, 'label': test_df['label']})

In [21]:
clean_data_test_df

Unnamed: 0,path,image shape,label
0,../data/chest_pneumonia/test/normal/IM-0031-00...,"(291, 291)",Healthy
1,../data/chest_pneumonia/test/normal/IM-0025-00...,"(291, 291)",Healthy
2,../data/chest_pneumonia/test/normal/NORMAL2-IM...,"(291, 291)",Healthy
3,../data/chest_pneumonia/test/normal/NORMAL2-IM...,"(291, 291)",Healthy
4,../data/chest_pneumonia/test/normal/NORMAL2-IM...,"(291, 291)",Healthy
...,...,...,...
718,../data/chest_pneumonia/test/pneumonia/person1...,"(291, 291)",Bacterial
719,../data/chest_pneumonia/test/pneumonia/person1...,"(291, 291)",Bacterial
720,../data/chest_pneumonia/test/pneumonia/person1...,"(291, 291)",Bacterial
721,../data/chest_pneumonia/test/pneumonia/person8...,"(291, 291)",Bacterial


In [22]:
clean_data_df = clean_data_train_df.append(clean_data_test_df, ignore_index=True)

In [23]:
clean_data_df

Unnamed: 0,path,image shape,label
0,../data/chest_pneumonia/train/normal/train_h_a...,"(291, 291)",Healthy
1,../data/chest_pneumonia/train/normal/NORMAL2-I...,"(291, 291)",Healthy
2,../data/chest_pneumonia/train/normal/train_h_a...,"(291, 291)",Healthy
3,../data/chest_pneumonia/train/normal/train_h_a...,"(291, 291)",Healthy
4,../data/chest_pneumonia/train/normal/train_h_a...,"(291, 291)",Healthy
...,...,...,...
7935,../data/chest_pneumonia/test/pneumonia/person1...,"(291, 291)",Bacterial
7936,../data/chest_pneumonia/test/pneumonia/person1...,"(291, 291)",Bacterial
7937,../data/chest_pneumonia/test/pneumonia/person1...,"(291, 291)",Bacterial
7938,../data/chest_pneumonia/test/pneumonia/person8...,"(291, 291)",Bacterial


In [24]:
# Exporting clean data to json file for the server
clean_data_df.to_json('../reports/clean_data.json')