# Xray Pneumonia Detection

## Imports

In [21]:
from keras.datasets import mnist
from keras.utils import to_categorical
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import cv2 as cv
import keras
from keras import layers
from keras import models

## Load Data

In [38]:
def append_images_location_to_df(location: str, df: pd.DataFrame):
    df_copy = df.copy()
    for root, dirs, files in os.walk(location):
        current_folder = root.split("\\")[-1]
        print(current_folder)
        for file in files:
            file = file.split(".")
            filename = file[0]
            extension = file[1]
            if extension != "png" and extension != "jpg" and extension != "jpeg":
                continue
            file = filename + "." + extension
            path = os.path.join(root, file)
            df_copy = df_copy.append(
                {
                    'image_name': path,
                    'type': 0 if current_folder == "NORMAL" else 1
                },
                ignore_index=True
            )
    return df_copy


In [39]:
# make an array of numpy arrays
# each numpy array is a 256x256 image

# load the data
# foreach image save the name in the dataframe column 'image_name' and the type in the column 'type'

df = pd.DataFrame(columns=['type', 'image_name'])
image_dir_base = ".\\chest_xray\\train\\"
current_dir = image_dir_base + "NORMAL"

df = append_images_location_to_df(current_dir, df)
current_dir = image_dir_base + "PNEUMONIA"
df = append_images_location_to_df(current_dir, df)
df

NORMAL
PNEUMONIA


Unnamed: 0,type,image_name
0,0,.\chest_xray\train\NORMAL\IM-0115-0001.jpeg
1,0,.\chest_xray\train\NORMAL\IM-0117-0001.jpeg
2,0,.\chest_xray\train\NORMAL\IM-0119-0001.jpeg
3,0,.\chest_xray\train\NORMAL\IM-0122-0001.jpeg
4,0,.\chest_xray\train\NORMAL\IM-0125-0001.jpeg
...,...,...
5211,1,.\chest_xray\train\PNEUMONIA\person99_virus_18...
5212,1,.\chest_xray\train\PNEUMONIA\person9_bacteria_...
5213,1,.\chest_xray\train\PNEUMONIA\person9_bacteria_...
5214,1,.\chest_xray\train\PNEUMONIA\person9_bacteria_...


In [41]:
# for each image in the dataframe, load the image and save it in a numpy array to append to the df
# create a new column in the dataframe called 'image'
df['image'] = None
def route_to_numpy_array(image_name):
    image = cv.imread(image_name)
    gray_image = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
    resized_image = cv.resize(gray_image, (256, 256))
    # divide by 255 to normalize the data
    resized_image = resized_image.astype('float32') / 255
    return resized_image

df["image"] = df['image_name'].apply(route_to_numpy_array)
# make df2 a dataframe
df.to_csv("train.csv", index=False)


AttributeError: 'DataFrame' object has no attribute 'read'

In [42]:
df.head()
#df["image"] = df["image"].apply(lambda x: (np.expand_dims(x, axis=0)))

Unnamed: 0,type,image_name,image
0,0,.\chest_xray\train\NORMAL\IM-0115-0001.jpeg,"[[0.09019608, 0.07058824, 0.07058824, 0.078431..."
1,0,.\chest_xray\train\NORMAL\IM-0117-0001.jpeg,"[[0.05490196, 0.09411765, 0.09411765, 0.117647..."
2,0,.\chest_xray\train\NORMAL\IM-0119-0001.jpeg,"[[0.0, 0.0, 0.0, 0.0, 0.06666667, 0.08235294, ..."
3,0,.\chest_xray\train\NORMAL\IM-0122-0001.jpeg,"[[0.15686275, 0.1764706, 0.18039216, 0.1921568..."
4,0,.\chest_xray\train\NORMAL\IM-0125-0001.jpeg,"[[0.3019608, 0.2901961, 0.2509804, 0.23529412,..."


In [54]:
h = df["image"].values
# reshape the array to be 5216x256x256x1
h = np.stack(h)
h = h.reshape((5216, 256, 256, 1))
train_images = h

In [30]:
model = models.Sequential()
model.add(layers.Conv2D(32, (5, 5), activation='relu', input_shape=(256, 256, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (5, 5), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Flatten())
model.add(layers.Dense(1, activation='sigmoid'))

model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_5 (Conv2D)            (None, 252, 252, 32)      832       
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 126, 126, 32)      0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 122, 122, 64)      51264     
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 61, 61, 64)        0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 238144)            0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 238145    
Total params: 290,241
Trainable params: 290,241
Non-trainable params: 0
________________________________________________

In [55]:
batch_size = 100
epochs = 2

model.compile(loss='binary_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])
history = model.fit(train_images, df["type"],
          batch_size=batch_size,
          epochs=epochs,
          #validation_data=(val_images, val_labels),
          verbose=1
          )


Epoch 1/2
Epoch 2/2

KeyboardInterrupt: 