# **Chest X-ray Image classification using Transfer Learning Algorithm**
This project leveraging Inception and Exception Pre-trained Model. The project data is taken from Kaggle and consist of 4000 chest x-ray images, 2000 of which are normal images and 2000 remaining are covid. The task involved binary classification with binary cross entropy as the loss function, SGD as the optimizer, learning rate and momentum as hyperparameter tuning, and accuracy as the metric. The best model was obtained from Exception model achieved 90% accuracy with a learning rate of 0.0003 and value of momentum is 0.9.

In [None]:
import glob
import numpy as np
import os
import shutil
np.random.seed(42)

In [None]:
import tensorflow as tf
tf.test.gpu_device_name()

In [None]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

In [None]:
from google.colab import drive
drive.mount('/content/drive')

#4/1AX4XfWiBmvHXxnRxiC5fpQzwE_BpZsZJUxflVRojBmUcKM26NBXcean9NPY

In [None]:
IMG_WIDTH=299
IMG_HEIGHT=299
IMG_DIM = (IMG_WIDTH, IMG_HEIGHT)

train_files = glob.glob('/content/drive/My Drive/Penelitian/DATA1/Validation/**/*')


# train_files = glob.glob('/content/drive/My Drive/Penelitian/DATA1/Validation/COVID/*')
# train_files2 = glob.glob('/content/drive/My Drive/Penelitian/DATA1/Validation/Normal/*')

# for i in train_files2:
#   train_files.append(i)


train_imgs = [tf.keras.preprocessing.image.img_to_array(tf.keras.preprocessing.image.load_img(img, target_size=IMG_DIM)) for img in train_files]
train_imgs = np.array(train_imgs)

#train_labels = [fn.split('\\')[-1].split('.')[0].strip() for fn in train_files]

validation_files = glob.glob('/content/drive/My Drive/Penelitian/DATA1/Train/**/*11.png') + glob.glob('/content/drive/My Drive/Penelitian/DATA1/Train/**/*12.png') + glob.glob('/content/drive/My Drive/Penelitian/DATA1/Train/**/*52.png') + glob.glob('/content/drive/My Drive/Penelitian/DATA1/Train/**/*42.png')
print(len(validation_files))

# validation_files = glob.glob('/content/drive/My Drive/Penelitian/DATA1/Validation/COVID/*')
# validation_files2 = glob.glob('/content/drive/My Drive/Penelitian/DATA1/Validation/Normal/*')
# for i in validation_files2:
#   validation_files.append(i)

validation_imgs = [tf.keras.preprocessing.image.img_to_array(tf.keras.preprocessing.image.load_img(img, target_size=IMG_DIM)) for img in validation_files]
validation_imgs = np.array(validation_imgs)


In [None]:
train_labels =[fn[52:55] for fn in train_files]
validation_labels =[fn[47:50] for fn in validation_files]

number_of_trainsamples=train_imgs.shape[0]
number_of_testsamples=validation_imgs.shape[0]
#validation_labels = [fn.split('\\')[-1].split('.')[0].strip() for fn in validation_files]
print('Train dataset shape:', train_imgs.shape,
 '\tValidation dataset shape:', validation_imgs.shape)

In [None]:
print(len(validation_files))
print(len(train_files))

In [None]:
validation_labels

In [None]:
train_imgs_scaled = train_imgs.astype('float32')
validation_imgs_scaled = validation_imgs.astype('float32')
train_imgs_scaled /= 255
validation_imgs_scaled /= 255

# visualize a sample image
print(train_imgs[0].shape)
tf.keras.preprocessing.image.array_to_img(train_imgs[0])

In [None]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
le.fit(train_labels)
train_labels_enc = le.transform(train_labels)
validation_labels_enc = le.transform(validation_labels)

In [None]:
print(train_labels[1000:1200], train_labels_enc[3500:4000])

In [None]:
# # from keras.utils import to_categorical
# from keras import utils

train_labels_enc2 = tf.keras.utils.to_categorical(train_labels_enc)
validation_labels_enc2 = tf.keras.utils.to_categorical(validation_labels_enc)

In [None]:
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./299,
 width_shift_range=0.2, height_shift_range=0.2,
 horizontal_flip=True, fill_mode='nearest')
val_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./299)

In [None]:
import matplotlib.pyplot as plt
img_id = 100
cat_generator = train_datagen.flow(train_imgs[img_id:img_id+1],
 train_labels[img_id:img_id+1],
 batch_size=1)
cat = [next(cat_generator) for i in range(0,5)]
fig, ax = plt.subplots(1,5, figsize=(16, 6))
print('Labels:', [item[1][0] for item in cat])
l = [ax[i].imshow(cat[i][0][0]) for i in range(0,5)]

In [None]:
batch=10
train_generator = train_datagen.flow(train_imgs, train_labels_enc,batch_size=batch)
val_generator = val_datagen.flow(validation_imgs, validation_labels_enc, batch_size=batch,shuffle=False )

In [None]:

from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model

xception = tf.keras.applications.InceptionV3(weights='imagenet', include_top=False, input_shape=(IMG_HEIGHT,IMG_WIDTH,3))
output = xception.layers[-1].output
output = tf.keras.layers.Flatten()(output)
xceptionmodel=Model(xception.input,output)
for layers in (xceptionmodel.layers):
    print(layers)
    layers.trainable = True
xception.summary()


In [None]:
from tensorflow.keras.layers import Conv2D,MaxPooling2D,Flatten, Dense, Dropout,InputLayer
from tensorflow.keras.models import Sequential
from tensorflow.keras import optimizers
input_shape=(IMG_HEIGHT,IMG_WIDTH)
model=Sequential()
model.add(xceptionmodel)
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='BinaryCrossentropy',optimizer=optimizers.SGD(lr=0.0007, momentum=0.9), metrics=["accuracy"])
model.summary()

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import tensorflow as tf
#checkpoint = ModelCheckpoint("xception_50.h5", monitor='val_accuracy', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
#early = EarlyStopping(monitor='val_accuracy', min_delta=0, patience=10, verbose=1, mode='auto')
history=model.fit_generator(generator= train_generator,
                            steps_per_epoch= (number_of_trainsamples/batch),
                            epochs= 80,
                            validation_data= val_generator,
                            validation_steps=(number_of_testsamples/batch))
#hist=model_final.fit_generator(generator= train_generator, steps_per_epoch= (number_of_trainsamples/batch), epochs= 50, validation_data= val_generator, validation_steps=(number_of_testsamples/batch))


In [None]:
model.save_weights('/content/drive/My Drive/Penelitian/inception86.h5')
# model.save_model('/content/drive/My Drive/Penelitian/xception1.h5')

In [None]:
import pandas as pd
hist_df = pd.DataFrame(history.history)
hist_csv_file = '/content/drive/My Drive/Penelitian/inception86.csv'
with open(hist_csv_file, mode='w') as f:
    hist_df.to_csv(f)

In [None]:

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
#plt.plot(history.history['loss'])
#plt.plot(history.history['val_loss'])
plt.title("model accuracy")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.legend(["Accuracy","Validation Accuracy","loss","Validation Loss"])
plt.show()

In [None]:
val_generator.reset()
Y_pred = model.predict_generator(val_generator, number_of_testsamples/batch)
y_pred = np.where(Y_pred>0.5,1,0)
print(y_pred)

In [None]:
print(validation_labels_enc)
from sklearn.metrics import classification_report, confusion_matrix
print('Confusion Matrix')

confmat=confusion_matrix(validation_labels_enc, y_pred)
print(confmat)

confusionmat_df = pd.DataFrame(confmat)
confusionmat_csv_file = '/content/drive/My Drive/Penelitian/confusionmat_inception86.csv'
with open(confusionmat_csv_file, mode='w') as f:
    confusionmat_df.to_csv(f)
print('Classification Report')

In [None]:
target_name=['bagus','reject']
print(classification_report(validation_labels_enc, y_pred,
 target_names=target_name))

In [None]:
model.load_weights('/content/drive/My Drive/Penelitian/inception86.h5')

In [None]:
validation_files = glob.glob('/content/drive/My Drive/Penelitian/DATA1/Train/**/*9.png')
print(len(validation_files))

# validation_files = glob.glob('/content/drive/My Drive/Penelitian/DATA1/Validation/COVID/*')
# validation_files2 = glob.glob('/content/drive/My Drive/Penelitian/DATA1/Validation/Normal/*')
# for i in validation_files2:
#   validation_files.append(i)

validation_imgs = [tf.keras.preprocessing.image.img_to_array(tf.keras.preprocessing.image.load_img(img, target_size=IMG_DIM)) for img in validation_files]
validation_imgs = np.array(validation_imgs)


In [None]:
validation_labels =[fn[47:50] for fn in validation_files]

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit(validation_labels)
validation_labels_enc = le.transform(validation_labels)



In [None]:
print(validation_labels[100:150], validation_labels_enc[100:150])

In [None]:
number_of_testsamples=validation_imgs.shape[0]
val_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./299)
val_generator = val_datagen.flow(validation_imgs, validation_labels_enc, batch_size=10,shuffle=False )

In [None]:
val_generator.reset()
Y_pred = model.predict_generator(val_generator, number_of_testsamples/10)
y_pred = np.where(Y_pred>0.5,1,0)
print(y_pred)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
print('Confusion Matrix')

confmat=confusion_matrix(validation_labels_enc, y_pred)
print(confmat)

In [None]:
target_name=['Cov','Normal']
print(classification_report(validation_labels_enc, y_pred))