In [3]:
import os
import cv2
import platform
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt
from IPython.display import clear_output
from pathlib import Path
from keras.preprocessing.image import ImageDataGenerator
from matplotlib.image import imread
from keras import backend as K
from keras.applications.densenet import DenseNet121
from keras.layers import Dense, GlobalAveragePooling2D, MaxPooling2D
from keras.models import Model
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
from keras.preprocessing import image
import tensorflow as tf
from sklearn.metrics import classification_report
from google.colab import drive

## Get Data

In [5]:
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
data_directory = '/content/drive/MyDrive/dataset_project/'
os.chdir(data_directory)
clear_output()
train_dir= Path('./train')
test_dir = Path('./test')
val_dir  = Path('./val')



In [7]:
train_files= list(train_dir.glob(r'*/*.jpeg'))
test_files= list(test_dir.glob(r'*/*.jpeg'))
val_files = list(val_dir.glob(r'*/*.jpeg'))

print('List of train files ', train_files)
print('List of test files', test_files)
print('List of val files', val_files)


List of train files  [PosixPath('train/PNEUMONIA/person564_bacteria_2342.jpeg'), PosixPath('train/PNEUMONIA/person557_virus_1097.jpeg'), PosixPath('train/PNEUMONIA/person545_bacteria_2289.jpeg'), PosixPath('train/PNEUMONIA/person582_bacteria_2405.jpeg'), PosixPath('train/PNEUMONIA/person553_bacteria_2316.jpeg'), PosixPath('train/PNEUMONIA/person584_virus_1128.jpeg'), PosixPath('train/PNEUMONIA/person579_bacteria_2381.jpeg'), PosixPath('train/PNEUMONIA/person578_virus_1122.jpeg'), PosixPath('train/PNEUMONIA/person55_bacteria_262.jpeg'), PosixPath('train/PNEUMONIA/person587_bacteria_2421.jpeg'), PosixPath('train/PNEUMONIA/person561_bacteria_2331.jpeg'), PosixPath('train/PNEUMONIA/person567_virus_1107.jpeg'), PosixPath('train/PNEUMONIA/person544_virus_1078.jpeg'), PosixPath('train/PNEUMONIA/person529_bacteria_2228.jpeg'), PosixPath('train/PNEUMONIA/person551_bacteria_2310.jpeg'), PosixPath('train/PNEUMONIA/person577_virus_1121.jpeg'), PosixPath('train/PNEUMONIA/person562_bacteria_2332.jpe

In [8]:
labels_train= list(map(lambda x: os.path.split(os.path.split(x)[0])[1], train_files))
labels_train= pd.Series(labels_train, name= 'Label')
images_train= pd.Series(train_files, name= 'Filepath').astype(str)
labels_test = list(map(lambda x: os.path.split(os.path.split(x)[0])[1], test_files))
labels_test= pd.Series(labels_test, name= 'Label')
images_test = pd.Series(test_files, name= 'Filepath').astype(str)
labels_val= list(map(lambda x: os.path.split(os.path.split(x)[0])[1], val_files))
labels_val = pd.Series(labels_val, name= 'Label')
images_val = pd.Series(val_files, name= 'Filepath').astype(str)

In [9]:
train_data= pd.concat([images_train, labels_train], axis=1)
test_data = pd.concat([images_test, labels_test], axis=1)
val_data= pd.concat([images_val, labels_val], axis=1)
print(train_data)
print(len(train_data))
print(len(val_data))
print(len(test_data))

                                          Filepath      Label
0     train/PNEUMONIA/person564_bacteria_2342.jpeg  PNEUMONIA
1        train/PNEUMONIA/person557_virus_1097.jpeg  PNEUMONIA
2     train/PNEUMONIA/person545_bacteria_2289.jpeg  PNEUMONIA
3     train/PNEUMONIA/person582_bacteria_2405.jpeg  PNEUMONIA
4     train/PNEUMONIA/person553_bacteria_2316.jpeg  PNEUMONIA
...                                            ...        ...
5211           train/NORMAL/IM-0525-0001-0001.jpeg     NORMAL
5212           train/NORMAL/IM-0511-0001-0002.jpeg     NORMAL
5213                train/NORMAL/IM-0520-0001.jpeg     NORMAL
5214                train/NORMAL/IM-0516-0001.jpeg     NORMAL
5215           train/NORMAL/IM-0509-0001-0001.jpeg     NORMAL

[5216 rows x 2 columns]
5216
16
624


# Image Processing

In [32]:
def image_pre_proces(train_data, test_data,valid_data ,x_col, label_col):

   #To suit input
    w = 300
    h = 300
    b_size = 32
    sample_size = 100
    #normalizing the image
    train_normalize = ImageDataGenerator(samplewise_center= True,samplewise_std_normalization= True)
    #flow from directory with specified batch size and target image size
    train_images = train_normalize.flow_from_dataframe(dataframe= train_data,x_col= x_col,y_col= label_col,
                                                  class_mode= 'binary',batch_size= b_size,shuffle=True,target_size=(w,h))
    #print(train_images.next()[0])  
    train_images_sample = ImageDataGenerator().flow_from_dataframe(dataframe=train_data, x_col="Filepath", y_col="Label", 
        class_mode="binary", batch_size=sample_size, shuffle=True, target_size=(w,h))
    #get data sample
    data_sample= train_images_sample.next()[0]
    
    #use the sample to fit mean & std to the test set generator
    test_valid_gen = ImageDataGenerator(featurewise_center=True,featurewise_std_normalization= True)
    test_valid_gen.fit(data_sample)
    
    #get the test generator & valid generators
    test_images= test_valid_gen.flow_from_dataframe(dataframe= test_data,x_col=x_col,y_col= label_col,
                                                    class_mode='binary',batch_size= b_size,target_size=(w,h),shuffle=False)
    valid_images= test_valid_gen.flow_from_dataframe(dataframe= valid_data,x_col=x_col,y_col= label_col,
                                                    class_mode='binary',batch_size= b_size,target_size=(w,h),shuffle=False)
    return train_images,test_images,valid_images





In [33]:
train_images,test_images, valid_images = image_pre_proces(train_data,test_data,val_data, 'Filepath', 'Label')

Found 5216 validated image filenames belonging to 2 classes.
Found 5216 validated image filenames belonging to 2 classes.
Found 624 validated image filenames belonging to 2 classes.
Found 16 validated image filenames belonging to 2 classes.


# Model Establish

class imbalance

In [58]:
list_labels= list(train_data['Label'].value_counts().index)
def get_class_weights(labels):
    negative_weight = np.sum(labels)/len(labels)
    #Since total is 1
    positive_weight = 1-negative_weight
    return positive_weight, negative_weight

positive_weights, negative_weights= get_class_weights(train_images.labels)
print("positive and negative weights are ",get_class_weights(train_images.labels))

positive and negative weights are  (0.2570935582822086, 0.7429064417177914)


Training

In [59]:
#Since we have not have 10000 we used already used weight data for DenseNEt121
base_model = DenseNet121(
    include_top=False,
    weights='./DenseNET121_Weight/densenet.hdf5',
    )

x= base_model.output
#Pooling
x= GlobalAveragePooling2D()(x)
#output layer
predictions= Dense(units= 1, activation ="sigmoid")(x)
model= Model(inputs= base_model.input, outputs=predictions )

def get_weighted_loss_binary(pos_weights, neg_weights, epsilon=1e-7):
    def weighted_loss(y_true, y_pred):
        loss = 0.0
        loss +=(-1*pos_weights*y_true* K.log(y_pred+epsilon)+ \
            -1*neg_weights*(1-y_true)* K.log(1-y_pred+epsilon))

        return loss
    
    return weighted_loss
model.compile(optimizer= 'adam', loss=get_weighted_loss_binary(positive_weights, negative_weights),metrics= ['accuracy'])

In [61]:
history = model.fit(train_images,
                    epochs= 5,
                    validation_data=valid_images,
                    steps_per_epoch=100,
                    validation_steps=2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


Results

In [54]:
Result_validate = model.evaluate(valid_images)
Result_test = model.evaluate(test_images)
Result_train = model.evaluate(train_images)

print("Valid accuracy :", Result_validate[1]*100,"%")
print("Test accuracy :", Result_test[1]*100,"%")
print("Train accuracy :", Result_train[1]*100,"%")

print("Valid loss :", Result_validate[0])
print("Test loss :", Result_test[0])
print("Train loss :", Result_train[0])


Valid accuracy : 87.5 %
Test accuracy : 74.67948794364929 %
Train accuracy : 93.90337467193604 %
Valid loss : 0.8656495213508606
Test loss : 0.9983369708061218
Train loss : 0.09384859353303909


In [57]:
y_true= test_images.classes
proba_predictions= model.predict(test_images)
y_predictions= (proba_predictions > 0.5).astype('int32')
print(classification_report(y_true,y_predictions))

              precision    recall  f1-score   support

           0       0.92      0.35      0.51       234
           1       0.72      0.98      0.83       390

    accuracy                           0.75       624
   macro avg       0.82      0.67      0.67       624
weighted avg       0.79      0.75      0.71       624

