In [1]:
from keras.models import Sequential
#Import from keras_preprocessing not from keras.preprocessing
from keras_preprocessing.image import ImageDataGenerator
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization, Rescaling
from keras.layers import Conv2D, MaxPooling2D, Conv3D
from keras.callbacks import ModelCheckpoint
from keras import regularizers, optimizers, Input
import pandas as pd
import numpy as np
from keras.optimizers import RMSprop, Adam
import ast
import random
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.preprocessing import MultiLabelBinarizer
import os
import cv2
import re
from keras.applications import ResNet50
from keras.applications import VGG19
from tensorflow.keras.models import Model
from keras.saving import load_model

2023-10-10 11:22:55.275284: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [9]:
# Reading and preparing the dataframe
traindf = pd.read_csv('ODIR-5K_Training_Preprocess.csv')
traindf = traindf[['ID', 'labels']]
traindf['labels'] = traindf['labels'].apply(lambda x: ast.literal_eval(x))
traindfdum = pd.get_dummies(traindf['labels'].apply(pd.Series).stack()).sum(level=0)
traindf = pd.concat([traindf, traindfdum], axis=1)

  traindfdum = pd.get_dummies(traindf['labels'].apply(pd.Series).stack()).sum(level=0)


In [10]:
# Convert the labels from A, C,D, G, H, M, n and O to a column of lists
traindf['coded labels'] = traindf.apply(lambda x: [x['A'], x['C'], x['D'], x['G'], x['H'], x['M'], x['N'], x['O']], axis=1)
# traindf['coded labels'] = traindf.apply(lambda x: str(x['A'] + x['C'] + x['D'] + x['G'] + x['H'] + x['M'] + x['N'] + x['O']_, axis=1)

In [11]:
# Remove jpg extension from ID column and make this the Name column
traindf['Name'] = traindf['ID'].apply(lambda x: os.path.splitext(x)[0])
traindf['Name'] = traindf['Name'].str.cat(traindf['coded labels'].astype(str), sep ="_")
traindf

Unnamed: 0,ID,labels,A,C,D,G,H,M,N,O,coded labels,Name
0,0_left.jpg,[C],0,1,0,0,0,0,0,0,"[0, 1, 0, 0, 0, 0, 0, 0]","0_left_[0, 1, 0, 0, 0, 0, 0, 0]"
1,0_right.jpg,[N],0,0,0,0,0,0,1,0,"[0, 0, 0, 0, 0, 0, 1, 0]","0_right_[0, 0, 0, 0, 0, 0, 1, 0]"
2,1_left.jpg,[N],0,0,0,0,0,0,1,0,"[0, 0, 0, 0, 0, 0, 1, 0]","1_left_[0, 0, 0, 0, 0, 0, 1, 0]"
3,1_right.jpg,[N],0,0,0,0,0,0,1,0,"[0, 0, 0, 0, 0, 0, 1, 0]","1_right_[0, 0, 0, 0, 0, 0, 1, 0]"
4,2_left.jpg,"[D, O]",0,0,1,0,0,0,0,1,"[0, 0, 1, 0, 0, 0, 0, 1]","2_left_[0, 0, 1, 0, 0, 0, 0, 1]"
...,...,...,...,...,...,...,...,...,...,...,...,...
6995,4689_right.jpg,[N],0,0,0,0,0,0,1,0,"[0, 0, 0, 0, 0, 0, 1, 0]","4689_right_[0, 0, 0, 0, 0, 0, 1, 0]"
6996,4690_left.jpg,[D],0,0,1,0,0,0,0,0,"[0, 0, 1, 0, 0, 0, 0, 0]","4690_left_[0, 0, 1, 0, 0, 0, 0, 0]"
6997,4690_right.jpg,[D],0,0,1,0,0,0,0,0,"[0, 0, 1, 0, 0, 0, 0, 0]","4690_right_[0, 0, 1, 0, 0, 0, 0, 0]"
6998,4784_left.jpg,"[A, H]",1,0,0,0,1,0,0,0,"[1, 0, 0, 0, 1, 0, 0, 0]","4784_left_[1, 0, 0, 0, 1, 0, 0, 0]"


In [12]:
# Renaming all files
directory = './ODIR-5K_Training_Dataset/'
for item in range(len(traindf['ID'])):
    os.rename(directory + traindf['ID'][item], directory + traindf['Name'][item] + '.jpg')
    

In [None]:
train_dir = './ODIR-5K_Training_Dataset/'
test_dir = './ODIR-5K_Testing_Images/'
train_img = [os.path.join(train_dir, i) for i in os.listdir(train_dir)]
test_img = [os.path.join(test_dir, i) for i in os.listdir(test_dir)]


In [None]:
def preprocess_image(image_list):
    X = []  # images
    y = []  # labels (0 for Normal or 1 for Pneumonia)
    pattern = r'\[([\d,\s]+)\]'

    for image in tqdm(image_list):
        try:
            img = cv2.imread(image)
            img = cv2.resize(img, (img_size, img_size), interpolation=cv2.INTER_CUBIC)
            # convert image to 2D to 3D
            # img = np.dstack([img, img, img])
            # convrt greyscale image to RGB
            # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            # Normalalize Image
            img = img.astype(np.float32) / 255.
            X.append(img)

        except:
            continue

        match = re.search(pattern, image)
        extracted_list = match.group(1).split(',')
        extracted_list = [int(i) for i in extracted_list]
        y.append(extracted_list)

    return X, y
    

In [None]:
X_train, y_train = preprocess_image(train_img)

In [None]:
fig = plt.figure(figsize=(20, 5))
k = 1
for i in range(4):
    a = fig.add_subplot(1, 4, k)
    plt.imshow(X_train[i])
    k = k + 1
plt.show()

In [None]:
X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=(500/3500), random_state=42, shuffle=True)

In [None]:
X_train = np.asarray(X_train, dtype=np.float32)
y_train = np.asarray(y_train, dtype=np.float32)

X_val = np.asarray(X_validation, dtype=np.float32)
y_val = np.asarray(y_validation, dtype=np.float32)

In [None]:
train_generator = ImageDataGenerator(rotation_range=5,
                                     horizontal_flip=True,
                                     width_shift_range=0.5,
                                     height_shift_range=0.5,
                                     shear_range=0.5,
                                     zoom_range=0.5,
                                    fill_mode='nearest'
                                    )
#Fitting the Image Generator defined above to the X train data set
train_generator.fit(X_train)

In [None]:
IMG_SHAPE = X_train[0].shape

In [None]:
base_model = VGG19(include_top = False, weights = 'imagenet', input_shape = IMG_SHAPE)
base_model.trainable = False
model= Sequential()
model.add(Rescaling(1./255,input_shape = IMG_SHAPE))
model.add(base_model)
model.add(Flatten())
model.add(Dense(256,activation=('relu'))) 
model.add(Dropout(.2))
model.add(Dense(128,activation=('relu')))
model.add(Dropout(.2))
model.add(Dense(8,activation=('sigmoid')))

# Sanity check
print(f"\nModel input shape:\n{model.input_shape}\nModel output shape:\n"\
f"{model.output_shape}\n\n\nModel summary:") 
model.summary()

In [None]:
# Model is saved at the end of every epoch, if it's the best seen so far.
checkpoint_filepath = './Checkpoint'
model_checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=False,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

In [None]:
#Initializing the hyperparameters

batch_size= 2**6
initial_epochs=5 #15
learn_rate=0.001
adam = Adam(learning_rate=learn_rate)

model.compile(optimizer=adam,loss='categorical_crossentropy',metrics=['accuracy'])

history = model.fit(train_generator.flow(X_train, y_train, batch_size= batch_size),
                    epochs=initial_epochs,
                    validation_data=(X_val,y_val),
                    steps_per_epoch= 20, #len(X_train)/batch_size,
                    validation_steps=len(X_val),
                    callbacks=[model_checkpoint_callback],
                    verbose = 1)


In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
# plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Loss')
# plt.ylim([0,1.0])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

# _ , accuracy = model.evaluate(x=X_test,y=y_test,batch_size= batch_size,verbose=1)
# print(f'Model accuracy on test set: {round(accuracy,3)*100}%')