In [1]:

from tensorflow.keras.layers import Input, Lambda,Activation, Dense, Flatten,Dropout,Conv2D,MaxPooling2D,BatchNormalization, Rescaling, AveragePooling2D, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.preprocessing import image, image_dataset_from_directory
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.applications import DenseNet121, ResNet50
from keras.utils import np_utils
from keras.models import load_model
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
import numpy as np
import pandas as pd
import os
import cv2
import gc
import matplotlib.pyplot as plt
import time
from numpy import asarray
from sklearn.metrics import accuracy_score

In [2]:
def len_file(t):
    with open(t, 'r') as f:
        return len(f.readlines())

Each line in the label files has the following format:

filename class xmin ymin xmax ymax

In [3]:
def load_labels(label_file):
    """Loads image filenames, classes"""
    fnames, classes= [], []
    with open(label_file, 'r') as f:
        for line in f.readlines():
            fname, cls, _, _, _, _ = line.strip('\n').split()
            fnames.append(fname)
            classes.append(int(cls))
        df = pd.DataFrame({'src': fnames, 'class':classes})
    return df

In [4]:
train_datagen=ImageDataGenerator(rescale=1./255.)
validation_datagen = ImageDataGenerator(rescale=1./255.)
batchsize = 8
nb_epoch = 3
targetsize = (256,256)

In [5]:
train_df = load_labels('/kaggle/input/covidxct/train_COVIDx_CT-3A.txt')
print(train_df.head())
train_generator=train_datagen.flow_from_dataframe(
dataframe=train_df,
directory='/kaggle/input/covidxct/3A_images',
x_col="src",
y_col="class",
color_mode = 'rgb',
batch_size=batchsize,
seed=42,
shuffle=True,
class_mode="raw",
target_size=targetsize)

                    src  class
0  NCP_96_1328_0032.png      2
1  NCP_96_1328_0035.png      2
2  NCP_96_1328_0036.png      2
3  NCP_96_1328_0037.png      2
4  NCP_96_1328_0038.png      2
Found 357518 validated image filenames.


In [6]:
val_df = load_labels('/kaggle/input/covidxct/val_COVIDx_CT-3A.txt')
validation_generator=validation_datagen.flow_from_dataframe(
dataframe=val_df,
directory='/kaggle/input/covidxct/3A_images',
x_col="src",
y_col="class",
color_mode = 'rgb',
batch_size=batchsize,
seed=42,
shuffle=True,
class_mode="raw",
target_size=targetsize)

Found 33725 validated image filenames.


In [7]:
def build_model():
    #model = ResNet50(weights='imagenet', include_top=False)
    model = DenseNet121(weights='imagenet', include_top=False)
    input = Input(shape=(256,256,3))
    x = Conv2D(3, (3, 3), padding='same')(input)
    
    x = model(x)
    
    x = GlobalAveragePooling2D()(x)
    x = BatchNormalization()(x)
    x = Dropout(0.4)(x)
    x = Dense(256, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.4)(x)

    # multi output
    output = Dense(3,activation = 'softmax', name='root')(x)
 

    # model
    model = Model(input,output)
    
    optimizer = Adam(learning_rate=0.005, beta_1=0.9, beta_2=0.999, epsilon=0.1)
    model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    model.summary()
    
    return model

In [8]:
#model = build_model()
model = load_model('/kaggle/input/k/anu10m/covidxct/densenetmodel.hdf5')
#model = load_model('/kaggle/working/resnetmodel-03.hdf5')
model.summary()
annealer = ReduceLROnPlateau(monitor='val_accuracy', factor=0.70, patience=5, verbose=1, min_learning_rate=1e-4)

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 256, 256, 3)]     0         
_________________________________________________________________
conv2d (Conv2D)              (None, 256, 256, 3)       84        
_________________________________________________________________
densenet121 (Functional)     (None, None, None, 1024)  7037504   
_________________________________________________________________
global_average_pooling2d (Gl (None, 1024)              0         
_________________________________________________________________
batch_normalization (BatchNo (None, 1024)              4096      
_________________________________________________________________
dropout (Dropout)            (None, 1024)              0         
_________________________________________________________________
dense (Dense)                (None, 256)               262400

In [9]:
#-{epoch:02d}
checkpoint = tf.keras.callbacks.ModelCheckpoint('densenetmodel.hdf5',
                                                monitor='val_loss',
                                                verbose=1,
                                                save_best_only=True,
                                                mode='min',
                                                period=1)

In [10]:
train_file = '/kaggle/input/covidxct/train_COVIDx_CT-3A.txt'
val_file = '/kaggle/input/covidxct/val_COVIDx_CT-3A.txt'
train_len=len_file(train_file)
val_len=len_file(val_file)
step_size_train=np.ceil(train_len/batchsize)
step_size_validate=np.ceil(val_len/batchsize)

In [11]:
model.fit(train_generator,
epochs=nb_epoch,steps_per_epoch=step_size_train,
verbose=1,
validation_data=validation_generator,
validation_steps=step_size_validate,
callbacks=[annealer, checkpoint]
)

Epoch 1/3

Epoch 00001: val_loss improved from inf to 0.46385, saving model to densenetmodel.hdf5
Epoch 2/3

Epoch 00002: val_loss improved from 0.46385 to 0.28495, saving model to densenetmodel.hdf5
Epoch 3/3

Epoch 00003: val_loss did not improve from 0.28495


<keras.callbacks.History at 0x7fa6ce1756d0>

In [12]:
test_datagen = ImageDataGenerator(rescale=1./255.)

In [13]:
test_df = load_labels('/kaggle/input/covidxct/test_COVIDx_CT-3A.txt')
test_generator=test_datagen.flow_from_dataframe(
dataframe=test_df,
directory='/kaggle/input/covidxct/3A_images',
x_col="src",
y_col=None,
color_mode = 'rgb',
batch_size=batchsize,
seed=42,
shuffle=False,
class_mode=None,
target_size=targetsize)

Found 33781 validated image filenames.


In [14]:
#step_size_test=np.ceil(test_generator.n/test_generator.batch_size)
test_len = len_file('/kaggle/input/covidxct/test_COVIDx_CT-3A.txt')
step_size_test=np.ceil(test_len/batchsize)

In [15]:
#model.evaluate_generator(test_generator, steps = step_size_test, verbose  = 1)

In [16]:
pred = model.predict(test_generator, steps = step_size_test, verbose = 1)



In [17]:
predicted_class_indices=np.argmax(pred,axis=1)

In [18]:
y = list(test_df['class']) 
accuracy_score(y, predicted_class_indices)

0.9426304727509547

In [19]:
print(classification_report(y, predicted_class_indices))

              precision    recall  f1-score   support

           0       0.99      0.96      0.97     17922
           1       0.98      0.88      0.93      7965
           2       0.83      0.97      0.90      7894

    accuracy                           0.94     33781
   macro avg       0.93      0.94      0.93     33781
weighted avg       0.95      0.94      0.94     33781

