In [1]:

from tensorflow.keras.layers import Input, Lambda,Activation, Dense, Flatten,Dropout,Conv2D,MaxPooling2D,BatchNormalization, Rescaling, AveragePooling2D, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.preprocessing import image, image_dataset_from_directory
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.applications import DenseNet121, ResNet50
from keras.utils import np_utils
from keras.models import load_model
from tensorflow.keras.optimizers import Adam
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn import svm
import tensorflow as tf
import numpy as np
import pandas as pd
import os
import cv2
import gc
import matplotlib.pyplot as plt
import time
from numpy import asarray
from numpy import dstack
from sklearn.metrics import accuracy_score, confusion_matrix

In [2]:
def len_file(t):
    with open(t, 'r') as f:
        return len(f.readlines())

Each line in the label files has the following format:

filename class xmin ymin xmax ymax

In [3]:
def load_labels(label_file):
    """Loads image filenames, classes"""
    fnames, classes= [], []
    with open(label_file, 'r') as f:
        for line in f.readlines():
            fname, cls, _, _, _, _ = line.strip('\n').split()
            fnames.append(fname)
            classes.append(int(cls))
        df = pd.DataFrame({'src': fnames, 'class':classes})
    return df

In [4]:
val_datagen = ImageDataGenerator(rescale=1./255.)
batchsize = 8
nb_epoch = 1
targetsize = (256,256)

In [5]:
val_df = load_labels('/kaggle/input/covidxct/val_COVIDx_CT-3A.txt')
val_generator=val_datagen.flow_from_dataframe(
dataframe=val_df,
directory='/kaggle/input/covidxct/3A_images',
x_col="src",
y_col="class",
color_mode = 'rgb',
batch_size=batchsize,
seed=42,
shuffle=False,
class_mode="raw",
target_size=targetsize)

Found 33725 validated image filenames.


In [6]:
val_file = '/kaggle/input/covidxct/val_COVIDx_CT-3A.txt'
val_len=len_file(val_file)
step_size_val=np.ceil(val_len/batchsize)

In [7]:
#model = build_model()
model1 = load_model('/kaggle/input/ensemble-test/densenet-v54/densenetmodel.hdf5')
model1._name = "model1"
model2 = load_model('/kaggle/input/ensemble-test/resnet-v45/resnetmodel.hdf5')
model2._name = "model2"
model3 = load_model('/kaggle/input/ensemble-test/densenet-2/densenet-2.hdf5')
model3._name = "model3"

In [8]:
models=[model1, model2, model3]

In [9]:
def stacked_dataset(members, inputX, step_size_test):
    stackX = None
    for model in members:
     # make prediction
        print(model._name)
        yhat = model.predict(inputX, steps=step_size_test, verbose=0)
    # stack predictions into [rows, members, probabilities]
        if stackX is None:
            stackX = yhat
        else:
            stackX = dstack((stackX, yhat))
    # flatten predictions to [rows, members x probabilities]
    stackX = stackX.reshape((stackX.shape[0], stackX.shape[1]*stackX.shape[2]))
    return stackX

In [10]:
def fit_stacked_model(members, inputX, step_size, inputy):
 # create dataset using ensemble
    stackedX = stacked_dataset(members, inputX, step_size)
 # fit standalone model
    model = svm.SVC(kernel='poly')
    model.fit(stackedX, inputy)
    return model

In [11]:
def stacked_prediction(members, model, inputX, step_size):
    # create dataset using ensemble
    stackedX = stacked_dataset(members, inputX, step_size)
 # make a prediction
    yhat = model.predict(stackedX)
    return yhat

In [12]:
test_datagen = ImageDataGenerator(rescale=1./255.)

In [13]:
test_df = load_labels('/kaggle/input/covidxct/test_COVIDx_CT-3A.txt')
test_generator=test_datagen.flow_from_dataframe(
dataframe=test_df,
directory='/kaggle/input/covidxct/3A_images',
x_col="src",
y_col=None,
color_mode = 'rgb',
batch_size=batchsize,
seed=42,
shuffle=False,
class_mode=None,
target_size=targetsize)

Found 33781 validated image filenames.


In [14]:
#step_size_test=np.ceil(test_generator.n/test_generator.batch_size)
test_len = len_file('/kaggle/input/covidxct/test_COVIDx_CT-3A.txt')
step_size_test=np.ceil(test_len/batchsize)

In [15]:
model_ensemble = fit_stacked_model(models, val_generator, step_size_val, val_df['class'])


model1
model2
model3


In [16]:
pred = stacked_prediction(models, model_ensemble, test_generator, step_size_test)

model1
model2
model3


In [17]:
y = list(test_df['class']) 
accuracy_score(y, pred)

0.9644770729108079

In [18]:
print(classification_report(y, pred))

              precision    recall  f1-score   support

           0       0.98      0.98      0.98     17922
           1       0.98      0.93      0.96      7965
           2       0.91      0.95      0.93      7894

    accuracy                           0.96     33781
   macro avg       0.96      0.96      0.96     33781
weighted avg       0.97      0.96      0.96     33781



In [19]:
confusion_matrix(y, pred)

array([[17621,    28,   273],
       [  104,  7425,   436],
       [  233,   126,  7535]])