In [1]:
# import system libs
import os
import time
import itertools

# import data handling tools
import pandas as pd
import numpy as np
import seaborn as sns
sns.set_style('darkgrid')
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import precision_score, f1_score, recall_score
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
import plotly.express as px

# import Deep learning Libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.layers import Input, Average, Dense, Concatenate
from tensorflow.keras.optimizers import Adamax
from tensorflow.keras.callbacks import ReduceLROnPlateau

# Ignore Warnings
import warnings
warnings.filterwarnings("ignore")

print ('modules loaded')

modules loaded


In [2]:
# Generate data paths with labels
def define_paths(data_dir):
    filepaths = []
    labels = []

    folders = os.listdir(data_dir)
    for folder in folders:
        folder_path = os.path.join(data_dir, folder)
        files = os.listdir(folder_path)
        for file in files:
            fpath = os.path.join(folder_path, file)
            filepaths.append(fpath)
            labels.append(folder)

    return filepaths, labels


# Concatenate data paths with labels into one dataframe ( to later be fitted into the model )
def define_df(files, classes):
    Fseries = pd.Series(files, name= 'filepaths')
    Lseries = pd.Series(classes, name='labels')
    return pd.concat([Fseries, Lseries], axis= 1)

# Split dataframe to train, valid, and test
def split_data(data_dir):
    # train dataframe
    files, classes = define_paths(data_dir)
    df = define_df(files, classes)
    strat = df['labels']
    train_df, dummy_df = train_test_split(df,  train_size= 0.8, shuffle= True, random_state= 123, stratify= strat)

    # valid and test dataframe
    strat = dummy_df['labels']
    valid_df, test_df = train_test_split(dummy_df,  train_size= 0.5, shuffle= True, random_state= 123, stratify= strat)

    return train_df, valid_df, test_df

In [3]:
def create_gens (train_df, valid_df, test_df, batch_size):
    '''
    This function takes train, validation, and test dataframe and fit them into image data generator, because model takes data from image data generator.
    Image data generator converts images into tensors. '''


    # define model parameters
    img_size = (224, 224)
    channels = 3 # either BGR or Grayscale
    color = 'rgb'
    img_shape = (img_size[0], img_size[1], channels)

    datagen = ImageDataGenerator(preprocessing_function= None)

    train_gen = datagen.flow_from_dataframe( train_df, x_col= 'filepaths', y_col= 'labels', target_size= img_size, class_mode= 'categorical',
                                        color_mode= color, shuffle= True, batch_size= batch_size)

    valid_gen = datagen.flow_from_dataframe( valid_df, x_col= 'filepaths', y_col= 'labels', target_size= img_size, class_mode= 'categorical',
                                        color_mode= color, shuffle= True, batch_size= batch_size)

    test_gen = datagen.flow_from_dataframe( test_df, x_col= 'filepaths', y_col= 'labels', target_size= img_size, class_mode= 'categorical',
                                        color_mode= color, shuffle= False, batch_size= batch_size)

    return train_gen, valid_gen, test_gen

In [4]:
data_dir = './Pill Dataset'

try:
    # Get splitted data
    train_df, valid_df, test_df = split_data(data_dir)

    # Get Generators
    batch_size = 40
    train_gen, valid_gen, test_gen = create_gens(train_df, valid_df, test_df, batch_size)

except:
    print('Invalid Input')

Found 16000 validated image filenames belonging to 10 classes.
Found 2000 validated image filenames belonging to 10 classes.
Found 2000 validated image filenames belonging to 10 classes.


In [5]:
# Load the saved base models
base_model1 = load_model("ResNet50_93.15.h5")
base_model2 = load_model("InceptionV3_96.15.h5")
base_model3 = load_model("MobileNet.h5")

In [6]:
base_model1 = Model(inputs=base_model1.inputs,
                outputs=base_model1.outputs,
                name='ResNet50')

base_model2 = Model(inputs=base_model2.inputs,
                outputs=base_model2.outputs,
                name='InceptionV3')

base_model3 = Model(inputs=base_model3.inputs,
                outputs=base_model3.outputs,
                name='MobileNet')

In [8]:
# Create AdaBoost classifiers with Decision Trees as weak learners
base_models = [base_model1, base_model2, base_model3]
adaboost_classifiers = []

# Record start time
start_time = time.time()

for base_model in base_models:
    base_model_name = base_model.name
    base_model_features = base_model.predict(train_gen, verbose=1)
    adaboost_classifier = AdaBoostClassifier(
        base_estimator=DecisionTreeClassifier(max_depth=2),
        n_estimators=50,  # You can adjust the number of estimators
        learning_rate=1.0,
        random_state=42
    )
    adaboost_classifier.fit(base_model_features, train_gen.classes)
    adaboost_classifiers.append((base_model_name, adaboost_classifier))
    
# Record end time
end_time = time.time()

# Calculate training duration
training_duration = end_time - start_time
print(f"Training duration: {training_duration} seconds")

Training duration: 2192.993932723999 seconds


In [9]:
# Evaluate the AdaBoost ensemble on the validation set
validation_features = [base_model.predict(test_gen, verbose=1) for base_model in base_models]

for model_name, adaboost_classifier in adaboost_classifiers:
    accuracy = adaboost_classifier.score(validation_features[base_models.index(model_name)], test_gen.classes)
    print(f"Accuracy of {model_name}: {accuracy}")



ValueError: 'ResNet50' is not in list

In [12]:
print(base_models.name)

AttributeError: 'list' object has no attribute 'name'

In [None]:
# Initialize arrays to store individual model predictions
individual_predictions = []

# Get predictions from each model in the ensemble
for model_name, adaboost_classifier in adaboost_classifiers:
    predictions = adaboost_classifier.predict(validation_features[base_models.index(model_name)])
    individual_predictions.append(predictions)

# Combine predictions using majority voting
ensemble_predictions = np.sum(individual_predictions, axis=0)
final_predictions = np.argmax(ensemble_predictions, axis=1)

# You can evaluate the accuracy of the ensemble's predictions
correct_predictions = (final_predictions == valid_generator.classes)
accuracy = np.mean(correct_predictions)
print(f"Ensemble Accuracy: {accuracy}")

In [None]:
def plot_training(hist):
    '''
    This function take training model and plot history of accuracy and losses with the best epoch in both of them.
    '''

    # Define needed variables
    tr_acc = hist.history['accuracy']
    tr_loss = hist.history['loss']
    val_acc = hist.history['val_accuracy']
    val_loss = hist.history['val_loss']
    index_loss = np.argmin(val_loss)
    val_lowest = val_loss[index_loss]
    index_acc = np.argmax(val_acc)
    acc_highest = val_acc[index_acc]
    Epochs = [i+1 for i in range(len(tr_acc))]
    loss_label = f'best epoch= {str(index_loss + 1)}'
    acc_label = f'best epoch= {str(index_acc + 1)}'

    # Plot training history
    plt.figure(figsize= (20, 8))
    plt.style.use('fivethirtyeight')

    plt.subplot(1, 2, 1)
    plt.plot(Epochs, tr_loss, 'r', label= 'Training loss')
    plt.plot(Epochs, val_loss, 'g', label= 'Validation loss')
    plt.scatter(index_loss + 1, val_lowest, s= 150, c= 'blue', label= loss_label)
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(Epochs, tr_acc, 'r', label= 'Training Accuracy')
    plt.plot(Epochs, val_acc, 'g', label= 'Validation Accuracy')
    plt.scatter(index_acc + 1 , acc_highest, s= 150, c= 'blue', label= acc_label)
    plt.title('Training and Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.tight_layout
    plt.show()
    
plot_training(history)

In [None]:
train_eval = ensemble_model.evaluate(train_gen, verbose= 1)
valid_eval = ensemble_model.evaluate(valid_gen, verbose= 1)
test_eval = ensemble_model.evaluate(test_gen, verbose= 1)

print("Train Loss: ", train_eval[0])
print("Train Accuracy: ", train_eval[1])
print('-' * 20)
print("Validation Loss: ", valid_eval[0])
print("Validation Accuracy: ", valid_eval[1])
print('-' * 20)
print("Test Loss: ", test_eval[0])
print("Test Accuracy: ", test_eval[1])

In [None]:
preds = ensemble_model.predict_generator(test_gen)
y_pred_class = np.argmax(preds, axis=1)
print(y_pred_class)

In [None]:
print(test_gen.classes)

In [None]:
# Calculate precision
precision = precision_score(test_gen.classes, y_pred_class, average = 'weighted')
print(f"Precision Score = {precision}")

# Calculate recall
recall = recall_score(test_gen.classes, y_pred_class, average = 'weighted')
print(f"Recall Score = {recall}")

# Calculate F1-score
f1 = f1_score(test_gen.classes, y_pred_class, average = 'weighted')
print(f"F1 Score = {f1}")

In [None]:
class_dict = test_gen.class_indices
classes = list(class_dict.keys())

# Confusion matrix
conf_matrix = confusion_matrix(test_gen.classes, y_pred_class)

# Plot the confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', cbar=False,
            xticklabels=test_gen.class_indices.keys(),
            yticklabels=test_gen.class_indices.keys())
plt.tight_layout()
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix - Stacking Ensemble')
plt.show()

# Classification report
print(classification_report(test_gen.classes, y_pred_class, target_names= classes))

In [None]:
#ensemble_model.save("Ensemble_Model.h5")