## Load Data

In [1]:
import os
import requests
import zipfile
import numpy as np

dataset_url = 'https://github.com/ishaqmarashy/DATALFS/raw/main/JMuBEN.zip'
dataset_dir = './JMuBEN'

# create directory for dataset if it does not exist
if not os.path.exists(dataset_dir):
    os.makedirs(dataset_dir)

# append JMuBEN.zip to the end of the path (this is where we download the file to)
zip_file_path = os.path.join(dataset_dir, 'JMuBEN.zip')


# check if file is downloaded already
if not os.path.exists(zip_file_path):

    # file is not downloaded so fetch the file
    response = requests.get(dataset_url)

    # write file to storage which is recieved from the response
    with open(zip_file_path, 'wb') as zip_file:
        zip_file.write(response.content)

    # unzip to zip file path
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(dataset_dir)

# within the concat train and test to become ./JMuBEN/train and JMuBEN ./JMuBEN/test
train_dir = os.path.join(dataset_dir, 'train')
test_dir = os.path.join(dataset_dir, 'test')

In [2]:
def load_images_and_labels(directory):
    images = []
    labels = []

    # get subdirectories Healthy and Miner

    for class_name in os.listdir(directory):

        # concat subdirectory to get full path
        class_dir = os.path.join(directory, class_name)
        # assign labels using class subdirectory
        # label is determined by filepath
        label = 0 if class_name == 'Miner' else 1

        # append labels and image paths to labels and images respectively
        for filename in os.listdir(class_dir):
            images.append(os.path.join(class_dir, filename))
            labels.append(label)

    return images, labels

# load file directories and their labels
train_images_dir, train_labels = load_images_and_labels(train_dir)
test_images_dir, test_labels = load_images_and_labels(test_dir)

In [3]:
# print the number of images and labels

print(f"Train images:{len(train_images_dir)}  Labels:{len(train_labels)}")
print(f"Test images:{len(test_images_dir)}  Labels:{len(test_labels)}")

Train images:24000  Labels:24000
Test images:6000  Labels:6000


## Normalize and Preprocess Data

In [4]:
import cv2
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer

def load_grayscale_images(image_paths):
    loaded_images = []
    for image_path in image_paths:
        img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)  #  grayscale
        loaded_images.append(img)
    return loaded_images

def resize_images(images_to_resize):
    resized_images = []
    for img in images_to_resize:
        resized_img = cv2.resize(img, (48, 48))  # resize
        resized_images.append(resized_img)
    return resized_images

def normalize_image(images_to_normalize):
    normalized_images = []
    for img in images_to_normalize:
        normalized_img = img / 255.0  # normalize
        normalized_images.append(normalized_img)
    return normalized_images

image_pipeline = Pipeline(steps=[
    ('load_grayscale_images', FunctionTransformer(load_grayscale_images)),
    ('resize_images', FunctionTransformer(resize_images)),
    ('normalize_image', FunctionTransformer(normalize_image))
])



In [5]:
x_train_reshaped = np.array(image_pipeline.transform(train_images_dir))
y_train = np.array(train_labels)
x_test_reshaped = np.array(image_pipeline.transform(test_images_dir))
y_test = np.array(test_labels)
x_train_reshaped = np.expand_dims(x_train_reshaped, axis=-1)
x_test_reshaped = np.expand_dims(x_test_reshaped, axis=-1)
input_shape = x_train_reshaped.shape[1:]
# train_images = train_images.reshape(train_images.shape[0], -1)
# train_labels = train_labels.reshape(train_images.shape[0], -1)
# test_images = test_images.reshape(test_images.shape[0], -1)

In [6]:
print(len(x_train_reshaped))
print(len(x_test_reshaped))

24000
6000


## Create Models

https://www.tensorflow.org/tutorials/images/cnn

https://colab.research.google.com/drive/1uWZQ-lzAk5308YVjMW5XaZcS_3zmGSgN?usp=sharing

In [7]:
import tensorflow as tf
from tensorflow.keras import  layers, models
import matplotlib.pyplot as plt

def create_cnn_ex1(input_shape=input_shape, num_classes=10, dense_units=64, learning_rate=0.001, loss='sparse_categorical_crossentropy',
               kernel=(3, 3), strides=(2, 2), filters=32, metrics=['accuracy'],):
    model = models.Sequential()
    # convolutional input layer
    model.add(layers.Conv2D(filters, kernel, activation='relu', input_shape=input_shape, strides=strides))
    # pooling input layer
    model.add(layers.MaxPooling2D((2, 2)))
    # convolutional layer
    model.add(layers.Flatten())
    # fully connected layer
    model.add(layers.Dense(dense_units, activation='relu'))
    # fully connected output layer
    model.add(layers.Dense(num_classes, activation='softmax'))
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
    return model


In [8]:
import tensorflow as tf
from tensorflow.keras import  layers, models
import matplotlib.pyplot as plt

def create_cnn_ex2(input_shape=input_shape, num_classes=10, dense_units=64, learning_rate=0.001, loss='sparse_categorical_crossentropy',
               kernel=(3, 3), strides=(2, 2), filters=32, metrics=['accuracy'],):
    model = models.Sequential()
    # convolutional input layer
    model.add(layers.Conv2D(filters, kernel, activation='relu', input_shape=input_shape, strides=strides))
    # pooling input layer
    model.add(layers.MaxPooling2D((2, 2)))
    # convolutional layer
    model.add(layers.Flatten())
    # fully connected layer
    model.add(layers.Dense(dense_units, activation='relu'))
    # fully connected output layer
    model.add(layers.Dense(num_classes, activation='softmax'))
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
    return model


In [9]:
import tensorflow as tf
from tensorflow.keras import  layers, models
import matplotlib.pyplot as plt

def create_cnn_ex3(input_shape=input_shape, num_classes=10, dense_units=64,
                   learning_rate=0.001, loss='sparse_categorical_crossentropy',
                    kernel=(3, 3), strides=(2, 2), filters=32, metrics=['accuracy'],):
    model = models.Sequential()
    # convolutional input layer
    model.add(layers.Conv2D(filters, kernel, activation='relu', input_shape=input_shape, strides=strides))
    model.add(layers.Conv2D(filters, kernel, activation='relu',  strides=strides))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(filters, kernel, activation='relu',  strides=strides))
    model.add(layers.MaxPooling2D((2, 2)))
    # convolutional layer
    model.add(layers.Flatten())
    # fully connected layer
    model.add(layers.Dense(dense_units, activation='relu'))
    # fully connected output layer
    model.add(layers.Dense(num_classes, activation='softmax'))
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
    return model


In [10]:
import tensorflow as tf
from tensorflow.keras import  layers, models
import matplotlib.pyplot as plt

def create_cnn_ex4(input_shape=input_shape, num_classes=10, dense_units=64,
                   learning_rate=0.001, loss='sparse_categorical_crossentropy',
                    kernel=(3, 3), strides=(2, 2), filters=32, metrics=['accuracy'],):
    model = models.Sequential()
    # convolutional input layer
    model.add(layers.Conv2D(filters, kernel, activation='relu', input_shape=input_shape, strides=strides))
    model.add(layers.Conv2D(filters, kernel, activation='relu',  strides=strides))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(filters, kernel, activation='relu',  strides=strides))
    model.add(layers.MaxPooling2D((2, 2)))
    # convolutional layer
    model.add(layers.Flatten())
    # fully connected layer
    model.add(layers.Dense(dense_units, activation='relu'))
    # fully connected output layer
    model.add(layers.Dense(num_classes, activation='softmax'))
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
    return model


In [11]:
import tensorflow as tf
from tensorflow.keras import  layers, models
import matplotlib.pyplot as plt

def create_cnn_ex5(input_shape=input_shape, num_classes=10, dense_units=64,
                   learning_rate=0.001, loss='sparse_categorical_crossentropy',
                    kernel=(3, 3), strides=(2, 2), filters=32, metrics=['accuracy'],):
    model = models.Sequential()
    # convolutional input layer
    model.add(layers.Conv2D(filters, kernel, activation='relu', input_shape=input_shape, strides=strides))
    model.add(layers.Conv2D(filters, kernel, activation='relu',  strides=strides))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(filters, kernel, activation='relu',  strides=strides))
    model.add(layers.MaxPooling2D((2, 2)))
    # convolutional layer
    model.add(layers.Flatten())
    # fully connected layer
    model.add(layers.Dense(dense_units, activation='relu'))
    # fully connected output layer
    model.add(layers.Dense(num_classes, activation='softmax'))
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
    return model


In [12]:
import tensorflow as tf
from tensorflow.keras import  layers, models
import matplotlib.pyplot as plt

def create_cnn_ex6(input_shape=input_shape, num_classes=10, dense_units=64,
                   learning_rate=0.001, loss='sparse_categorical_crossentropy',
                    kernel=(3, 3), strides=(2, 2), filters=32, metrics=['accuracy'],):
    model = models.Sequential()
    # convolutional input layer
    model.add(layers.Conv2D(filters, kernel, activation='relu', input_shape=input_shape, strides=strides))
    model.add(layers.Conv2D(filters, kernel, activation='relu',  strides=strides))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(filters, kernel, activation='relu',  strides=strides))
    model.add(layers.MaxPooling2D((2, 2)))
    # convolutional layer
    model.add(layers.Flatten())
    # fully connected layer
    model.add(layers.Dense(dense_units, activation='relu'))
    # fully connected output layer
    model.add(layers.Dense(num_classes, activation='softmax'))
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
    return model


## Train and Evaluate Models

In [13]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix, recall_score, accuracy_score,roc_auc_score
from sklearn.model_selection import cross_val_score,cross_val_predict
from sklearn.metrics import roc_curve, auc

def calculate_metrics_and_mean(X_train, y_train, y_pred, model):
    report = classification_report(y_train, y_pred, output_dict=True,zero_division=0)
    report_df = pd.DataFrame(report).T.iloc[:-3].drop(columns='support')
    cm = confusion_matrix(y_train, y_pred)
    tp = np.diagonal(cm)
    fn = np.sum(cm, axis=1) - tp
    fp = np.sum(cm, axis=0) - tp
    tn = np.sum(cm) - (tp + fn + fp)
    tp_rate = tp / (tp + fn)
    fp_rate = fp / (fp + tn)
    tp_fp_rate_df = pd.DataFrame({'TPR': tp_rate, 'FPR': fp_rate}, index=range(len(tp_rate)))
    specificity = []
    unique_labels = np.unique(y_train)
    try:
        for i in range(len(unique_labels)):
            true_negative = np.sum(cm) - np.sum(cm[i, :]) - np.sum(cm[:, i]) + cm[i, i]
            total_negative = np.sum(cm) - np.sum(cm[i, :])
            specificity.append(true_negative / total_negative)
        specificity_df = pd.DataFrame({'specificity': specificity}, index=unique_labels)
    except:
        specificity_df = pd.DataFrame({'specificity': np.nan}, index=unique_labels)
    try:
        sensitivity = recall_score(y_train, y_pred, average=None)
        sensitivity_df = pd.DataFrame({'sensitivity': sensitivity}, index=unique_labels)
    except:
        sensitivity_df = pd.DataFrame({'sensitivity': np.nan}, index=unique_labels)

    result_df = pd.concat([report_df.reset_index().drop(columns='index'),
                           tp_fp_rate_df.reset_index().drop(columns='index'),
                           specificity_df.reset_index().drop(columns='index'),
                           sensitivity_df.reset_index().drop(columns='index')], axis=1)
    
    probabilities = model.predict_proba(X_train)
    unique_labels = np.unique(y_train)

    aucs = []
    for i in range(len(unique_labels)):
        fpr_dt, tpr_dt, _ = roc_curve((y_train == unique_labels[i]).astype(int), probabilities[:, i])
        roc_auc_dt = auc(fpr_dt, tpr_dt)
        aucs.append(roc_auc_dt)
    auc_df = pd.DataFrame({'AUC': aucs}, index=unique_labels)
    
    accuracy = accuracy_score(y_train, y_pred)
    accuracy_df = pd.DataFrame({'accuracy': [accuracy]})
    
    
    cross_val_scores = cross_val_score(model, X_train, y_train, cv=10)

    cross_val_mean = np.mean(cross_val_scores)
    cross_val_std = np.std(cross_val_scores)
    cross_val_df = pd.DataFrame({'CV mean': [cross_val_mean], 'CV std': [cross_val_std]})

    result_df = pd.concat([result_df, auc_df, accuracy_df, cross_val_df], axis=1)
    mean_values = pd.DataFrame(result_df.mean()).transpose()
    mean_values.index = ['mean']
    result_df = pd.concat([result_df, mean_values])
    
    return result_df

In [14]:
def plotHistory(history):
    plt.plot(history.history['accuracy'], label='accuracy')
    plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.ylim([0, 1])
    plt.legend(loc='lower right')

### Architecture 1

In [15]:
from keras.wrappers.scikit_learn import KerasClassifier

num_classes=2
epochs=3
batch_size=64
kernel=(3, 3)
strides=(2, 2)
filters=32
learning_rate=0.001
model = KerasClassifier(build_fn=create_cnn_ex1, input_shape=input_shape,
                                            num_classes=num_classes,learning_rate=learning_rate,
                                            kernel=kernel, strides=strides, filters=filters)
model.fit(x_train_reshaped,y_train,epochs=2)
y_pred=model.predict(x_train_reshaped)
df1=calculate_metrics_and_mean(x_train_reshaped, y_train, y_pred, model)

  model = KerasClassifier(build_fn=create_cnn_ex1, input_shape=input_shape,


Epoch 1/2
Epoch 2/2


In [16]:
df1

Unnamed: 0,precision,recall,f1-score,TPR,FPR,specificity,sensitivity,AUC,accuracy,CV mean,CV std
0,1.0,0.998333,0.999166,0.998333,0.0,1.0,0.998333,1.0,0.999167,0.981917,0.027986
1,0.998336,1.0,0.999167,1.0,0.001667,0.998333,1.0,1.0,,,
mean,0.999168,0.999167,0.999167,0.999167,0.000833,0.999167,0.999167,1.0,0.999167,0.981917,0.027986


### Architecture 2

In [17]:
from keras.wrappers.scikit_learn import KerasClassifier

num_classes=2
epochs=3
batch_size=64
kernel=(3, 3)
strides=(2, 2)
filters=32
learning_rate=0.001
model =  KerasClassifier(build_fn=create_cnn_ex2, input_shape=input_shape,
                                            num_classes=num_classes,learning_rate=learning_rate,
                                            kernel=kernel, strides=strides, filters=filters)
model.fit(x_train_reshaped,y_train,epochs=2)
y_pred=model.predict(x_train_reshaped)
df2=calculate_metrics_and_mean(x_train_reshaped, y_train, y_pred, model)

  model =  KerasClassifier(build_fn=create_cnn_ex2, input_shape=input_shape,


Epoch 1/2
Epoch 2/2


In [18]:
df2

Unnamed: 0,precision,recall,f1-score,TPR,FPR,specificity,sensitivity,AUC,accuracy,CV mean,CV std
0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.978083,0.03959
1,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,,,
mean,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.978083,0.03959


### Architecture 3

In [19]:
from keras.wrappers.scikit_learn import KerasClassifier

num_classes=2
epochs=3
batch_size=64
kernel=(3, 3)
strides=(2, 2)
filters=32
learning_rate=0.001
model =  KerasClassifier(build_fn=create_cnn_ex3, input_shape=input_shape,
                                            num_classes=num_classes,learning_rate=learning_rate,
                                            kernel=kernel, strides=strides, filters=filters)
model.fit(x_train_reshaped,y_train,epochs=2)
y_pred=model.predict(x_train_reshaped)
df3=calculate_metrics_and_mean(x_train_reshaped, y_train, y_pred, model)

  model =  KerasClassifier(build_fn=create_cnn_ex3, input_shape=input_shape,


Epoch 1/2
Epoch 2/2


In [20]:
df3

Unnamed: 0,precision,recall,f1-score,TPR,FPR,specificity,sensitivity,AUC,accuracy,CV mean,CV std
0,1.0,0.999417,0.999708,0.999417,0.0,1.0,0.999417,1.0,0.999708,0.983917,0.025804
1,0.999417,1.0,0.999708,1.0,0.000583,0.999417,1.0,1.0,,,
mean,0.999709,0.999708,0.999708,0.999708,0.000292,0.999708,0.999708,1.0,0.999708,0.983917,0.025804


### Architecture 4

In [21]:
from keras.wrappers.scikit_learn import KerasClassifier

num_classes=2
epochs=3
batch_size=64
kernel=(3, 3)
strides=(2, 2)
filters=32
learning_rate=0.001
model =  KerasClassifier(build_fn=create_cnn_ex4, input_shape=input_shape,
                                            num_classes=num_classes,learning_rate=learning_rate,
                                            kernel=kernel, strides=strides, filters=filters)
model.fit(x_train_reshaped,y_train,epochs=2)
y_pred=model.predict(x_train_reshaped)
df4=calculate_metrics_and_mean(x_train_reshaped, y_train, y_pred, model)

  model =  KerasClassifier(build_fn=create_cnn_ex4, input_shape=input_shape,


Epoch 1/2
Epoch 2/2


In [None]:
df4

### Architecture 5

In [None]:
from keras.wrappers.scikit_learn import KerasClassifier

num_classes=2
epochs=3
batch_size=64
kernel=(3, 3)
strides=(2, 2)
filters=32
learning_rate=0.001
model =  KerasClassifier(build_fn=create_cnn_ex5, input_shape=input_shape,
                                            num_classes=num_classes,learning_rate=learning_rate,
                                            kernel=kernel, strides=strides, filters=filters)
model.fit(x_train_reshaped,y_train,epochs=2)
y_pred=model.predict(x_train_reshaped)
df5=calculate_metrics_and_mean(x_train_reshaped, y_train, y_pred, model)

In [None]:
df5

### Architecture 6

In [None]:
from keras.wrappers.scikit_learn import KerasClassifier

num_classes=2
epochs=3
batch_size=64
kernel=(3, 3)
strides=(2, 2)
filters=32
learning_rate=0.001
model =  KerasClassifier(build_fn=create_cnn_ex6, input_shape=input_shape,
                                            num_classes=num_classes,learning_rate=learning_rate,
                                            kernel=kernel, strides=strides, filters=filters)
model.fit(x_train_reshaped,y_train,epochs=2)
y_pred=model.predict(x_train_reshaped)
df6=calculate_metrics_and_mean(x_train_reshaped, y_train, y_pred, model)

In [None]:
df6

## Results

In [None]:
dfs = [df1, df2, df3, df4, df5, df6]
last_rows = [df.iloc[-1] for df in dfs]
result_df = pd.concat(last_rows, axis=1).T.reset_index(drop=True)
result_df

In [None]:
# from sklearn.model_selection import ParameterGrid

# param_grid = {
#     # 'batch_size': [32, 64, 128],
#     'epochs': [1],
#     'learning_rate': [0.01,0.001],
#     'kernel': [(3, 3), (5, 5)],
#     'strides': [(1, 1), (2, 2)],
#     # 'filters': [32, 64, 128],
#     'dense_units': [64, 128],
# }
# i=0
# best_accuracy = 0
# best_params = {}
# for params in ParameterGrid(param_grid):
#     print(i)
#     model = KerasClassifier(build_fn=create_cnn_ex2, **params)
#     results_dict, history = cross_validate_and_evaluate(x_train_reshaped, y_train,
#                                                   x_test_reshaped, y_test,model, 
#                                                   f'M3 CVsearch {i}',epochs,batch_size)
#     if results_dict['Test_accuracy'] > best_accuracy:
#         best_accuracy = results_dict['Test_accuracy'] 
#         best_params = params
#     i+=1