### ECG & PCG analysis

Library imports


In [1]:
from sklearn.metrics import roc_curve
import numpy as np
import os
from keras.models import Sequential, Model
import pandas as pd
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import f1_score, accuracy_score, balanced_accuracy_score, precision_score, recall_score, roc_auc_score, confusion_matrix as cm, make_scorer
from sklearn.model_selection import StratifiedKFold, GridSearchCV
import itertools
from sklearn.model_selection import GroupShuffleSplit
import tensorflow as tf
import tensorflow_addons as tfa
from sklearn.utils import class_weight
from keras.optimizers import Adam, SGD
from sklearn.utils import class_weight
from keras.callbacks import EarlyStopping
from sklearn.model_selection import StratifiedGroupKFold, GridSearchCV
from itertools import product
from keras.applications import VGG16
from keras.applications.vgg16 import preprocess_input
from tensorflow.keras import backend as K
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from datetime import datetime
import pickle
from sklearn.utils import compute_class_weight


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 

 The versions of TensorFlow you are currently using is 2.9.1 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


In [2]:
main_folder = 'physionet2017_ecg_scalograms'

### ECG Model



In [3]:
train_df_p = pd.read_csv(f"./{main_folder}/train/dataset.csv",usecols=range(1,3))
test_df_p = pd.read_csv(f"./{main_folder}/test/dataset.csv",usecols=range(1,3))


In [4]:
train_df_p['label']=train_df_p['label'].astype(str)
test_df_p['label']=test_df_p['label'].astype(str)


In [5]:
train_df_p['group'] = train_df_p['filename'].apply(lambda x: x.split('_')[0])
test_df_p['group'] = test_df_p['filename'].apply(lambda x: x.split('_')[0])

In [6]:
train_df_p

Unnamed: 0,filename,label,group
0,A06729_11.tiff,0,A06729
1,A06047_4.tiff,0,A06047
2,A01326_149.tiff,1,A01326
3,A02351_69.tiff,0,A02351
4,A03493_22.tiff,1,A03493
...,...,...,...
37340,A05868_37340.tiff,0,A05868
37341,A05868_37341.tiff,0,A05868
37342,A05868_37342.tiff,0,A05868
37343,A05868_37344.tiff,0,A05868


In [7]:
train_datagen_p = ImageDataGenerator(preprocessing_function = preprocess_input)
val_datagen_p = ImageDataGenerator(preprocessing_function= preprocess_input)

test_datagen_p = ImageDataGenerator(preprocessing_function=preprocess_input)


In [8]:
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

In [9]:
base_model.input

<KerasTensor: shape=(None, 224, 224, 3) dtype=float32 (created by layer 'input_1')>

In [10]:
# Define your model creation function
def create_model(optimizer='adam', learning_rate=0.001, dropout = 0.5, neurons = 128):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    
    if optimizer == 'adam':
        opt = Adam(learning_rate=learning_rate)
    elif optimizer == 'sgd':
        opt = SGD(learning_rate=learning_rate)
        
    
    for layer in base_model.layers:
        layer.trainable = False
        
    x = base_model.output
    x = Flatten()(x)  # Add Global Average Pooling
    x = Dense(neurons, activation='relu')(x)  # Add a fully connected layer
    x = Dropout(dropout) (x)
    predictions = Dense(1, activation='sigmoid')(x)  # Replace softmax with sigmoid for binary classification

    model = Model(inputs=base_model.input, outputs=predictions)
    
    model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['Accuracy'])
    

    
    return model


In [11]:
model = create_model()


In [12]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [13]:
# Define your model creation function
def create_model_finetune(optimizer='adam', learning_rate=0.001, dropout = 0.5, neurons = 128):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    
    if optimizer == 'adam':
        opt = Adam(learning_rate=learning_rate)
    elif optimizer == 'sgd':
        opt = SGD(learning_rate=learning_rate)
        
    base_model.trainable = True
    num_layers = len(base_model.layers)
    num_layers_fine_tune = 8
    for model_layer in base_model.layers[:num_layers - num_layers_fine_tune]:
        model_layer.trainable = False
    
    
        
        
    x = base_model.output
    
    
    x = Flatten()(x)  # Add Global Average Pooling
    x = Dense(neurons, activation='relu')(x)  # Add a fully connected layer
    x = Dropout(dropout) (x)
    predictions = Dense(1, activation='sigmoid')(x)  # Replace softmax with sigmoid for binary classification

    model = Model(inputs=base_model.input, outputs=predictions)
    
        
    model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['Accuracy'])
    return model


In [14]:
model = create_model_finetune()

In [15]:
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0   

In [16]:
class ScoreCallback(tf.keras.callbacks.Callback):
    def __init__(self, validation_data, train_data):
        super().__init__()
        self.validation_data = validation_data
        self.train_data = train_data
        self.val_f1_scores = []
        self.train_f1_scores = []
        self.val_accuracies = []
        self.train_accuracies = []
        
    def on_epoch_end(self, epoch, logs=None):
        X_val, y_val = self.validation_data, self.validation_data.classes
        X_train, y_train = self.train_data, self.train_data.classes

        y_pred_val = self.model.predict(X_val)
        y_pred_rounded_val = np.round(y_pred_val)  # Round predictions to binary values
        y_pred_train = self.model.predict(X_train)
        y_pred_rounded_train = np.round(y_pred_train)  # Round predictions to binary values
        
        macro_f1_val = f1_score(y_val, y_pred_rounded_val, average='macro')
        self.val_f1_scores.append(macro_f1_val)
        
        macro_f1_train = f1_score(y_train, y_pred_rounded_train, average='macro')
        self.train_f1_scores.append(macro_f1_train)
        
        acc_val = accuracy_score(y_val, y_pred_rounded_val)
        self.val_accuracies.append(acc_val)
        acc_train = accuracy_score(y_train, y_pred_rounded_train)
        self.train_accuracies.append(acc_train)

In [24]:
# Define the parameter grid
param_grid = {
    'batch_size': [ 32],
    'epochs': [ 30],
    'optimizer': ['adam'],
    'learning_rate': [ 0.000001],
    'dropout':[ 0.5],
    'neurons':[128],
    'class_weights':[True]
}


# Store results
results = []

# Define f1_macro scorer
#f1_macro_scorer = make_scorer(f1_score, average='macro')


sgkf = StratifiedGroupKFold(n_splits=5, random_state=42, shuffle=True )


In [25]:
def calculate_scores(generator, model):
    y_true = generator.classes
    predictions = model.predict(generator)
    threshold = 0.5
    predicted_classes = (predictions > threshold).astype(int)
    
    f1_macro_value = f1_score(y_true,predicted_classes, average='binary' )
    acc = accuracy_score(y_true, predicted_classes)
    auc = roc_auc_score(y_true, predictions) 
    precision = precision_score(y_true, predicted_classes)
    recall = recall_score(y_true, predicted_classes)
    return f1_macro_value, acc, auc, precision, recall, predictions, y_true
    

In [26]:

for params in product(*param_grid.values()):
    detailed_results = []
    param_dict = dict(zip(param_grid.keys(), params))
    use_class_weights = param_dict['class_weights']
    batch_size = param_dict['batch_size']
    n_epochs = param_dict['epochs']
    param_dict_filtered = {x:param_dict[x] for x in param_dict.keys() if x not in ('batch_size', 'epochs', 'class_weights')}
    
    
    epoch_info = []
    
    #For each fold
    for fold_idx, (train_indices, test_indices) in enumerate(sgkf.split(train_df_p['filename'], train_df_p['label'], groups=train_df_p['group'])):
        
        print(param_dict)
        print (f'-----------------FOLD {fold_idx}-----------------')
        
        model = create_model_finetune(**param_dict_filtered)


        train_gen = train_datagen_p.flow_from_dataframe(dataframe=train_df_p.iloc[train_indices],
                                             directory=f"./{main_folder}/train",
                                            target_size=(224, 224),
                                             x_col = 'filename',
                                             y_col = 'label',
                                             class_mode = 'binary',
                                             classes = ["0","1"], shuffle=False, batch_size=batch_size,
                                             color_mode = 'rgb' )



        test_gen = train_datagen_p.flow_from_dataframe(dataframe=train_df_p.iloc[test_indices],
                                             directory=f"./{main_folder}/train",
                                            target_size=(224, 224),
                                             x_col = 'filename',
                                             y_col = 'label',
                                             class_mode = 'binary',
                                             shuffle = False,
                                             classes = ["0","1"], batch_size=batch_size,
                                             color_mode = 'rgb')
        
        train_group = train_df_p.iloc[train_indices]['group']
        val_group = train_df_p.iloc[test_indices]['group']
        #score_callback = ScoreCallback(test_gen, train_gen)
        
        #history = model.fit(train_gen, batch_size = batch_size, epochs=n_epochs, callbacks=[score_callback])
        if use_class_weights:
            y_train = train_gen.classes

            class_weights = compute_class_weight('balanced',
                                                     classes=np.unique(y_train),
                                                     y=y_train)


            class_weights = {i:w for i,w in enumerate(class_weights)}
            #score_callback = ScoreCallback(test_gen, train_gen)

            #history = model.fit(train_gen, batch_size = batch_size, epochs=n_epochs, callbacks=[score_callback])
            history = model.fit(train_gen, batch_size = batch_size, epochs=n_epochs,validation_data=test_gen,class_weight=class_weights)
        else:
            history = model.fit(train_gen, batch_size = batch_size, epochs=n_epochs,validation_data=test_gen)
                
        train_f1, train_acc, train_auc, train_precision, train_recall, prediction_train, y_true_train = calculate_scores(train_gen, model)
        
        test_f1, test_acc, test_auc, test_precision, test_recall, prediction_test, y_true_val = calculate_scores(test_gen, model)
        
        #test_predictions = model.predict(test_gen)
        
        
        print ( train_f1, train_acc, train_auc, train_precision,train_recall)
        
        print (test_f1, test_acc, test_auc, test_precision, test_recall)
        
        
        
        #print ('F1-macro:', test_score)
         # Access loss, accuracy, and f1_macro values from history
        epoch_losses = history.history['loss']
        
        epoch_accuracy = history.history['Accuracy']
        epoch_val_accuracy = history.history['val_Accuracy']
        #epoch_accuracies = score_callback.accuracies_train
        #epoch_f1_macro = score_callback.train_f1_scores
        epoch_val_losses = history.history['val_loss']
        #epoch_val_accuracies = score_callback.accuracies_val
        #epoch_val_f1_macro = score_callback.val_f1_scores
        # After training
        #print("F1-scores per epoch:", score_callback.val_f1_scores)
        #print("Accuracies per epoch:", accuracy_callback.accuracies)
        
        epoch_info.append({
            'params': param_dict,
            'fold_number': fold_idx + 1,
            'loss': epoch_losses,
            #'val_loss':epoch_val_losses,
            'acc_train': epoch_accuracy,
            #'f1_train': epoch_f1_macro,
            'acc_val': epoch_val_accuracy,
            #'f1_val': epoch_val_f1_macro,
            'epoch_val_losses':epoch_val_losses,
            'train_f1': train_f1,
            'train_acc': train_acc,
            'test_f1': test_f1,
            'test_acc': test_acc,
            'train_auc':train_auc,
            'test_auc':test_auc,
            'train_precision':train_precision,
            'train_recall':train_recall,
            'test_precision':test_precision,
            'test_recall':test_recall,
            'y_true_train':y_true_train,
            'y_true_val':y_true_val, #y_true_val
            'prediction_train': prediction_train,
            'prediction_test': prediction_test,
            'train_group':train_group,
            'test_group':val_group
            
        })
    
    detailed_results.extend(epoch_info)

    # Convert detailed_results to a DataFrame
    detailed_results_df = pd.DataFrame(detailed_results)

    print(detailed_results_df)
    now = datetime.now()
    # dd/mm/YY H:M:S
    dt_string = now.strftime("%d_%m_%Y__%H_%M_%S")
    print("date and time =", dt_string)


    detailed_results_df.to_csv(main_folder+'_vgg.csv')
    with open (main_folder+dt_string+'_vgg.pkl', 'wb') as f:
        pickle.dump(detailed_results_df, f)


{'batch_size': 32, 'epochs': 30, 'optimizer': 'adam', 'learning_rate': 1e-06, 'dropout': 0.5, 'neurons': 128, 'class_weights': True}
-----------------FOLD 0-----------------
Found 29929 validated image filenames belonging to 2 classes.
Found 7416 validated image filenames belonging to 2 classes.
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
0.8423299786520281 0.8790804904941696 0.943265511688457 0.8671510584858271 0.8188903007200339
0.7485976542580318 0.8005663430420712 0.8595184836355075 0.767514813523876 0.7305905773059058
{'batch_size': 32, 'epochs': 30, 'optimizer': 'adam', 'learning_rate': 1e-06, 'dropout': 0.5, 'neurons': 128, 'class_weights': True}
-----------------FOLD 1--------

Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
0.8532537103714762 0.8880627009646302 0.9527783804841458 0.8955664116508434 0.8147589098532495
0.7314596554850408 0.8022432901588997 0.844092464015076 0.7695536054940862 0.6969592259847961
{'batch_size': 32, 'epochs': 30, 'optimizer': 'adam', 'learning_rate': 1e-06, 'dropout': 0.5, 'neurons': 128, 'class_weights': True}
-----------------FOLD 2-----------------
Found 29858 validated image filenames belonging to 2 classes.
Found 7487 validated image filenames belonging to 2 classes.
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
0.8363367454170396 0.8591667224864358 0.947

Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
0.8518408676863111 0.8828750376796061 0.948858211945675 0.8513719512195121 0.8523103009749894
0.7151555043607042 0.7688301282051282 0.8308556192040735 0.7117589256469047 0.718584656084656
{'batch_size': 32, 'epochs': 30, 'optimizer': 'adam', 'learning_rate': 1e-06, 'dropout': 0.5, 'neurons': 128, 'class_weights': True}
-----------------FOLD 4-----------------
Found 29880 validated image filenames belonging to 2 classes.
Found 7465 validated image filenames belonging to 2 classes.
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/

Epoch 30/30
0.8481726111845002 0.8846050870147256 0.9483435632820985 0.8860980771000092 0.813360358077865
0.749605747327843 0.8085733422638982 0.8512949805145616 0.783803591058996 0.7182672934855607
                                              params  fold_number  \
0  {'batch_size': 32, 'epochs': 30, 'optimizer': ...            1   
1  {'batch_size': 32, 'epochs': 30, 'optimizer': ...            2   
2  {'batch_size': 32, 'epochs': 30, 'optimizer': ...            3   
3  {'batch_size': 32, 'epochs': 30, 'optimizer': ...            4   
4  {'batch_size': 32, 'epochs': 30, 'optimizer': ...            5   

                                                loss  \
0  [1.9781194925308228, 0.7500041723251343, 0.643...   
1  [1.8622928857803345, 0.7634438872337341, 0.643...   
2  [1.915986180305481, 0.7504754662513733, 0.6399...   
3  [1.7686793804168701, 0.7519109845161438, 0.640...   
4  [1.8694206476211548, 0.7227396368980408, 0.638...   

                                           acc_tr

### Training on the full dataset

In [21]:
detailed_results=[]
param_dict = {'batch_size': 32, 'epochs': 30, 'optimizer': 'adam', 'learning_rate': 1e-06, 'dropout': 0.5, 'neurons': 128, 'class_weights': False}
param_str = str(param_dict)
batch_size = param_dict['batch_size']
n_epochs = param_dict['epochs']
use_class_weights = param_dict['class_weights']
param_dict_filtered = {x:param_dict[x] for x in param_dict.keys() if x not in ('batch_size', 'epochs', 'class_weights')}
    
    
test_info = []     
model = create_model_finetune(**param_dict_filtered)


train_gen = train_datagen_p.flow_from_dataframe(dataframe=train_df_p,
                                     directory=f"./{main_folder}/train",
                                    target_size=(224, 224),
                                     x_col = 'filename',
                                     y_col = 'label',
                                     class_mode = 'binary',
                                     classes = ["0","1"], shuffle=False, batch_size=batch_size,
                                     color_mode = 'rgb' )



test_gen = train_datagen_p.flow_from_dataframe(dataframe=test_df_p,
                                 directory=f"./{main_folder}/test",
                                target_size=(224, 224),
                                 x_col = 'filename',
                                 y_col = 'label',
                                 class_mode = 'binary',
                                 shuffle = False,
                                 classes = ["0","1"], batch_size=batch_size,
                                 color_mode = 'rgb')

#train_group = train_df_p['group']
#score_callback = ScoreCallback(test_gen, train_gen)


class CombinedGen():
    def __init__(self, *gens):
        self.gens = gens

    def generate(self):
        while True:
            for g in self.gens:
                yield next(g)

    def __len__(self):
        return sum([len(g) for g in self.gens])

full_data_generator=CombinedGen(train_gen, test_gen)


if use_class_weights:
    y_train = train_gen.classes

    class_weights = compute_class_weight('balanced',
                                             classes=np.unique(y_train),
                                             y=y_train)


    class_weights = {i:w for i,w in enumerate(class_weights)}
    #score_callback = ScoreCallback(test_gen, train_gen)

    #history = model.fit(train_gen, batch_size = batch_size, epochs=n_epochs, callbacks=[score_callback])
    history = model.fit(full_data_generator.generate(), batch_size = batch_size, epochs=n_epochs,class_weight=class_weights, steps_per_epoch=len(train_gen)+len(test_gen))
else:
    history = model.fit(full_data_generator.generate(), batch_size = batch_size, epochs=n_epochs, steps_per_epoch=len(train_gen)+len(test_gen))

#train_f1, train_acc, train_auc, train_precision, train_recall, prediction_train, y_true_train = calculate_scores(train_gen, model)

#test_f1, test_acc, test_auc, test_precision, test_recall, prediction_test, y_true_val = calculate_scores(test_gen, model)

#test_predictions = model.predict(test_gen)


#print ( train_f1, train_acc, train_auc, train_precision,train_recall)

#print (test_f1, test_acc, test_auc, test_precision, test_recall)


model.save(f'ecg_only_finetuned.h5')  # creates a HDF5 file 'my_model.h5'



Found 37345 validated image filenames belonging to 2 classes.
Found 15957 validated image filenames belonging to 2 classes.
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


### Training on the training set, Testing on the test set 

In [20]:

detailed_results=[]
param_dict = {'batch_size': 32, 'epochs': 30, 'optimizer': 'adam', 'learning_rate': 1e-06, 'dropout': 0.5, 'neurons': 128, 'class_weights': False}
param_str = str(param_dict)
batch_size = param_dict['batch_size']
n_epochs = param_dict['epochs']
use_class_weights = param_dict['class_weights']
param_dict_filtered = {x:param_dict[x] for x in param_dict.keys() if x not in ('batch_size', 'epochs', 'class_weights')}
    
    
test_info = []     
model = create_model_finetune(**param_dict_filtered)
epoch_info = []

train_gen = train_datagen_p.flow_from_dataframe(dataframe=train_df_p,
                                     directory=f"./{main_folder}/train",
                                    target_size=(224, 224),
                                     x_col = 'filename',
                                     y_col = 'label',
                                     class_mode = 'binary',
                                     classes = ["0","1"], shuffle=False, batch_size=batch_size,
                                     color_mode = 'rgb' )



test_gen = train_datagen_p.flow_from_dataframe(dataframe=test_df_p,
                                 directory=f"./{main_folder}/test",
                                target_size=(224, 224),
                                 x_col = 'filename',
                                 y_col = 'label',
                                 class_mode = 'binary',
                                 shuffle = False,
                                 classes = ["0","1"], batch_size=batch_size,
                                 color_mode = 'rgb')

#train_group = train_df_p['group']
#score_callback = ScoreCallback(test_gen, train_gen)



if use_class_weights:
    y_train = train_gen.classes

    class_weights = compute_class_weight('balanced',
                                             classes=np.unique(y_train),
                                             y=y_train)


    class_weights = {i:w for i,w in enumerate(class_weights)}
    #score_callback = ScoreCallback(test_gen, train_gen)

    #history = model.fit(train_gen, batch_size = batch_size, epochs=n_epochs, callbacks=[score_callback])
    history = model.fit(train_gen, batch_size = batch_size, epochs=n_epochs,class_weight=class_weights, validation_data= test_gen )
else:
    history = model.fit(train_gen, batch_size = batch_size, epochs=n_epochs, validation_data= test_gen)

train_f1, train_acc, train_auc, train_precision, train_recall, prediction_train, y_true_train = calculate_scores(train_gen, model)

test_f1, test_acc, test_auc, test_precision, test_recall, prediction_test, y_true_val = calculate_scores(test_gen, model)

  # Access loss, accuracy, and f1_macro values from history
epoch_losses = history.history['loss']

epoch_accuracy = history.history['Accuracy']
epoch_val_accuracy = history.history['val_Accuracy']
#epoch_accuracies = score_callback.accuracies_train
#epoch_f1_macro = score_callback.train_f1_scores
epoch_val_losses = history.history['val_loss']

epoch_info.append({
            'params': param_dict,
            'fold_number': -1,
            'loss': epoch_losses,
            #'val_loss':epoch_val_losses,
            'acc_train': epoch_accuracy,
            #'f1_train': epoch_f1_macro,
            'acc_val': epoch_val_accuracy,
            #'f1_val': epoch_val_f1_macro,
            'epoch_val_losses':epoch_val_losses,
            'train_f1': train_f1,
            'train_acc': train_acc,
            'test_f1': test_f1,
            'test_acc': test_acc,
            'train_auc':train_auc,
            'test_auc':test_auc,
            'train_precision':train_precision,
            'train_recall':train_recall,
            'test_precision':test_precision,
            'test_recall':test_recall,
            'y_true_train':y_true_train,
            'y_true_val':y_true_val, #y_true_val
            'prediction_train': prediction_train,
            'prediction_test': prediction_test,
            'train_group':train_group,
            'test_group':val_group
            
        })


detailed_results.extend(epoch_info)

# Convert detailed_results to a DataFrame
detailed_results_df = pd.DataFrame(detailed_results)

print(detailed_results_df)
now = datetime.now()
# dd/mm/YY H:M:S
dt_string = now.strftime("%d_%m_%Y__%H_%M_%S")
print("date and time =", dt_string)


detailed_results_df.to_csv(main_folder+'_vgg.csv')
with open (main_folder+dt_string+'test_results_vgg.pkl', 'wb') as f:
    pickle.dump(detailed_results_df, f)


Found 37345 validated image filenames belonging to 2 classes.
Found 15957 validated image filenames belonging to 2 classes.
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
                                              params  fold_number  \
0  {'batch_size': 32, 'epochs': 30, 'optimizer': ...           -1   

                                                loss  \
0  [1.6673976182937622, 0.6598511338233948, 0.579...   

                                           acc_train  \
0  [0.6000803112983704, 0.6643727421760559, 0.707...   

                                             acc_val  \
0  [0.676630973815918, 0.7070878148078918, 0.7317...   

                                    epoch_val_l

In [23]:
# Define your model creation function
def create_model_single_mode(optimizer='adam', learning_rate=0.001, dropout = 0.5, neurons = 128):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    
    if optimizer == 'adam':
        opt = Adam(learning_rate=learning_rate)
    elif optimizer == 'sgd':
        opt = SGD(learning_rate=learning_rate)
        
    base_model.trainable = True

   
    
    
        
        
    x = base_model.output
    
    
    x = Flatten()(x)  # Add Global Average Pooling
    x = Dense(neurons, activation='relu')(x)  # Add a fully connected layer
    x = Dropout(dropout) (x)
    predictions = Dense(1, activation='sigmoid')(x)  # Replace softmax with sigmoid for binary classification

    model = Model(inputs=base_model.input, outputs=predictions)
    
        
    model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['Accuracy', 'AUC'])
    return model

In [None]:
m = create_model_single_mode(optimizer='adam', learning_rate=0.001, dropout = 0.5, neurons = 128)

In [25]:
m.summary()

 input_16 (InputLayer)       [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0         
                                                                 
 block3_conv1 (Conv2D)       (None, 56, 56, 256)       295168    
          