In [None]:

import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import precision_recall_fscore_support
import pickle
from keras.models import load_model
from keras.optimizers import Adam
from keras.regularizers import l2


# Path to the data directories
fold_data_dir = '/home/wangg/REU-Hearing-Loss-Project-1/machine learning/allFolds/10folds - 80-20 train test split (includes all subjects)'

# CNN Model
def build_model(input_shape, learning_rate = 0.001):
    # apply weight decay
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape, kernel_regularizer=l2(0.0001)))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu', kernel_regularizer=l2(0.0001)))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(128, (3, 3), activation='relu', kernel_regularizer=l2(0.0001)))
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(512, activation='relu', kernel_regularizer=l2(0.0001)))
    model.add(Dropout(0.5))
    model.add(Dense(2, activation='softmax'))
    optimizer = Adam(learning_rate=0.001)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Dimensions of the images (224x224 with 3 channels)
img_width, img_height = 224, 224
input_shape = (img_width, img_height, 3)

# # Create the CNN model
# model = build_model(input_shape)
# model.summary()

# # Data generator for training and testing (no validation)
data_generator = ImageDataGenerator(rescale=1.0/255.0)
# Data generator for training with augmentation
train_data_generator = ImageDataGenerator(
    rescale=1.0/255.0,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Lists to store results for each fold
fold_accuracies = []
precision_list = []
recall_list = []
f1_list = []

for fold_number in range(1, 11):
    # Create the CNN model
    model = build_model(input_shape, learning_rate=0.001)
    model.summary()
    # Update fold path 
    fold_path = os.path.join(fold_data_dir, f'fold{fold_number}')

    # Training data
    train_data_dir = os.path.join(fold_path, 'Training')
    print(f"\nFold {fold_number} - Training Data Directory:", train_data_dir)
    
    # Training data with augmentation
    train_generator = train_data_generator.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=32,
        class_mode='categorical'
    )


    print("Number of Training Samples:", train_generator.samples)

    # Testing data
    test_data_dir = os.path.join(fold_path, 'Testing')
    print(f"\nFold {fold_number} - Testing Data Directory:", test_data_dir)

    test_generator = data_generator.flow_from_directory(
        test_data_dir,
        target_size=(img_width, img_height),
        batch_size=32,
        class_mode='categorical',
        shuffle=False)

    print("Number of Test Samples:", test_generator.samples)

    # Training the model
    # Training the model with augmented data
    history = model.fit(
        train_generator,
        steps_per_epoch=train_generator.samples // train_generator.batch_size,
        epochs=100
    )

    # Print the contents of history.history
    print(history.history)
    # Save training loss values
    train_loss = history.history['loss']
    train_accuracies = history.history['accuracy']

    # Save training and test loss values to a file
    losses_filename = 'losses_pretrained_model10FOLD100EPOCHS.txt'
    with open(losses_filename, 'w') as losses_file:
        losses_file.write("Epoch\tTraining Loss\tTraining Accuracy\n")
        for epoch in range(len(train_loss)):
            losses_file.write(f"{epoch + 1}\t{train_loss[epoch]}\t{train_accuracies[epoch]}\n")


    # Save the model after training
    model.save(f'10fold_model_fold_DataAugment_100epochs_{fold_number}.h5')

    # Load the saved model for testing
    loaded_model = load_model(f'10fold_model_fold_DataAugment_100epochs_{fold_number}.h5')

    # Evaluate the model on test data
    test_loss, test_accuracy = model.evaluate(test_generator, steps=test_generator.samples // test_generator.batch_size)
    print(f'\nEvaluation for Fold {fold_number}:')
    print("Test Loss:", test_loss)
    print("Test Accuracy:", test_accuracy)

    # Store the accuracy for this fold
    fold_accuracies.append(test_accuracy)

    # Evaluate precision, recall, and F1 score
    predictions = model.predict(test_generator, steps=test_generator.samples // test_generator.batch_size)
    true_labels = np.argmax(test_generator.labels, axis=1) if len(test_generator.labels.shape) > 1 else test_generator.labels
    predicted_labels = np.argmax(predictions, axis=1)
    precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predicted_labels, average='binary')

    # Store the results for this fold
    precision_list.append(precision)
    recall_list.append(recall)
    f1_list.append(f1)

     # Save classification results to a file
    results_filename = f'10fold_classification_resultsDataAugment100epochs_fold_{fold_number}.txt'
    with open(results_filename, 'w') as results_file:
        results_file.write("Image Name\tTrue Label\tPredicted Label\n")
        for i in range(len(test_generator.filenames)):
            image_name = os.path.basename(test_generator.filenames[i])
            true_label = true_labels[i]
            predicted_label = predicted_labels[i]
            results_file.write(f"{image_name}\t{true_label}\t{predicted_label}\n")

# Save precision, recall, and f1_list to a file
results = {
    'precision_list': precision_list,
    'recall_list': recall_list,
    'f1_list': f1_list
}

with open('evaluation_results_10FOLD100EPOCHS.pkl', 'wb') as file:
    pickle.dump(results, file)

# Boxplot for precision, recall, and F1 score
df_boxplot = pd.DataFrame({
    'Precision': precision_list,
    'Recall': recall_list,
    'F1 Score': f1_list
})

plt.figure(figsize=(10, 6))
sns.boxplot(data=df_boxplot)
plt.title('Precision, Recall, and F1 Score Across Folds')
plt.show()

# Summary table
df_summary = pd.DataFrame({
    'Fold': range(1, 10),
    'Accuracy': fold_accuracies,
    'Precision': precision_list,
    'Recall': recall_list,
    'F1 Score': f1_list
})

# Display the summary table
print("\nSummary of Results:")
print(df_summary)

import pickle

# Load the results from the file
with open('evaluation_results_10FOLD100EPOCHS.pkl', 'rb') as file:
    loaded_results = pickle.load(file)

# Access individual lists
precision_list_loaded = loaded_results['precision_list']
recall_list_loaded = loaded_results['recall_list']
f1_list_loaded = loaded_results['f1_list']

# Print the loaded results
print("Precision List:", precision_list_loaded)
print("Recall List:", recall_list_loaded)
print("F1 List:", f1_list_loaded)


# Combine the lists into a single list of lists
data = [precision_list, recall_list, f1_list]

# Create a figure and axis
fig, ax = plt.subplots(figsize=(10, 6))

# Create boxplot
bp = ax.boxplot(data, labels=['Precision', 'Recall', 'F1 Score'])

# Set labels and title
ax.set_ylabel('Score')
ax.set_title('Boxplot of Precision, Recall, and F1 Score')

# Show the plot
plt.show()

# # Create a DataFrame for easy summary and visualization
# df_results = pd.DataFrame({
#     'Fold': np.repeat(range(1, 11), 3),
#     'Metric': ['Precision'] * 10 + ['Recall'] * 10 + ['F1 Score'] * 10,
#     'Score': precision_list + recall_list + f1_list
# })

# # Visualize the results with a single boxplot
# plt.figure(figsize=(10, 6))
# sns.boxplot(x='Metric', y='Score', data=df_results)
# plt.title('Performance Across 5-Folds')
# plt.show()

# # Summary table
# summary_table = df_results.groupby('Metric')['Score'].describe()[['min', 'mean', 'max']]
# summary_table.columns = ['Worst Fold', 'Avg. Fold', 'Best Fold']
# print("\nSummary of Results:")
# print(summary_table)



In [2]:
import pickle

file_path = "/home/wangg/REU-Hearing-Loss-Project-1/machine learning/checkpoint results/10folds - 80-20 train test split (includes all subjects)/experiments/100epochsDataAugLRScheduler/results/evaluation_results.pkl"

with open(file_path, 'rb') as file:
    evaluation_results = pickle.load(file)


print(evaluation_results)
# Now, 'evaluation_results' contains the data loaded from the pickle file
# You can access and manipulate the data as needed


{'precision_list': [0.5567010309278351, 0.9772727272727273, 0.8362573099415205, 0.9815950920245399, 0.9754601226993865, 0.99375, 0.9696969696969697, 1.0, 1.0, 1.0], 'recall_list': [0.675, 0.80625, 0.89375, 1.0, 0.99375, 0.99375, 1.0, 1.0, 1.0, 1.0], 'f1_list': [0.6101694915254238, 0.8835616438356164, 0.8640483383685801, 0.9907120743034056, 0.9845201238390093, 0.99375, 0.9846153846153847, 1.0, 1.0, 1.0]}
