In [None]:
from keras.layers import Reshape, InputLayer, Input, MaxPooling2D, Conv2D, Dense, Flatten, Dropout
from statsmodels.stats.contingency_tables import mcnemar
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from keras.utils import to_categorical
from keras.models import Sequential, load_model
import matplotlib.pyplot as plt
import tensorflow as tf
import seaborn as sns
import pandas as pd
import numpy as np
import os

In [None]:
# Check tensorflow version
tf.__version__

In [None]:
label_file_path = '../data/train/outputs/image_descriptions.txt'

In [None]:
# Define a directory for saved files
output_dir = '../data/train/snippets/'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [None]:
# Constants
img_shape_full = (150, 150, 1)
num_classes = 2

In [None]:
# Get a list of all .npy files in the directory
npy_files = [f for f in os.listdir(output_dir) if f.endswith('.npy')]

In [None]:
# Read labels from the text file
labels = {}
with open(label_file_path, 'r') as file:
    for line in file:
        filename, label = line.strip().split(',')
        labels[filename] = int(label)

# Load images and their labels based on the filenames
images = []
image_labels = []
for npy_file in sorted(os.listdir(output_dir)): # SUS
    if npy_file.endswith('.npy') and npy_file in labels:
        img_array = np.load(os.path.join(output_dir, npy_file))
        images.append(img_array)
        image_labels.append(labels[npy_file])


In [None]:
print("Number of images:", len(images))
print("Number of labels:", len(image_labels))
assert len(images) == len(image_labels)

print("Image shape:", images[0].shape)

In [None]:
# Assuming all images are the same size and reshaped properly for input to a CNN
images = np.array(images).reshape(-1, 150, 150, 1)  # Reshape for CNN, change shape as necessary

# Start constructing the Keras Sequential model.
model = Sequential()

# Add an input layer which is similar to a feed_dict in TensorFlow.
model.add(InputLayer(input_shape=img_shape_full))

# First convolutional layer with ReLU-activation and max-pooling.
model.add(Conv2D(kernel_size=3, strides=1, filters=1, padding='same',
                activation='relu', name='layer_conv1'))
# model.add(Dropout(0.1))
model.add(MaxPooling2D(pool_size=2, strides=2))


model.add(Conv2D(kernel_size=3, strides=1, filters=1, padding='same',
                activation='relu', name='layer_conv2'))
# model.add(Dropout(0.1))
model.add(MaxPooling2D(pool_size=2, strides=2))

# Flatten the output of the convolutional layers
model.add(Flatten())

# Last fully-connected layer with softmax-activation for use in classification.
model.add(Dense(num_classes, activation='softmax'))

# Compile the model (assuming this is for a classification task)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Show a summary of the model's architecture
model.summary()

In [None]:
base_path = '../models/model_no_dropout/'
path = base_path + 'model_no_dropout'

In [None]:
# Assuming 'model' is your Keras model

# Open a file in write mode
with open(path + '.txt', 'w') as f:
    # Pass the file handle to the print function of model.summary()
    model.summary(print_fn=lambda x: f.write(x + '\n'))

print("Model summary saved to %s", path + ".txt")


In [None]:
# Convert lists to numpy arrays
image_labels = np.array(image_labels)
print(image_labels)

# Convert labels to one-hot encoding
image_labels = to_categorical(image_labels)
print(image_labels)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(images, image_labels, test_size=0.3, random_state=42)

# Standardize pixel values to mean 0 and standard deviation 1
mean = np.mean(X_train, axis=(0, 1, 2), keepdims=True)
std = np.std(X_train, axis=(0, 1, 2), keepdims=True)

# Save to a file
np.save(base_path + 'mean.npy', mean)
np.save(base_path + 'std.npy', std)

X_train = (X_train - mean) / std
X_test = (X_test - mean) / std

In [None]:
# Fit the model
history = model.fit(
    x=X_train,
    y=y_train,
    batch_size=5,
    epochs=6,
    verbose=1,
    validation_data=(X_test, y_test)  # Use test data for validation
)

model.save(path + '.keras')

In [None]:
# Set larger font sizes and bold fonts
plt.rc('font', size=14)  # controls default text sizes
plt.rc('axes', titlesize=16, titleweight='bold', labelsize=14, labelweight='bold')  # fontsize of the axes title and labels
plt.rc('xtick', labelsize=12)  # fontsize of the tick labels
plt.rc('ytick', labelsize=12)  # fontsize of the tick labels
plt.rc('legend', fontsize=12)  # legend fontsize

# Plot training & validation accuracy values
plt.figure(figsize=(8, 8))
plt.plot(history.history['accuracy'], marker='o', linestyle='-', color='blue')
plt.plot(history.history['val_accuracy'], marker='o', linestyle='--', color='green')
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.savefig(path + '_accuracy.png')
plt.close()

# Plot training & validation loss values
plt.figure(figsize=(8, 8))
plt.plot(history.history['loss'], marker='o', linestyle='-', color='red')
plt.plot(history.history['val_loss'], marker='o', linestyle='--', color='purple')
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.savefig(path + '_loss.png')
plt.close()

# Predict the values from the test dataset
Y_pred = model.predict(X_test)
Y_pred_classes = np.argmax(Y_pred, axis=1) 
Y_true = np.argmax(y_test, axis=1)

# Compute the confusion matrix
cm = confusion_matrix(Y_true, Y_pred_classes)

# Plot confusion matrix
plt.figure(figsize=(8, 8))
sns.heatmap(cm, annot=True, fmt="d", cmap='viridis')  # Using 'viridis' for better visibility
plt.title('Confusion Matrix')
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.savefig(path + '_confusion_matrix.png')
plt.close()

print("Plots saved in the 'models' directory.")

In [113]:
# Load models
model1 = load_model('../models/model_dropout/model_dropout.keras')
model2 = load_model('../models/model_no_dropout/model_no_dropout.keras')

In [114]:
# Assuming X_test and y_test are your test datasets
y_pred1 = model1.predict(X_test)
y_pred2 = model2.predict(X_test)



In [115]:
y_pred1_classes = np.argmax(y_pred1, axis=1)
y_pred2_classes = np.argmax(y_pred2, axis=1)
y_true_classes = np.argmax(y_test, axis=1)

In [116]:
cm1 = confusion_matrix(y_true_classes, y_pred1_classes)
cm2 = confusion_matrix(y_true_classes, y_pred2_classes)

In [117]:
confusion_matrix_path = '../models/confusion_matrices/'

In [118]:
pd.DataFrame(cm1).to_csv(confusion_matrix_path + 'model_dropout.csv', index=False)
pd.DataFrame(cm2).to_csv(confusion_matrix_path + 'model_no_dropout.csv', index=False)

In [119]:
# Load the confusion matrices
cm1 = pd.read_csv(confusion_matrix_path + 'model_dropout.csv').to_numpy()
cm2 = pd.read_csv(confusion_matrix_path + 'model_no_dropout.csv').to_numpy()

In [120]:
print(cm1)
print(cm2)

[[7 0]
 [0 2]]
[[6 1]
 [0 2]]


In [121]:
# Extract b and c for McNemar's test
# b: False Positives in cm1 not present in cm2
# c: False Positives in cm2 not present in cm1
b = cm1[0, 1] - cm2[0, 1]  # FP in Matrix 1 that are TP/TN in Matrix 2
c = cm2[0, 1] - cm1[0, 1]  # FP in Matrix 2 that are TP/TN in Matrix 1

print("b:", b)
print("c:", c)
# Create the contingency table for McNemar's test
contingency_table = np.array([[0, max(0, b)], [max(0, c), 0]])

# Perform McNemar's test
result = mcnemar(contingency_table, exact=False)
print("McNemar's Test Statistic:", result.statistic)
print("P-value:", result.pvalue)

# Optional: Save the p-value to a file
mcnemar_path = '../models/mcnemar_test_results/'
with open(mcnemar_path + 'mcnemar_1_conv.txt', 'w') as file:
    file.write(f"McNemar's Test Statistic: {result.statistic}\n")
    file.write(f"P-value: {result.pvalue}")

b: -1
c: 1
McNemar's Test Statistic: 0.0
P-value: 1.0
