In [33]:
# Takes the SHFT images that are generated in file "Capstone_CNN_SHFT_CREATION",the images are sorted in the ZIP file under test, train and validate.
# The SHFTs are created using the raw data columns in the raw dataset folder.
# Purpose of this model is to take the images as inputs into the binary CNN classification model.
# The output of this model is the accuracy of being able to predict whether a participant is doing a problem solving task or a rcall task.
# Note many parameters of this model were tweaked to increase the accuracy such as the split of test data and number of epochs.

import numpy as np
import pandas as pd
import seaborn as sb
from matplotlib import pyplot as plt
from google.colab import drive
from sklearn.model_selection import cross_val_score
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
import os

# This code was created on google collab and requires access to the files/images which were stored on the drive.
drive.mount("/content/drive")

%cd /content/drive/MyDrive/Colab Notebooks

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/Colab Notebooks


# Image Augmentation and Sourcing the Images

In [34]:
train_datagen = ImageDataGenerator(rescale = 1./255,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True,
                                   validation_split=0.3) # Specify 20% for validation
training_set = train_datagen.flow_from_directory('/content/drive/MyDrive/Colab Notebooks/ManuCapstone/train',
                                                 target_size = (64, 64),
                                                 batch_size = 32,
                                                 class_mode = 'binary',
                                                 subset='training')  # Specify that this is the training set
validation_set = train_datagen.flow_from_directory('/content/drive/MyDrive/Colab Notebooks/ManuCapstone/train',
                                                   target_size=(64, 64),
                                                   batch_size=32,
                                                   class_mode='binary',
                                                   subset='validation')  # Specify that this is the validation set

Found 14 images belonging to 2 classes.
Found 6 images belonging to 2 classes.


# Sourcing the Test Folder Which Contains the Images


In [35]:
test_datagen = ImageDataGenerator(rescale = 1./255)
test_set = test_datagen.flow_from_directory('/content/drive/MyDrive/Colab Notebooks/ManuCapstone/test',
                                            target_size = (64, 64),
                                            batch_size = 32,
                                            class_mode = 'binary')

Found 81 images belonging to 2 classes.


# CNN Model Creation

In [36]:
Model = tf.keras.models.Sequential()
Model.add(tf.keras.layers.Conv2D(filters=4, kernel_size=5, activation='relu', input_shape=[64, 64, 3])) #First layer, relu activation was used along with 4 filiters.
Model.add(tf.keras.layers.MaxPool2D(pool_size=4, strides=2))
Model.add(tf.keras.layers.Flatten())
Model.add(tf.keras.layers.Dense(units=16, activation='tanh'))# First dense layer containing 16 neurons.
Model.add(tf.keras.layers.Dense(units=16, activation='relu'))# Second dense layer containing 16 neurons.
Model.add(tf.keras.layers.Dense(units=1, activation='sigmoid')) # 1 neruron for the last layer to find the final class for this binary classification.
Model.compile(loss='binary_crossentropy', optimizer='adam',metrics=['accuracy'])

In [37]:
history=Model.fit(x = training_set, validation_data = validation_set, epochs = 20) #20 epochs yielded the same results as using 50, 80, 100. For time sake 20 was used.

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


# Vaildating for Select Images for Debugging Purposes

In [38]:
import os
from keras.preprocessing import image
path = r'/content/drive/MyDrive/Colab Notebooks/ManuCapstone/Validate' #File path for the select images for internal testing to help with debugging.
for img in os.listdir(path):
#Iterates over the list of files in the specified directory
  test_image = image.load_img(path+"/"+img, target_size = (64, 64))
  test_image = image.img_to_array(test_image)
  test_image = np.expand_dims(test_image, axis = 0)
  predict = Model.predict(test_image)
  if predict[0][0] > 0.5:
      prediction = 'Recall'
  else :
      prediction = 'Maze'
  print(img + " = "+ prediction)

Copy of Copy of spectrogram_JasmineMaze_RAW_TP10_maze.png = Maze
Copy of Copy of spectrogram_JasmineRecall_RAW_TP9_recall.png = Maze


# Final Meteric of the Model In Terms of Accuracy -> ~58%

In [39]:
test_accuracy = Model.evaluate(test_set)
print(f'Test Accuracy: {test_accuracy[1]}')

from sklearn.metrics import confusion_matrix, classification_report
true_labels = [] #Making empty vectors
predicted_labels = []

for i in range(test_set.n // test_set.batch_size + 1): # Had issues cause of the batch size. This helped overcome that.
    batch_data, batch_labels = test_set.next()
    true_labels.extend(batch_labels)
    batch_predictions = Model.predict(batch_data)
    predicted_labels.extend(np.round(batch_predictions).astype(int))


confusion_matrix = confusion_matrix(true_labels, predicted_labels)

print(confusion_matrix)

Test Accuracy: 0.5555555820465088
[[24 19]
 [17 21]]


The CNN model had a average slightly over 58%, however it flucated around ~5%. With more images this model would become much more accurate. Also by more filtering and increasing the number of pixels in the model may yield better accuarcy.