In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os

# For my first CNN model
import tensorflow as tf
from tensorflow.keras.utils import image_dataset_from_directory
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# For the CNN model using VGG16
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Performancer Metric Analysis
from sklearn.metrics import classification_report

# Importing and Loading the Data

In [2]:
directory = "../plots/spectograms"

# Create a generator for my training set
train = tf.keras.utils.image_dataset_from_directory(
    directory,
    labels='inferred',
    label_mode='int',
    class_names=None,
    color_mode='rgb',
    batch_size=32,
    image_size=(128, 128),  # I might want this as 256, 256 if my results are bad. # Dont need to rescale bc all images are the same size
    shuffle=True,
    seed=42,
    validation_split=0.2,
    subset='training',
    interpolation='bilinear',
    follow_links=False,
    crop_to_aspect_ratio=False,
)

# Creating a generator for my validaton (=testing) set
test = tf.keras.utils.image_dataset_from_directory(
    directory,
    labels='inferred',
    label_mode='int',
    class_names=None,
    color_mode='rgb',
    batch_size=32,
    image_size=(128, 128),
    shuffle=True,
    seed=42,
    validation_split=0.2,
    subset='validation',
    interpolation='bilinear',
    follow_links=False,
    crop_to_aspect_ratio=False,
)

Found 37175 files belonging to 6 classes.
Using 29740 files for training.
Found 37175 files belonging to 6 classes.
Using 7435 files for validation.


In [3]:
# Confirming no class imbalance for my 6 target outputs
# If data is imbalanced, use class weights.

base_directory = "../plots/spectograms"

image_counts = {}

# Iterate over subdirectories
for subfolder in os.listdir(base_directory):
    subfolder_path = os.path.join(base_directory, subfolder)
    if os.path.isdir(subfolder_path):
        # Count the number of files with .png extension in the subfolder
        num_images = len([filename for filename in os.listdir(subfolder_path) if filename.endswith('.png')])
        image_counts[subfolder] = num_images

# Print counts for each subfolder
for subfolder, count in image_counts.items():
    print(f"Subfolder '{subfolder}': {count} images")

Subfolder 'ANG': 6345 images
Subfolder 'NEU': 5435 images
Subfolder 'SAD': 6355 images
Subfolder 'HAP': 6355 images
Subfolder 'FEA': 6340 images
Subfolder 'DIS': 6345 images


---

# Creating the first CNN model:

In [None]:
# Instantiate the CNN model
cnn = Sequential()

# Add a convolutional layer
cnn.add(Conv2D(filters=32,
               kernel_size=(3,3),
               strides = (1,1), # to conform with padding
               padding = 'same',
               activation='relu',
               input_shape=(128, 128, 3),
               kernel_regularizer=l2(0.01)))

# Add a MaxPooling2D layer to downsample
cnn.add(MaxPooling2D(pool_size=(2, 2)))

# Add a second convolutional layer
cnn.add(Conv2D(filters=64,  # Increase the number of filters, base 2
               kernel_size=(3, 3),
               strides=(1, 1),
               padding='same',
               activation='relu',
               kernel_regularizer=l2(0.01)))

# Add a second MaxPooling2D layer to downsample
cnn.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten the output from the convolutional layer
cnn.add(Flatten())

# Add a dense layer: softmax activation for multi-class classification
cnn.add(Dense(units=6,
              activation='softmax',
              kernel_regularizer=l2(0.01)))

In [None]:
cnn.summary()

In [None]:
# Compile the model
cnn.compile(
    loss = 'sparse_categorical_crossentropy',
  optimizer = 'adam',
    metrics = ['accuracy'] 
)

In [None]:
# Add option for Early Stopping callback
early_stopping = EarlyStopping(
    monitor='accuracy',
    patience=5,           # Number of epochs with no improvement after which training will stop
    restore_best_weights=True  # Restore the model weights from the epoch with the best validation loss
)

# Add option for ModelCheckpoint callback (to save the best model)
model_checkpoint = ModelCheckpoint(
    'best_cnn.h5',  # Filepath to save the best model
    monitor='val_accuracy', 
    save_best_only=True,  # Save only the best model
    verbose=1
)

In [None]:
# Fit the model
history = cnn.fit(train,
        validation_data = test,
        epochs = 25,
        batch_size = 64,
        callbacks = [early_stopping, model_checkpoint],
        verbose =1)

In [None]:
# Evaluate model history

plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xticks(range(0,25), range(1,26))
plt.legend()
plt.title('CNN Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy');

In [None]:
# Call preds
predictions = cnn.predict(test)

---

# Now using a VGG16 model with Transfer Learning

In [4]:
# Loading the pre-trained VGG16 model without the top classification layers
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(128, 128, 3))

In [5]:
# Adding custom classification layers on top of VGG16 base:
# Because I need to format for my problem of multi-class with 6 target outputs

x = Flatten()(base_model.output)
x = Dense(128, activation='relu')(x)
output = Dense(6, activation='softmax')(x)

In [6]:
# Creating the VGG-16 base model
vgg16_model = Model(inputs=base_model.input, outputs=output)

In [7]:
# Train the model

# Unfreeze some layers
for layer in base_model.layers[-4:]:
    layer.trainable = True
# I decided to make only the last 4 layers trainable for optimization

# Compile the model
vgg16_model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=0.001),
    # Default learning rate is 0.001. Larger learning rate = model will converge faster = takes less time to learn, so better for optimization
    # I used tf.keras.optimizers.legacy.Adam over tf.keras.optimizers.Adam because the former runs slowly on M1/M2 macs.
    metrics=['accuracy']
)

In [8]:
vgg16_model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 128, 128, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 128, 128, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 128, 128, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 64, 64, 64)        0         
                                                                 
 block2_conv1 (Conv2D)       (None, 64, 64, 128)       73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 64, 64, 128)       147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 32, 32, 128)       0     

In [None]:
# Add option for Early Stopping callback
early_stopping = EarlyStopping(
    monitor='accuracy',
    patience=15,
    restore_best_weights=True
)

# Add option for ModelCheckpoint callback (to save the best model)
model_checkpoint = ModelCheckpoint(
    'best_vgg16.h5',  # Filepath to save the best model
    monitor='val_accuracy',  # Metric to monitor
    save_best_only=True,  # Save only the best model
    verbose=1
)

history = vgg16_model.fit(
    train,
    validation_data=test,
    epochs=10,
    verbose=1,
    callbacks=[early_stopping, model_checkpoint]
)

# Baseline Accuracy # is acc / # of classes so acc / 6 --> 100/6 = ~17%

Epoch 1/10


2023-09-19 12:00:50.197029: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz




In [None]:
test_loss, test_accuracy = vgg16_model.evaluate(test)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy*100:.2f}%")

In [None]:
# Display the number of epochs used in the best model
best_model_epochs = len(history.history['val_accuracy'])
print("Number of epochs in the best model:", best_model_epochs)

In [None]:
# Evaluate model history

plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xticks(range(0,25), range(1,26))
plt.legend()
plt.title('VGG16 Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy');

In [None]:
# Call preds
predictions = vgg16_model.predict(test)

### Generating a multi-class confusion matrix for the VGG16 model:

In [None]:
# Classification Matrix

target_names = ['ANG', 'DIS', 'FEA', 'HAP', 'NEU', 'SAD']
print(classification_report(test, predictions, target_names=target_names))

# https://scikit-learn.org/stable/modules/generated/sklearn.metrics.classification_report.html