In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import numpy as np
import shutil
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
from matplotlib import pyplot
import matplotlib.image as mpimg

from sklearn.model_selection import train_test_split
import keras
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.layers import BatchNormalization
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import model_from_json
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPool2D, Dropout, MaxPooling2D, Activation, ZeroPadding2D
from tensorflow.keras.layers import Flatten
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from PIL import Image
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.metrics import SparseCategoricalAccuracy

In [None]:
base_frame_path = '/content/drive/MyDrive/toybox_5fold'

def load_data_for_fold(fold):
    train_list = []
    val_list = []

    fold_train_path = os.path.join(base_frame_path, f'fold_{fold}', 'train')
    fold_val_path = os.path.join(base_frame_path, f'fold_{fold}', 'val')

    directories = [fold_train_path, fold_val_path]
    data_lists = [train_list, val_list]

    unique_labels = set()

    for dataset, data_list in zip(directories, data_lists):
        for class_folder in os.listdir(dataset):
            class_path = os.path.join(dataset, class_folder)

            if os.path.isdir(class_path):
                for frame_file in os.listdir(class_path):
                    if frame_file.endswith('.jpg'):
                        frame_path = os.path.join(class_path, frame_file)
                        label = frame_file.split('_')[0]
                        unique_labels.add(label)
                        image_pil = Image.open(frame_path).convert('L')
                        image_resized = image_pil.resize((224, 224))
                        image = np.array(image_resized) / 255.0
                        data_list.append((image, label))
                        image_pil.close()

    label_to_index = {label: idx for idx, label in enumerate(unique_labels)}

    x_train, y_train = zip(*[(image, label_to_index[label]) for image, label in train_list])
    x_test, y_test = zip(*[(image, label_to_index[label]) for image, label in val_list])

    x_train = np.array(x_train).reshape(-1, 224, 224, 1)
    y_train = np.array(y_train)
    x_test = np.array(x_test).reshape(-1, 224, 224, 1)
    y_test = np.array(y_test)

    return (x_train, y_train), (x_test, y_test)

In [None]:
def create_tf_dataset(x_data, y_data, batch_size=32, shuffle=True):
    # Create a TensorFlow dataset
    dataset = tf.data.Dataset.from_tensor_slices((x_data, y_data))

    if shuffle:
        dataset = dataset.shuffle(buffer_size=len(x_data))

    # Batch the dataset
    dataset = dataset.batch(batch_size)

    return dataset

In [None]:
# Define the AlexNet model
def alexnet_model(img_shape=(224, 224, 1), n_classes=12):

	# Initialize model
	alexnet = Sequential()

	# Layer 1
	alexnet.add(Conv2D(96, (11, 11), input_shape=img_shape,
		padding='same', kernel_regularizer='l2'))
	alexnet.add(BatchNormalization())
	alexnet.add(Activation('relu'))
	alexnet.add(MaxPooling2D(pool_size=(2, 2)))

	# Layer 2
	alexnet.add(Conv2D(256, (5, 5), padding='same'))
	alexnet.add(BatchNormalization())
	alexnet.add(Activation('relu'))
	alexnet.add(MaxPooling2D(pool_size=(2, 2)))

	# Layer 3
	alexnet.add(ZeroPadding2D((1, 1)))
	alexnet.add(Conv2D(512, (3, 3), padding='same'))
	alexnet.add(BatchNormalization())
	alexnet.add(Activation('relu'))
	alexnet.add(MaxPooling2D(pool_size=(2, 2)))

	# Layer 4
	alexnet.add(ZeroPadding2D((1, 1)))
	alexnet.add(Conv2D(1024, (3, 3), padding='same'))
	alexnet.add(BatchNormalization())
	alexnet.add(Activation('relu'))

	# Layer 5
	alexnet.add(ZeroPadding2D((1, 1)))
	alexnet.add(Conv2D(1024, (3, 3), padding='same'))
	alexnet.add(BatchNormalization())
	alexnet.add(Activation('relu'))
	alexnet.add(MaxPooling2D(pool_size=(2, 2)))

	# Layer 6
	alexnet.add(Flatten())
	alexnet.add(Dense(3072))
	alexnet.add(BatchNormalization())
	alexnet.add(Activation('relu'))
	alexnet.add(Dropout(0.5))

	# Layer 7
	alexnet.add(Dense(4096))
	alexnet.add(BatchNormalization())
	alexnet.add(Activation('relu'))
	alexnet.add(Dropout(0.5))

	# Layer 8
	alexnet.add(Dense(n_classes))
	alexnet.add(BatchNormalization())
	alexnet.add(Activation('softmax'))

	return alexnet

In [None]:
## TOO MUCH RAM

fold_train_accuracies = []
fold_val_accuracies = []
fold_train_losses = []
fold_val_losses = []

# Parameters
batch_size = 32
epochs = 10
learning_rate = 0.001
num_classes = 12

# Loop through each fold
for fold in range(1, 6):  # Assuming folds are numbered 1 through 5
    # Load the data for the current fold
    (x_train, y_train), (x_test, y_test) = load_data_for_fold(fold)

    # Convert labels to one-hot encoding
    y_train = to_categorical(y_train, num_classes)
    y_test = to_categorical(y_test, num_classes)

    # Create TensorFlow datasets
    train_dataset = create_tf_dataset(x_train, y_train, batch_size=batch_size, shuffle=True)
    val_dataset = create_tf_dataset(x_test, y_test, batch_size=batch_size, shuffle=False)

    # Initialize the model
    model = alexnet_model(img_shape=(224, 224, 1), n_classes=num_classes)

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # Train the model
    print(f"Training fold {fold}...")
    model.fit(train_dataset, epochs=epochs, validation_data=val_dataset)

    print(f"Training fold {fold}...")
    history = model.fit(train_dataset, epochs=epochs, validation_data=val_dataset)

    # Save the metrics for each epoch
    fold_train_accuracies.append(history.history['accuracy'])
    fold_val_accuracies.append(history.history['val_accuracy'])
    fold_train_losses.append(history.history['loss'])
    fold_val_losses.append(history.history['val_loss'])

KeyboardInterrupt: 

In [None]:
# ONE FOLD
batch_size = 32
epochs = 10
learning_rate = 0.001
num_classes = 12

(x_train, y_train), (x_test, y_test) = load_data_for_fold(1)

# Convert labels to one-hot encoding
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)

# Create TensorFlow datasets
train_dataset = create_tf_dataset(x_train, y_train, batch_size=batch_size, shuffle=True)
val_dataset = create_tf_dataset(x_test, y_test, batch_size=batch_size, shuffle=False)

# Initialize the model
model = alexnet_model(img_shape=(224, 224, 1), n_classes=num_classes)

# Compile the model
model.compile(optimizer=Adam(learning_rate=learning_rate),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

print(f"Training...")
history = model.fit(train_dataset, epochs=epochs, validation_data=val_dataset)

In [None]:
# no preprocessing, return image paths and labels only
def load_data_for_fold(fold):
    train_image_paths = []
    val_image_paths = []
    train_labels = []
    val_labels = []

    fold_train_path = os.path.join('/content/drive/MyDrive/toybox_5fold', f'fold_{fold}', 'train')
    fold_val_path = os.path.join('/content/drive/MyDrive/toybox_5fold', f'fold_{fold}', 'val')

    directories = [fold_train_path, fold_val_path]
    data_lists = [(train_image_paths, train_labels), (val_image_paths, val_labels)]

    unique_labels = set()

    for dataset, (image_paths, labels) in zip(directories, data_lists):
        for class_folder in os.listdir(dataset):
            class_path = os.path.join(dataset, class_folder)

            if os.path.isdir(class_path):
                for frame_file in os.listdir(class_path):
                    if frame_file.endswith('.jpg'):
                        frame_path = os.path.join(class_path, frame_file)
                        label = frame_file.split('_')[0]
                        unique_labels.add(label)
                        image_paths.append(frame_path)
                        labels.append(label)

    # Map labels to indices
    label_to_index = {label: idx for idx, label in enumerate(unique_labels)}
    print(label_to_index)

    # Convert labels to their respective indices
    y_train = [label_to_index[label] for label in train_labels]
    y_val = [label_to_index[label] for label in val_labels]

    return (train_image_paths, y_train), (val_image_paths, y_val)

def preprocess_image(image_path, label, target_size=(224, 224)):
    # Load the image
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=1)  # Use channels=3 for RGB
    image = tf.image.resize(image, target_size)
    image = image / 255.0  # Normalize pixel values
    label = to_categorical(label, num_classes=12)

    return image, label

def create_tf_dataset(image_paths, labels, batch_size=32, shuffle=True, target_size=(224, 224)):
    # Create a dataset from the file paths and labels
    dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels))
    # Shuffle the dataset
    if shuffle:
        dataset = dataset.shuffle(buffer_size=len(image_paths))
    # Preprocess each image
    dataset = dataset.map(lambda x, y: preprocess_image(x, y, target_size),
                          num_parallel_calls=tf.data.AUTOTUNE)
    # Batch the dataset
    dataset = dataset.batch(batch_size).prefetch(buffer_size=tf.data.AUTOTUNE)

    return dataset

In [None]:
# Parameters
batch_size = 32
epochs = 2
learning_rate = 0.001
num_classes = 12

(x_train_paths, y_train), (x_val_paths, y_val) = load_data_for_fold(1)

# Convert file paths and labels to TensorFlow datasets
train_dataset = create_tf_dataset(x_train_paths, y_train, batch_size=batch_size, shuffle=True)
val_dataset = create_tf_dataset(x_val_paths, y_val, batch_size=batch_size, shuffle=False)

# Initialize the model
model = alexnet_model(img_shape=(224, 224, 1), n_classes=num_classes)

# Compile the model
model.compile(optimizer=Adam(learning_rate=learning_rate),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
print(f"Training...")
history = model.fit(train_dataset, epochs=epochs, validation_data=val_dataset)

{'helicopter': 0, 'spoon': 1, 'airplane': 2, 'mug': 3, 'duck': 4, 'cup': 5, 'car': 6, 'truck': 7, 'ball': 8, 'cat': 9, 'giraffe': 10, 'horse': 11}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training...
[1m  78/1460[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m8:34:38[0m 22s/step - accuracy: 0.4204 - loss: 1.8783

In [None]:
plt.figure(figsize=(12, 6))

# History for accuracy
plt.subplot(211)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('AlexNet model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')

# History for loss
plt.subplot(212)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('AlexNet model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')

plt.savefig('/content/drive/MyDrive/alexnet_loss_accuracy.png')

plt.show()