In [83]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Data Preparation

In [84]:
# Load and preprocess images from file paths
def load_image(file_path, target_size=(64, 64)):
    image = load_img(file_path, target_size=target_size)
    image_array = img_to_array(image) / 255.0
    return image_array

In [85]:
def prepare_data(file_path):
    # Read the csv file
    df = pd.read_csv(file_path)

    # Trim dataset for faster testing for now
    #df = df.sample(n=100, random_state=42)

    # Append data/ in front of every image file path
    df['image:FILE'] = 'data/' + df['image:FILE']
    
    # Add new column with loaded image
    df['image'] = df['image:FILE'].apply(lambda x: load_image(x))

    X = np.stack(df['image'].to_numpy())
    y = df['category'].to_numpy()

    return X, y


In [86]:
X_train, y_train = prepare_data('data/test.csv')
X_val, y_val = prepare_data('data/val.csv')
X_test, y_test = prepare_data('data/test.csv')

# Train Model

In [87]:
def CNN_train():
    # Define the CNN architecture
    model = models.Sequential([
        layers.Input(shape=(64, 64, 3)),
        layers.Conv2D(32, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(30)  # 30 unique labels in dataset
    ])

    # Compile the model
    model.compile(optimizer='adam',
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=['accuracy'])
    
    # Train the model
    model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val))

    # Evaluate the model
    test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
    print(f'Test accuracy: {test_acc}')
    print(f'Test loss: {test_loss}')

    # Save the model
    #model.save('CNN_model.h5')

In [88]:
CNN_train()

Epoch 1/10
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 95ms/step - accuracy: 0.0733 - loss: 3.2438 - val_accuracy: 0.1907 - val_loss: 2.7107
Epoch 2/10
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 104ms/step - accuracy: 0.2883 - loss: 2.3674 - val_accuracy: 0.2750 - val_loss: 2.3981
Epoch 3/10
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 105ms/step - accuracy: 0.4047 - loss: 1.9799 - val_accuracy: 0.3410 - val_loss: 2.3149
Epoch 4/10
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 105ms/step - accuracy: 0.4650 - loss: 1.7442 - val_accuracy: 0.3870 - val_loss: 2.1682
Epoch 5/10
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 97ms/step - accuracy: 0.5465 - loss: 1.4720 - val_accuracy: 0.4313 - val_loss: 2.0039
Epoch 6/10
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 98ms/step - accuracy: 0.6131 - loss: 1.2330 - val_accuracy: 0.4403 - val_loss: 2.1059
Epoch 7/10
[