In [33]:
import pandas as pd
import numpy as np

train_meta = pd.read_csv("./train/train_metadata.csv")
train_add_features = pd.read_csv("./train/features/additional_features.csv")
train_color_hist = pd.read_csv("./train/features/color_histogram.csv")
train_hog_pca = pd.read_csv("./train/features/hog_pca.csv")


test_meta = pd.read_csv("./test/test_metadata.csv")
# test_meta.drop("ClassId", inplace=True, axis=1) # useless for now
test_add_features = pd.read_csv("./test/features/additional_features.csv")
test_color_hist = pd.read_csv("./test/features/color_histogram.csv")
test_hog_pca = pd.read_csv("./test/features/hog_pca.csv")

In [34]:
# merging all the dataframes

train_df = pd.merge(train_meta, train_add_features, on = "image_path", how = "left")
train_df = pd.merge(train_df, train_color_hist, on = "image_path", how = "left")
train_df = pd.merge(train_df, train_hog_pca, on = "image_path", how = "left")

test_df = pd.merge(test_meta, test_add_features, on = "image_path", how = "left")
test_df = pd.merge(test_df, test_color_hist, on = "image_path", how = "left")
test_df = pd.merge(test_df, test_hog_pca, on = "image_path", how = "left")


# this is just better for reading files
train_df["image_path"] = train_df["image_path"].apply(lambda x : "train/" + x)
test_df["image_path"] = test_df["image_path"].apply(lambda x : "test/" + x)

In [35]:
train_df = train_df[["image_path", "ClassId", "id"]]

test_df = test_df[["image_path", "ClassId", "id"]]

In [36]:
# train a CNN to classify the images into classes
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from sklearn.model_selection import train_test_split
import numpy as np
import cv2
from PIL import Image

# Define image dimensions - smaller for faster training
IMG_HEIGHT, IMG_WIDTH = 96, 96

# Function to load and preprocess images
def load_images(image_paths, img_height=IMG_HEIGHT, img_width=IMG_WIDTH):
    images = []
    for path in image_paths:
        img = cv2.imread(path)
        if img is not None:
            img = cv2.resize(img, (img_width, img_height))
            # Convert to grayscale immediately
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            img = img / 255.0  # Normalize
            images.append(img)
    
    # Reshape to add channel dimension (height, width, 1)
    images = [img.reshape(img_height, img_width, 1) for img in images]
    return np.array(images)

# Get image paths and labels from train_df
image_paths = train_df['image_path'].values
labels = train_df['ClassId'].values

# Load images
X_images = load_images(image_paths)

# Split data
X_train_img, X_val_img, y_train_img, y_val_img = train_test_split(
    X_images, labels, test_size=0.2, random_state=42, stratify=labels
)

# Data augmentation with fewer transformations for speed
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True
)

# Build a custom CNN model for grayscale images
def build_cnn_model(num_classes=43):
    model = models.Sequential([
        # First Convolutional Block
        layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(IMG_HEIGHT, IMG_WIDTH, 1)),
        layers.BatchNormalization(),
        layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Dropout(0.25),

        # Second Convolutional Block
        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Dropout(0.25),

        # Third Convolutional Block
        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Dropout(0.25),

        # Dense Layers
        layers.Flatten(),
        layers.Dense(256, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')
    ])
    
    return model

cnn_model = build_cnn_model()
cnn_model.compile(
    optimizer=Adam(learning_rate=0.0005),  # Lower learning rate for better generalization
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Add callbacks
reduce_lr = ReduceLROnPlateau(
    monitor='val_accuracy',  # Monitor validation accuracy instead of loss
    factor=0.2,
    patience=2,
    min_lr=1e-6,
    verbose=1
)

early_stopping = EarlyStopping(
    monitor='val_accuracy',
    patience=5,
    restore_best_weights=True,
    verbose=1
)

# Add model checkpoint to save the best model based on validation accuracy
checkpoint = ModelCheckpoint(
    'best_cnn_model.h5',
    monitor='val_accuracy',
    save_best_only=True,
    mode='max',
    verbose=1
)

# Train model with focus on validation metrics
history = cnn_model.fit(
    X_train_img, y_train_img,
    batch_size=32,  # Smaller batch size for better generalization
    validation_data=(X_val_img, y_val_img),
    epochs=50,  
    callbacks=[reduce_lr, early_stopping, checkpoint],
    verbose=1
)

# Load the best model saved during training
cnn_model = tf.keras.models.load_model('best_cnn_model.h5')

# Evaluate model
val_loss, val_acc = cnn_model.evaluate(X_val_img, y_val_img)
print(f"Validation accuracy: {val_acc:.4f}")

# Make predictions on test set
test_image_paths = test_df['image_path'].values
X_test_img = load_images(test_image_paths)
cnn_predictions = cnn_model.predict(X_test_img)
cnn_pred_classes = np.argmax(cnn_predictions, axis=1)

# Save CNN predictions
cnn_pred_df = pd.DataFrame({'id': test_df['id'], 'ClassId': cnn_pred_classes})
cnn_pred_df.set_index('id', inplace=True)
cnn_pred_df.to_csv('cnn_predictions.csv')

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 330ms/step - accuracy: 0.1830 - loss: 3.6869
Epoch 1: val_accuracy improved from -inf to 0.04736, saving model to best_cnn_model.h5




[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 351ms/step - accuracy: 0.1842 - loss: 3.6800 - val_accuracy: 0.0474 - val_loss: 3.9812 - learning_rate: 5.0000e-04
Epoch 2/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 337ms/step - accuracy: 0.7575 - loss: 0.9663
Epoch 2: val_accuracy improved from 0.04736 to 0.20856, saving model to best_cnn_model.h5




[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 356ms/step - accuracy: 0.7577 - loss: 0.9652 - val_accuracy: 0.2086 - val_loss: 3.1661 - learning_rate: 5.0000e-04
Epoch 3/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 323ms/step - accuracy: 0.8900 - loss: 0.4718
Epoch 3: val_accuracy improved from 0.20856 to 0.72951, saving model to best_cnn_model.h5




[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 347ms/step - accuracy: 0.8901 - loss: 0.4714 - val_accuracy: 0.7295 - val_loss: 1.2831 - learning_rate: 5.0000e-04
Epoch 4/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 441ms/step - accuracy: 0.9509 - loss: 0.2400
Epoch 4: val_accuracy improved from 0.72951 to 0.90073, saving model to best_cnn_model.h5




[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 460ms/step - accuracy: 0.9509 - loss: 0.2398 - val_accuracy: 0.9007 - val_loss: 0.4091 - learning_rate: 5.0000e-04
Epoch 5/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 333ms/step - accuracy: 0.9800 - loss: 0.1220
Epoch 5: val_accuracy improved from 0.90073 to 0.96812, saving model to best_cnn_model.h5




[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 352ms/step - accuracy: 0.9799 - loss: 0.1220 - val_accuracy: 0.9681 - val_loss: 0.1768 - learning_rate: 5.0000e-04
Epoch 6/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 337ms/step - accuracy: 0.9807 - loss: 0.1106
Epoch 6: val_accuracy did not improve from 0.96812
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 356ms/step - accuracy: 0.9808 - loss: 0.1105 - val_accuracy: 0.9672 - val_loss: 0.1494 - learning_rate: 5.0000e-04
Epoch 7/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 358ms/step - accuracy: 0.9946 - loss: 0.0642
Epoch 7: val_accuracy improved from 0.96812 to 0.97177, saving model to best_cnn_model.h5




[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 378ms/step - accuracy: 0.9946 - loss: 0.0642 - val_accuracy: 0.9718 - val_loss: 0.1337 - learning_rate: 5.0000e-04
Epoch 8/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 333ms/step - accuracy: 0.9931 - loss: 0.0591
Epoch 8: val_accuracy did not improve from 0.97177
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 351ms/step - accuracy: 0.9931 - loss: 0.0591 - val_accuracy: 0.9709 - val_loss: 0.1250 - learning_rate: 5.0000e-04
Epoch 9/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 341ms/step - accuracy: 0.9951 - loss: 0.0389
Epoch 9: val_accuracy improved from 0.97177 to 0.97632, saving model to best_cnn_model.h5




[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 361ms/step - accuracy: 0.9951 - loss: 0.0389 - val_accuracy: 0.9763 - val_loss: 0.1097 - learning_rate: 5.0000e-04
Epoch 10/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 334ms/step - accuracy: 0.9986 - loss: 0.0235
Epoch 10: val_accuracy did not improve from 0.97632
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 353ms/step - accuracy: 0.9986 - loss: 0.0235 - val_accuracy: 0.9654 - val_loss: 0.1560 - learning_rate: 5.0000e-04
Epoch 11/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 330ms/step - accuracy: 0.9956 - loss: 0.0300
Epoch 11: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.

Epoch 11: val_accuracy did not improve from 0.97632
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 348ms/step - accuracy: 0.9956 - loss: 0.0300 - val_accuracy: 0.9736 - val_loss: 0.1089 - learning_rate: 5.0000e-04
Epoch 12/50
[1m138/138[



[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 348ms/step - accuracy: 0.9975 - loss: 0.0194 - val_accuracy: 0.9818 - val_loss: 0.0887 - learning_rate: 1.0000e-04
Epoch 13/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 322ms/step - accuracy: 0.9980 - loss: 0.0172
Epoch 13: val_accuracy improved from 0.98179 to 0.98361, saving model to best_cnn_model.h5




[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 340ms/step - accuracy: 0.9980 - loss: 0.0172 - val_accuracy: 0.9836 - val_loss: 0.0859 - learning_rate: 1.0000e-04
Epoch 14/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 321ms/step - accuracy: 0.9998 - loss: 0.0147
Epoch 14: val_accuracy improved from 0.98361 to 0.98543, saving model to best_cnn_model.h5




[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 340ms/step - accuracy: 0.9998 - loss: 0.0146 - val_accuracy: 0.9854 - val_loss: 0.0791 - learning_rate: 1.0000e-04
Epoch 15/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 342ms/step - accuracy: 0.9996 - loss: 0.0118
Epoch 15: val_accuracy did not improve from 0.98543
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 361ms/step - accuracy: 0.9996 - loss: 0.0118 - val_accuracy: 0.9836 - val_loss: 0.0786 - learning_rate: 1.0000e-04
Epoch 16/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 325ms/step - accuracy: 0.9995 - loss: 0.0111
Epoch 16: ReduceLROnPlateau reducing learning rate to 2.0000000949949027e-05.

Epoch 16: val_accuracy did not improve from 0.98543
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 343ms/step - accuracy: 0.9995 - loss: 0.0111 - val_accuracy: 0.9854 - val_loss: 0.0777 - learning_rate: 1.0000e-04
Epoch 17/50
[1m138/138[



[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 77ms/step - accuracy: 0.9806 - loss: 0.1045
Validation accuracy: 0.9854
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 77ms/step
