In [4]:
import pandas as pd
import numpy as np

train_meta = pd.read_csv("./train/train_metadata.csv")
train_add_features = pd.read_csv("./train/features/additional_features.csv")
train_color_hist = pd.read_csv("./train/features/color_histogram.csv")
train_hog_pca = pd.read_csv("./train/features/hog_pca.csv")


test_meta = pd.read_csv("./test/test_metadata.csv")
# test_meta.drop("ClassId", inplace=True, axis=1) # useless for now
test_add_features = pd.read_csv("./test/features/additional_features.csv")
test_color_hist = pd.read_csv("./test/features/color_histogram.csv")
test_hog_pca = pd.read_csv("./test/features/hog_pca.csv")

In [5]:
# merging all the dataframes

train_df = pd.merge(train_meta, train_add_features, on = "image_path", how = "left")
train_df = pd.merge(train_df, train_color_hist, on = "image_path", how = "left")
train_df = pd.merge(train_df, train_hog_pca, on = "image_path", how = "left")

test_df = pd.merge(test_meta, test_add_features, on = "image_path", how = "left")
test_df = pd.merge(test_df, test_color_hist, on = "image_path", how = "left")
test_df = pd.merge(test_df, test_hog_pca, on = "image_path", how = "left")


# this is just better for reading files
train_df["image_path"] = train_df["image_path"].apply(lambda x : "train/" + x)
test_df["image_path"] = test_df["image_path"].apply(lambda x : "test/" + x)

In [6]:
train_df = train_df[["image_path", "ClassId", "id"]]

test_df = test_df[["image_path", "ClassId", "id"]]

In [36]:
# train a CNN to classify the images into classes
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from sklearn.model_selection import train_test_split
import numpy as np
import cv2
from PIL import Image

# Define image dimensions - smaller for faster training
IMG_HEIGHT, IMG_WIDTH = 128, 128

# Function to load and preprocess images
def load_images(image_paths, img_height=IMG_HEIGHT, img_width=IMG_WIDTH):
    images = []
    for path in image_paths:
        img = cv2.imread(path)
        if img is not None:
            img = cv2.resize(img, (img_width, img_height))
            img = img / 255.0  # Normalize
            images.append(img)
    return np.array(images)

# Get image paths and labels from train_df
image_paths = train_df['image_path'].values
labels = train_df['ClassId'].values

# Convert labels to one-hot encoding
num_classes = 43
labels = tf.keras.utils.to_categorical(labels, num_classes=num_classes)

# Load images
X_images = load_images(image_paths)

# Split data
X_train_img, X_val_img, y_train_img, y_val_img = train_test_split(
    X_images, labels, test_size=0.2, random_state=42, stratify=np.argmax(labels, axis=1)
)

def mixup(x, y, alpha=0.2):
    lam = np.random.beta(alpha, alpha)
    idx = np.random.permutation(len(x))
    x_mix = lam * x + (1 - lam) * x[idx]
    y_mix = lam * y + (1 - lam) * y[idx]
    return x_mix, y_mix

# Data augmentation with fewer transformations for speed
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True
)

# Build a custom CNN model
def build_cnn_model(num_classes=43):
    model = models.Sequential([
        # First Convolutional Block
        layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
        layers.BatchNormalization(),
        layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Dropout(0.25),

        # Second Convolutional Block
        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Dropout(0.25),

        # Third Convolutional Block
        layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Dropout(0.25),

        # Dense Layers
        layers.Flatten(),
        layers.Dense(256, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')
    ])
    
    return model

cnn_model = build_cnn_model()
loss = tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.1)

INITIAL_LR = 0.00005
EPOCHS = 50
BATCH_SIZE = 32


cnn_model.compile(
    optimizer=Adam(learning_rate=0.00005),
    loss=loss,
    metrics=['accuracy']
)

# Add callbacks
reduce_lr = ReduceLROnPlateau(
    monitor='val_accuracy',
    factor=0.2,
    patience=2,
    min_lr=1e-6,
    verbose=1
)

early_stopping = EarlyStopping(
    monitor='val_accuracy',
    patience=5,
    restore_best_weights=True,
    verbose=1
)

checkpoint = ModelCheckpoint(
    'best_cnn_model.h5',
    monitor='val_accuracy',
    save_best_only=True,
    mode='max',
    verbose=1
)

# Create a custom data generator that applies mixup
class MixupDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, x, y, batch_size=32, alpha=0.2):
        self.x = x
        self.y = y
        self.batch_size = batch_size
        self.alpha = alpha
        
    def __len__(self):
        return int(np.ceil(len(self.x) / self.batch_size))
    
    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
        
        # Apply mixup to the batch
        batch_x, batch_y = mixup(batch_x, batch_y, self.alpha)
        return batch_x, batch_y

# Create the mixup data generator
train_generator = MixupDataGenerator(X_train_img, y_train_img, batch_size=32)

# Train model with mixup
history = cnn_model.fit(
    X_train_img, y_train_img,
    validation_data=(X_val_img, y_val_img),
    epochs=50,
    callbacks=[reduce_lr, early_stopping, checkpoint],
    verbose=1
)

# Load the best model saved during training
cnn_model = tf.keras.models.load_model('best_cnn_model.h5')

# Evaluate model
val_loss, val_acc = cnn_model.evaluate(X_val_img, y_val_img)
print(f"Validation accuracy: {val_acc:.4f}")

# Make predictions on test set
test_image_paths = test_df['image_path'].values
X_test_img = load_images(test_image_paths)
cnn_predictions = cnn_model.predict(X_test_img)
cnn_pred_classes = np.argmax(cnn_predictions, axis=1)

# Save CNN predictions
cnn_pred_df = pd.DataFrame({'id': test_df['id'], 'ClassId': cnn_pred_classes})
cnn_pred_df.set_index('id', inplace=True)
cnn_pred_df.to_csv('cnn_predictions.csv')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 227ms/step - accuracy: 0.1335 - loss: 4.1946
Epoch 1: val_accuracy improved from -inf to 0.02004, saving model to best_cnn_model.h5




[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 242ms/step - accuracy: 0.1340 - loss: 4.1905 - val_accuracy: 0.0200 - val_loss: 4.0999 - learning_rate: 5.0000e-05
Epoch 2/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 233ms/step - accuracy: 0.4287 - loss: 2.5934
Epoch 2: val_accuracy improved from 0.02004 to 0.02732, saving model to best_cnn_model.h5




[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 244ms/step - accuracy: 0.4291 - loss: 2.5921 - val_accuracy: 0.0273 - val_loss: 3.8108 - learning_rate: 5.0000e-05
Epoch 3/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 232ms/step - accuracy: 0.5960 - loss: 1.9801
Epoch 3: val_accuracy improved from 0.02732 to 0.12659, saving model to best_cnn_model.h5




[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 243ms/step - accuracy: 0.5962 - loss: 1.9795 - val_accuracy: 0.1266 - val_loss: 3.2571 - learning_rate: 5.0000e-05
Epoch 4/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 232ms/step - accuracy: 0.7087 - loss: 1.6546
Epoch 4: val_accuracy improved from 0.12659 to 0.59290, saving model to best_cnn_model.h5




[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 243ms/step - accuracy: 0.7088 - loss: 1.6544 - val_accuracy: 0.5929 - val_loss: 1.9254 - learning_rate: 5.0000e-05
Epoch 5/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 231ms/step - accuracy: 0.8076 - loss: 1.4240
Epoch 5: val_accuracy improved from 0.59290 to 0.81785, saving model to best_cnn_model.h5




[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 242ms/step - accuracy: 0.8076 - loss: 1.4239 - val_accuracy: 0.8179 - val_loss: 1.3510 - learning_rate: 5.0000e-05
Epoch 6/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 231ms/step - accuracy: 0.8407 - loss: 1.3067
Epoch 6: val_accuracy improved from 0.81785 to 0.88707, saving model to best_cnn_model.h5




[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 242ms/step - accuracy: 0.8407 - loss: 1.3067 - val_accuracy: 0.8871 - val_loss: 1.1543 - learning_rate: 5.0000e-05
Epoch 7/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 231ms/step - accuracy: 0.8835 - loss: 1.2227
Epoch 7: val_accuracy improved from 0.88707 to 0.90346, saving model to best_cnn_model.h5




[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 242ms/step - accuracy: 0.8834 - loss: 1.2228 - val_accuracy: 0.9035 - val_loss: 1.1248 - learning_rate: 5.0000e-05
Epoch 8/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 231ms/step - accuracy: 0.8990 - loss: 1.1774
Epoch 8: val_accuracy did not improve from 0.90346
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 240ms/step - accuracy: 0.8990 - loss: 1.1774 - val_accuracy: 0.9016 - val_loss: 1.1128 - learning_rate: 5.0000e-05
Epoch 9/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 231ms/step - accuracy: 0.9224 - loss: 1.1226
Epoch 9: val_accuracy improved from 0.90346 to 0.91257, saving model to best_cnn_model.h5




[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 241ms/step - accuracy: 0.9224 - loss: 1.1226 - val_accuracy: 0.9126 - val_loss: 1.0964 - learning_rate: 5.0000e-05
Epoch 10/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 232ms/step - accuracy: 0.9242 - loss: 1.1038
Epoch 10: val_accuracy improved from 0.91257 to 0.93352, saving model to best_cnn_model.h5




[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 243ms/step - accuracy: 0.9242 - loss: 1.1038 - val_accuracy: 0.9335 - val_loss: 1.0508 - learning_rate: 5.0000e-05
Epoch 11/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 231ms/step - accuracy: 0.9475 - loss: 1.0700
Epoch 11: val_accuracy did not improve from 0.93352
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 240ms/step - accuracy: 0.9474 - loss: 1.0700 - val_accuracy: 0.9335 - val_loss: 1.0356 - learning_rate: 5.0000e-05
Epoch 12/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 231ms/step - accuracy: 0.9510 - loss: 1.0460
Epoch 12: val_accuracy improved from 0.93352 to 0.93898, saving model to best_cnn_model.h5




[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 242ms/step - accuracy: 0.9510 - loss: 1.0461 - val_accuracy: 0.9390 - val_loss: 1.0416 - learning_rate: 5.0000e-05
Epoch 13/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 230ms/step - accuracy: 0.9519 - loss: 1.0466
Epoch 13: val_accuracy improved from 0.93898 to 0.93989, saving model to best_cnn_model.h5




[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 241ms/step - accuracy: 0.9519 - loss: 1.0466 - val_accuracy: 0.9399 - val_loss: 1.0196 - learning_rate: 5.0000e-05
Epoch 14/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 232ms/step - accuracy: 0.9625 - loss: 1.0138
Epoch 14: val_accuracy improved from 0.93989 to 0.94353, saving model to best_cnn_model.h5




[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 243ms/step - accuracy: 0.9624 - loss: 1.0138 - val_accuracy: 0.9435 - val_loss: 0.9992 - learning_rate: 5.0000e-05
Epoch 15/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 232ms/step - accuracy: 0.9584 - loss: 1.0192
Epoch 15: val_accuracy improved from 0.94353 to 0.95173, saving model to best_cnn_model.h5




[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 242ms/step - accuracy: 0.9585 - loss: 1.0191 - val_accuracy: 0.9517 - val_loss: 0.9886 - learning_rate: 5.0000e-05
Epoch 16/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 231ms/step - accuracy: 0.9699 - loss: 0.9931
Epoch 16: val_accuracy did not improve from 0.95173
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 241ms/step - accuracy: 0.9699 - loss: 0.9931 - val_accuracy: 0.9508 - val_loss: 0.9847 - learning_rate: 5.0000e-05
Epoch 17/50
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 231ms/step - accuracy: 0.9775 - loss: 0.9863
Epoch 17: ReduceLROnPlateau reducing learning rate to 9.999999747378752e-06.

Epoch 17: val_accuracy did not improve from 0.95173
[1m138/138[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 240ms/step - accuracy: 0.9775 - loss: 0.9863 - val_accuracy: 0.9463 - val_loss: 1.0019 - learning_rate: 5.0000e-05
Epoch 18/50
[1m138/138[0



[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 39ms/step - accuracy: 0.9488 - loss: 0.9974
Validation accuracy: 0.9517
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 43ms/step
