In [None]:
# ======================================
# 1️⃣ Imports
# ======================================
import os, shutil, random
import numpy as np, pandas as pd
import matplotlib.pyplot as plt
import scipy.io
import kagglehub

from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.applications.inception_v3 import preprocess_input

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.metrics import TopKCategoricalAccuracy


In [None]:
# ======================================
# 2️⃣ Download Dataset
# ======================================
print("Downloading dataset...")
DATASET_PATH = kagglehub.dataset_download("eduardo4jesus/stanford-cars-dataset")
print("Dataset path:", DATASET_PATH)

IMAGES_PATH, ANNOS_PATH = None, None
for root, dirs, files in os.walk(DATASET_PATH):
    if "cars_train_annos.mat" in files:
        ANNOS_PATH = os.path.join(root, "cars_train_annos.mat")
    if os.path.basename(root) == "cars_train":
        IMAGES_PATH = root

print("Images path:", IMAGES_PATH)
print("Annotations path:", ANNOS_PATH)


Downloading dataset...
Using Colab cache for faster access to the 'stanford-cars-dataset' dataset.
Dataset path: /kaggle/input/stanford-cars-dataset
Images path: /kaggle/input/stanford-cars-dataset/cars_train/cars_train
Annotations path: /kaggle/input/stanford-cars-dataset/car_devkit/devkit/cars_train_annos.mat


In [None]:
# ======================================
# 3️⃣ Load Annotations
# ======================================
annos = scipy.io.loadmat(ANNOS_PATH)
annotations = annos["annotations"][0]

class_dict = {}
for ann in annotations:
    img_name = ann[5][0]
    class_id = int(ann[4][0][0])
    class_dict.setdefault(class_id, []).append(img_name)

print("Total classes:", len(class_dict))


Total classes: 196


In [None]:
# ======================================
# 4️⃣ Select Top 40 Classes
# ======================================

sorted_classes = sorted(class_dict.items(), key=lambda x: len(x[1]), reverse=True)
selected_classes = [cls for cls, imgs in sorted_classes[:40]]

OUTPUT_PATH = "/kaggle/working/cars_40_classes"
os.makedirs(OUTPUT_PATH, exist_ok=True)

missing = 0
for cls in selected_classes:
    class_folder = os.path.join(OUTPUT_PATH, f"class_{cls}")
    os.makedirs(class_folder, exist_ok=True)
    for img in class_dict[cls]:
        src = os.path.join(IMAGES_PATH, img)
        dst = os.path.join(class_folder, img)
        if os.path.exists(src):
            shutil.copy(src, dst)
        else:
            missing += 1

print("✅ Created dataset with 40 classes at:", OUTPUT_PATH)
print("⚠️ Missing images:", missing)


✅ Created dataset with 40 classes at: /kaggle/working/cars_40_classes
⚠️ Missing images: 0


In [None]:
# ======================================
# 5️⃣ Build DataFrame
# ======================================
DATA_PATH = OUTPUT_PATH
data = []
for cls in sorted(os.listdir(DATA_PATH)):
    if not cls.startswith("class_"):
        continue
    cls_path = os.path.join(DATA_PATH, cls)
    for img in os.listdir(cls_path):
        if img.lower().endswith(('.jpg','.jpeg','.png')):
            data.append({"filename": os.path.join(cls_path, img), "label": cls})

df = pd.DataFrame(data)
print("Total images:", len(df))
print("Total classes:", df["label"].nunique())


Total images: 1847
Total classes: 40


In [None]:
# ======================================
# 6️⃣ Train / Validation Split
# ======================================
train_df, val_df = train_test_split(
    df, test_size=0.2, stratify=df["label"], random_state=42
)

IMG_SIZE = 299 # Updated for InceptionV3
BATCH_SIZE = 32

train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.25,
    shear_range=0.2,
    horizontal_flip=True,
    brightness_range=[0.7,1.3]
)

val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

train_gen = train_datagen.flow_from_dataframe(
    train_df, x_col="filename", y_col="label",
    target_size=(IMG_SIZE, IMG_SIZE), batch_size=BATCH_SIZE,
    class_mode="categorical", shuffle=True
)

val_gen = val_datagen.flow_from_dataframe(
    val_df, x_col="filename", y_col="label",
    target_size=(IMG_SIZE, IMG_SIZE), batch_size=BATCH_SIZE,
    class_mode="categorical", shuffle=False
)

NUM_CLASSES = len(train_gen.class_indices)
print("NUM_CLASSES:", NUM_CLASSES)


Found 1477 validated image filenames belonging to 40 classes.
Found 370 validated image filenames belonging to 40 classes.
NUM_CLASSES: 40


In [None]:
import os, shutil
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.inception_v3 import preprocess_input

# Paths
DATASET_PATH = "/kaggle/working/cars_40_classes"
# Build DataFrame
data = []
for cls in sorted(os.listdir(DATASET_PATH)):
    if not cls.startswith("class_"):
        continue
    cls_path = os.path.join(DATASET_PATH, cls)
    for img in os.listdir(cls_path):
        if img.lower().endswith(('.jpg','.jpeg','.png')):
            data.append({"filename": os.path.join(cls_path, img), "label": cls})
df = pd.DataFrame(data)

# Train/Validation Split
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df["label"], random_state=42)

# Data Augmentation
IMG_SIZE = 299  # InceptionV3 default
BATCH_SIZE = 32

train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.25,
    shear_range=0.2,
    horizontal_flip=True,
    brightness_range=[0.7,1.3]
)

val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

train_gen = train_datagen.flow_from_dataframe(
    train_df, x_col="filename", y_col="label",
    target_size=(IMG_SIZE, IMG_SIZE), batch_size=BATCH_SIZE,
    class_mode="categorical", shuffle=True
)

val_gen = val_datagen.flow_from_dataframe(
    val_df, x_col="filename", y_col="label",
    target_size=(IMG_SIZE, IMG_SIZE), batch_size=BATCH_SIZE,
    class_mode="categorical", shuffle=False
)

NUM_CLASSES = len(train_gen.class_indices)
print("NUM_CLASSES:", NUM_CLASSES)


Found 1477 validated image filenames belonging to 40 classes.
Found 370 validated image filenames belonging to 40 classes.
NUM_CLASSES: 40


In [None]:
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import TopKCategoricalAccuracy

# Load pre-trained InceptionV3
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))

# Add custom head
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
preds = Dense(NUM_CLASSES, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=preds)

# Freeze base layers
for layer in base_model.layers:
    layer.trainable = False

# Compile
model.compile(
    optimizer=Adam(learning_rate=1e-4),
    loss='categorical_crossentropy',
    metrics=['accuracy', TopKCategoricalAccuracy(k=5)]
)

model.summary()


In [None]:
# Load base model without top layers
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))

# Add custom classifier
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
outputs = Dense(NUM_CLASSES, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=outputs)

# Freeze initial layers (feature extractor)
for layer in base_model.layers:
    layer.trainable = False

# Compile initial model
model.compile(
    optimizer=Adam(learning_rate=1e-4),
    loss='categorical_crossentropy',
    metrics=['accuracy', TopKCategoricalAccuracy(k=5)]
)


In [None]:
checkpoint = ModelCheckpoint(
    "inceptionv3_best_model.keras", monitor="val_accuracy",
    save_best_only=True, verbose=1
)
early_stop = EarlyStopping(monitor="val_accuracy", patience=5, verbose=1, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3, verbose=1)


In [None]:
history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=10,
    callbacks=[checkpoint, early_stop, reduce_lr]
)


Epoch 1/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.0275 - loss: 3.9512 - top_k_categorical_accuracy: 0.1329
Epoch 1: val_accuracy improved from -inf to 0.04054, saving model to inceptionv3_best_model.keras
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 2s/step - accuracy: 0.0275 - loss: 3.9500 - top_k_categorical_accuracy: 0.1329 - val_accuracy: 0.0405 - val_loss: 3.6606 - val_top_k_categorical_accuracy: 0.1919 - learning_rate: 1.0000e-04
Epoch 2/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 946ms/step - accuracy: 0.0413 - loss: 3.7549 - top_k_categorical_accuracy: 0.1777
Epoch 2: val_accuracy improved from 0.04054 to 0.09459, saving model to inceptionv3_best_model.keras
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 1s/step - accuracy: 0.0413 - loss: 3.7551 - top_k_categorical_accuracy: 0.1777 - val_accuracy: 0.0946 - val_loss: 3.5370 - val_top_k_categorical_accuracy: 0.2865 - learni

In [None]:
# Unfreeze some top layers of InceptionV3
for layer in base_model.layers[-50:]:  # fine-tune last 50 layers
    layer.trainable = True

# Compile with lower LR
model.compile(
    optimizer=Adam(learning_rate=1e-5),
    loss='categorical_crossentropy',
    metrics=['accuracy', TopKCategoricalAccuracy(k=5)]
)

# Continue training
history_ft = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=10,
    callbacks=[checkpoint, early_stop, reduce_lr]
)


Epoch 1/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.1570 - loss: 3.2907 - top_k_categorical_accuracy: 0.4214
Epoch 1: val_accuracy improved from 0.31081 to 0.32973, saving model to inceptionv3_best_model.keras
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s[0m 2s/step - accuracy: 0.1568 - loss: 3.2903 - top_k_categorical_accuracy: 0.4212 - val_accuracy: 0.3297 - val_loss: 2.8464 - val_top_k_categorical_accuracy: 0.7162 - learning_rate: 1.0000e-05
Epoch 2/10
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 945ms/step - accuracy: 0.1695 - loss: 3.1525 - top_k_categorical_accuracy: 0.4502
Epoch 2: val_accuracy improved from 0.32973 to 0.34595, saving model to inceptionv3_best_model.keras
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 1s/step - accuracy: 0.1696 - loss: 3.1518 - top_k_categorical_accuracy: 0.4508 - val_accuracy: 0.3459 - val_loss: 2.7907 - val_top_k_categorical_accuracy: 0.7162 - lea

In [None]:
val_loss, val_acc, val_top5 = model.evaluate(val_gen, verbose=1)
print(f"Validation Accuracy: {val_acc*100:.2f}%")
print(f"Validation Top-5 Accuracy: {val_top5*100:.2f}%")


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 195ms/step - accuracy: 0.4988 - loss: 2.2029 - top_k_categorical_accuracy: 0.8154
Validation Accuracy: 49.46%
Validation Top-5 Accuracy: 82.16%
