<a href="https://colab.research.google.com/github/ahmedwalidahmad-debug/car-type-classification/blob/main/resnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ======================================
# 1️⃣ Imports
# ======================================
import os, shutil, random
import numpy as np, pandas as pd
import matplotlib.pyplot as plt
import scipy.io
import kagglehub

from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.metrics import TopKCategoricalAccuracy


In [2]:
# ======================================
# 2️⃣ Download Dataset
# ======================================
print("Downloading dataset...")
DATASET_PATH = kagglehub.dataset_download("eduardo4jesus/stanford-cars-dataset")
print("Dataset path:", DATASET_PATH)

IMAGES_PATH, ANNOS_PATH = None, None
for root, dirs, files in os.walk(DATASET_PATH):
    if "cars_train_annos.mat" in files:
        ANNOS_PATH = os.path.join(root, "cars_train_annos.mat")
    if os.path.basename(root) == "cars_train":
        IMAGES_PATH = root

print("Images path:", IMAGES_PATH)
print("Annotations path:", ANNOS_PATH)


Downloading dataset...
Using Colab cache for faster access to the 'stanford-cars-dataset' dataset.
Dataset path: /kaggle/input/stanford-cars-dataset
Images path: /kaggle/input/stanford-cars-dataset/cars_train/cars_train
Annotations path: /kaggle/input/stanford-cars-dataset/car_devkit/devkit/cars_train_annos.mat


In [3]:
# ======================================
# 3️⃣ Load Annotations
# ======================================
annos = scipy.io.loadmat(ANNOS_PATH)
annotations = annos["annotations"][0]

class_dict = {}
for ann in annotations:
    img_name = ann[5][0]
    class_id = int(ann[4][0][0])
    class_dict.setdefault(class_id, []).append(img_name)

print("Total classes:", len(class_dict))


Total classes: 196


In [5]:
# ======================================
# 4️⃣ Select Top 40 Classes
# ======================================
# ترتيب الكلاسات حسب عدد الصور
sorted_classes = sorted(class_dict.items(), key=lambda x: len(x[1]), reverse=True)
selected_classes = [cls for cls, imgs in sorted_classes[:40]]

OUTPUT_PATH = "/kaggle/working/cars_40_classes"
os.makedirs(OUTPUT_PATH, exist_ok=True)

missing = 0
for cls in selected_classes:
    class_folder = os.path.join(OUTPUT_PATH, f"class_{cls}")
    os.makedirs(class_folder, exist_ok=True)
    for img in class_dict[cls]:
        src = os.path.join(IMAGES_PATH, img)
        dst = os.path.join(class_folder, img)
        if os.path.exists(src):
            shutil.copy(src, dst)
        else:
            missing += 1

print("✅ Created dataset with 40 classes at:", OUTPUT_PATH)
print("⚠️ Missing images:", missing)


✅ Created dataset with 40 classes at: /kaggle/working/cars_40_classes
⚠️ Missing images: 0


In [6]:
# ======================================
# 5️⃣ Build DataFrame
# ======================================
DATA_PATH = OUTPUT_PATH
data = []
for cls in sorted(os.listdir(DATA_PATH)):
    if not cls.startswith("class_"):
        continue
    cls_path = os.path.join(DATA_PATH, cls)
    for img in os.listdir(cls_path):
        if img.lower().endswith(('.jpg','.jpeg','.png')):
            data.append({"filename": os.path.join(cls_path, img), "label": cls})

df = pd.DataFrame(data)
print("Total images:", len(df))
print("Total classes:", df["label"].nunique())


Total images: 1847
Total classes: 40


In [7]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# ======================================
# 6️⃣ Train / Validation / Test Split (70% / 10% / 20%)
# ======================================

# الطريقة البديلة: نفصل test أولًا، ثم val، ثم train
# بس بنفس النتيجة

# أولًا: فصل 20% test
df_temp, test_df = train_test_split(
    df,
    test_size=0.2,
    stratify=df["label"],
    random_state=42
)

# ثانيًا: من الباقي (80%)، فصل 10% val (يعني test_size=0.1 / 0.8 = 0.125 زي ما كان)
train_df, val_df = train_test_split(
    df_temp,
    test_size=0.125,
    stratify=df_temp["label"],
    random_state=42
)

# أو طريقة تانية مباشرة بدون حسابات: استخدم proportions
# بس sklearn مش بيدعم multi-split مباشر، فهنعملها stepwise زي فوق

IMG_SIZE = 299
BATCH_SIZE = 32

# Data augmentation للـ train فقط
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.25,
    shear_range=0.2,
    horizontal_flip=True,
    brightness_range=[0.7, 1.3]
)

# Validation و Test بدون augmentation
val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

# Generators
train_gen = train_datagen.flow_from_dataframe(
    train_df,
    x_col="filename",
    y_col="label",
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=True
)

val_gen = val_datagen.flow_from_dataframe(
    val_df,
    x_col="filename",
    y_col="label",
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=False
)

test_gen = test_datagen.flow_from_dataframe(
    test_df,
    x_col="filename",
    y_col="label",
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=False
)

NUM_CLASSES = len(train_gen.class_indices)
print("NUM_CLASSES:", NUM_CLASSES)

total_samples = len(df)
print(f"Train samples: {len(train_df)} ({len(train_df)/total_samples*100:.1f}%)")
print(f"Validation samples: {len(val_df)} ({len(val_df)/total_samples*100:.1f}%)")
print(f"Test samples: {len(test_df)} ({len(test_df)/total_samples*100:.1f}%)")

Found 1292 validated image filenames belonging to 40 classes.
Found 185 validated image filenames belonging to 40 classes.
Found 370 validated image filenames belonging to 40 classes.
NUM_CLASSES: 40
Train samples: 1292 (70.0%)
Validation samples: 185 (10.0%)
Test samples: 370 (20.0%)


In [8]:
# ======================================
# 7️⃣ Build Model (ResNet50)
# ======================================
base_model = ResNet50(weights="imagenet", include_top=False,
                      input_shape=(IMG_SIZE, IMG_SIZE, 3))

for layer in base_model.layers:
    layer.trainable = False

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation="relu")(x)
x = Dropout(0.4)(x)
output = Dense(NUM_CLASSES, activation="softmax")(x)

model = Model(base_model.input, output)

model.compile(
    optimizer=Adam(1e-4),
    loss="categorical_crossentropy",
    metrics=["accuracy", TopKCategoricalAccuracy(k=5, name="top5_accuracy")]
)

model.summary()


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 0us/step


In [9]:
# ======================================
# 8️⃣ Training Stage 1 (Feature Extraction)
# ======================================
callbacks = [
    EarlyStopping(patience=8, restore_best_weights=True),
    ReduceLROnPlateau(patience=4, factor=0.2, min_lr=1e-6),
    ModelCheckpoint("resnet50_best_stage1.keras", monitor="val_accuracy", save_best_only=True)
]

history_1 = model.fit(
    train_gen, validation_data=val_gen,
    epochs=25, callbacks=callbacks
)


  self._warn_if_super_not_called()


Epoch 1/25
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 1s/step - accuracy: 0.0257 - loss: 3.9641 - top5_accuracy: 0.1166 - val_accuracy: 0.0649 - val_loss: 3.5225 - val_top5_accuracy: 0.2919 - learning_rate: 1.0000e-04
Epoch 2/25
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 936ms/step - accuracy: 0.0666 - loss: 3.5915 - top5_accuracy: 0.2410 - val_accuracy: 0.1351 - val_loss: 3.3516 - val_top5_accuracy: 0.4486 - learning_rate: 1.0000e-04
Epoch 3/25
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 938ms/step - accuracy: 0.1422 - loss: 3.3496 - top5_accuracy: 0.3813 - val_accuracy: 0.2054 - val_loss: 3.1918 - val_top5_accuracy: 0.4811 - learning_rate: 1.0000e-04
Epoch 4/25
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 932ms/step - accuracy: 0.1548 - loss: 3.2097 - top5_accuracy: 0.4485 - val_accuracy: 0.2378 - val_loss: 3.0457 - val_top5_accuracy: 0.5405 - learning_rate: 1.0000e-04
Epoch 5/25
[1m41/41[0m [32m━

In [10]:
# ======================================
# 9️⃣ Fine-Tuning Stage 2
# ======================================
for layer in base_model.layers[-160:]:
    layer.trainable = True

model.compile(
    optimizer=Adam(1e-5),
    loss="categorical_crossentropy",
    metrics=["accuracy", TopKCategoricalAccuracy(k=5, name="top5_accuracy")]
)

callbacks_ft = [
    EarlyStopping(patience=10, restore_best_weights=True),
    ReduceLROnPlateau(patience=5, factor=0.2, min_lr=1e-7),
    ModelCheckpoint("resnet50_best_stage2.keras", monitor="val_accuracy", save_best_only=True)
]

history_2 = model.fit(
    train_gen, validation_data=val_gen,
    epochs=50, callbacks=callbacks_ft
)


Epoch 1/50
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 2s/step - accuracy: 0.4753 - loss: 1.8747 - top5_accuracy: 0.7970 - val_accuracy: 0.5189 - val_loss: 1.6925 - val_top5_accuracy: 0.8486 - learning_rate: 1.0000e-05
Epoch 2/50
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 1s/step - accuracy: 0.6205 - loss: 1.2955 - top5_accuracy: 0.9170 - val_accuracy: 0.4973 - val_loss: 1.6238 - val_top5_accuracy: 0.8649 - learning_rate: 1.0000e-05
Epoch 3/50
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 1s/step - accuracy: 0.7010 - loss: 1.1256 - top5_accuracy: 0.9403 - val_accuracy: 0.5622 - val_loss: 1.4989 - val_top5_accuracy: 0.8811 - learning_rate: 1.0000e-05
Epoch 4/50
[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 1s/step - accuracy: 0.7225 - loss: 0.9944 - top5_accuracy: 0.9535 - val_accuracy: 0.5892 - val_loss: 1.3642 - val_top5_accuracy: 0.8865 - learning_rate: 1.0000e-05
Epoch 5/50
[1m41/41[0m [32m━━━━━━━━━