In [5]:
from pathlib import Path

DATA_DIR = Path("dataset_task44")

print("Exists?", DATA_DIR.exists())
print("Number of files:", len(list(DATA_DIR.iterdir())))

Exists? True
Number of files: 263


In [6]:
# Cell — Generate labels.csv for multi-label dataset

from pathlib import Path
import pandas as pd

DATA_DIR = Path("dataset_task44")
exts = {".jpg", ".jpeg", ".png", ".webp"}

rows = []

for p in DATA_DIR.iterdir():
    if not p.is_file() or p.suffix.lower() not in exts:
        continue
    
    name = p.name.lower()

    rows.append({
        "filename": p.name,
        "car": 1 if "car" in name else 0,
        "bicycle": 1 if "bicycle" in name else 0,
        "motorcycle": 1 if "motorcycle" in name else 0,
    })

df = pd.DataFrame(rows)

print("Total images:", len(df))
print("\nLabel distribution:")
print(df[["car","bicycle","motorcycle"]].sum())

df.to_csv(DATA_DIR / "labels.csv", index=False)

print("\nSaved labels.csv successfully.")

Total images: 262

Label distribution:
car           98
bicycle       99
motorcycle    97
dtype: int64

Saved labels.csv successfully.


In [8]:
# Cell — Split labels.csv into train/val 

import pandas as pd
from sklearn.model_selection import train_test_split
from pathlib import Path

DATA_DIR = Path("dataset_task44")
df = pd.read_csv(DATA_DIR / "labels.csv")

train_df, val_df = train_test_split(
    df,
    test_size=0.2,
    random_state=42,
    shuffle=True
)

train_df.to_csv(DATA_DIR / "train.csv", index=False)
val_df.to_csv(DATA_DIR / "val.csv", index=False)

print("Train:", len(train_df), " Val:", len(val_df))
print("Saved:", DATA_DIR/"train.csv", "and", DATA_DIR/"val.csv")

Train: 209  Val: 53
Saved: dataset_task44\train.csv and dataset_task44\val.csv


In [9]:
# Cell — tf.data loader from CSV 

import tensorflow as tf
import pandas as pd
from pathlib import Path

DATA_DIR = Path("dataset_task44")
IMG_DIR  = DATA_DIR  
IMG_SIZE = (224, 224)
BATCH_SIZE = 16

train_df = pd.read_csv(DATA_DIR / "train.csv")
val_df   = pd.read_csv(DATA_DIR / "val.csv")

LABEL_COLS = ["car", "bicycle", "motorcycle"]

def load_and_preprocess(filename, label_vec):
    img_path = tf.strings.join([str(IMG_DIR) + "\\", filename])  
    img = tf.io.read_file(img_path)
    img = tf.image.decode_jpeg(img, channels=3)  
    img = tf.image.resize(img, IMG_SIZE)
    img = tf.cast(img, tf.float32) / 255.0
    return img, label_vec

def make_ds(df, shuffle=True):
    filenames = df["filename"].astype(str).values
    labels = df[LABEL_COLS].astype("float32").values

    ds = tf.data.Dataset.from_tensor_slices((filenames, labels))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(df), seed=42, reshuffle_each_iteration=True)
    ds = ds.map(load_and_preprocess, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
    return ds

train_ds = make_ds(train_df, shuffle=True)
val_ds   = make_ds(val_df, shuffle=False)

print("Train batches:", tf.data.experimental.cardinality(train_ds).numpy())
print("Val batches:", tf.data.experimental.cardinality(val_ds).numpy())

Train batches: 14
Val batches: 4


In [10]:
# Cell — Multi-label transfer learning model (ResNet50)

import tensorflow as tf
from tensorflow.keras import layers

NUM_LABELS = 3

base = tf.keras.applications.ResNet50(
    weights="imagenet",
    include_top=False,
    pooling="avg",
    input_shape=(224,224,3)
)
base.trainable = False  # start frozen

inputs = layers.Input(shape=(224,224,3))
x = tf.keras.applications.resnet50.preprocess_input(inputs * 255.0)  # match ImageNet preprocessing
x = base(x, training=False)
x = layers.Dropout(0.2)(x)
outputs = layers.Dense(NUM_LABELS, activation="sigmoid")(x)  # sigmoid for multi-label

model = tf.keras.Model(inputs, outputs)

model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-3),
    loss="binary_crossentropy",
    metrics=[tf.keras.metrics.BinaryAccuracy(name="bin_acc")]
)

model.summary()

In [11]:
# Cell — Train (multi-label)

callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor="val_bin_acc", patience=4, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", patience=2, factor=0.3, min_lr=1e-6)
]

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=15,
    callbacks=callbacks,
    verbose=1
)

Epoch 1/15
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 1s/step - bin_acc: 0.6762 - loss: 0.6111 - val_bin_acc: 0.8994 - val_loss: 0.3253 - learning_rate: 0.0010
Epoch 2/15
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1s/step - bin_acc: 0.9171 - loss: 0.2428 - val_bin_acc: 0.9371 - val_loss: 0.1685 - learning_rate: 0.0010
Epoch 3/15
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1s/step - bin_acc: 0.9617 - loss: 0.1404 - val_bin_acc: 0.9497 - val_loss: 0.1280 - learning_rate: 0.0010
Epoch 4/15
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 1s/step - bin_acc: 0.9649 - loss: 0.1118 - val_bin_acc: 0.9623 - val_loss: 0.1188 - learning_rate: 0.0010
Epoch 5/15
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1s/step - bin_acc: 0.9841 - loss: 0.0840 - val_bin_acc: 0.9686 - val_loss: 0.0913 - learning_rate: 0.0010
Epoch 6/15
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1s/step - bin_

In [12]:
# Cell — Evaluate with thresholded predictions (multi-label report)

import numpy as np
from sklearn.metrics import classification_report

threshold = 0.5

y_true = np.vstack([y for _, y in val_ds])
y_prob = model.predict(val_ds, verbose=0)
y_pred = (y_prob >= threshold).astype(int)

print("Labels order:", LABEL_COLS)
print("\nClassification report (threshold = 0.5):")
print(classification_report(y_true, y_pred, target_names=LABEL_COLS, zero_division=0))

Labels order: ['car', 'bicycle', 'motorcycle']

Classification report (threshold = 0.5):
              precision    recall  f1-score   support

         car       1.00      1.00      1.00        15
     bicycle       1.00      0.95      0.98        21
  motorcycle       0.91      1.00      0.95        20

   micro avg       0.96      0.98      0.97        56
   macro avg       0.97      0.98      0.98        56
weighted avg       0.97      0.98      0.97        56
 samples avg       0.98      0.99      0.98        56



In [13]:
# Cell — Save multi-label model + label names

import json
MODEL_PATH = "vehicle_multilabel_resnet50.keras"
model.save(MODEL_PATH)

with open("vehicle_multilabel_labels.json", "w") as f:
    json.dump(LABEL_COLS, f)

print("Saved:", MODEL_PATH, "and vehicle_multilabel_labels.json")

Saved: vehicle_multilabel_resnet50.keras and vehicle_multilabel_labels.json
