In [55]:
import pandas as pd
import re
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, BatchNormalization
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# Chuẩn bị dữ liệu 

In [2]:
tag_mapping = {
    "Others": 0,
    "Honda": 1,
    "Hyundai": 2,
    "KIA": 3,
    "Mazda": 4,
    "Mitsubishi" : 5,
    "Toyota": 6,
    "Suzuki": 7,
    "Vinfast": 8
}

In [10]:
def extract_brand_from_path(path, tag_mapping):
    for brand in tag_mapping.keys():
        if re.search(fr'\b{brand}\b', path, re.IGNORECASE):
            return brand
    return "Unknown"

In [11]:
data_path = "clustering_results.csv"
data = pd.read_csv(data_path)

In [13]:
data['BrandName'] = data['ImageFullPath'].apply(lambda x:extract_brand_from_path(x, tag_mapping))
data['Label'] = data['BrandName'].map(tag_mapping).fillna(-1).astype(int)

In [14]:
data

Unnamed: 0,ImageFullPath,ClusterID,BrandName,Label
0,D:/dataset/CS114_ML\Others/21522373-21522499.L...,1,Others,0
1,D:/dataset/CS114_ML\Others/21522373-21522499.L...,1,Others,0
2,D:/dataset/CS114_ML\Others/21522373-21522499.L...,4,Others,0
3,D:/dataset/CS114_ML\Others/21522373-21522499.L...,4,Others,0
4,D:/dataset/CS114_ML\Others/21522373-21522499.L...,1,Others,0
...,...,...,...,...
31793,D:/dataset/CS114_ML\Vinfast/22521692-22521676....,2,Vinfast,8
31794,D:/dataset/CS114_ML\Vinfast/22521692-22521676....,4,Vinfast,8
31795,D:/dataset/CS114_ML\Vinfast/22521692-22521676....,2,Vinfast,8
31796,D:/dataset/CS114_ML\Vinfast/22521692-22521676....,4,Vinfast,8


In [15]:
data['Label'].unique()

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [20]:
brand_counts = data['BrandName'].value_counts()

brand_counts_df = brand_counts.reset_index()
brand_counts_df.columns = ['Brand', 'ImageCount']

ordered_brands = list(tag_mapping.keys())
ordered_brand_counts = brand_counts.reindex(ordered_brands).fillna(0).astype(int)

ordered_brand_counts_df = ordered_brand_counts.reset_index()
ordered_brand_counts_df.columns = ['Brand', 'ImageCount']
ordered_brand_counts_df

Unnamed: 0,Brand,ImageCount
0,Others,4469
1,Honda,2769
2,Hyundai,3088
3,KIA,2529
4,Mazda,2989
5,Mitsubishi,2689
6,Toyota,5092
7,Suzuki,5965
8,Vinfast,2208


In [None]:
def load_and_preprocess_images(file_paths, img_size):
    images = []
    for path in file_paths:
        try:
            img = load_img(path, target_size=img_size)
            img = img_to_array(img) / 255.0
            images.append(img)
        except Exception as e:
            print(f'Error loading image {path}: {e}')
    return np.array(images)

In [None]:
def prepare_data(data, img_size=(224, 224)):
    # Ensure file paths exist
    data = data[data['ImageFullPath'].apply(os.path.exists)]

    # Extract file paths and labels
    file_paths = data['ImageFullPath'].values
    labels = to_categorical(data['Label'], num_classes=len(tag_mapping))

    # Split into train and test sets
    train_paths, test_paths, train_labels, test_labels = train_test_split(
        file_paths, labels, test_size=0.2, random_state=42, stratify=data['Label']
    )

    # Load and preprocess images
    X_train = load_and_preprocess_images(train_paths, img_size)
    X_test = load_and_preprocess_images(test_paths, img_size)

    return X_train, X_test, train_labels, test_labels

In [None]:
X_train, X_test, y_train, y_test = prepare_data(data)

## First: 
* Only MobileNetV2, trainable = False
* Learning rate = 0.0001

In [59]:
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3), pooling='avg')
base_model.trainable = False

In [60]:
model = Sequential([
    base_model,
    Flatten(),
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),
    Dense(len(tag_mapping), activation='softmax')
])

In [61]:
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

In [63]:
checkpoint = ModelCheckpoint('best_car_brand_model_1601.keras', monitor='val_accuracy', save_best_only=True)
early_stopping = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)

In [64]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=40,
    batch_size=32,
    callbacks=[checkpoint, early_stopping]
)

Epoch 1/40
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m425s[0m 523ms/step - accuracy: 0.2051 - loss: 2.5277 - val_accuracy: 0.3569 - val_loss: 1.8558
Epoch 2/40
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m342s[0m 430ms/step - accuracy: 0.3349 - loss: 1.9791 - val_accuracy: 0.3994 - val_loss: 1.7075
Epoch 3/40
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m322s[0m 405ms/step - accuracy: 0.3753 - loss: 1.8178 - val_accuracy: 0.4272 - val_loss: 1.6443
Epoch 4/40
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m319s[0m 400ms/step - accuracy: 0.4037 - loss: 1.7215 - val_accuracy: 0.4431 - val_loss: 1.6051
Epoch 5/40
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m309s[0m 389ms/step - accuracy: 0.4267 - loss: 1.6361 - val_accuracy: 0.4585 - val_loss: 1.5683
Epoch 6/40
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m322s[0m 389ms/step - accuracy: 0.4487 - loss: 1.5945 - val_accuracy: 0.4607 - val_loss: 1.5489
Epoc

In [65]:
loss, accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f'Test accuracy: {accuracy}')

[1m199/199[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 307ms/step - accuracy: 0.5101 - loss: 1.4994
Test accuracy: 0.5154088139533997


In [66]:
y_pred = model.predict(X_test).argmax(axis=1)
y_test_labels = y_test.argmax(axis=1)
print("Classification Report:")
print(classification_report(y_test_labels, y_pred, target_names=list(tag_mapping.keys())))

[1m199/199[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 266ms/step
Classification Report:
              precision    recall  f1-score   support

      Others       0.49      0.48      0.48       894
       Honda       0.44      0.34      0.39       554
     Hyundai       0.48      0.45      0.47       618
         KIA       0.51      0.30      0.38       506
       Mazda       0.48      0.60      0.53       598
  Mitsubishi       0.37      0.37      0.37       538
      Toyota       0.47      0.58      0.52      1018
      Suzuki       0.70      0.71      0.71      1193
     Vinfast       0.56      0.54      0.55       441

    accuracy                           0.52      6360
   macro avg       0.50      0.48      0.49      6360
weighted avg       0.52      0.52      0.51      6360



# Second: Freezing earlier layers
* Freeze first 100 layers
* Learning rate: 0.00001

In [67]:
for layer in base_model.layers[:100]:  # Freeze the first 100 layers
    layer.trainable = False

In [68]:
model = Sequential([
    base_model,
    Flatten(),
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),
    Dense(len(tag_mapping), activation='softmax')
])

In [69]:
model.compile(optimizer=Adam(learning_rate=0.00001), loss='categorical_crossentropy', metrics=['accuracy'])

In [70]:
checkpoint = ModelCheckpoint('best_car_brand_model_1601_1.keras', monitor='val_accuracy', save_best_only=True)
early_stopping = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)

In [71]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=40,
    batch_size=32,
    callbacks=[checkpoint, early_stopping]
)

Epoch 1/40
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m305s[0m 375ms/step - accuracy: 0.1205 - loss: 2.9536 - val_accuracy: 0.1890 - val_loss: 2.4057
Epoch 2/40
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m278s[0m 350ms/step - accuracy: 0.1784 - loss: 2.6321 - val_accuracy: 0.2384 - val_loss: 2.2341
Epoch 3/40
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m273s[0m 343ms/step - accuracy: 0.2156 - loss: 2.4618 - val_accuracy: 0.2698 - val_loss: 2.1345
Epoch 4/40
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m273s[0m 343ms/step - accuracy: 0.2434 - loss: 2.3382 - val_accuracy: 0.2895 - val_loss: 2.0645
Epoch 5/40
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m273s[0m 343ms/step - accuracy: 0.2656 - loss: 2.2702 - val_accuracy: 0.3093 - val_loss: 2.0022
Epoch 6/40
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m273s[0m 343ms/step - accuracy: 0.2710 - loss: 2.2055 - val_accuracy: 0.3182 - val_loss: 1.9660
Epoc

In [72]:
loss, accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f'Test accuracy: {accuracy}')

[1m199/199[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 285ms/step - accuracy: 0.4465 - loss: 1.5979
Test accuracy: 0.448427677154541


In [73]:
y_pred = model.predict(X_test).argmax(axis=1)
y_test_labels = y_test.argmax(axis=1)
print("Classification Report:")
print(classification_report(y_test_labels, y_pred, target_names=list(tag_mapping.keys())))

[1m199/199[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 280ms/step
Classification Report:
              precision    recall  f1-score   support

      Others       0.39      0.48      0.43       894
       Honda       0.35      0.35      0.35       554
     Hyundai       0.40      0.33      0.37       618
         KIA       0.43      0.21      0.28       506
       Mazda       0.44      0.45      0.44       598
  Mitsubishi       0.33      0.32      0.32       538
      Toyota       0.44      0.45      0.45      1018
      Suzuki       0.61      0.70      0.65      1193
     Vinfast       0.44      0.42      0.43       441

    accuracy                           0.45      6360
   macro avg       0.43      0.41      0.41      6360
weighted avg       0.44      0.45      0.44      6360



# Third (Final Model)
* Class Weighting
* base_model.trainable=True
* Freeze first 100 layers
* Learning rate: 0.00001

In [88]:
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.models import load_model

In [75]:
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.arange(len(tag_mapping)),
    y=data['Label'].values
)

In [146]:
print("Class weights:")
for class_index, weight in class_weights.items():
    print(f"Class {class_index}: Weight {weight}")

Class weights:
Class 0: Weight 0.7905820342607096
Class 1: Weight 1.2759520083463745
Class 2: Weight 1.1441421991940126
Class 3: Weight 1.3970387944290672
Class 4: Weight 1.182037842459388
Class 5: Weight 1.3139126482376762
Class 6: Weight 0.6938552849786157
Class 7: Weight 0.5923069758778057
Class 8: Weight 1.6001409017713366


In [76]:
class_weights = {i: weight for i, weight in enumerate(class_weights)}

In [77]:
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3), pooling='avg')
base_model.trainable = True

In [78]:
for layer in base_model.layers[:100]:  # Freeze the first 100 layers
    layer.trainable = False


In [79]:
model = Sequential([
    base_model,
    Flatten(),
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.2),
    Dense(len(tag_mapping), activation='softmax')
])

In [80]:
model.compile(optimizer=Adam(learning_rate=0.00001), loss='categorical_crossentropy', metrics=['accuracy'])

In [82]:
checkpoint = ModelCheckpoint('best_car_brand_model_classweight.keras', monitor='val_accuracy', save_best_only=True)
early_stopping = EarlyStopping(monitor='val_accuracy', patience=7, restore_best_weights=True)

In [83]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=40,
    batch_size=32,
    class_weight=class_weights,
    callbacks=[checkpoint, early_stopping]
)

Epoch 1/40
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m637s[0m 786ms/step - accuracy: 0.1391 - loss: 2.9155 - val_accuracy: 0.2299 - val_loss: 2.2843
Epoch 2/40
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m560s[0m 704ms/step - accuracy: 0.2152 - loss: 2.4708 - val_accuracy: 0.3052 - val_loss: 2.0430
Epoch 3/40
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m551s[0m 693ms/step - accuracy: 0.2725 - loss: 2.2227 - val_accuracy: 0.3385 - val_loss: 1.9297
Epoch 4/40
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m593s[0m 746ms/step - accuracy: 0.3143 - loss: 2.0538 - val_accuracy: 0.3678 - val_loss: 1.8424
Epoch 5/40
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m456s[0m 574ms/step - accuracy: 0.3613 - loss: 1.9007 - val_accuracy: 0.4057 - val_loss: 1.7637
Epoch 6/40
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m460s[0m 578ms/step - accuracy: 0.3863 - loss: 1.8010 - val_accuracy: 0.4280 - val_loss: 1.6950
Epoc

In [84]:
loss, accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f'Test accuracy: {accuracy}')

[1m199/199[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 312ms/step - accuracy: 0.6465 - loss: 1.2439
Test accuracy: 0.6435534358024597


In [85]:
y_pred = model.predict(X_test).argmax(axis=1)
y_test_labels = y_test.argmax(axis=1)
print("Classification Report:")
print(classification_report(y_test_labels, y_pred, target_names=list(tag_mapping.keys())))

[1m199/199[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 315ms/step
Classification Report:
              precision    recall  f1-score   support

      Others       0.60      0.58      0.59       894
       Honda       0.61      0.54      0.57       554
     Hyundai       0.61      0.57      0.59       618
         KIA       0.56      0.56      0.56       506
       Mazda       0.70      0.74      0.72       598
  Mitsubishi       0.46      0.59      0.52       538
      Toyota       0.66      0.63      0.65      1018
      Suzuki       0.79      0.75      0.77      1193
     Vinfast       0.71      0.76      0.73       441

    accuracy                           0.64      6360
   macro avg       0.63      0.64      0.63      6360
weighted avg       0.65      0.64      0.64      6360



In [86]:
model.save('best_car_brand_model_classweight.keras')

In [87]:
def predict_car_brand(image_path, model, tag_mapping):
    try:
        # Load and preprocess the image
        img = load_img(image_path, target_size=(224, 224))
        img_array = img_to_array(img) / 255.0
        img_array = np.expand_dims(img_array, axis=0)

        # Predict the class
        predictions = model.predict(img_array)
        predicted_class = np.argmax(predictions, axis=1)[0]
        predicted_label = list(tag_mapping.keys())[predicted_class]

        return predicted_label
    except Exception as e:
        print(f"Error predicting car brand: {e}")
        return None

In [90]:
# model = load_model('best_car_brand_model_classweight.keras')
# result = predict_car_brand('test_2.jpg', model, tag_mapping)
# print(f"Predicted Car Brand: {result}")

# Test with internet image

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 929ms/step
Predicted Car Brand: Mitsubishi
