This notebook implements a traditional machine learning model using KNN to classify car damage types. 

**Original dataset**: Car damage severity (minor, moderate, severe) from https://www.kaggle.com/datasets/prajwalbhamere/car-damage-severity-dataset/data

**New dataset**: CarDD dataset with 6 damage types (dent, scratch, crack, glass shatter, lamp broken, tire flat) from CarDD_release/CarDD_COCO

In [1]:
%matplotlib inline
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
import matplotlib.pyplot as plt
import numpy as np
import json
import os
from collections import defaultdict

In [2]:
COCO_DIR = '../CarDD_release/CarDD_COCO'
TRAIN_JSON = os.path.join(COCO_DIR, 'annotations', 'instances_train2017.json')
VAL_JSON = os.path.join(COCO_DIR, 'annotations', 'instances_val2017.json')
TRAIN_IMG_DIR = os.path.join(COCO_DIR, 'train2017')
VAL_IMG_DIR = os.path.join(COCO_DIR, 'val2017')
    
    # Load COCO annotations
def load_coco_data(json_path, img_dir):
    """Load images and labels from COCO format"""
    with open(json_path, 'r') as f:
        data = json.load(f)
    
    # Create category mapping
    category_map = {cat['id']: cat['name'] for cat in data['categories']}
    category_names = sorted([cat['name'] for cat in data['categories']])
    category_to_idx = {name: idx for idx, name in enumerate(category_names)}
    
    # Map image_id to category
    image_categories = defaultdict(list)
    for ann in data['annotations']:
        image_id = ann['image_id']
        category_id = ann['category_id']
        category_name = category_map[category_id]
        image_categories[image_id].append(category_name)
    
    # Create image_id to filename mapping
    image_map = {img['id']: img['file_name'] for img in data['images']}
    
    # Load images and labels
    X = []
    y = []
    
    for image_id, categories in image_categories.items():
        filename = image_map[image_id]
        img_path = os.path.join(img_dir, filename)
        
        if os.path.exists(img_path):
            # Load and preprocess image
            img = load_img(img_path, color_mode='grayscale', target_size=(224, 224))
            img_array = img_to_array(img) / 255.0
            X.append(img_array)
            
            # Use first category (or you could use a different strategy for multi-label)
            category = categories[0]
            y.append(category_to_idx[category])
    
    return np.array(X), np.array(y), category_names

print("Loading CarDD training data...")
X_train, y_train, class_names = load_coco_data(TRAIN_JSON, TRAIN_IMG_DIR)
print(f"Loaded {len(X_train)} training images with {len(class_names)} classes: {class_names}")

print("\nLoading CarDD validation data...")
X_val, y_val, _ = load_coco_data(VAL_JSON, VAL_IMG_DIR)
print(f"Loaded {len(X_val)} validation images")

# Reshape for KNN (flatten images)
X_train = X_train.reshape(X_train.shape[0], -1)
X_val = X_val.reshape(X_val.shape[0], -1)

Loading CarDD training data...
Loaded 2816 training images with 6 classes: ['crack', 'dent', 'glass shatter', 'lamp broken', 'scratch', 'tire flat']

Loading CarDD validation data...
Loaded 810 validation images


In [3]:
X_test = X_val
y_test = y_val

In [12]:
def run_knn(X_train, y_train, X_test, y_test, k=3, class_names=None, metric='minkowski'):
    knn = KNeighborsClassifier(n_neighbors=k, metric=metric)
    knn.fit(X_train, y_train)
    print('Accuracy of K-NN classifier on training set: {:.2f}'
         .format(knn.score(X_train, y_train)))
    print('Accuracy of K-NN classifier on test set: {:.2f}'
         .format(knn.score(X_test, y_test)))
    pred = knn.predict(X_test)
    print('\nConfusion Matrix:')
    print(confusion_matrix(y_test, pred))
    
    if class_names:
        print('\nClassification Report:')
        print(classification_report(y_test, pred, target_names=class_names))

In [None]:
# Run KNN with appropriate class names
k_vals = [3, 5, 7, 9, 11, 15, 21]
for k in k_vals:
    print(f"K = {k}")
    run_knn(X_train, y_train, X_test, y_test, k=k, class_names=class_names)

Accuracy of K-NN classifier on training set: 0.63
Accuracy of K-NN classifier on test set: 0.41

Confusion Matrix:
[[  0  10   0   0  20   0]
 [  4 160   7   0 181   0]
 [  0  35  12   0  63   0]
 [  0  10   1   0  14   0]
 [  1  88   6   1 155   0]
 [  1  12   5   0  20   4]]

Classification Report:
               precision    recall  f1-score   support

        crack       0.00      0.00      0.00        30
         dent       0.51      0.45      0.48       352
glass shatter       0.39      0.11      0.17       110
  lamp broken       0.00      0.00      0.00        25
      scratch       0.34      0.62      0.44       251
    tire flat       1.00      0.10      0.17        42

     accuracy                           0.41       810
    macro avg       0.37      0.21      0.21       810
 weighted avg       0.43      0.41      0.38       810

Accuracy of K-NN classifier on training set: 0.56
Accuracy of K-NN classifier on test set: 0.40

Confusion Matrix:
[[  0   8   0   0  22   0]
 [ 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Accuracy of K-NN classifier on training set: 0.54
Accuracy of K-NN classifier on test set: 0.39

Confusion Matrix:
[[  0  10   1   0  19   0]
 [  0 147   5   0 199   1]
 [  0  30   9   0  70   1]
 [  0   8   2   0  15   0]
 [  0  89   7   0 155   0]
 [  0  11   6   0  22   3]]

Classification Report:
               precision    recall  f1-score   support

        crack       0.00      0.00      0.00        30
         dent       0.50      0.42      0.45       352
glass shatter       0.30      0.08      0.13       110
  lamp broken       0.00      0.00      0.00        25
      scratch       0.32      0.62      0.42       251
    tire flat       0.60      0.07      0.13        42

     accuracy                           0.39       810
    macro avg       0.29      0.20      0.19       810
 weighted avg       0.39      0.39      0.35       810



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Accuracy of K-NN classifier on training set: 0.52
Accuracy of K-NN classifier on test set: 0.37

Confusion Matrix:
[[  0  14   0   0  16   0]
 [  0 145   2   0 204   1]
 [  0  29   5   0  75   1]
 [  0  10   1   0  14   0]
 [  0  97   8   0 146   0]
 [  0   8   7   0  26   1]]

Classification Report:
               precision    recall  f1-score   support

        crack       0.00      0.00      0.00        30
         dent       0.48      0.41      0.44       352
glass shatter       0.22      0.05      0.08       110
  lamp broken       0.00      0.00      0.00        25
      scratch       0.30      0.58      0.40       251
    tire flat       0.33      0.02      0.04        42

     accuracy                           0.37       810
    macro avg       0.22      0.18      0.16       810
 weighted avg       0.35      0.37      0.33       810



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Accuracy of K-NN classifier on training set: 0.50
Accuracy of K-NN classifier on test set: 0.37

Confusion Matrix:
[[  0  16   0   0  14   0]
 [  0 130   4   0 217   1]
 [  0  26   7   0  76   1]
 [  0  10   1   0  14   0]
 [  0  86   5   0 160   0]
 [  0   8   5   0  27   2]]

Classification Report:
               precision    recall  f1-score   support

        crack       0.00      0.00      0.00        30
         dent       0.47      0.37      0.41       352
glass shatter       0.32      0.06      0.11       110
  lamp broken       0.00      0.00      0.00        25
      scratch       0.31      0.64      0.42       251
    tire flat       0.50      0.05      0.09        42

     accuracy                           0.37       810
    macro avg       0.27      0.19      0.17       810
 weighted avg       0.37      0.37      0.33       810



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Accuracy of K-NN classifier on training set: 0.48
Accuracy of K-NN classifier on test set: 0.38

Confusion Matrix:
[[  0  15   0   0  15   0]
 [  0 147   3   0 202   0]
 [  0  30   5   0  74   1]
 [  0   9   0   0  16   0]
 [  0  98   3   0 150   0]
 [  0   9   3   0  28   2]]

Classification Report:
               precision    recall  f1-score   support

        crack       0.00      0.00      0.00        30
         dent       0.48      0.42      0.45       352
glass shatter       0.36      0.05      0.08       110
  lamp broken       0.00      0.00      0.00        25
      scratch       0.31      0.60      0.41       251
    tire flat       0.67      0.05      0.09        42

     accuracy                           0.38       810
    macro avg       0.30      0.18      0.17       810
 weighted avg       0.39      0.38      0.34       810



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Accuracy of K-NN classifier on training set: 0.47
Accuracy of K-NN classifier on test set: 0.38

Confusion Matrix:
[[  0  18   0   0  12   0]
 [  0 146   2   0 203   1]
 [  0  26   3   0  80   1]
 [  0   9   0   0  16   0]
 [  0  95   1   0 155   0]
 [  0   8   4   0  27   3]]

Classification Report:
               precision    recall  f1-score   support

        crack       0.00      0.00      0.00        30
         dent       0.48      0.41      0.45       352
glass shatter       0.30      0.03      0.05       110
  lamp broken       0.00      0.00      0.00        25
      scratch       0.31      0.62      0.42       251
    tire flat       0.60      0.07      0.13        42

     accuracy                           0.38       810
    macro avg       0.28      0.19      0.17       810
 weighted avg       0.38      0.38      0.34       810



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


K = 3 seems to have gotten the best test set accuracy, we try different algorithms with k = 3 now.

In [14]:
metrics = ['euclidean', 'manhattan', 'minkowski', 'cosine']
for metric in metrics:
    print(f"Metric: {metric}")
    run_knn(X_train, y_train, X_test, y_test, k=3, class_names=class_names, metric=metric)

Metric: euclidean
Accuracy of K-NN classifier on training set: 0.63
Accuracy of K-NN classifier on test set: 0.41

Confusion Matrix:
[[  0  10   0   0  20   0]
 [  4 160   7   0 181   0]
 [  0  35  12   0  63   0]
 [  0  10   1   0  14   0]
 [  1  88   6   1 155   0]
 [  1  12   5   0  20   4]]

Classification Report:
               precision    recall  f1-score   support

        crack       0.00      0.00      0.00        30
         dent       0.51      0.45      0.48       352
glass shatter       0.39      0.11      0.17       110
  lamp broken       0.00      0.00      0.00        25
      scratch       0.34      0.62      0.44       251
    tire flat       1.00      0.10      0.17        42

     accuracy                           0.41       810
    macro avg       0.37      0.21      0.21       810
 weighted avg       0.43      0.41      0.38       810

Metric: manhattan
Accuracy of K-NN classifier on training set: 0.66
Accuracy of K-NN classifier on test set: 0.40

Confusion Ma