In [3]:
pip install --pre scikit-learn opencv-contrib-python joblib matplotlib gradio pandas


Collecting opencv-contrib-python
  Downloading opencv_contrib_python-4.12.0.88-cp37-abi3-win_amd64.whl.metadata (20 kB)
Downloading opencv_contrib_python-4.12.0.88-cp37-abi3-win_amd64.whl (45.3 MB)
   ---------------------------------------- 0.0/45.3 MB ? eta -:--:--
   --- ------------------------------------ 3.4/45.3 MB 22.7 MB/s eta 0:00:02
   ----- ---------------------------------- 6.3/45.3 MB 16.8 MB/s eta 0:00:03
   ------- -------------------------------- 8.9/45.3 MB 15.4 MB/s eta 0:00:03
   --------- ------------------------------ 10.5/45.3 MB 13.2 MB/s eta 0:00:03
   ------------ --------------------------- 14.4/45.3 MB 14.5 MB/s eta 0:00:03
   --------------- ------------------------ 17.3/45.3 MB 14.2 MB/s eta 0:00:02
   ----------------- ---------------------- 19.9/45.3 MB 14.1 MB/s eta 0:00:02
   -------------------- ------------------- 22.8/45.3 MB 14.0 MB/s eta 0:00:02
   ---------------------- ----------------- 25.4/45.3 MB 13.8 MB/s eta 0:00:02
   ---------------------

ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'c:\\Users\\ESHA\\AppData\\Local\\Programs\\Python\\Python313\\Lib\\site-packages\\cv2\\cv2.pyd'
Consider using the `--user` option or check the permissions.


[notice] A new release of pip is available: 25.1.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.cluster import MiniBatchKMeans
from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import joblib
import matplotlib.pyplot as plt

# Path to your dataset
TRAIN_PATH = r"traffic_Data/DATA"
TEST_PATH  = r"traffic_Data/TEST"
LABELS_CSV = r"labels.csv"

VOCAB_SIZE = 200
MAX_SIFT_PER_IMAGE = 400
RANDOM_STATE = 42

print("✅ Imports OK")


✅ Imports OK


In [5]:
labels_df = pd.read_csv(LABELS_CSV)
class_names = labels_df.set_index("ClassId")["Name"].to_dict()

num_classes = len(class_names)
print(f"✅ Found {num_classes} classes")
print(class_names)


✅ Found 58 classes
{0: 'Speed limit (5km/h)', 1: 'Speed limit (15km/h)', 2: 'Speed limit (30km/h)', 3: 'Speed limit (40km/h)', 4: 'Speed limit (50km/h)', 5: 'Speed limit (60km/h)', 6: 'Speed limit (70km/h)', 7: 'speed limit (80km/h)', 8: 'Dont Go straight or left', 9: 'Dont Go straight or Right', 10: 'Dont Go straight', 11: 'Dont Go Left', 12: 'Dont Go Left or Right', 13: 'Dont Go Right', 14: 'Dont overtake from Left', 15: 'No Uturn', 16: 'No Car', 17: 'No horn', 18: 'Speed limit (40km/h)', 19: 'Speed limit (50km/h)', 20: 'Go straight or right', 21: 'Go straight', 22: 'Go Left', 23: 'Go Left or right', 24: 'Go Right', 25: 'keep Left', 26: 'keep Right', 27: 'Roundabout mandatory', 28: 'watch out for cars', 29: 'Horn', 30: 'Bicycles crossing', 31: 'Uturn', 32: 'Road Divider', 33: 'Traffic signals', 34: 'Danger Ahead', 35: 'Zebra Crossing', 36: 'Bicycles crossing', 37: 'Children crossing', 38: 'Dangerous curve to the left', 39: 'Dangerous curve to the right', 40: 'Unknown1', 41: 'Unknown2

In [6]:
def load_train_images(path):
    images = []
    labels = []
    for class_id in sorted(os.listdir(path), key=lambda x: int(x)):
        class_folder = os.path.join(path, class_id)
        if not os.path.isdir(class_folder):
            continue
        for fname in os.listdir(class_folder):
            fpath = os.path.join(class_folder, fname)
            img = cv2.imread(fpath)
            if img is None:
                continue
            images.append((fpath, img))
            labels.append(int(class_id))
    return images, np.array(labels)

def load_test_images(path):
    images = []
    for fname in os.listdir(path):
        fpath = os.path.join(path, fname)
        img = cv2.imread(fpath)
        if img is None:
            continue
        images.append((fpath, img))
    return images

train_images, train_labels = load_train_images(TRAIN_PATH)
test_images  = load_test_images(TEST_PATH)

print("✅ Training images:", len(train_images))
print("✅ Test images:", len(test_images))


✅ Training images: 4170
✅ Test images: 1994


In [7]:
sift = cv2.SIFT_create()

all_desc = []
image_descs = []
missing = 0

for path, img in train_images:
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    kps, des = sift.detectAndCompute(gray, None)

    if des is None:
        image_descs.append(None)
        missing += 1
        continue

    if len(des) > MAX_SIFT_PER_IMAGE:
        des = des[:MAX_SIFT_PER_IMAGE]

    image_descs.append(des)
    all_desc.append(des)

stacked = np.vstack(all_desc)
print("✅ Total descriptors for clustering:", stacked.shape)
print("❗ Images without descriptors:", missing)


✅ Total descriptors for clustering: (466044, 128)
❗ Images without descriptors: 14


In [8]:
print("⏳ Building BoVW vocabulary...")

kmeans = MiniBatchKMeans(
    n_clusters=VOCAB_SIZE,
    batch_size=VOCAB_SIZE * 20,
    random_state=RANDOM_STATE
)

kmeans.fit(stacked)
joblib.dump(kmeans, "bovw_kmeans.joblib")

print("✅ Saved vocabulary → bovw_kmeans.joblib")


⏳ Building BoVW vocabulary...
✅ Saved vocabulary → bovw_kmeans.joblib


In [9]:
def descriptors_to_bovw(des, kmeans_model, vocab_size):
    hist = np.zeros(vocab_size, dtype=np.float32)
    if des is None:
        return hist
    idxs = kmeans_model.predict(des)
    for i in idxs:
        hist[i] += 1
    if hist.sum() > 0:
        hist /= hist.sum()
    return hist

bovw = np.array([descriptors_to_bovw(d, kmeans, VOCAB_SIZE) for d in image_descs])
print("✅ BoVW feature matrix:", bovw.shape)


✅ BoVW feature matrix: (4170, 200)


In [10]:
X_train, X_val, y_train, y_val = train_test_split(
    bovw, train_labels,
    test_size=0.2,
    stratify=train_labels,
    random_state=RANDOM_STATE
)

scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_val_s   = scaler.transform(X_val)

joblib.dump(scaler, "bovw_scaler.joblib")

print("✅ Train:", X_train_s.shape)
print("✅ Val:",   X_val_s.shape)


✅ Train: (3336, 200)
✅ Val: (834, 200)


In [11]:
print("⏳ Training SVM ...")

svm = LinearSVC(random_state=RANDOM_STATE, max_iter=20000)
svm.fit(X_train_s, y_train)
joblib.dump(svm, "sift_bovw_svm.joblib")

print("✅ SVM saved → sift_bovw_svm.joblib")

y_pred = svm.predict(X_val_s)
print("✅ Validation accuracy:", accuracy_score(y_val, y_pred))
print(classification_report(y_val, y_pred))


⏳ Training SVM ...
✅ SVM saved → sift_bovw_svm.joblib
✅ Validation accuracy: 0.9448441247002398
              precision    recall  f1-score   support

           0       1.00      0.92      0.96        24
           1       1.00      1.00      1.00         8
           2       0.89      1.00      0.94        16
           3       1.00      1.00      1.00        52
           4       1.00      0.90      0.95        20
           5       0.95      0.90      0.92        39
           6       0.89      1.00      0.94        16
           7       1.00      1.00      1.00        30
           8       1.00      1.00      1.00         2
          10       0.88      1.00      0.93        14
          11       0.93      0.93      0.93        28
          12       1.00      1.00      1.00        19
          13       1.00      1.00      1.00         7
          14       1.00      0.85      0.92        26
          15       1.00      1.00      1.00         4
          16       0.93      0.93      

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [12]:
def predict_image_sift_bovw(img_bgr):
    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
    kps, des = sift.detectAndCompute(gray, None)

    hist = descriptors_to_bovw(des, kmeans, VOCAB_SIZE).reshape(1, -1)
    hist_s = scaler.transform(hist)

    pid = svm.predict(hist_s)[0]
    name = class_names.get(pid, "Unknown")

    return pid, name


In [13]:
if len(test_images) > 0:
    path, img = test_images[0]
    pid, name = predict_image_sift_bovw(img)
    print("✅ Example prediction:", name)


✅ Example prediction: Speed limit (30km/h)


In [14]:
import gradio as gr

def gradio_predict(pil_img):
    img = np.array(pil_img)[:, :, ::-1]  # RGB → BGR
    pid, name = predict_image_sift_bovw(img)
    return name

ui = gr.Interface(
    fn=gradio_predict,
    inputs=gr.Image(type="pil", label="Upload Traffic Sign"),
    outputs=gr.Textbox(label="Predicted Class"),
    title="SIFT + BoVW + SVM Traffic Sign Classifier",
    description="Upload an image to classify it."
)

# Uncomment to run:
# ui.launch(share=True)


In [15]:
ui.launch(share=True)

* Running on local URL:  http://127.0.0.1:7861
* Running on public URL: https://91acc1e36c12b5b09b.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


