<a href="https://colab.research.google.com/github/nazalnihad/CV_LAB/blob/main/HOG_SVM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install scikit-image scikit-learn opencv-python matplotlib



In [2]:
import kagglehub

path = kagglehub.dataset_download("jcoral02/inriaperson")

print("Path to dataset files:", path)



Using Colab cache for faster access to the 'inriaperson' dataset.
Path to dataset files: /kaggle/input/inriaperson


In [9]:
!cp -r "/kaggle/input/inriaperson" /content/INRIA

In [10]:
import os
import cv2
import glob
import xml.etree.ElementTree as ET
import numpy as np

def extract_pos_neg_samples(img_dir, ann_dir):
    pos_samples = []
    neg_samples = []

    for xml_file in glob.glob(os.path.join(ann_dir, '*.xml')):
        tree = ET.parse(xml_file)
        root = tree.getroot()

        img_name = root.find('filename').text
        img_path = os.path.join(img_dir, img_name)

        img = cv2.imread(img_path)
        if img is None:
            continue

        h, w = img.shape[:2]

        for obj in root.findall('object'):
            bbox = obj.find('bndbox')
            x1 = int(bbox.find('xmin').text)
            y1 = int(bbox.find('ymin').text)
            x2 = int(bbox.find('xmax').text)
            y2 = int(bbox.find('ymax').text)

            crop = img[y1:y2, x1:x2]
            crop = cv2.resize(crop, (64,128))
            crop = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
            pos_samples.append(crop)

        for _ in range(2):  # 2 negatives per image
            rx = np.random.randint(0, w-64)
            ry = np.random.randint(0, h-128)
            patch = img[ry:ry+128, rx:rx+64]
            patch = cv2.cvtColor(patch, cv2.COLOR_BGR2GRAY)
            neg_samples.append(patch)

    return pos_samples, neg_samples


In [11]:


train_img = "/content/INRIA/Train/JPEGImages"
train_ann = "/content/INRIA/Train/Annotations"

pos_train, neg_train = extract_pos_neg_samples(train_img, train_ann)

print("Pos:", len(pos_train), "Neg:", len(neg_train))




Pos: 1240 Neg: 1228


In [12]:
from skimage.feature import hog

def hog_features(images):
    feats = []
    for img in images:
        f = hog(img,
                orientations=9,
                pixels_per_cell=(8,8),
                cells_per_block=(2,2),
                block_norm='L2-Hys')
        feats.append(f)
    return np.array(feats)

X_pos = hog_features(pos_train)
X_neg = hog_features(neg_train)

X_train = np.vstack((X_pos, X_neg))
y_train = np.hstack((np.ones(len(X_pos)), np.zeros(len(X_neg))))

print(X_train.shape)

(2468, 3780)


In [15]:
from sklearn.svm import LinearSVC

svm = LinearSVC(C=0.01)
svm.fit(X_train, y_train)

print("SVM done")

SVM done


In [16]:


test_img = "/content/INRIA/Test/JPEGImages"
test_ann = "/content/INRIA/Test/Annotations"

pos_test, neg_test = extract_pos_neg_samples(test_img, test_ann)

X_pos_t = hog_features(pos_test)
X_neg_t = hog_features(neg_test)

X_test = np.vstack((X_pos_t, X_neg_t))
y_test = np.hstack((np.ones(len(X_pos_t)), np.zeros(len(X_neg_t))))




In [17]:


from sklearn.metrics import accuracy_score, classification_report

y_pred = svm.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))




Accuracy: 0.9531116794543905
              precision    recall  f1-score   support

         0.0       0.95      0.96      0.95       576
         1.0       0.96      0.95      0.95       597

    accuracy                           0.95      1173
   macro avg       0.95      0.95      0.95      1173
weighted avg       0.95      0.95      0.95      1173

