In [None]:
import numpy as np
import time
from sklearn.svm import SVC, LinearSVC
from sklearn.kernel_approximation import Nystroem
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

def run_rbf_svm(X_train, y_train, X_test, y_test, gamma=0.05, C=1.0):
    print("\nRunning Standard RBF SVM...")
    start = time.time()
    model = SVC(kernel='rbf', gamma=gamma, C=C)
    model.fit(X_train, y_train)
    train_time = time.time() - start
    acc = accuracy_score(y_test, model.predict(X_test))
    print(f"RBF-SVM Accuracy: {acc:.4f} | Time: {train_time:.2f}s")
    return model, acc, train_time

def run_nystroem_linear_svm(X_train, y_train, X_test, y_test, n_components=500, gamma=0.05, C=1.0):
    print(f"\nRunning Nystroem + LinearSVC | n_components={n_components}")
    start = time.time()
    pipeline = Pipeline([
        ('nystroem', Nystroem(kernel='rbf', gamma=gamma, 
                              n_components=min(n_components, X_train.shape[0]), random_state=1)),
        ('linear_svc', LinearSVC(C=C, max_iter=5000))
    ])
    pipeline.fit(X_train, y_train)
    train_time = time.time() - start
    acc = accuracy_score(y_test, pipeline.predict(X_test))
    print(f"Nystroem + LinearSVC Accuracy: {acc:.4f} | Time: {train_time:.2f}s")
    return pipeline, acc, train_time

INRIA Pedestrian Dataset (gamma=175)

In [None]:
import cv2
import os
import xml.etree.ElementTree as ET
from skimage.feature import hog
from tqdm import tqdm

def parse_annotation(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    objects = []
    for obj in root.findall('object'):
        bbox = obj.find('bndbox')
        xmin = int(bbox.find('xmin').text)
        ymin = int(bbox.find('ymin').text)
        xmax = int(bbox.find('xmax').text)
        ymax = int(bbox.find('ymax').text)
        objects.append((xmin, ymin, xmax, ymax))
    return objects

def extract_hog_features(img, resize_size=(64,128)):
    img = cv2.resize(img, resize_size)
    features = hog(img, pixels_per_cell=(8,8), cells_per_block=(2,2), feature_vector=True)
    return features

def prepare_inria_dataset(root_dir, resize_size=(64,128)):
    X, y = [], []
    ann_dir = os.path.join(root_dir, 'Annotations')
    img_dir = os.path.join(root_dir, 'JPEGImages')

    for xml_file in tqdm(os.listdir(ann_dir)):
        if not xml_file.endswith('.xml'):
            continue
        objects = parse_annotation(os.path.join(ann_dir, xml_file))
        img_file = os.path.join(img_dir, xml_file.replace('.xml', '.png'))
        image = cv2.imread(img_file, cv2.IMREAD_GRAYSCALE)
        if image is None:
            continue
        for (xmin, ymin, xmax, ymax) in objects:
            person_crop = image[ymin:ymax, xmin:xmax]
            if person_crop.size == 0:
                continue
            feature = extract_hog_features(person_crop, resize_size)
            X.append(feature)
            y.append(1)

    for img_file in tqdm(os.listdir(img_dir)):
        if not img_file.endswith('.png'):
            continue
        image = cv2.imread(os.path.join(img_dir, img_file), cv2.IMREAD_GRAYSCALE)
        if image is None:
            continue
        h, w = image.shape
        for _ in range(5):
            x = np.random.randint(0, w - resize_size[0])
            y_pos = np.random.randint(0, h - resize_size[1])
            neg_crop = image[y_pos:y_pos+resize_size[1], x:x+resize_size[0]]
            feature = extract_hog_features(neg_crop, resize_size)
            X.append(feature)
            y.append(0)

    return np.array(X), np.array(y)

In [None]:
X_train, y_train = prepare_inria_dataset('./INRIAPerson/Train')
X_test, y_test = prepare_inria_dataset('./INRIAPerson/Test')
print(X_train.shape, X_test.shape)  # (num_samples, feature_dim)
print(y_train.shape, y_test.shape)  # (num_samples,)

In [None]:
# grid search
param_grid_rbf = {
    'C': [0.1, 1, 10, 100],
    'gamma': np.logspace(-4, 3, 8), 
}

svc = SVC(kernel='rbf')

grid_search = GridSearchCV(svc, param_grid_rbf, cv=3, scoring='accuracy', n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

print("Best rbf params:", grid_search.best_params_)
print("Best rbf accuracy:", grid_search.best_score_)

In [None]:
# Best rbf params: {'C': 10, 'gamma': 0.01}
# Best rbf accuracy: 0.9668219344308691

rbf_model, rbf_acc, rbf_time = run_rbf_svm(X_train, y_train, X_test, y_test, gamma=0.01)

# Best Nystroem gamma: {'nystroem__gamma': 0.001, 'svm__C': 10}
# Best accuracy: 0.9621815088563362
nys_model, nys_acc, nys_time = run_nystroem_linear_svm(X_train, y_train, X_test, y_test, gamma=0.01)

Caltech-101 (gamma=250)

In [None]:
import random
from sklearn.preprocessing import LabelEncoder

def load_caltech101_balanced(root_dir, resize=(128, 128), per_class_train=30, per_class_test=30):
    X_train, y_train, X_test, y_test = [], [], [], []
    classes = [cls for cls in os.listdir(root_dir) if not cls.startswith('.') and os.path.isdir(os.path.join(root_dir, cls))]
    
    for cls in tqdm(classes):
        cls_path = os.path.join(root_dir, cls)
        images = [img for img in os.listdir(cls_path) if img.endswith(('.jpg', '.png', '.jpeg'))]
        if len(images) < per_class_train + per_class_test:
            continue

        random.shuffle(images)
        train_imgs = images[:per_class_train]
        test_imgs = images[per_class_train:per_class_train + per_class_test]

        for img_file in train_imgs:
            img_path = os.path.join(cls_path, img_file)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is None:
                continue
            img = cv2.resize(img, resize)
            X_train.append(img.flatten())
            y_train.append(cls)

        for img_file in test_imgs:
            img_path = os.path.join(cls_path, img_file)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is None:
                continue
            img = cv2.resize(img, resize)
            X_test.append(img.flatten())
            y_test.append(cls)

    le = LabelEncoder()
    y_train = le.fit_transform(y_train)
    y_test = le.transform(y_test)

    return np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test), le.classes_

15 samples per class

In [None]:
X_train, y_train, X_test, y_test, class_names = load_caltech101_balanced('./caltech-101/', per_class_train=15, per_class_test=15)

In [None]:
# grid search
svc = SVC(kernel='rbf')

grid_search = GridSearchCV(svc, param_grid_rbf, cv=3, scoring='accuracy', n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

print("Best rbf params:", grid_search.best_params_)
print("Best rbf accuracy:", grid_search.best_score_)

In [None]:
# X_train, X_test, y_train, y_test = train_test_split(X_cal, y_cal, test_size=0.3, random_state=42)

# 跑标准RBF
rbf_model, rbf_acc, rbf_time = run_rbf_svm(X_train, y_train, X_test, y_test, gamma=0.0001)

# 跑 Nystroem + LinearSVC
nys_model, nys_acc, nys_time = run_nystroem_linear_svm(X_train, y_train, X_test, y_test, gamma=0.0001)

30 samples per class

In [None]:
X_train, y_train, X_test, y_test, class_names = load_caltech101_balanced('./caltech-101/', per_class_train=30, per_class_test=30)

In [None]:
# grid search
svc = SVC(kernel='rbf')

grid_search = GridSearchCV(svc, param_grid_rbf, cv=3, scoring='accuracy', n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

print("Best rbf params:", grid_search.best_params_)
print("Best rbf accuracy:", grid_search.best_score_)

In [None]:
# X_train, X_test, y_train, y_test = train_test_split(X_cal, y_cal, test_size=0.3, random_state=42)

# 跑标准RBF
rbf_model, rbf_acc, rbf_time = run_rbf_svm(X_train, y_train, X_test, y_test, gamma=250)

# 跑 Nystroem + LinearSVC
nys_model, nys_acc, nys_time = run_nystroem_linear_svm(X_train, y_train, X_test, y_test, gamma=250)

UIUC Cars (gamma=2.0)

In [None]:
from skimage.feature import hog

def load_uiuc_car_data(folder, resize=(64,64)):
    X, y = [], []
    for file in tqdm(os.listdir(folder)):
        if file.endswith('.pgm'):
            img = cv2.imread(os.path.join(folder, file), cv2.IMREAD_GRAYSCALE)
            if img is None:
                continue
            img = cv2.resize(img, resize)
            feature = hog(img, pixels_per_cell=(8,8), cells_per_block=(2,2), feature_vector=True)
            X.append(feature)
            if 'pos' in file:
                y.append(1)
            elif 'neg' in file:
                y.append(0)
    return np.array(X), np.array(y)

X_uiuc, y_uiuc = load_uiuc_car_data('./CarData/TrainImages/')
print(X_uiuc.shape, X_uiuc.shape)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_uiuc, y_uiuc, test_size=0.3, random_state=42)

# grid search
svc = SVC(kernel='rbf')

grid_search = GridSearchCV(svc, param_grid_rbf, cv=3, scoring='accuracy', n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

print("Best rbf params:", grid_search.best_params_)
print("Best rbf accuracy:", grid_search.best_score_)

In [None]:
rbf_model, rbf_acc, rbf_time = run_rbf_svm(X_train, y_train, X_test, y_test, gamma=0.01)

n_components = min(300, X_train.shape[0])
nys_model, nys_acc, nys_time = run_nystroem_linear_svm(X_train, y_train, X_test, y_test, n_components=n_components, gamma=0.01)