# Histogram of Oriented Gradients (HOG) with MNIST Dataset with `sklearn`

In [1]:
import torch
from torchvision.datasets import MNIST
from torchvision import transforms
from skimage.feature import hog
from sklearn import metrics, svm
import matplotlib.pyplot as plt
import numpy as np
import random

In [2]:
def apply_hog(img, o = 9, ppc = (8, 8), cpb = (2, 2)):
    # apply HOG to image
    features = hog(
        img,
        orientations = o,
        pixels_per_cell = ppc,
        cells_per_block = cpb,
    )

    return features

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


The `MNIST` class loads in the data. Additionally, we extract HOG features.
- Train set size: 60000
- Test set size: 10000

All **original** images have size $28\times 28$ pixels.

In [4]:
# load datasets and transform to tensors
# mnist_train = MNIST("./data/", download = True, transform = transforms.ToTensor())
mnist_train = MNIST("./data/", download = True, transform = transforms.Lambda(apply_hog))
mnist_test = MNIST("./data/", train = False, download = True, transform = transforms.Lambda(apply_hog))

print("Train Set (Image):", mnist_train.data.shape)
print("Train Set (Target):", mnist_train.targets.shape)
print("Test Set Size:", len(mnist_test))

Train Set (Image): torch.Size([60000, 28, 28])
Train Set (Target): torch.Size([60000])
Test Set Size: 10000


After transformation, the data (with the extracted HOG features) is a 1D array and its label is a scalar.

In [5]:
print(len(mnist_train[0][0]))
print(mnist_train[0][1])

144
5


**NOTE:** The `ToTensor()` method transform a `PIL` image into a Torch tensor!

In [6]:
# split data and label
train_data, train_label = zip(*mnist_train)
train_data = np.vstack(train_data)
print(train_data.shape)

(60000, 144)


In [7]:
NUM_SAMPLES = 5000

train_data = train_data[:NUM_SAMPLES]
train_label = train_label[:NUM_SAMPLES]

test_data, test_label = zip(*mnist_test)
test_data = np.vstack(test_data)
test_data = test_data[:int(0.1 * NUM_SAMPLES)]
test_label = test_label[:int(0.1 * NUM_SAMPLES)]

In [8]:
# create SVM classifier
clf = svm.SVC(gamma = 0.001)
clf.fit(train_data, train_label)

SVC(gamma=0.001)

In [9]:
predicted = clf.predict(test_data)
print("Accuracy:", metrics.accuracy_score(test_label, predicted))

Accuracy: 0.442
