In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import cifar10
from sklearn.metrics import accuracy_score

Using TensorFlow backend.


## 準備資料

In [2]:
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

In [3]:
def normalize_cifar10_data(x, y):
    y = y.astype('int')
    return x, y

def normalize_result(x, y):
    print('x.shape:', x.shape)
    print('y.shape:', y.shape)

In [4]:
print('Before normalization:')
normalize_result(X_train, y_train)

Before normalization:
x.shape: (50000, 32, 32, 3)
y.shape: (50000, 1)


In [5]:
print('After normalization:')
X_train, y_train = normalize_cifar10_data(X_train, y_train)
X_test, y_test = normalize_cifar10_data(X_test, y_test)
normalize_result(X_train, y_train)

After normalization:
x.shape: (50000, 32, 32, 3)
y.shape: (50000, 1)


## 定義函數

#### SVM model

* SVM 是機器學習中一個經典的分類算法，具體細節有興趣可以參考[該知乎上的解釋](https://www.zhihu.com/question/21094489)，我們這裡直接調用 opencv 中實現好的函數。
* HOG 特徵通過計算和統計圖像局部區域的梯度方向直方圖來構建特徵，具體細節不在我們涵蓋的範圍裡面，有興趣的同學請參考[補充資料](https://www.cnblogs.com/zyly/p/9651261.html)。

補充：有關 HOG(Histogram of Oriented Gradients) 的算法，先將圖像轉成灰階，並經過索伯算子分別求兩坐標軸方向的梯度和極坐標轉換；接著映射徑度至 \[0, 16) 的範圍，並將維度仍與圖片相同的半徑和徑度陣列分為四個區塊，也就是將原本的圖像切成 2 x 2 = 4 個圖片再做統計，統計出每個區塊徑度頻率分配表，故應得到 4 x 16 維度的陣列。這裡呼叫 bincount 的結果因為有代入權重，所以實際上是每個徑度與其所對應半徑的總和。最後，將結果打平成 1 x 64 的陣列，即為 HOG 的直方圖值。

In [6]:
N_BLK = 2
N_BIN = 16

def make_hog_data(x):
    x_hogs = np.array([])
    block = lambda mtx, m, n, by: np.array([np.hsplit(vt, by) for vt in np.vsplit(mtx, by)]).reshape(by ** 2, m // by, n // by)
    for x_img in x:
        img = cv2.cvtColor(x_img, cv2.COLOR_RGB2GRAY)
        gd1, gd2 = cv2.Sobel(img, cv2.CV_32F, 1, 0), cv2.Sobel(img, cv2.CV_32F, 0, 1)
        mag, ang = cv2.cartToPolar(gd1, gd2)
        mag, ang = mag, np.int32(N_BIN * ang / 2 / np.pi)
        blk, wgt = block(ang, *ang.shape, N_BLK), block(mag, *mag.shape, N_BLK)
        hogs = np.array([np.bincount(b.ravel(), w.ravel(), N_BIN) for b, w in zip(blk, wgt)]).reshape(1, -1)
        x_hogs = np.vstack((x_hogs, hogs,)) if x_hogs.size else hogs
    return np.float32(x_hogs)

def make_hist_data(x):
    x_hists = np.array([])
    for x_img in x:
        chans = cv2.split(x_img)
        hists = np.array([cv2.calcHist([chan], [0], None, [N_BIN], [0, 256]) for chan in chans]).flatten()
        x_hists = np.vstack((x_hists, hists,)) if x_hists.size else hists
    return np.float32(x_hists)

def build_svm():
    svm = cv2.ml.SVM_create()
    svm.setKernel(cv2.ml.SVM_LINEAR)
    svm.setType(cv2.ml.SVM_C_SVC)
    svm.setC(2.67)
    svm.setGamma(5.383)
    return svm

def train_data(svm, x, y):
    svm.train(x, cv2.ml.ROW_SAMPLE, y)
    return svm

def evaluate_data(svm, x, y):
    return accuracy_score(y, svm.predict(x)[1].astype('int'))

## 用 histogram 特徵訓練 SVM 模型

In [7]:
X_train_hists, X_test_hists = make_hist_data(X_train), make_hist_data(X_test)
X_train_hists

array([[  1.,   4.,   9., ...,   9.,   5.,   0.],
       [ 13.,  22.,  54., ...,  42.,  57.,  35.],
       [  0.,  24.,  87., ...,  15.,  12., 273.],
       ...,
       [ 72., 146., 186., ...,  40.,  67., 168.],
       [ 17.,  11.,  15., ..., 105., 152., 158.],
       [  2.,  20.,  30., ...,  29.,  95.,  37.]], dtype=float32)

In [8]:
svm = build_svm()
svm = train_data(svm, X_train_hists, y_train)
svm

<ml_SVM 000001E06B2D9D30>

In [9]:
print('Train accuracy:', evaluate_data(svm, X_train_hists, y_train))
print('Test accuracy:', evaluate_data(svm, X_test_hists, y_test))

Train accuracy: 0.14846
Test accuracy: 0.1448


## 用 HOG 特徵訓練 SVM 模型

In [10]:
X_train_hists, X_test_hists = make_hog_data(X_train), make_hog_data(X_test)
X_train_hists

array([[2635.8762  , 5540.079   , 2825.9854  , ..., 2130.2808  ,
        3270.286   ,  732.6842  ],
       [3161.2834  , 1962.0333  , 8758.989   , ..., 4804.9917  ,
        2960.6658  , 3142.0178  ],
       [ 108.26818 ,  125.94612 ,  756.45856 , ..., 1016.5663  ,
         846.55817 , 1138.8684  ],
       ...,
       [ 840.3656  ,  553.963   , 1155.7039  , ..., 1733.1053  ,
        1114.1395  ,  585.9903  ],
       [ 158.14745 ,  222.85387 ,   25.495094, ..., 4541.2095  ,
        1387.7686  , 2718.4321  ],
       [ 808.257   , 1677.5273  , 1390.5697  , ..., 5229.7544  ,
        4387.689   , 1623.7395  ]], dtype=float32)

In [11]:
svm = build_svm()
svm = train_data(svm, X_train_hists, y_train)
svm

<ml_SVM 000001E06B2D9E50>

In [12]:
print('Train accuracy:', evaluate_data(svm, X_train_hists, y_train))
print('Test accuracy:', evaluate_data(svm, X_test_hists, y_test))

Train accuracy: 0.19588
Test accuracy: 0.2052


## 作業

嘗試比較用 Color Histogram 和 Color HOG 特徵來訓練的 SVM 分類器在 cifar10 訓練集和驗證集上準確度的差別。