In [1]:
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt

In [2]:
!wget -q https://raw.githubusercontent.com/opencv/opencv/master/samples/data/digits.png
import os
if not os.path.exists('digits.png'):
    print("digits.png download failed. Please upload manually.")
else:
    print("digits.png file is ready.")

digits.png file is ready.


In [3]:
img = cv.imread('digits.png', cv.IMREAD_GRAYSCALE)
if img is None:
    raise FileNotFoundError("digits.png not found.")
print("digits.png shape:", img.shape)

# 세로 50줄, 가로 100줄로 vsplit/hsplit
cells = [np.hsplit(row, 100) for row in np.vsplit(img, 50)]
print("len(cells)=", len(cells), "  len(cells[0])=", len(cells[0]))

# train: 앞 50개 (열)  => x[:50]
# test:  뒷 50개 (열)  => x[50:]

train_cells = [ r[:50] for r in cells ]
test_cells  = [ r[50:] for r in cells ]

print("train_cells shape:", len(train_cells), "×", len(train_cells[0]))
print("test_cells shape:", len(test_cells),  "×", len(test_cells[0]))

digits.png shape: (1000, 2000)
len(cells)= 50   len(cells[0])= 100
train_cells shape: 50 × 50
test_cells shape: 50 × 50


In [4]:
SZ = 20
affine_flags = cv.WARP_INVERSE_MAP | cv.INTER_LINEAR
bin_n = 16

def deskew(img):
  m = cv.moments(img)
  if abs(m['mu02']) < 1e-2:
    return img.copy()
  skew = m['mu11']/m['mu02']
  M = np.float32([[1, skew, -0.5*SZ*skew], [0, 1, 0]])
  img_out = cv.warpAffine(img, M, (SZ, SZ), flags=affine_flags)
  return img_out

In [5]:
def hog(img):
  gx = cv.Sobel(img, cv.CV_32F, 1, 0)
  gy = cv.Sobel(img, cv.CV_32F, 0, 1)
  mag, ang = cv.cartToPolar(gx, gy)

  bins = np.int32(bin_n*ang/(2*np.pi))

  bin_cells = bins[:10,:10], bins[10:,:10], bins[:10,10:], bins[10:,10:]
  mag_cells = mag[:10,:10], mag[10:,:10], mag[:10,10:], mag[10:,10:]

  hists = []
  for b, m in zip(bin_cells, mag_cells):
    hist_ = np.bincount(
        b.ravel(), weights=m.ravel(), minlength=bin_n
    )
    hists.append(hist_)

  hist = np.hstack(hists)
  return hist

In [7]:
train_deskewed = []
for row in train_cells:
  deskewed_row = [deskew(cell) for cell in row]
  train_deskewed.append(deskewed_row)

train_hogdata = []
for row in train_deskewed:
  hog_row = [hog(cell) for cell in row]
  train_hogdata.append(hog_row)

trainData = np.float32(train_hogdata).reshape(-1, 64)
responses = np.repeat(np.arange(10), 250)[:, np.newaxis]

svm = cv.ml.SVM_create()
svm.setKernel(cv.ml.SVM_LINEAR)
svm.setType(cv.ml.SVM_C_SVC)
svm.setC(2.67)
svm.setGamma(5.383)

svm.train(trainData, cv.ml.ROW_SAMPLE, responses)
svm.save('svm_digits.dat')
print('SVM training complete.')

test_deskewed = []
for row in test_cells:
  deskewed_row = [deskew(cell) for cell in row]
  test_deskewed.append(deskewed_row)

test_hogdata = []
for row in test_deskewed:
  hog_row = [hog(cell) for cell in row]
  test_hogdata.append(hog_row)

testData = np.float32(test_hogdata).reshape(-1, 64)
ret, result = svm.predict(testData)

responses_test = np.repeat(np.arange(10), 250)[:, np.newaxis]
mask = (result == responses_test)
correct = np.count_nonzero(mask)
accuracy = correct * (100.0 / result.size)
print(f"Accuracy= {accuracy:.2f}%")

SVM training complete.
Accuracy= 93.80%
