In [16]:
import numpy as np
import matplotlib.pyplot as plt
from emnist import extract_training_samples
from functools import partial

from vertices_generator import vertices
from kernel import Gaussian_kernel
from mdwsvm_ad import mdwsvm_ad
from mdwsvm import mdwsvm
from one_class_svm import one_class_svm
from metric import within_class_error

In [2]:
# Load data
digits_images, digits_labels = extract_training_samples('digits')
letters_images, letters_labels = extract_training_samples('byclass')

X_train = np.zeros((4000,28,28))
y_train = np.zeros((4000), dtype=int)
X_test = np.zeros((40000,28,28))
y_test = np.zeros((40000), dtype=int)

# 4000 digits normalized training data 
X_train[0:4000,:,:] = digits_images[0:4000,:,:] / 255
X_train = X_train.reshape(4000,784).T 
y_train[0:4000] = digits_labels[0:4000] # 4000 digits training label

# Get 2000 digits for test X
X_test[0:2000,:,:] = digits_images[4000:6000,:,:] / 255
y_test[0:2000] = digits_labels[4000:6000]
# Get 38000 lowercase letters
count = 2000
current_i = 0
while True:
    if count == 40000:
        break

    if(letters_labels[current_i] >= 36): # Get lower case letter
        X_test[count,:,:] = letters_images[current_i,:,:] / 255
        y_test[count] = letters_labels[current_i]
        count += 1

    current_i += 1
# 2000 digits and 38000 letters normalized data, 0-9 are 0-9, 36-61 are a-z
X_test = X_test.reshape(40000,784).T

In [3]:
np.unique(y_test, return_counts=True)

(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 36, 37, 38, 39, 40, 41, 42,
        43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
        60, 61]),
 array([ 192,  209,  192,  192,  201,  214,  190,  201,  203,  206, 2302,
        1184,  611, 2406, 5675,  565,  859, 2065,  667,  415,  572, 3558,
         615, 2583,  618,  594,  681, 3198,  670, 4286,  682,  697,  640,
         665,  525,  667]))

In [11]:
index_train = np.where(y_train <= 1)[0]
X_train_2 = X_train[:, index_train]
y_train_2 = y_train[index_train]

index_test = np.append(np.where(y_test <= 1)[0], np.where(y_test == 46)[0])
X_test_2 = X_test[:, index_test]
y_test_2 = y_test[index_test]

In [13]:
w = vertices(2)
c = 10
model = mdwsvm(X_train_2, y_train_2, w, c)
y_pred = model.predict(X_test_2)

In [17]:
within_class_error(y_test_2, y_pred)

0.338400451887294

In [19]:
np.unique(y_pred[y_test_2 == 0], return_counts=True)

(array([0, 1]), array([190,   2]))

In [20]:
np.unique(y_pred[y_test_2 == 1], return_counts=True)

(array([0, 1]), array([  1, 208]))

In [21]:
np.unique(y_pred[y_test_2 == 36], return_counts=True)

(array([0, 1]), array([1765,  537]))