In [1]:
import numpy as np
import matplotlib.pyplot as plt
from emnist import extract_training_samples
from functools import partial

from vertices_generator import vertices
from kernel import Gaussian_kernel
from mdwsvm_ad import mdwsvm_ad
from mdwsvm import mdwsvm
from one_class_svm import one_class_svm
from hybrid import hybrid
from metric import within_class_error

In [2]:
# Load data
digits_images, digits_labels = extract_training_samples('digits')
letters_images, letters_labels = extract_training_samples('byclass')

X_train = np.zeros((4000,28,28))
y_train = np.zeros((4000), dtype=int)
X_test = np.zeros((40000,28,28))
y_test = np.zeros((40000), dtype=int)

# 4000 digits normalized training data 
X_train[0:4000,:,:] = digits_images[0:4000,:,:] / 255
X_train = X_train.reshape(4000,784).T 
y_train[0:4000] = digits_labels[0:4000] # 4000 digits training label

# Get 2000 digits for test X
X_test[0:2000,:,:] = digits_images[4000:6000,:,:] / 255
y_test[0:2000] = digits_labels[4000:6000]
# Get 38000 lowercase letters
count = 2000
current_i = 0
while True:
    if count == 40000:
        break

    if(letters_labels[current_i] >= 36): # Get lower case letter
        X_test[count,:,:] = letters_images[current_i,:,:] / 255
        y_test[count] = letters_labels[current_i]
        count += 1

    current_i += 1
# 2000 digits and 38000 letters normalized data, 0-9 are 0-9, 36-61 are a-z
X_test = X_test.reshape(40000,784).T

In [5]:
index_train = np.concatenate([np.where(y_train == 1)[0], np.where(y_train == 2)[0]])
X_train_2 = X_train[:, index_train]
y_train_2 = y_train[index_train] - 1

index_test = np.concatenate([np.where(y_test == 1)[0], np.where(y_test == 2)[0], np.where(y_test == 46)[0]])
X_test_2 = X_test[:, index_test]
y_test_2 = y_test[index_test] - 1

In [18]:
v = 1
w = vertices(2)
c = 10
k = partial(Gaussian_kernel, sigma2=10)
y_pred = hybrid(X_train_2, y_train_2, X_test_2, v, w, c, k)

In [19]:
within_class_error(y_test_2, y_pred)

0.8277511961722488

In [20]:
np.unique(y_pred[y_test_2 == 0], return_counts=True)

(array([-1,  0]), array([101, 108]))

In [21]:
np.unique(y_pred[y_test_2 == 1], return_counts=True)

(array([-1]), array([192]))

In [22]:
np.unique(y_pred[y_test_2 == 45], return_counts=True)

(array([-1]), array([572]))

In [5]:
k = partial(Gaussian_kernel, sigma2=10)
model = one_class_svm(X_test, v=0.1, K=k)

[[-1.13686838e-13  1.14857193e+02  1.04279047e+02 ...  9.84472280e+01
   1.30978839e+02  8.61607382e+01]
 [ 1.14857193e+02  0.00000000e+00  1.46941207e+02 ...  1.33740377e+02
   8.85964475e+01  1.37503376e+02]
 [ 1.04279047e+02  1.46941207e+02 -1.13686838e-13 ...  1.12470527e+02
   1.00850104e+02  8.59885121e+01]
 ...
 [ 9.84472280e+01  1.33740377e+02  1.12470527e+02 ... -2.84217094e-14
   1.12636955e+02  1.45572211e+02]
 [ 1.30978839e+02  8.85964475e+01  1.00850104e+02 ...  1.12636955e+02
  -2.84217094e-14  1.28390111e+02]
 [ 8.61607382e+01  1.37503376e+02  8.59885121e+01 ...  1.45572211e+02
   1.28390111e+02 -1.70530257e-13]]


: 

: 

In [1]:
from sklearn import svm

In [5]:
model = svm.OneClassSVM(kernel='rbf', gamma='scale', nu=0.2)
model.fit(X_test)
y_pred = model.predict(X_test)