In [1]:
import numpy as np
import matplotlib.pyplot as plt
from emnist import extract_training_samples
from sklearn.utils import shuffle
from functools import partial
import time

from vertices_generator import vertices
from kernel import Gaussian_kernel
from mdwsvm import mdwsvm
from mdwsvm_ad import mdwsvm_ad
from one_class_svm import one_class_svm
from hybrid import hybrid
from metric import within_class_error

In [2]:
# Load data
digits_images, digits_labels = extract_training_samples('digits')
letters_images, letters_labels = extract_training_samples('byclass')

# Get number 
mask_1 = (digits_labels == 1)
digits_images_1 = digits_images[mask_1]
digits_labels_1 = digits_labels[mask_1]

mask_3 = (digits_labels == 3)
digits_images_3 = digits_images[mask_3]
digits_labels_3 = digits_labels[mask_3]

mask_5 = (digits_labels == 5)
digits_images_5 = digits_images[mask_5]
digits_labels_5 = digits_labels[mask_5]

mask_7 = (digits_labels == 7)
digits_images_7 = digits_images[mask_7]
digits_labels_7 = digits_labels[mask_7]


# Get letter u, v, w, x, y, z
mask_uvwxyz = (letters_labels == 56) | (letters_labels == 57) | (letters_labels == 58) | (letters_labels == 59) | (letters_labels == 60) | (letters_labels == 61)
letters_images = letters_images[mask_uvwxyz]
letters_labels = letters_labels[mask_uvwxyz]
print(len(letters_labels))

16349


In [3]:
# Get training and testing data
X_train = np.zeros((800,28,28))
y_train = np.zeros((800), dtype=int)
X_val = np.zeros((8000,28,28))
y_val = np.zeros((8000), dtype=int)
X_test = np.zeros((8000,28,28))
y_test = np.zeros((8000), dtype=int)

# 800 digits normalized training data 
X_train[0:150,:,:] = digits_images_1[0:150,:,:] / 255
X_train[150:300,:,:] = digits_images_3[0:150,:,:] / 255
X_train[300:550,:,:] = digits_images_5[0:250,:,:] / 255
X_train[550:800,:,:] = digits_images_7[0:250,:,:] / 255
X_train = X_train.reshape(800,784).T 
# 800 digits training label
y_train[0:150] = digits_labels_1[0:150] - 1
y_train[150:300] = digits_labels_3[0:150] - 2
y_train[300:550] = digits_labels_5[0:250] - 3
y_train[550:800] = digits_labels_7[0:250] - 4

# Used for hybrid
# Get 400 digits for validation X
X_val[0:100,:,:] = digits_images_1[1000:1100,:,:] / 255
X_val[100:200,:,:] = digits_images_3[1000:1100,:,:] / 255
X_val[200:300,:,:] = digits_images_5[1000:1100,:,:] / 255
X_val[300:400,:,:] = digits_images_7[1000:1100,:,:] / 255
# 400 digits validation label
y_val[0:100] = digits_labels_1[1000:1100] - 1
y_val[100:200] = digits_labels_3[1000:1100] - 2
y_val[200:300] = digits_labels_5[1000:1100] - 3
y_val[300:400] = digits_labels_7[1000:1100] - 4
# Get 7600 lowercase letters
X_val[400:8000,:,:] = letters_images[0:7600,:,:] / 255
y_val[400:8000] = letters_labels[0:7600]
# Get true y label to calculate hybrid error
y_val_true_hybrid = -np.ones((8000), dtype=int)
y_val_true_hybrid[0:400] = y_val[0:400]
# Get true y label to calculate mdwsvm_ad error
y_val_true_mdwsvm_ad = 4 * np.ones((8000), dtype=int)
y_val_true_mdwsvm_ad[0:400] = y_val[0:400]
# 400 digits and 7600 letters normalized data
X_val = X_val.reshape(8000,784).T

# Get 400 digits for test X
X_test[0:100,:,:] = digits_images_1[1100:1200,:,:] / 255
X_test[100:200,:,:] = digits_images_3[1100:1200,:,:] / 255
X_test[200:300,:,:] = digits_images_5[1100:1200,:,:] / 255
X_test[300:400,:,:] = digits_images_7[1100:1200,:,:] / 255
# 400 digits test label
y_test[0:100] = digits_labels_1[1100:1200] - 1
y_test[100:200] = digits_labels_3[1100:1200] - 2
y_test[200:300] = digits_labels_5[1100:1200] - 3
y_test[300:400] = digits_labels_7[1100:1200] - 4
# Get 7600 lowercase letters
X_test[400:8000,:,:] = letters_images[8000:15600,:,:] / 255
y_test[400:8000] = letters_labels[8000:15600]
# Get true y label to calculate hybrid error
y_test_true_hybrid = -np.ones((8000), dtype=int)
y_test_true_hybrid[0:400] = y_test[0:400]
# Get true y label to calculate mdwsvm_ad error
y_test_true_mdwsvm_ad = 4 * np.ones((8000), dtype=int)
y_test_true_mdwsvm_ad[0:400] = y_test[0:400]
# 400 digits and 7600 letters normalized data
X_test = X_test.reshape(8000,784).T

# y_test: 0,1,2,3,56-61
# y_test_true_hybrid: -1,0,1,2,3
# y_test_true_mdwsvm_ad: 0,1,2,3,4

In [5]:
# MDWSVM
w1 = vertices(4)
best_c = 1
model1 = mdwsvm(X_train, y_train, w1, best_c)
y_pred_1 = model1.predict(X_test)

In [6]:
# Hybrid
best_sigma2 = 0.01
best_v = 0.3
best_c = 1
best_k = partial(Gaussian_kernel, sigma2=best_sigma2)
y_pred_2 = hybrid(X_train, y_train, X_test, best_v, w1, best_c, best_k)

In [7]:
# MDWSVM_ad
# v=0.100 sigma2=12.000 c=1.000 score:0.8743947368421052
best_v_2 = 0.1
best_sigma2_2 = 12
best_c_2 = 1
w2 = vertices(5)
best_k_2 = partial(Gaussian_kernel, sigma2=best_sigma2_2)
model3 = mdwsvm_ad(X_train, y_train, w2, best_c_2, best_v_2, best_k_2)
y_pred_3 = model3.predict(X_test)

In [27]:
# number: 1-0,3-1,5-2,7-3
# letter: u-56, v-57, w-58, x-59, y-60, z-61
# hybrid:-1 0 1 2 3
# ad: 0 1 2 3 4
total_u = len(y_test[y_test == 56]) / 100
total_v = len(y_test[y_test == 57]) / 100
total_w = len(y_test[y_test == 58]) / 100
total_x = len(y_test[y_test == 59]) / 100
total_y = len(y_test[y_test == 60]) / 100
total_z = len(y_test[y_test == 61]) / 100

# Create plot for MDWSVM:
result_matrix = np.zeros((10,4)) # 10 input class, 4 predict class
for i in range(8000):
    if(y_test[i] == 56): # If its true label is u
        if(y_pred_1[i] == 0):
            result_matrix[0,0] += 1
        if(y_pred_1[i] == 1):
            result_matrix[0,1] += 1
        if(y_pred_1[i] == 2):
            result_matrix[0,2] += 1
        if(y_pred_1[i] == 3):
            result_matrix[0,3] += 1
    if(y_test[i] == 57): # If its true label is v
        if(y_pred_1[i] == 0):
            result_matrix[1,0] += 1
        if(y_pred_1[i] == 1):
            result_matrix[1,1] += 1
        if(y_pred_1[i] == 2):
            result_matrix[1,2] += 1
        if(y_pred_1[i] == 3):
            result_matrix[1,3] += 1
    if(y_test[i] == 58): # If its true label is w
        if(y_pred_1[i] == 0):
            result_matrix[2,0] += 1
        if(y_pred_1[i] == 1):
            result_matrix[2,1] += 1
        if(y_pred_1[i] == 2):
            result_matrix[2,2] += 1
        if(y_pred_1[i] == 3):
            result_matrix[2,3] += 1
    if(y_test[i] == 59): # If its true label is x
        if(y_pred_1[i] == 0):
            result_matrix[3,0] += 1
        if(y_pred_1[i] == 1):
            result_matrix[3,1] += 1
        if(y_pred_1[i] == 2):
            result_matrix[3,2] += 1
        if(y_pred_1[i] == 3):
            result_matrix[3,3] += 1
    if(y_test[i] == 60): # If its true label is y
        if(y_pred_1[i] == 0):
            result_matrix[4,0] += 1
        if(y_pred_1[i] == 1):
            result_matrix[4,1] += 1
        if(y_pred_1[i] == 2):
            result_matrix[4,2] += 1
        if(y_pred_1[i] == 3):
            result_matrix[4,3] += 1
    if(y_test[i] == 61): # If its true label is z
        if(y_pred_1[i] == 0):
            result_matrix[5,0] += 1
        if(y_pred_1[i] == 1):
            result_matrix[5,1] += 1
        if(y_pred_1[i] == 2):
            result_matrix[5,2] += 1
        if(y_pred_1[i] == 3):
            result_matrix[5,3] += 1
    if(y_test[i] == 0): # If its true label is 1
        if(y_pred_1[i] == 0):
            result_matrix[6,0] += 1
        if(y_pred_1[i] == 1):
            result_matrix[6,1] += 1
        if(y_pred_1[i] == 2):
            result_matrix[6,2] += 1
        if(y_pred_1[i] == 3):
            result_matrix[6,3] += 1
    if(y_test[i] == 1): # If its true label is 3
        if(y_pred_1[i] == 0):
            result_matrix[7,0] += 1
        if(y_pred_1[i] == 1):
            result_matrix[7,1] += 1
        if(y_pred_1[i] == 2):
            result_matrix[7,2] += 1
        if(y_pred_1[i] == 3):
            result_matrix[7,3] += 1
    if(y_test[i] == 2): # If its true label is 5
        if(y_pred_1[i] == 0):
            result_matrix[8,0] += 1
        if(y_pred_1[i] == 1):
            result_matrix[8,1] += 1
        if(y_pred_1[i] == 2):
            result_matrix[8,2] += 1
        if(y_pred_1[i] == 3):
            result_matrix[8,3] += 1
    if(y_test[i] == 3): # If its true label is 7
        if(y_pred_1[i] == 0):
            result_matrix[9,0] += 1
        if(y_pred_1[i] == 1):
            result_matrix[9,1] += 1
        if(y_pred_1[i] == 2):
            result_matrix[9,2] += 1
        if(y_pred_1[i] == 3):
            result_matrix[9,3] += 1

result_matrix[0,:] = result_matrix[0,:] / total_u
result_matrix[1,:] = result_matrix[1,:] / total_v
result_matrix[2,:] = result_matrix[2,:] / total_w
result_matrix[3,:] = result_matrix[3,:] / total_x
result_matrix[4,:] = result_matrix[4,:] / total_y
result_matrix[5,:] = result_matrix[5,:] / total_z


print(result_matrix)
print(np.sum(result_matrix))

[[ 5.97460792  1.56833458 77.74458551 14.71247199]
 [ 7.98794273  0.97965335 56.44310475 34.58929917]
 [12.98804781  0.         86.61354582  0.39840637]
 [18.78326996  6.69201521 61.90114068 12.62357414]
 [22.78597786  8.85608856 25.09225092 43.26568266]
 [ 9.53125    14.921875   54.140625   21.40625   ]
 [94.          0.          5.          1.        ]
 [ 0.         90.          6.          4.        ]
 [ 2.         16.         81.          1.        ]
 [ 1.          0.          0.         99.        ]]
1000.0


In [32]:
# Create plot for hybrid:
result_matrix2 = np.zeros((10,5)) # 10 input class, 5 predict class
for i in range(8000):
    if(y_test[i] == 56): # If its true label is u
        if(y_pred_2[i] == -1):
            result_matrix2[0,4] += 1
        if(y_pred_2[i] == 0):
            result_matrix2[0,0] += 1
        if(y_pred_2[i] == 1):
            result_matrix2[0,1] += 1
        if(y_pred_2[i] == 2):
            result_matrix2[0,2] += 1
        if(y_pred_2[i] == 3):
            result_matrix2[0,3] += 1
    if(y_test[i] == 57): # If its true label is v
        if(y_pred_2[i] == -1):
            result_matrix2[1,4] += 1
        if(y_pred_2[i] == 0):
            result_matrix2[1,0] += 1
        if(y_pred_2[i] == 1):
            result_matrix2[1,1] += 1
        if(y_pred_2[i] == 2):
            result_matrix2[1,2] += 1
        if(y_pred_2[i] == 3):
            result_matrix2[1,3] += 1
    if(y_test[i] == 58): # If its true label is w
        if(y_pred_2[i] == -1):
            result_matrix2[2,4] += 1
        if(y_pred_2[i] == 0):
            result_matrix2[2,0] += 1
        if(y_pred_2[i] == 1):
            result_matrix2[2,1] += 1
        if(y_pred_2[i] == 2):
            result_matrix2[2,2] += 1
        if(y_pred_2[i] == 3):
            result_matrix2[2,3] += 1
    if(y_test[i] == 59): # If its true label is x
        if(y_pred_2[i] == -1):
            result_matrix2[3,4] += 1
        if(y_pred_2[i] == 0):
            result_matrix2[3,0] += 1
        if(y_pred_2[i] == 1):
            result_matrix2[3,1] += 1
        if(y_pred_2[i] == 2):
            result_matrix2[3,2] += 1
        if(y_pred_2[i] == 3):
            result_matrix2[3,3] += 1
    if(y_test[i] == 60): # If its true label is y
        if(y_pred_2[i] == -1):
            result_matrix2[4,4] += 1
        if(y_pred_2[i] == 0):
            result_matrix2[4,0] += 1
        if(y_pred_2[i] == 1):
            result_matrix2[4,1] += 1
        if(y_pred_2[i] == 2):
            result_matrix2[4,2] += 1
        if(y_pred_2[i] == 3):
            result_matrix2[4,3] += 1
    if(y_test[i] == 61): # If its true label is z
        if(y_pred_2[i] == -1):
            result_matrix2[5,4] += 1
        if(y_pred_2[i] == 0):
            result_matrix2[5,0] += 1
        if(y_pred_2[i] == 1):
            result_matrix2[5,1] += 1
        if(y_pred_2[i] == 2):
            result_matrix2[5,2] += 1
        if(y_pred_2[i] == 3):
            result_matrix2[5,3] += 1
    if(y_test[i] == 0): # If its true label is 1
        if(y_pred_2[i] == -1):
            result_matrix2[6,4] += 1
        if(y_pred_2[i] == 0):
            result_matrix2[6,0] += 1
        if(y_pred_2[i] == 1):
            result_matrix2[6,1] += 1
        if(y_pred_2[i] == 2):
            result_matrix2[6,2] += 1
        if(y_pred_2[i] == 3):
            result_matrix2[6,3] += 1
    if(y_test[i] == 1): # If its true label is 3
        if(y_pred_2[i] == -1):
            result_matrix2[7,4] += 1
        if(y_pred_2[i] == 0):
            result_matrix2[7,0] += 1
        if(y_pred_2[i] == 1):
            result_matrix2[7,1] += 1
        if(y_pred_2[i] == 2):
            result_matrix2[7,2] += 1
        if(y_pred_2[i] == 3):
            result_matrix2[7,3] += 1
    if(y_test[i] == 2): # If its true label is 5
        if(y_pred_2[i] == -1):
            result_matrix2[8,4] += 1
        if(y_pred_2[i] == 0):
            result_matrix2[8,0] += 1
        if(y_pred_2[i] == 1):
            result_matrix2[8,1] += 1
        if(y_pred_2[i] == 2):
            result_matrix2[8,2] += 1
        if(y_pred_2[i] == 3):
            result_matrix2[8,3] += 1
    if(y_test[i] == 3): # If its true label is 7
        if(y_pred_2[i] == -1):
            result_matrix2[9,4] += 1
        if(y_pred_2[i] == 0):
            result_matrix2[9,0] += 1
        if(y_pred_2[i] == 1):
            result_matrix2[9,1] += 1
        if(y_pred_2[i] == 2):
            result_matrix2[9,2] += 1
        if(y_pred_2[i] == 3):
            result_matrix2[9,3] += 1

result_matrix2[0,:] = result_matrix2[0,:] / total_u
result_matrix2[1,:] = result_matrix2[1,:] / total_v
result_matrix2[2,:] = result_matrix2[2,:] / total_w
result_matrix2[3,:] = result_matrix2[3,:] / total_x
result_matrix2[4,:] = result_matrix2[4,:] / total_y
result_matrix2[5,:] = result_matrix2[5,:] / total_z

print(result_matrix2)
print(np.sum(result_matrix2))

[[ 4.92905153  1.34428678 68.85735624 13.21882001 11.65048544]
 [ 7.00828937  0.9042954  50.18839488 30.59532781 11.30369254]
 [10.91633466  0.         73.86454183  0.23904382 14.98007968]
 [15.81749049  6.08365019 57.11026616 11.40684411  9.58174905]
 [18.63468635  7.84132841 20.4797048  35.70110701 17.34317343]
 [ 8.203125   14.140625   48.203125   19.765625    9.6875    ]
 [82.          0.          4.          1.         13.        ]
 [ 0.         81.          4.          4.         11.        ]
 [ 1.         14.         63.          1.         21.        ]
 [ 1.          0.          0.         79.         20.        ]]
1000.0


In [34]:
# Create plot for MDWSVM-ad:
result_matrix3 = np.zeros((10,5)) # 10 input class, 5 predict class
for i in range(8000):
    if(y_test[i] == 56): # If its true label is u
        if(y_pred_3[i] == 4):
            result_matrix3[0,4] += 1
        if(y_pred_3[i] == 0):
            result_matrix3[0,0] += 1
        if(y_pred_3[i] == 1):
            result_matrix3[0,1] += 1
        if(y_pred_3[i] == 2):
            result_matrix3[0,2] += 1
        if(y_pred_3[i] == 3):
            result_matrix3[0,3] += 1
    if(y_test[i] == 57): # If its true label is v
        if(y_pred_3[i] == 4):
            result_matrix3[1,4] += 1
        if(y_pred_3[i] == 0):
            result_matrix3[1,0] += 1
        if(y_pred_3[i] == 1):
            result_matrix3[1,1] += 1
        if(y_pred_3[i] == 2):
            result_matrix3[1,2] += 1
        if(y_pred_3[i] == 3):
            result_matrix3[1,3] += 1
    if(y_test[i] == 58): # If its true label is w
        if(y_pred_3[i] == 4):
            result_matrix3[2,4] += 1
        if(y_pred_3[i] == 0):
            result_matrix3[2,0] += 1
        if(y_pred_3[i] == 1):
            result_matrix3[2,1] += 1
        if(y_pred_3[i] == 2):
            result_matrix3[2,2] += 1
        if(y_pred_3[i] == 3):
            result_matrix3[2,3] += 1
    if(y_test[i] == 59): # If its true label is x
        if(y_pred_3[i] == 4):
            result_matrix3[3,4] += 1
        if(y_pred_3[i] == 0):
            result_matrix3[3,0] += 1
        if(y_pred_3[i] == 1):
            result_matrix3[3,1] += 1
        if(y_pred_3[i] == 2):
            result_matrix3[3,2] += 1
        if(y_pred_3[i] == 3):
            result_matrix3[3,3] += 1
    if(y_test[i] == 60): # If its true label is y
        if(y_pred_3[i] == 4):
            result_matrix3[4,4] += 1
        if(y_pred_3[i] == 0):
            result_matrix3[4,0] += 1
        if(y_pred_3[i] == 1):
            result_matrix3[4,1] += 1
        if(y_pred_3[i] == 2):
            result_matrix3[4,2] += 1
        if(y_pred_3[i] == 3):
            result_matrix3[4,3] += 1
    if(y_test[i] == 61): # If its true label is z
        if(y_pred_3[i] == 4):
            result_matrix3[5,4] += 1
        if(y_pred_3[i] == 0):
            result_matrix3[5,0] += 1
        if(y_pred_3[i] == 1):
            result_matrix3[5,1] += 1
        if(y_pred_3[i] == 2):
            result_matrix3[5,2] += 1
        if(y_pred_3[i] == 3):
            result_matrix3[5,3] += 1
    if(y_test[i] == 0): # If its true label is 1
        if(y_pred_3[i] == 4):
            result_matrix3[6,4] += 1
        if(y_pred_3[i] == 0):
            result_matrix3[6,0] += 1
        if(y_pred_3[i] == 1):
            result_matrix3[6,1] += 1
        if(y_pred_3[i] == 2):
            result_matrix3[6,2] += 1
        if(y_pred_3[i] == 3):
            result_matrix3[6,3] += 1
    if(y_test[i] == 1): # If its true label is 3
        if(y_pred_3[i] == 4):
            result_matrix3[7,4] += 1
        if(y_pred_3[i] == 0):
            result_matrix3[7,0] += 1
        if(y_pred_3[i] == 1):
            result_matrix3[7,1] += 1
        if(y_pred_3[i] == 2):
            result_matrix3[7,2] += 1
        if(y_pred_3[i] == 3):
            result_matrix3[7,3] += 1
    if(y_test[i] == 2): # If its true label is 5
        if(y_pred_3[i] == 4):
            result_matrix3[8,4] += 1
        if(y_pred_3[i] == 0):
            result_matrix3[8,0] += 1
        if(y_pred_3[i] == 1):
            result_matrix3[8,1] += 1
        if(y_pred_3[i] == 2):
            result_matrix3[8,2] += 1
        if(y_pred_3[i] == 3):
            result_matrix3[8,3] += 1
    if(y_test[i] == 3): # If its true label is 7
        if(y_pred_3[i] == 4):
            result_matrix3[9,4] += 1
        if(y_pred_3[i] == 0):
            result_matrix3[9,0] += 1
        if(y_pred_3[i] == 1):
            result_matrix3[9,1] += 1
        if(y_pred_3[i] == 2):
            result_matrix3[9,2] += 1
        if(y_pred_3[i] == 3):
            result_matrix3[9,3] += 1

result_matrix3[0,:] = result_matrix3[0,:] / total_u
result_matrix3[1,:] = result_matrix3[1,:] / total_v
result_matrix3[2,:] = result_matrix3[2,:] / total_w
result_matrix3[3,:] = result_matrix3[3,:] / total_x
result_matrix3[4,:] = result_matrix3[4,:] / total_y
result_matrix3[5,:] = result_matrix3[5,:] / total_z

print(result_matrix3)
print(np.sum(result_matrix3))

[[0.00000000e+00 7.46825990e-02 3.21135176e+00 2.98730396e-01
  9.64152353e+01]
 [4.52147702e-01 0.00000000e+00 1.15297664e+01 7.00828937e+00
  8.10097965e+01]
 [0.00000000e+00 0.00000000e+00 2.70916335e+00 0.00000000e+00
  9.72908367e+01]
 [1.06463878e+00 1.52091255e-01 2.08365019e+01 3.72623574e+00
  7.42205323e+01]
 [6.54981550e+00 1.75276753e+00 1.30073801e+01 3.65313653e+01
  4.21586716e+01]
 [7.81250000e-02 2.26562500e+00 9.06250000e+00 3.67187500e+00
  8.49218750e+01]
 [9.60000000e+01 0.00000000e+00 3.00000000e+00 1.00000000e+00
  0.00000000e+00]
 [0.00000000e+00 7.50000000e+01 6.00000000e+00 2.00000000e+00
  1.70000000e+01]
 [0.00000000e+00 3.00000000e+00 8.70000000e+01 0.00000000e+00
  1.00000000e+01]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 9.30000000e+01
  7.00000000e+00]]
1000.0
