In [21]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [22]:
import numpy as np


In [23]:
def load_data(f):
    """
    Assume data format:
    feature1 feature 2 ... label
    """
    # process training data
    data = np.genfromtxt(f)
    # return all feature columns except last
    X = data[:, :-1]
    y = data[:, -1].astype(int)
    return X, y

In [24]:
def standardize(X, mean, std):
    """
    standardizes the dataset
    """
    return np.apply_along_axis(lambda row: (row - mean) / std, 1, X)

def get_stats(X, y=None):
    """
    get dictionary of all relevant statistics on dataset
    """
    covs, c_means, classes, classn = {}, {}, 0, 0 # dictionaries
    mean, std, cov = np.mean(X, axis=0).reshape(-1,1), np.std(X, axis=0).reshape(-1,1), np.cov(X.T)
    if y is not None:
        classes = np.unique(y)  # get unique labels as dictionary items
        classn = len(classes)  # the number of classes in the dataset
        for c in classes:
            arr = X[y == c]
            covs[c] = np.cov(arr.T)
            if covs[c].shape == ():
                covs[c] = np.array([[covs[c]]]).reshape((1,1))
            c_means[c] = np.mean(arr, axis=0).reshape(-1,1)  # mean along rows
    stats = {
        'covs': covs,
        'c_means': c_means,
        'classes': classes,
        'classn': classn,
        'mean': mean,
        'std': std,
        'cov': cov
    }
    return stats

In [25]:
X_train_pima, y_train_pima = load_data('pima.tr')
X_test_pima, y_test_pima = load_data('pima.te')
print(y_test_pima.shape)
# print dataset shapes
print('-' * 20)
print(f"Pima dataset")
print(f"The dimension of the synth training data is: {X_train_pima.shape}")
print(f"The dimension of the synth testing data is: {X_test_pima.shape}")
print('-' * 20)

# create directory for images

# standardize the datasets
pima_stats = get_stats(X_train_pima, y_train_pima)
nX_train_pima = standardize(X_train_pima, pima_stats['mean'].flatten(), pima_stats['std'].flatten())
nX_test_pima = standardize(X_test_pima, pima_stats['mean'].flatten(), pima_stats['std'].flatten())

(332,)
--------------------
Pima dataset
The dimension of the synth training data is: (200, 7)
The dimension of the synth testing data is: (332, 7)
--------------------


In [27]:
from svm import SVM
from evaluation import *

svm = SVM(kernel='rbf', C=.1, max_iter=10000)
svm.fit(X_train_pima, y_train_pima)

starting training
starting epoch # 0
examine all
examine example  0
k=0, num_changed=0
examine example  1
k=1, num_changed=1
examine example  2
k=2, num_changed=2
examine example  3
k=3, num_changed=3
examine example  4
k=4, num_changed=4
examine example  5
k=5, num_changed=5
examine example  6
k=6, num_changed=6
examine example  7
k=7, num_changed=7
examine example  8
k=8, num_changed=8
examine example  9
k=9, num_changed=9
examine example  10
k=10, num_changed=10
examine example  11
k=11, num_changed=11
examine example  12
k=12, num_changed=12
examine example  13
k=13, num_changed=13
examine example  14
k=14, num_changed=14
examine example  15
k=15, num_changed=15
examine example  16
k=16, num_changed=16
examine example  17
k=17, num_changed=17
examine example  18
k=18, num_changed=18
examine example  19
k=19, num_changed=19
examine example  20
k=20, num_changed=20
examine example  21
k=21, num_changed=21
examine example  22
k=22, num_changed=22
examine example  23
k=23, num_changed=

In [28]:
svm.predict(X_test_pima)

starting predicting
finished predicting


array([-3.14039638e-043,  3.44002741e-038,  1.40937431e-025,
        5.57663297e-046, -2.92306689e-091, -8.34021908e-156,
        2.45149785e-100,  0.00000000e+000, -6.42308815e-055,
        7.59269347e-054,  9.87596235e-015, -5.20492116e-093,
        4.50014315e-057, -1.55849821e-019,  4.71927247e-051,
       -8.41589250e-032,  6.20431142e-038, -3.63129301e-160,
       -9.83839081e-035, -1.05196232e-230, -1.36081893e-296,
       -1.00899798e-095,  1.09763147e-051,  3.22489043e-033,
        1.32332719e-035,  1.00701610e-056, -1.51602770e-050,
        2.28154593e-036,  2.15144352e-055,  7.37559251e-068,
       -2.48700849e-061,  8.71422253e-006, -5.21235076e-067,
        3.29939070e-033,  1.88620269e-051,  9.12062495e-048,
        1.33592766e-011,  4.26256034e-044,  1.55221061e-056,
        6.29686259e-027, -2.54552538e-120,  1.96665650e-038,
        4.98648802e-064, -1.31294707e-058,  1.11521619e-090,
        2.59476355e-016,  1.87419521e-037, -1.17986576e-105,
        9.12057806e-046,