In [1]:
import numpy as np
import json
import matplotlib.pyplot as plt
import math
from mpl_toolkits.mplot3d import Axes3D

In [2]:
# Helper function for printing tables
def printTable(header, data):
    """
    Prints table with columns of header and data.
    
    Parameters
    ----------
    header : []
        list of header labels
        ex. header = ["one","two","three"]
    data : [[],[],...,[]]
        list of lists, each inner list is a data line
        data line must index with header appropriately
        ex. data = [[1, 2, 3],[1, 2, 3]]
    """
    
    # print headers
    col_widths=[]
    for i,label in enumerate(header):
        col_widths.append(len(label))
        if i == 0 :
            print("| ",end="")
        print(str(label).center(len(label)), end=" | ")
    print()
    
    # print separating line
    for i,width in enumerate(col_widths):
        if i == 0 :
            print("| ",end="")
        print("".center(width,"-"), end=" | ")
    print()
    
    # print data
    for i,line in enumerate(data):
        for i,value in enumerate(line):
            if i == 0 :
                print("| ",end="")
            print(str(value).center(col_widths[i]), end=" | ")
        print()

In [3]:
colors = ['r', 'g', 'b']

# Define three cluster centers
centers = [[4, 1],
           [1, 7],
           [5, 6]]

# Define three cluster sigmas in x and y, respectively
sigmas = [[0.8, 0.5],
          [0.5, 1.1],
          [0.7, 0.7]]

# seeded for reproducibility
np.random.seed(2)  

# Initial varables
xpts = np.zeros(1)
ypts = np.zeros(1)
labels = np.zeros(1)

# Zip object is an iterator of tuples, enumerate returns centers: sigmas pair
# Total of 200 sample points, in 3 clusters
for i, ((x_center, y_center), (x_sigma, y_sigma)) in enumerate(zip(centers, sigmas)):
    # Create row array x or y value of each point
    xpts = np.hstack((xpts, np.random.standard_normal(200) * x_sigma + x_center))
    ypts = np.hstack((ypts, np.random.standard_normal(200) * y_sigma + y_center))
    labels = np.hstack((labels, np.ones(200) * i))


# Remove the extra 0 at front 
xpts = np.delete(xpts,0)
ypts = np.delete(ypts,0)
labels = np.delete(labels,0)

# Visualize the test data
for j in range(3):
    plt.plot(xpts[j == labels],ypts[j==labels],'.', color = colors[j], label = 'Class %s'%j)

plt.xlabel('x')
plt.ylabel('y')
plt.title('Sample 2 feature input with 3 classes')
plt.legend(loc='lower left')
plt.show()
xy = np.vstack((xpts,ypts))

In [4]:
# Prep simple data in format two columns = two features = two axis
X = xy.transpose()
b = labels.transpose()

# model parameters
num_samples = len(b)
X_svm = np.hstack((X, np.ones((num_samples, 1))))
num_features = len(X[0])
w_k = np.zeros((num_features+1, 1))

r = 0 # regularizer (aka lambda)
U, s, V = np.linalg.svd(X_svm)
tau = 1/s[0]**2 # max stepsize

# loop through each class
classes = [0,1,2]
b_classes = np.zeros((len(b),len(classes)))
b_test_svm = np.zeros(b_classes.shape)
w_svm = np.zeros((len(w_k),len(classes)))


for c in classes:
    # form binary labels, assign +1 to one class, -1 to all others
    b_svm = np.where(b == c, 1, -1)
    b_classes[:,c] = b_svm.reshape(len(b_svm))
    
    # train svm
    num_steps = 10
    counter = 0
    counter2 = 0
    for i in range(num_steps):
        # loop through training samples
        l_hinge = np.zeros(w_k.shape)
        for s in range(num_samples):
            # indicator function
            counter2 +=1
            if b_svm[s]*X_svm[s]@w_k <= 1:
                counter +=1
                l_hinge = np.add(l_hinge, -b_svm[s]*X_svm[s].reshape(l_hinge.shape))
        w_k = w_k - tau*(l_hinge+2*r*w_k)
        
    # save weights
    w_svm[:,c] = w_k.reshape(len(w_k)) # svm

    # Graph
    fig = plt.figure()
    plt.plot(X[(c==b),0],X[(c==b),1],'.', color = 'r', label = 'Postive Class')
    plt.plot(X[(c!=b),0],X[(c!=b),1],'.', color = 'grey', label = 'Negative Class')
    plt.xlabel('x')
    plt.ylabel('y')
    plt.title('OvA with Class %s Positive, all other Negative'%c)
    plt.legend(loc='lower left')
    plt.show()
    
    # Add svm plot
    #space = 2
    #db_range = np.linspace(db,db,num=space)
    #db_range_pos1 = np.linspace(db_pos1,db_pos1)
    #db_range_neg1 = np.linspace(db_neg1,db_neg1,num=space)
    x_range = np.linspace(0,8,num =50)
    y_range = np.linspace(0,10,num =50)
    const_range = np.ones(50)
    plot_range =  np.transpose(np.vstack((np.vstack((x_range,y_range)), const_range)))
    
    #
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    hehe = np.append(np.ones(200),-np.ones(400))
    ax.scatter(X[:,0],X[:,1],hehe, c='c', marker='s')
    plt.show
    
# test svm against known training data
header = ["class", "  svm  "]
data = []
for c in classes:
    b_hat_svm = X_svm@w_svm[:,c]
    b_test_svm[:,c] = b_hat_svm.reshape(len(b))

    # find percent incorrect
    num_incorrect_svm = np.sum(np.sign(b_test_svm[:,c]) != b_classes[:,c])
    percent_incorrect_svm = num_incorrect_svm/len(b)*100
    data.append([c,"{:0.2f} %".format(percent_incorrect_svm)])
    
printTable(header,data)

## Single Class SVM