# Math 582 Miniproject 3 - Model Development

The purpose of this notebook is implment dual SVM convex quadratic optimization for the purposes of binary classification.

In [3]:
# imports

from sklearn import svm
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import numpy as np
from cvxopt import matrix, solvers
import matplotlib
from cvxopt import matrix as cvxopt_matrix
from cvxopt import solvers as cvxopt_solvers
from sklearn.svm import SVC
import matplotlib.pyplot as plt

In [4]:
# kernel function definitions

def polynomial_kernel (x_i, x_j, d):
	"""
	polynomial_kernel generates a kernel for SVM in a polynomial format

	x_i: ith column to chose from
	x_j: jth row to choose from 
	d: polynomial degree

	:return: kernel values for the i, jth value
	"""
	return (np.dot(x_i, x_j) + 1/2)**d

def linear_kernel (x_i, x_j):
	"""
	linear_kernel generates a kernel for SVM in a linear format

	x_i: ith column to chose from
	x_j: jth row to choose from 
	:return: kernel values for the i, jth value
	"""
	return (np.dot(x_i, x_j))

def rbf_kernel (x_i, x_j,gamma=1):
	"""
	rbf_kernel generates a kernel for SVM in a radial format

	x_i: ith column to chose from
	x_j: jth row to choose from 
	gamma: defines how far the influence of a single training example reaches
	:return: kernel values for the i, jth value
	"""
	return (np.exp(-gamma*(np.linalg.norm(x_j - x_i))**2))

In [5]:
# build the kernel matrix from a kernel spec and data

def build_k (X, kernel_type='linear_kernel', poly_power=3, gamma=1):
	"""
	build_k generates a kernel to use inside of an SVM calculation
	X: Training data for our calculations
	kernel_type: Specifies the type of kernel to use: linear_kernel, polynomial_kernel, rbf_kernel
	poly_power: An optional parameter to define to what degree the polynomial should be calculated
	gamma: An optional parameter that defines how far the influence of a single training example reaches
	:return:
	"""
	N = X.shape[0]
	K = np.zeros((N, N))
	for i in range(X.shape[0]):
		x_i = X[i]
		for j in range(X.shape[0]):
			x_j = X[j]

			if kernel_type == 'linear_kernel':
				K[i][j] = linear_kernel(x_i, x_j)

			elif kernel_type == 'polynomial_kernel':
				K[i][j] = polynomial_kernel(x_i, x_j, poly_power)

			elif kernel_type == 'rbf_kernel':
				K[i][j] = rbf_kernel(x_i, x_j, gamma)

			else:
				raise ValueError('Use kernal type polynomial_kernel, linear_kernel or rbf_kernel') 

	return K

In [8]:
# This outputs weights, biases, and lagrange multipliers of our SVM Classifier
def SVM(X, y, kernel_type='linear_kernel', C=10):
	"""
	SVM will calculate the weight and bias using the SVM quadratic method (soft margin)
	X: Training data used for calculations 
	y: results of training data
	kernel_type: Specifies the type of kernel to use: linear_kernel, polynomial_kernel, rbf_kernel
	C: Trades off misclassification of training examples against simplicity of the decision surface
	:return: weight, bias, and alphas matrix

	Help used: https://stats.stackexchange.com/questions/23391/how-does-a-support-vector-machine-svm-work/353605#353605
	"""

	# Grabs shape of our training data
	m, _ = X.shape

	# Make sure y values are floats and within -1 == y == 1
	y = y.reshape(-1,1) * 1.

	# Calculate our kernel
	K = build_k(X, kernel_type=kernel_type)

	# Compute 
	H = np.matmul(y,y.T) * K * 1.

	#Converting into cvxopt format - as previously
	P = cvxopt_matrix(H)
	q = cvxopt_matrix(-np.ones((m, 1)))
	G = cvxopt_matrix(np.vstack((np.eye(m)*-1,np.eye(m))))
	h = cvxopt_matrix(np.hstack((np.zeros(m), np.ones(m) * C)))
	A = cvxopt_matrix(y.reshape(1, -1))
	b = cvxopt_matrix(np.zeros(1))

	#Run solver
	sol = cvxopt_solvers.qp(P, q, G, h, A, b)
	alphas = np.array(sol['x'])

	# Calculating w, b
	w = ((y * alphas).T @ X).reshape(-1,1)
	S = (alphas > 1e-4).flatten()

	sv = X[S]
	sv_y = y[S]
	alphas = alphas[S]
	b = sv_y - np.sum(build_k(sv) * alphas * sv_y, axis=0)
	b = [np.sum(b) / b.size]

	return w, b, alphas

In [None]:
# 
def SVM_classifier(X_train, y_train, kernel, C):
    w, b, alphas = SVM(X_train, y_train)

    # classifies a single sample as +1 or -1
    def classify_sample(x):
        return (1 if np.inner(w, x) + b >= 0 else -1)

    # classifies multiple samples as +1 or -1
    def classify_samples(X_test):
        return np.array(list(map(classify_sample, list(X_test))))

    return classify_samples

In [7]:
# Read in our training data from a CSV using pandas
df = pd.read_csv('./data/test-data/test_data.csv', encoding='utf8')

# replace 0 by -1 in the label to conform to y being in {-1,1}
df["success"] = df[["success"]].replace(0,-1)

# Specify our X array by combining the training columns into a single 2D array.
X = df[['age', 'interest']]
# Grab the known y values
y = df[["success"]]

# Convert pandas data frame ---> numpy array
X = X.to_numpy()
y = y.to_numpy()

# Split the data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [None]:
# This function performs the following:
# - maps classifiers values -1 or +1
# - separates feature columns from the classifier column
# - splits the data into training and testing sets
# returns: xs_train, xs_test, ys_train, ys_test

def prep_data(data, classifier_column_name, classifier_vals, train_size=0.75):

    if len(classifier_vals) != 2:
        raise ValueError("classifier_vals argument must be length 2 (binary classifier)")
    
    # map each binary classifier value to either 1 or -1
    data[classifier_column_name] = data[classifier_column_name].apply(lambda b: -1 if b == classifier_vals[0] else 1)

    # separate the features from the classifications
    colnames = data.columns.tolist()
    feature_column_names = list(filter(lambda colname: colname != classifier_column_name, colnames))

    xs = data[feature_column_names]
    ys = data[classifier_column_name]

    # split the data into training and testing data
    datasets = train_test_split(xs, ys, train_size=train_size)

    # map all the training data into numpy arrays
    X_train, X_test, y_train, y_test = list(map(lambda s: s.to_numpy(), datasets))

    # return the training and testing data
    return X_train, X_test, y_train, y_test

In [None]:
# Calculate our weight bias and alphas using our SVM function
w, b, alphas = SVM(X_train, y_train)

# Display results
print("------------------- FROM OUR CALCULATIONS -----------------------")
print('Alphas = ',alphas[alphas > 1e-4])
print('w = ', w.flatten())
print('b = ', b)

# Here, we look at the SVM calculations for a sanity check
print("------------------- FROM SVM CALCULATIONS -----------------------")
clf = SVC(C = 10, kernel = 'linear')
clf.fit(X, y.ravel()) 
w_svm=clf.coef_[0]
b_svm=clf.intercept_
print("w = ",w_svm) 
print("b = ",b_svm)