##Objective: To learn how to train and predict a soft margin-SVM with RBF kernel using SMO algorithm.

In [None]:
# Data Pre-processing:
# 1)Data was splitted into features and prediction (X and Y).
# 2)The data was shuffled.
# 3)Data was feature scaled
# 4)Data was splitted into 70% training and 30% test set.

# Modelling:
# The solution used a RBF kernel for the SVM classifier. The data was input from a csv file and the output is projected on the standard output.

import numpy as np
import pandas as pd
import random as rnd

def getscaleddata():
	'''
	This function reads the files from the drive and then considers only
	the features f1 and f4.
	It scales those features.
	It replaces all the y values that have a 0 with -1.
	Splits the dataset into traning and testing data and return the data.
	'''
	input_data = pd.read_csv("/content/heartDiseases.csv")
	n = input_data.shape[0]
	Y = input_data['y']
	f1 = input_data['f1']
	f1 = (f1 - np.mean(f1)) / (np.max(f1) - np.min(f1))
	f4 = input_data['f4']
	f4 = (f4 - np.mean(f4)) / (np.max(f4) - np.min(f4))
	X_train = []
	X_test = []
	Y_train = []
	Y_test = []
	for i in range(int(0.7 * n)):
		X_train.append([f1[i], f4[i]])
		if(Y[i] == 0):
			Y_train.append(-1)
		else:
			Y_train.append(1)

	for i in range(int(0.7 * n), n):
		X_test.append([f1[i], f4[i]])
		if(Y[i] == 0):
			Y_test.append(-1)
		else:
			Y_test.append(1)
	return np.array(X_train), np.array(Y_train), np.array(X_test), np.array(Y_test)

def rbfKernel(x1, x2):
	return np.sum(np.exp(-np.square(np.linalg.norm(x1-x2))/1))

def getB(X, y, w):
	return np.mean(y.reshape((y.shape[0],1)) - np.dot(X, w))

def getW(alpha, y, X):
	return np.dot(X.T, np.multiply(alpha,y).reshape((X.shape[0],1)))

def getH(X, w, b):
	if len(X.shape) == 1:
		return np.sign(np.dot(w.T, X.reshape(X.shape[0],1)) + b).T
	else:
		return np.sign(np.dot(w.T, X.T) + b).T

def getLH(alphaJ, alphaI, yj, yi):
	if(yi != yj):
		return (max(0, alphaJ - alphaI), min(1, 1 - alphaI + alphaJ))
	else:
		return (max(0, alphaI + alphaJ - 1), min(1, alphaI + alphaJ))

def getParameters(X, y, epoch = 1000):
	n = X.shape[0]
	alpha = np.zeros((n))
	for i in range(epoch):
		alphaTemp = np.copy(alpha)
		for j in range(n):
			i = rnd.randint(0, n - 1)
			xI, xJ, yI, yJ = X[i,:], X[j,:], y[i], y[j]
			k = rbfKernel(xI, xI) + rbfKernel(xJ, xJ) - 2 * rbfKernel(xI, xJ)
			if k != 0:
				alphaJTemp = alpha[j].copy()
				(L, H) = getLH(alpha[j], alpha[i], yJ, yI)

				w = getW(alpha, y, X)
				b = getB(X, y, w)

				eI = getH(xI, w, b) - yI
				eJ = getH(xJ, w, b) - yJ

				alpha[j] = alpha[j] + float(yJ * (eI - eJ)) / k
				alpha[j] = max(alpha[j], L)
				alpha[j] = min(alpha[j], H)

				alpha[i] = alpha[i] + yI * yJ * (alphaJTemp - alpha[j])

		# Checking convergence
		diff = np.linalg.norm(alpha - alphaTemp)
		if diff < 0.001:
			break
	# Computing and returning final model parameters
	return getB(X, y, w), getW(alpha, y, X)

def getAccuracy(X_test, Y_test, w, b):
	yPred = getH(X_test, w, b).flatten()
	TP = 0
	TN = 0
	for i in range(len(yPred)):
		if yPred[i] == 1 and Y_test[i] == 1:
			TP += 2
		elif yPred[i] == -1 and Y_test[i] == -1 and yPred[i] != 1 and Y_test[i] != 1:
			TN += 1
	return (TP + TN) / len(Y_test)

X_train, Y_train, X_test, Y_test = getscaleddata()
b, w = getParameters(X_train, Y_train)
print("The accuracy is : " + str(getAccuracy(X_test, Y_test, w, b) * 100) + " %")

# Both of the algorithms were run for 10 times as the data was shuffled before running the algorithm. The results obtained were as follows.
# 1) The accuracy of the algorithm was around 75% on average.
# 2) The maximum accuracy obtained was around 81% and the minimum obtained was around 62%.

The accuracy is : 76.92307692307693 %
