# KNN and SVM
### Dano Gillam

In [2]:
from __future__ import division
import csv
import random
import operator
from sklearn.datasets import load_iris
import numpy as np

def splitIris(split=1):
    trainingSet = []
    testSet = []
    dataset = load_iris()['data']
    datalabels = list(load_iris()['target'])
    dataset = [list(dataset[x])+[datalabels[x]] for x in xrange(len(dataset))]
    for x in range(len(dataset)-1):
        if random.random() < split:
            trainingSet.append(dataset[x])
        else:
            testSet.append(dataset[x])
    return trainingSet,testSet
 
class knn_iris():

    def __init__(self, trainingSet):
        self.trainingSet = trainingSet
        self.predictions = []

    def euclideanDistance(self, instance1, instance2, length):
        distance = 0
        for x in range(length):
            distance += (instance1[x] - instance2[x])**2
        return np.sqrt(distance)
     
    def getNeighbors(self, trainingSet, testInstance, k):
        distances = []
        length = len(testInstance)-1
        for x in range(len(trainingSet)):
            dist = self.euclideanDistance(testInstance, trainingSet[x], length)
            distances.append((trainingSet[x], dist))
        distances.sort(key=operator.itemgetter(1))
        neighbors = []
        for x in range(k):
            neighbors.append(distances[x][0])
        return neighbors
     
    def getResponse(self, neighbors):
        classVotes = {}
        for x in range(len(neighbors)):
            response = neighbors[x][-1]
            if response in classVotes:
                classVotes[response] += 1
            else:
                classVotes[response] = 1
        sortedVotes = sorted(classVotes.iteritems(), key=operator.itemgetter(1), reverse=True)
        return sortedVotes[0][0]
    
    def classify(self, testSet,k=3):
        self.predictions = []
        for x in xrange(len(testSet)):
            neighbors = self.getNeighbors(self.trainingSet, testSet[x], k)
            result = self.getResponse(neighbors)
            self.predictions.append(result)
        accuracy = self.getAccuracy(testSet)

    def getAccuracy(self, testSet):
        correct = 0
        for x in range(len(testSet)):
            if testSet[x][-1] == self.predictions[x]:
                correct += 1
        accuracy = (correct/float(len(testSet))) 
        print('Accuracy: ' + repr(accuracy*100.) + '%')
        return accuracy

### Problems 1, 2, 3 KNN

In [3]:
def prob123():    
    split = 0.67   
    trainingSet,testSet = splitIris(split)

    my_knn = knn_iris(trainingSet)
    for k in xrange(1,100,10):
        print "k=",k," >> ",
        my_knn.classify(testSet,k)

prob123()

k= 1  >>  Accuracy: 100.0%
k= 11  >>  Accuracy: 100.0%
k= 21  >>  Accuracy: 100.0%
k= 31  >>  Accuracy: 100.0%
k= 41  >>  Accuracy: 100.0%
k= 51  >>  Accuracy: 89.47368421052632%
k= 61  >>  Accuracy: 63.1578947368421%
k= 71  >>  Accuracy: 63.1578947368421%
k= 81  >>  Accuracy: 59.64912280701754%
k= 91  >>  Accuracy: 22.807017543859647%


### Problems 4, 5 SVM

In [4]:
import numpy as np
import scipy as sp
from scipy import linalg as la
import cvxopt
import pandas as pd
cvxopt.solvers.options['show_progress']=False
class SVM():

    def __init__(self, data, target):
        self.data = data
        self.target = target.astype(float)
        self.N = len(target)
        self.kernel = lambda x, y: np.dot(x, y)

    def setKernel(self, kernelname,a=1.5,d=.5,gam=.5,r=.5):
        if kernelname == "polynomial":
            self.kernel = lambda x, y: (np.dot(x, y) + a)**d
        elif kernelname == "rbf":
            self.kernel = lambda x, y: np.exp(-gam*la.norm(x-y)**2)
        elif kernelname == "sigmoid":
            self.kernel = lambda x, y: np.tanh(np.dot(x, y) + r)

    def train(self):
        K = np.array([[self.kernel(self.data[i], self.data[j]) for j in xrange(self.N)] for i in xrange(self.N)])

        Q = cvxopt.matrix(np.outer(self.target, self.target)*K)
        q = cvxopt.matrix(np.ones(self.N) * -1)
        A = cvxopt.matrix(self.target, (1, self.N))
        b = cvxopt.matrix(0.0)
        G = cvxopt.matrix(-np.eye(self.N))
        h = cvxopt.matrix(np.zeros(self.N))

        solution = cvxopt.solvers.qp(Q, q, G, h, A, b)
        self.a = np.ravel(solution['x'])

    def predictone(self, x):
        if int((self.a*self.target).dot( np.array([self.kernel(x, self.data[i,:]) for i in xrange(self.N)]))) > 0:
            return 1
        else:
            return -1

    def predict(self,X):
        return [self.predictone(x) for x in X]

def getAccuracy(prediction, testtarget):
	correct = 0
	for x in range(len(prediction)):
	    if testtarget[x] == prediction[x]:
		correct += 1
	accuracy = (correct/float(len(testtarget))) 
	print('Accuracy: ' + repr(accuracy*100.) + '%')
	return accuracy

def splitCancer(split=1):
    trainingSet = []
    testSet = []

    dataset = pd.read_csv("cancer.csv").values.astype(int)
    for x in xrange(len(dataset)-1):
        if np.random.random() < split:
            trainingSet.append(dataset[x])
        else:
            testSet.append(dataset[x])
    return np.array(trainingSet),np.array(testSet)


train, test = splitCancer(.67)
traindata = train[:,:-1]
traintarget =  train[:,-1]
testdata = test[:,:-1]
testtarget =  test[:,-1]

mySVM = SVM(traindata,traintarget)

print "\npolynomial:"
mySVM.setKernel("polynomial")
mySVM.train()
predictions = mySVM.predict(testdata)
getAccuracy(predictions, testtarget)

print "\nrbf:"
mySVM.setKernel("rbf")
mySVM.train()
predictions = mySVM.predict(testdata)
getAccuracy(predictions, testtarget)

print "\nsigmoid:"
mySVM.setKernel("sigmoid")
mySVM.train()
predictions = mySVM.predict(testdata)
getAccuracy(predictions, testtarget)


polynomial:
Terminated (singular KKT matrix).
Accuracy: 67.69911504424779%

rbf:
Accuracy: 69.02654867256636%

sigmoid:
Terminated (singular KKT matrix).
Accuracy: 67.69911504424779%


0.6769911504424779