In [171]:
import numpy as np
import pandas as pd
import cvxopt

from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.datasets import make_classification
from scipy.spatial.distance import cdist

In [172]:
dataset = pd.read_csv('dataset/Cancer_Data.csv', sep=',')

In [173]:
X = np.zeros((len(dataset['radius_mean'].values), 2))

In [174]:
X[:,0] = dataset['radius_mean'].values

In [175]:
X[:,1] = dataset['texture_mean'].values

In [176]:
y = np.where(dataset['diagnosis'].values == 'M', -1, 1)

In [177]:
def RBF(X, gamma):
    dist_cdist = cdist(X, X, 'euclidean')
    K = np.exp(-gamma*(dist_cdist)**2)
    return K

In [181]:
class SVM(object):

    def __init__(self, kernel=RBF, C=None):
        self.kernel = kernel
        self.C = C
        if self.C is not None: self.C = float(self.C)

    def fit(self, X, y):
        n_samples, n_features = X.shape

    
        K = self.kernel(X, 1.0)

        P = cvxopt.matrix(np.outer(y,y) * K)
        q = cvxopt.matrix(np.ones(n_samples) * -1)
        A = cvxopt.matrix(y, (1,n_samples)) * 1.0
        b = cvxopt.matrix(0.0)

        if self.C is None:
            G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))
            h = cvxopt.matrix(np.zeros(n_samples))
        else:
            tmp1 = np.diag(np.ones(n_samples) * -1)
            tmp2 = np.identity(n_samples)
            G = cvxopt.matrix(np.vstack((tmp1, tmp2)))
            tmp1 = np.zeros(n_samples)
            tmp2 = np.ones(n_samples) * self.C
            h = cvxopt.matrix(np.hstack((tmp1, tmp2)))

        solution = cvxopt.solvers.qp(P, q, G, h, A, b)

        a = np.ravel(solution['x'])

        sv = a > 1e-5
        ind = np.arange(len(a))[sv]
        self.a = a[sv]
        self.sv = X[sv]
        self.sv_y = y[sv]
        print("%d support vectors out of %d points" % (len(self.a), n_samples))
        
         # Intercept
        self.b = 0
        for n in range(len(self.a)):
            self.b += self.sv_y[n]
            self.b -= np.sum(self.a * self.sv_y * K[ind[n],sv])
        self.b /= len(self.a)

        # Weight vector
        self.w = None

    def project(self, X):
        if self.w is not None:
            return np.dot(X, self.w) + self.b
        else:
            y_predict = np.zeros(len(X))
            for i in range(len(X)):
                s = 0
                for a, sv_y, sv in zip(self.a, self.sv_y, self.sv):
                    s += a * sv_y * self.kernel((X[i], sv), 1.0)
                y_predict[i] = s[0][0]
            return y_predict + self.b

    def predict(self, X):
        return np.sign(self.project(X))

In [182]:
clf = SVM()
clf.fit(X,y)
pred = clf.predict(X)
print(pred)

     pcost       dcost       gap    pres   dres
 0: -1.6013e+02 -5.4550e+02  2e+03  2e+01  3e+00
 1: -4.3924e+02 -1.0327e+03  1e+03  1e+01  2e+00
 2: -9.4759e+02 -1.6203e+03  1e+03  1e+01  1e+00
 3: -2.1908e+03 -3.0615e+03  1e+03  1e+01  1e+00
 4: -4.8343e+03 -5.9963e+03  2e+03  1e+01  1e+00
 5: -9.3362e+03 -1.0971e+04  2e+03  9e+00  1e+00
 6: -2.1115e+04 -2.3803e+04  3e+03  9e+00  1e+00
 7: -3.8586e+04 -4.3212e+04  5e+03  9e+00  1e+00
 8: -6.9148e+04 -7.9149e+04  1e+04  9e+00  1e+00
 9: -1.5020e+05 -1.8393e+05  3e+04  8e+00  9e-01
10: -2.3112e+05 -2.8695e+05  6e+04  5e+00  5e-01
11: -2.7058e+05 -2.8330e+05  1e+04  7e-02  8e-03
12: -2.7213e+05 -2.7337e+05  1e+03  7e-03  8e-04
13: -2.7232e+05 -2.7242e+05  1e+02  4e-04  5e-05
14: -2.7234e+05 -2.7236e+05  2e+01  4e-06  4e-07
15: -2.7235e+05 -2.7235e+05  2e+00  3e-07  4e-08
16: -2.7235e+05 -2.7235e+05  3e-01  5e-08  5e-09
17: -2.7235e+05 -2.7235e+05  3e-02  1e-09  1e-10
Optimal solution found.
353 support vectors out of 569 points
[1. 1. 1