In [1]:
import pandas as pd
import numpy as np
from cvxopt import matrix, solvers
from cvxopt.modeling import variable
from sklearn.svm import SVC

In [3]:
data = pd.read_csv('2017EE30515.csv', header = None)

t1 = 0
t2 = 1
num_f = 25
train_ex = 480

bin_data = data[(data[25] == t1) | (data[25] == t2)].sample(frac=1)
X_bin = np.array(bin_data.loc[:,:num_f-1])
t_bin = np.array(bin_data.loc[:,25])
t_bin[t_bin == 0] = -1

X_bin_train = X_bin[:train_ex]
t_bin_train = t_bin[:train_ex]
X_bin_val = X_bin[train_ex:]
t_bin_val = t_bin[train_ex:]

In [4]:
ker = 'linear'
if ker is 'linear':
    C = 1
if ker is 'rbf':
    C = 10
    gamma = 0.001

n_samples, n_features = X_bin_train.shape
K = np.zeros((n_samples, n_samples))

for i in range(n_samples):
    for j in range(n_samples):
        if ker is 'linear':
            K[i,j] = np.dot(X_bin_train[i],np.transpose(X_bin_train[j]))
        if ker is 'rbf':
            K[i,j] = np.exp(-1*gamma*np.sum(np.square(X_bin_train[i]-X_bin_train[j])))

P = matrix(np.outer(t_bin_train,t_bin_train) * K)
q = matrix(np.ones(n_samples) * -1)
A = matrix(t_bin_train, (1,n_samples))
A = matrix(A,(1,n_samples),'d')
b = matrix(0.0)
G = matrix(np.vstack((np.diag(np.ones(n_samples) * -1), np.identity(n_samples))))
h = matrix(np.hstack((np.zeros(n_samples), np.ones(n_samples) * C)))
solution = solvers.qp(P, q, G, h, A, b)
a = np.ravel(solution['x'])
sv = a > 1e-5

     pcost       dcost       gap    pres   dres
 0: -2.5168e+01 -8.1979e+02  4e+03  2e+00  5e-14
 1: -1.4000e+01 -3.9861e+02  7e+02  3e-01  3e-14
 2: -4.7815e+00 -9.4528e+01  1e+02  6e-02  2e-14
 3: -1.7157e+00 -5.2999e+01  8e+01  3e-02  1e-14
 4: -4.4910e-02 -8.0963e+00  1e+01  3e-03  3e-15
 5: -4.1307e-02 -6.4587e-01  6e-01  3e-05  2e-15
 6: -1.6313e-01 -3.8430e-01  2e-01  1e-05  2e-15
 7: -2.1837e-01 -3.8816e-01  2e-01  2e-06  9e-16
 8: -2.5237e-01 -3.3668e-01  8e-02  2e-07  1e-15
 9: -2.7716e-01 -2.9191e-01  1e-02  2e-08  1e-15
10: -2.8315e-01 -2.8364e-01  5e-04  1e-10  2e-15
11: -2.8338e-01 -2.8339e-01  5e-06  1e-12  2e-15
12: -2.8338e-01 -2.8338e-01  5e-08  1e-14  2e-15
Optimal solution found.


In [14]:
np.vstack((np.diag(np.ones(n_samples) * -1), np.identity(n_samples)))

array([[-1.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0., -1.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0., -1., ...,  0.,  0.,  0.],
       ...,
       [ 0.,  0.,  0., ...,  1.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  1.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  1.]])

In [18]:
np.vstack((np.zeros(n_samples), np.ones(n_samples) * C)).shape

(2, 480)

In [20]:
np.ones(n_samples).shape

(480,)

In [15]:
a.shape

(480,)

In [71]:
lm = a[sv]
lm = lm.reshape(len(lm),1)
sv_t = t_bin_train[sv]
sv_t = sv_t.reshape(len(sv_t),1)
sv_x = X_bin_train[sv]

In [72]:
w = np.sum(lm*sv_t*sv_x,axis=0)
print('w =', w)

w = [ 0.47607378 -0.22507329  0.06984814 -0.13698964  0.14598698 -0.02710312
 -0.07673886  0.21814866  0.01722041 -0.04195948 -0.03062127  0.18486051
 -0.04923804 -0.18138636 -0.15827904 -0.01466951  0.0393599  -0.02507239
 -0.22059507 -0.03347074  0.01356878 -0.09083855 -0.01472464 -0.096191
 -0.19745122]


In [73]:
b = np.mean(t_bin_train - np.dot(X_bin_train,w))
print('b =', b)

b = 0.17967036136665873


In [74]:
y_train = np.dot(X_bin_train,w) + b
y_val = np.dot(X_bin_val,w)+b
y_train[y_train<0] = -1
y_train[y_train>=0] = 1
y_val[y_val<0] = -1
y_val[y_val>=0] = 1
y_train = y_train.astype(int)
y_val = y_val.astype(int)

In [75]:
res1 = (y_train == t_bin_train)
res2 = (y_val == t_bin_val)
print('Training score =',(res1==True).sum()/len(res1))
print('Validation score =',(res2==True).sum()/len(res2))
print(str(len(sv_x))+' support vectors using CVXOPT:', np.where(sv==True)[0].tolist())

Training score = 1.0
Validation score = 1.0
13 support vectors using CVXOPT: [20, 50, 97, 259, 286, 297, 319, 321, 327, 329, 430, 443, 469]


In [76]:
if ker is 'linear':
    s = SVC(kernel='linear',C = 1)
if ker is 'rbf':
    s = SVC(kernel='rbf', C=10, gamma=0.001)
s.fit(X_bin_train,t_bin_train)
print('Training score using SVC =',s.score(X_bin_train,t_bin_train))
print('Validation score using SVC =',s.score(X_bin_val,t_bin_val))
print(str(len(s.support_))+' support vectors using SVC:', sorted(s.support_))

Training score using SVC = 1.0
Validation score using SVC = 1.0
13 support vectors using SVC: [20, 50, 97, 259, 286, 297, 319, 321, 327, 329, 430, 443, 469]
