# Multiclass SVM 구현

In [27]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics.pairwise import *
from sklearn.datasets import load_iris
import numpy as np
import cvxopt as cv
#IRIS 데이터 로드
iris =  sns.load_dataset('iris') 
X= iris.iloc[:,:4] #학습할데이터
y = iris.iloc[:,-1] #타겟
print(y)

0         setosa
1         setosa
2         setosa
3         setosa
4         setosa
         ...    
145    virginica
146    virginica
147    virginica
148    virginica
149    virginica
Name: species, Length: 150, dtype: object


In [28]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=48)

In [29]:
def standardization(train, test):
    scaler = StandardScaler()
    train = scaler.fit_transform(train)
    test = scaler.transform(test)
    return train, test

X_train, X_test = standardization(X_train, X_test)

In [30]:
X_train[:10]

array([[ 0.78522493,  0.32015325,  0.77221097,  1.04726529],
       [-0.26563371, -1.29989934,  0.0982814 , -0.11996537],
       [ 0.43493872,  0.78302542,  0.94069336,  1.43634218],
       [-0.84944407,  0.78302542, -1.24957775, -1.28719604],
       [-0.38239578, -1.7627715 ,  0.15444219,  0.13941922],
       [ 0.55170079, -0.374155  ,  1.05301496,  0.7878807 ],
       [ 0.31817664, -0.14271892,  0.65988937,  0.7878807 ],
       [ 0.20141457, -0.374155  ,  0.43524618,  0.39880381],
       [-1.66677857, -0.14271892, -1.36189934, -1.28719604],
       [-0.14887164, -0.60559109,  0.21060299,  0.13941922]])

In [31]:
X_test[:10]

array([[-0.14887164, -0.374155  ,  0.26676379,  0.13941922],
       [ 0.31817664, -0.60559109,  0.54756778,  0.00972692],
       [ 0.31817664, -1.06846325,  1.05301496,  0.26911151],
       [-1.5500165 , -1.7627715 , -1.36189934, -1.15750374],
       [ 0.0846525 ,  0.32015325,  0.60372857,  0.7878807 ],
       [ 0.78522493, -0.14271892,  0.99685416,  0.7878807 ],
       [-0.84944407,  1.70876975, -1.24957775, -1.15750374],
       [ 0.20141457, -0.14271892,  0.60372857,  0.7878807 ],
       [-0.38239578,  2.63451409, -1.30573855, -1.28719604],
       [-0.38239578, -1.29989934,  0.15444219,  0.13941922]])

In [32]:
set(y_train)

{'setosa', 'versicolor', 'virginica'}

In [33]:
kernel_dct = {
    "rbf": rbf_kernel,
    "linear": linear_kernel
}

eps = 10**(-7)

In [34]:
class SVM:
    def __init__(self, C=1.0, kernel="rbf"):
        self.C = C
        self.kernel = kernel_dct[kernel] if kernel in kernel_dct else kernel
        self.w = None
        self.support_vector = None
        self.b = None
        
    def fit(self, X, y):
        X = np.array(X)
        y = np.array(y)
        gram_matrix = self.kernel(X)
        assert len(X) == len(y), "not same shape label and feature"
        N = len(X)

        T = np.array([[y[i] * y[j] for j in range(N)] for i in range(N)])
        P = gram_matrix * T
        P = cv.matrix(P)

        q = cv.matrix(-np.ones(N))
        G = cv.matrix(np.r_[np.identity(N), -np.identity(N)])
        h = cv.matrix(np.r_[self.C*np.ones(N).T, np.zeros(N).T])
        
        A = cv.matrix(np.array([y], dtype="double"))
        b = cv.matrix(0.0)

        sol = cv.solvers.qp(P, q, G=G, h=h, A=A, b=b)
        #print(len(list(filter(lambda x: x > eps, sol["x"]))))
        
        index_list = list(filter(lambda x: sol["x"][x] > eps, range(N)))
        self.w = np.array(sol["x"])[index_list].reshape(len(index_list)) * y[index_list]
        #print(np.array(sol["x"]).shape)
        print(self.w)
        self.support_vector = X[index_list]
        # calc b
        tmp_list = []
        for i in index_list:
            tmp = 0
            for j in index_list:
                tmp += (sol["x"][j] * y[j] * gram_matrix[i][j])
            tmp_list.append(y[i]-tmp)
        self.b = np.mean(tmp_list)
        
    def predict(self, X):
        assert self.w is not None or self.support_vector is not None or self.b is not None, "not call fit method yet"
        #print(self.w.shape)
        #print(self.kernel(X, Y=self.support_vector).shape)
        #np.dot(self.w, self.kernel(X, Y=self.support_vector))
        y = np.dot(np.array([self.w]), self.kernel(X, Y=self.support_vector).T) + self.b
        y = y.reshape(len(X))
        return np.array([1 if pred > 0 else -1 for pred in y])

In [35]:
def main():
    np.random.seed(1)
    iris = load_iris()
    N_pos = len(list(filter(lambda x: x == 0, iris.target)))
    tmp_list = np.random.choice(list(range(N_pos, len(iris.target))), N_pos, replace=False)

    print(N_pos)
    label = [1 if i < N_pos else -1 for i in range(N_pos*2)]

    #print(iris.data)
    index_list = list(range(N_pos))
    index_list.extend(tmp_list)
    #print(len(index_list))
    x = iris.data[index_list]
    x_train, x_test, y_train, y_test = train_test_split(x, label, test_size=0.2, random_state=2)
    
    clf = SVM()
    clf.fit(x_train, y_train)
    pred = clf.predict(x_test)
    print(pred)
    print(y_test)
    print("Accuray: " + str(accuracy_score(y_test, pred)))
    
    clf = SVC(gamma='auto')
    clf.fit(x_train, y_train)
    print(accuracy_score(y_test, clf.predict(x_test)))
    
if __name__ == "__main__":
    main()

50
     pcost       dcost       gap    pres   dres
 0: -1.2202e+00 -9.3465e+01  3e+02  1e+00  4e-16
 1:  4.5970e-01 -3.3948e+01  3e+01  3e-16  4e-16
 2: -1.8034e+00 -6.3749e+00  5e+00  2e-16  5e-16
 3: -2.3449e+00 -3.6235e+00  1e+00  2e-16  2e-16
 4: -2.4895e+00 -2.9615e+00  5e-01  2e-16  2e-16
 5: -2.5729e+00 -2.7008e+00  1e-01  2e-16  2e-16
 6: -2.6078e+00 -2.6359e+00  3e-02  2e-16  2e-16
 7: -2.6172e+00 -2.6185e+00  1e-03  1e-16  2e-16
 8: -2.6177e+00 -2.6178e+00  1e-04  3e-16  2e-16
 9: -2.6178e+00 -2.6178e+00  3e-06  1e-16  2e-16
10: -2.6178e+00 -2.6178e+00  4e-08  2e-16  2e-16
Optimal solution found.
[ 7.42645602e-01  3.31661977e-02  5.05636233e-06 -1.60984646e-01
  9.99999999e-01  9.44281876e-06 -1.86754395e-01 -9.99999999e-01
 -4.18707026e-01 -3.13518142e-02 -2.15188348e-01 -1.96562211e-01
 -1.84042304e-01  6.17764495e-01]
[-1  1 -1  1  1  1  1  1  1  1 -1 -1 -1  1  1  1  1  1 -1 -1]
[-1, 1, -1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, 1, 1, 1, 1, 1, -1, -1]
Accuray: 1.0
1.0
