# Multiclass SVM 구현

In [23]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

#IRIS 데이터 로드
iris =  sns.load_dataset('iris') 


In [24]:
iris = pd.DataFrame(iris)
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [25]:
X= iris.iloc[:,:4] #학습할데이터
y = iris.iloc[:,-1] #타겟
print(y)

0         setosa
1         setosa
2         setosa
3         setosa
4         setosa
         ...    
145    virginica
146    virginica
147    virginica
148    virginica
149    virginica
Name: species, Length: 150, dtype: object


In [26]:
y_onehot = pd.get_dummies(y)
y_onehot.head()

Unnamed: 0,setosa,versicolor,virginica
0,1,0,0
1,1,0,0
2,1,0,0
3,1,0,0
4,1,0,0


In [27]:
X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2, random_state=48)

In [28]:
def standardization(train, test):
    scaler = StandardScaler()
    train = scaler.fit_transform(train)
    test = scaler.transform(test)
    return train, test

X_train, X_test = standardization(X_train, X_test)

In [29]:
X_train

array([[ 0.78522493,  0.32015325,  0.77221097,  1.04726529],
       [-0.26563371, -1.29989934,  0.0982814 , -0.11996537],
       [ 0.43493872,  0.78302542,  0.94069336,  1.43634218],
       [-0.84944407,  0.78302542, -1.24957775, -1.28719604],
       [-0.38239578, -1.7627715 ,  0.15444219,  0.13941922],
       [ 0.55170079, -0.374155  ,  1.05301496,  0.7878807 ],
       [ 0.31817664, -0.14271892,  0.65988937,  0.7878807 ],
       [ 0.20141457, -0.374155  ,  0.43524618,  0.39880381],
       [-1.66677857, -0.14271892, -1.36189934, -1.28719604],
       [-0.14887164, -0.60559109,  0.21060299,  0.13941922],
       [-0.14887164, -1.06846325, -0.12636179, -0.24965767],
       [ 0.31817664, -0.60559109,  0.15444219,  0.13941922],
       [ 0.66846286, -0.83702717,  0.88453256,  0.91757299],
       [ 0.0846525 , -0.14271892,  0.77221097,  0.7878807 ],
       [-0.49915786, -0.14271892,  0.43524618,  0.39880381],
       [-0.26563371, -0.60559109,  0.65988937,  1.04726529],
       [ 2.18636979,  1.

In [30]:
X_test

array([[-0.14887164, -0.374155  ,  0.26676379,  0.13941922],
       [ 0.31817664, -0.60559109,  0.54756778,  0.00972692],
       [ 0.31817664, -1.06846325,  1.05301496,  0.26911151],
       [-1.5500165 , -1.7627715 , -1.36189934, -1.15750374],
       [ 0.0846525 ,  0.32015325,  0.60372857,  0.7878807 ],
       [ 0.78522493, -0.14271892,  0.99685416,  0.7878807 ],
       [-0.84944407,  1.70876975, -1.24957775, -1.15750374],
       [ 0.20141457, -0.14271892,  0.60372857,  0.7878807 ],
       [-0.38239578,  2.63451409, -1.30573855, -1.28719604],
       [-0.38239578, -1.29989934,  0.15444219,  0.13941922],
       [ 0.66846286,  0.08871717,  0.99685416,  0.7878807 ],
       [-0.38239578,  1.0144615 , -1.36189934, -1.28719604],
       [-0.49915786,  0.78302542, -1.13725615, -1.28719604],
       [ 0.43493872, -0.60559109,  0.60372857,  0.7878807 ],
       [ 0.55170079, -1.7627715 ,  0.37908538,  0.13941922],
       [ 0.55170079,  0.55158933,  0.54756778,  0.52849611],
       [-1.19973028,  0.

In [43]:
'''
아이디어:

one vs rest 방식으로 각 class에 대하여 0을 뱉는지 1을 뱉는지 계산한다.
모든 클래스에 대하여 그 합계가 1이 아닌 경우, decision_function 값이 최대인 class를 뽑는다.
'''

class MultiSVM:
    def __init__(self, C):
        self.C = C
        self.svms = []

    def fit(self, X, y):
        for i in range(y.shape[1]):
            self.svms.append(SVC(C = self.C))
            self.svms[i].fit(X, y.iloc[:, i])

    def predict(self, X, y):
        predict = []
        for i in range(y.shape[1]):
            predict.append(self.svms[i].predict(X))
        predict = np.array(predict).T
        predict_idx = np.argmax(predict, axis = 1)
        predict_sum = np.sum(predict, axis = 1)

        dfunc_val = []
        for i in range(y.shape[1]):
            dfunc_val.append(self.svms[i].decision_function(X))
        dfunc = np.array(dfunc_val).T
        dfunc_max = np.argmax(dfunc, axis = 1)

        y_hat = np.zeros_like(predict_sum)
        y_hat[predict_sum == 1] = predict_idx[predict_sum == 1]
        y_hat[predict_sum != 1] = dfunc_max[predict_sum != 1]

        return y_hat





In [44]:
msvm = MultiSVM(1.0)

msvm.fit(X_train, y_train)

In [51]:
#예측값
y_hat = msvm.predict(X_test, y_test)

In [52]:
#ground truth
gt = np.argmax(y_test.to_numpy(), axis = 1)

In [55]:
accuracy_rate = np.sum(y_hat == gt)/y_hat.shape[0]
accuracy_rate
#위에서 만든 MultiClass SVM의 Iris Dataset에 대한 정확도는 96.7%이다.

0.9666666666666667