# Multiclass SVM 구현

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

#IRIS 데이터 로드
iris =  sns.load_dataset('iris') 
X= iris.iloc[:,:4] #학습할데이터
y = iris.iloc[:,-1] #타겟
print(y)

0         setosa
1         setosa
2         setosa
3         setosa
4         setosa
         ...    
145    virginica
146    virginica
147    virginica
148    virginica
149    virginica
Name: species, Length: 150, dtype: object


In [2]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=48)

In [3]:
def standardization(train, test):
    scaler = StandardScaler()
    train = scaler.fit_transform(train)
    test = scaler.transform(test)
    return train, test

X_train, X_test = standardization(X_train, X_test)

In [4]:
print([x.shape for x in (X_train, X_test, y_train, y_test)])

[(120, 4), (30, 4), (120,), (30,)]


In [5]:
type(X_test)

numpy.ndarray

## One vs Rest

In [6]:
class OVR_SVM:
    def __init__(self, n_classes):
        self.n_classes = n_classes
        self.classifier = []
        self.y_pred = []
    
    
    
    # iris의 경우 0,0,1/0,1,0/1,0,0으로 encoding됩니다.
    def one_hot(self, y):
        y_one_hot = pd.get_dummies(y)
        return y_one_hot
    
    
    
    def fit(self, X_train, y_train, C=5, gamma=5):

        y_one_hot = self.one_hot(y_train)              # one-hot-encoding입니다.
        
        for i in range(self.n_classes):                # 이진 분류를 총 class의 개수만큼 합니다.
            clf = SVC(kernel='rbf', C=C, gamma=gamma)  
            clf.fit(X_train, y_one_hot.iloc[:,i])      # 각 class 별로 계산됩니다.
            self.classifier.append(clf)                   # class별로 계산된 객체를 저장합니다.




    # fit으로 train data에 대해 classifier를 계산 하였고
    # 여기서는 계산된 객체를 이용하여 test data에 대한 predict를 진행합니다.
    def predict(self, X_test):
        
        for i in range(len(X_test)):
            self.y_pred.append(
                np.argmax([self.classifier[0].decision_function(X_test)[i], 
                           self.classifier[1].decision_function(X_test)[i], 
                           self.classifier[2].decision_function(X_test)[i]]
                          )
                )
        
        self.y_pred = pd.DataFrame(self.y_pred).replace({0:'setosa', 1:'versicolor', 2:'virginica'})
        return self.y_pred
    
    
    
    
    # accuracy 를 계산합니다.
    def evaluate(self, y_test):
        print('Accuacy : {: .5f}'.format(accuracy_score(y_test, self.y_pred)))
        
        
        

In [9]:
onevsrest = OVR_SVM(3)
onevsrest.fit(X_train, y_train)
y_pred_rest = onevsrest.predict(X_test)
y_pred_rest

Unnamed: 0,0
0,versicolor
1,versicolor
2,versicolor
3,virginica
4,virginica
5,virginica
6,setosa
7,virginica
8,setosa
9,versicolor


In [10]:
onevsrest.evaluate(y_test)


Accuacy :  0.86667


30