# Multiclass SVM 구현

In [90]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from itertools import combinations

In [91]:
#IRIS 데이터 로드
iris =  sns.load_dataset('iris') 
X= iris.iloc[:,:4] #학습할데이터
y = iris.iloc[:,-1] #타겟
print(y)

0         setosa
1         setosa
2         setosa
3         setosa
4         setosa
         ...    
145    virginica
146    virginica
147    virginica
148    virginica
149    virginica
Name: species, Length: 150, dtype: object


In [92]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=48)

In [93]:
def standardization(train, test):
    scaler = StandardScaler()
    train = scaler.fit_transform(train)
    test = scaler.transform(test)
    return train, test

X_train, X_test = standardization(X_train, X_test)

In [94]:
X_train

array([[ 0.78522493,  0.32015325,  0.77221097,  1.04726529],
       [-0.26563371, -1.29989934,  0.0982814 , -0.11996537],
       [ 0.43493872,  0.78302542,  0.94069336,  1.43634218],
       [-0.84944407,  0.78302542, -1.24957775, -1.28719604],
       [-0.38239578, -1.7627715 ,  0.15444219,  0.13941922],
       [ 0.55170079, -0.374155  ,  1.05301496,  0.7878807 ],
       [ 0.31817664, -0.14271892,  0.65988937,  0.7878807 ],
       [ 0.20141457, -0.374155  ,  0.43524618,  0.39880381],
       [-1.66677857, -0.14271892, -1.36189934, -1.28719604],
       [-0.14887164, -0.60559109,  0.21060299,  0.13941922],
       [-0.14887164, -1.06846325, -0.12636179, -0.24965767],
       [ 0.31817664, -0.60559109,  0.15444219,  0.13941922],
       [ 0.66846286, -0.83702717,  0.88453256,  0.91757299],
       [ 0.0846525 , -0.14271892,  0.77221097,  0.7878807 ],
       [-0.49915786, -0.14271892,  0.43524618,  0.39880381],
       [-0.26563371, -0.60559109,  0.65988937,  1.04726529],
       [ 2.18636979,  1.

In [95]:
X_test

array([[-0.14887164, -0.374155  ,  0.26676379,  0.13941922],
       [ 0.31817664, -0.60559109,  0.54756778,  0.00972692],
       [ 0.31817664, -1.06846325,  1.05301496,  0.26911151],
       [-1.5500165 , -1.7627715 , -1.36189934, -1.15750374],
       [ 0.0846525 ,  0.32015325,  0.60372857,  0.7878807 ],
       [ 0.78522493, -0.14271892,  0.99685416,  0.7878807 ],
       [-0.84944407,  1.70876975, -1.24957775, -1.15750374],
       [ 0.20141457, -0.14271892,  0.60372857,  0.7878807 ],
       [-0.38239578,  2.63451409, -1.30573855, -1.28719604],
       [-0.38239578, -1.29989934,  0.15444219,  0.13941922],
       [ 0.66846286,  0.08871717,  0.99685416,  0.7878807 ],
       [-0.38239578,  1.0144615 , -1.36189934, -1.28719604],
       [-0.49915786,  0.78302542, -1.13725615, -1.28719604],
       [ 0.43493872, -0.60559109,  0.60372857,  0.7878807 ],
       [ 0.55170079, -1.7627715 ,  0.37908538,  0.13941922],
       [ 0.55170079,  0.55158933,  0.54756778,  0.52849611],
       [-1.19973028,  0.

In [96]:
classes = y_train.unique()
classes

array(['virginica', 'versicolor', 'setosa'], dtype=object)

In [97]:
num_classes = len(classes)
kernel = 'rbf'
C = 1
gamma = 1

In [98]:
# 가능한 경우들을 위해 permutations 에서 combinations 이용.
Cases = num_classes*(num_classes-1)//2
Clfs = [{'class' : None,'clf' : SVC(kernel = kernel, gamma = gamma, C = C)} for _ in range(Cases)]

In [99]:
i = 0
grid = []
for c in combinations(classes, 2):
    idx = (y_train == c[0]) | (y_train == c[1])
    Clfs[i]['clf'].fit(X_train[idx], y_train[idx])
    Clfs[i]['class'] = c
    grid.append(c)
    i += 1

In [100]:
preds_df = pd.DataFrame([svm['clf'].predict(X_test) for svm in Clfs]).T
decisions = pd.DataFrame([svm['clf'].decision_function(X_test) for svm in Clfs]).T 
decisions.columns = grid
decisions

Unnamed: 0,"(virginica, versicolor)","(virginica, setosa)","(versicolor, setosa)"
0,-0.623474,0.429296,0.891338
1,-0.477598,0.632637,1.071622
2,-0.199587,0.884016,1.232032
3,-0.816122,-0.476538,-0.152223
4,0.050279,0.708855,0.927655
5,0.383792,1.015004,1.137268
6,-0.455297,-1.059373,-1.079478
7,0.006216,0.82863,1.091255
8,-0.208432,-0.894255,-0.926495
9,-0.802101,0.462727,1.018599


In [101]:
## 최종 예측
predictions = []
for i in range(len(preds_df)):
    # 가장 우위인 클래스가 2개인 경우
    if preds_df.iloc[i].value_counts().iloc[0] > preds_df.iloc[i].value_counts().iloc[1]:
        label = (preds_df.iloc[i].value_counts() / len(preds_df.iloc[i])).index[0]
        predictions.append(label)

    # 가장 우위인 클래스가 2개 이상인 경우
    else:
        decision_for_row = {key : 0 for key in classes}
        for c, d in zip(decisions.iloc[i].index, decisions.iloc[i]):
            if d > 0:
                decision_for_row[c[0]] += d
            else:
                decision_for_row[c[1]] -= d
        label = sorted(decision_for_row.items(), key = lambda x : x[1], reverse = True)[0][0]
        predictions.append(label)

In [102]:
y_test

96     versicolor
73     versicolor
134     virginica
41         setosa
70     versicolor
116     virginica
19         setosa
138     virginica
33         setosa
89     versicolor
137     virginica
36         setosa
20         setosa
126     virginica
87     versicolor
56     versicolor
11         setosa
62     versicolor
72     versicolor
120     virginica
8          setosa
147     virginica
77     versicolor
86     versicolor
129     virginica
4          setosa
31         setosa
136     virginica
132     virginica
88     versicolor
Name: species, dtype: object

In [103]:
predictions

['versicolor',
 'versicolor',
 'versicolor',
 'setosa',
 'virginica',
 'virginica',
 'setosa',
 'virginica',
 'setosa',
 'versicolor',
 'virginica',
 'setosa',
 'setosa',
 'virginica',
 'versicolor',
 'virginica',
 'setosa',
 'versicolor',
 'versicolor',
 'virginica',
 'setosa',
 'virginica',
 'virginica',
 'virginica',
 'virginica',
 'setosa',
 'setosa',
 'virginica',
 'virginica',
 'versicolor']

In [104]:
accuracy_score(y_test, predictions)

0.8333333333333334