## Multiclass SVM 

### Import Module

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

### Load Data

In [2]:
iris =  sns.load_dataset('iris') 
X= iris.iloc[:,:4]
y = iris.iloc[:,-1]
print(y)

0         setosa
1         setosa
2         setosa
3         setosa
4         setosa
         ...    
145    virginica
146    virginica
147    virginica
148    virginica
149    virginica
Name: species, Length: 150, dtype: object


#### Scaling 

In [3]:
scal = StandardScaler()

X = scal.fit_transform(X)

In [4]:
y.value_counts()

setosa        50
versicolor    50
virginica     50
Name: species, dtype: int64

### One vs Rest

- setosa or not
- veriscolor or not
- virginica or not

In [5]:
svm_1_ovr = SVC(kernel = 'rbf', C = 5, gamma = 5)
svm_2_ovr = SVC(kernel = 'rbf', C = 5, gamma = 5)
svm_3_ovr = SVC(kernel = 'rbf', C = 5, gamma = 5)

#### Split Data

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1212)

#### Encoding Target

In [7]:
y_train = pd.get_dummies(y_train)

In [8]:
y_train

Unnamed: 0,setosa,versicolor,virginica
1,1,0,0
13,1,0,0
53,0,1,0
4,1,0,0
126,0,0,1
...,...,...,...
105,0,0,1
141,0,0,1
101,0,0,1
66,0,1,0


#### Training Data

In [9]:
svm_1_ovr.fit(X_train, y_train.iloc[:,0])
svm_2_ovr.fit(X_train, y_train.iloc[:,1])
svm_3_ovr.fit(X_train, y_train.iloc[:,2])

SVC(C=5, gamma=5)

In [10]:
def one_rest_svm(models, data, labels): 
    distance = None 
    
    for model in models: 
        if distance is None:
            distance = model.decision_function(data) 
        else:
            distance = np.vstack((distance, model.decision_function(data))) 
            
    distance = distance.T
    
    result = [] 
    
    for pred in distance:
        result.append(labels[pred.argmax()])

    
    print(f"prediction : \n {result}")
    return result

models = [svm_1_ovr, svm_2_ovr, svm_3_ovr]
labels = ["setosa", "versicolor", "virginica"]

preds = one_rest_svm(models, X_test, labels)

accuracy_score(y_test, preds)

prediction : 
 ['setosa', 'versicolor', 'setosa', 'setosa', 'virginica', 'virginica', 'virginica', 'setosa', 'versicolor', 'versicolor', 'setosa', 'virginica', 'setosa', 'virginica', 'virginica', 'versicolor', 'versicolor', 'setosa', 'virginica', 'setosa', 'versicolor', 'setosa', 'setosa', 'versicolor', 'versicolor', 'setosa', 'virginica', 'setosa', 'versicolor', 'versicolor']


0.9333333333333333

- 0.9333.. 으로 잘 예측


### One vs One

- versicolor / virginica
- setosa / virginica
- setosa / versicolor 

#### Split Data

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1212)

In [12]:
no_set = y_train[y_train != 'setosa']
no_ver = y_train[y_train != 'versicolor']
no_vir = y_train[y_train != 'virginica']

In [13]:
x_no_set = X_train[y_train != 'setosa']
x_no_ver = X_train[y_train != 'versicolor']
x_no_vir = X_train[y_train != 'virginica']

#### Encoding Target

In [14]:
enc_no_set = pd.get_dummies(no_set, drop_first = True, columns = 'Verginica')
enc_no_ver = pd.get_dummies(no_ver, drop_first = True, columns = 'Verginica')
enc_no_vir = pd.get_dummies(no_vir, drop_first = True, columns = 'Setosa')

#### Training Data

In [15]:
svm_1_ovo = SVC(kernel = 'rbf', C = 5, gamma = 5)
svm_2_ovo = SVC(kernel = 'rbf', C = 5, gamma = 5)
svm_3_ovo = SVC(kernel = 'rbf', C = 5, gamma = 5)

In [16]:
svm_1_ovo.fit(x_no_set, no_set)
svm_2_ovo.fit(x_no_vir, no_vir)
svm_3_ovo.fit(x_no_ver, no_ver)

SVC(C=5, gamma=5)

In [22]:
def one_one_svm(models, data, labels):
    result = []
    
    for model in models: 
        pred = model.predict(data)
        result.append(pred)
    
    result = np.array(result).T 
    
    pred = []
    
   
    for row in range(len(result)):
        score = np.array([0, 0, 0])
        
        if result[row][0] == 1: 
            score[1] += 1
        elif result[row][0] == 0: 
            score[0] += 1
            
        if result[row][1] == 1: 
            score[2] += 1
        elif result[row][1] == 0:
            score[0] += 1
            
        if result[row][2] == 1: 
            score[2] += 1
        elif result[row][2] == 0:
            score[1] += 1
    
        pred.append(labels[score.argmax()]) 
    
    
    print(f"prediction : \n {result}")
    
    return pred

In [23]:
models = [svm_1_ovo, svm_2_ovo, svm_3_ovo]
labels = ['setosa', 'versicolor', 'virginica']

prediction = one_one_svm(models, X_test, labels)
accuracy_score(y_test, prediction)

prediction : 
 [['virginica' 'setosa' 'setosa']
 ['versicolor' 'versicolor' 'virginica']
 ['virginica' 'setosa' 'setosa']
 ['virginica' 'setosa' 'setosa']
 ['virginica' 'versicolor' 'virginica']
 ['virginica' 'versicolor' 'virginica']
 ['virginica' 'versicolor' 'virginica']
 ['virginica' 'setosa' 'setosa']
 ['versicolor' 'versicolor' 'virginica']
 ['versicolor' 'versicolor' 'virginica']
 ['virginica' 'setosa' 'setosa']
 ['virginica' 'versicolor' 'virginica']
 ['virginica' 'setosa' 'setosa']
 ['virginica' 'versicolor' 'virginica']
 ['virginica' 'versicolor' 'virginica']
 ['versicolor' 'versicolor' 'virginica']
 ['versicolor' 'versicolor' 'virginica']
 ['virginica' 'setosa' 'setosa']
 ['virginica' 'versicolor' 'virginica']
 ['virginica' 'setosa' 'setosa']
 ['versicolor' 'versicolor' 'virginica']
 ['virginica' 'setosa' 'virginica']
 ['virginica' 'setosa' 'setosa']
 ['versicolor' 'versicolor' 'virginica']
 ['versicolor' 'versicolor' 'virginica']
 ['virginica' 'setosa' 'setosa']
 ['virginic

0.4

- 0.4로 낮은 성능이 나옴.