In [1]:
#make sure you have read binary svm before moving onto multiclass
# https://github.com/tattooday/machine-learning/blob/master/binary%20support%20vector%20machine.ipynb

import cvxopt.solvers
import pandas as pd
import numpy as np
import copy
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.svm import SVC
import os
os.chdir('d:/python/data')

In [2]:
#plz refer to binary svm for this function
def binary_svm(x_train,y_train,kernel='linear',poly_constant=0.0,poly_power=1,gamma=5):

    y_product=np.outer(y_train,y_train)
    
    if kernel=='linear':
        x_product=np.outer(x_train,x_train)
    elif kernel=='polynomial':
        temp=np.outer(x_train,x_train)
        x_product=np.apply_along_axis(lambda x:(x+poly_constant)**poly_power,0,temp.ravel()).reshape(temp.shape)
    else:
        temp=np.mat([i-j for j in x_train for i in x_train]).reshape(len(x_train),len(x_train))
        x_product=np.apply_along_axis(lambda x:np.exp(-1*gamma*(np.linalg.norm(x))**2),0,temp.ravel()).reshape(temp.shape)
    
    P=cvxopt.matrix(x_product*y_product)
    q=cvxopt.matrix(-1*np.ones(len(x_train)))
    G=cvxopt.matrix(np.diag(-1 * np.ones(len(x_train))))
    h=cvxopt.matrix(np.zeros(len(x_train)))
    A=cvxopt.matrix(y_train,(1,len(x_train)))
    b=cvxopt.matrix(0.0)

    solution=cvxopt.solvers.qp(P, q, G, h, A, b)
    alpha=pd.Series(solution['x'])
    w=np.sum(alpha*y_train*x_train)

    b=-(min(x_train[y_train==1.0]*w)+max(x_train[y_train==-1.0]*w))/2

    return w,b    

In [3]:
#first, one vs one multiclass svm
#given n classes, we do n*(n-1)/2 times binary classification as one vs one
#we would obtain w and b for each binary classification
#when we make a prediction, we use each w and b to get the classification
#now that we have a classification list of n*(n-1)/2
#we just select the value with the most frequency in the list
#that would be our prediction, voila!
def get_accuracy_ovo(train,test,**kwargs):
    
    #calculate w and b for each binary classification
    multiclass=train['y'].drop_duplicates().tolist()
    multiclass_params={}
    for i in range(len(multiclass)):
        for j in range(i+1,len(multiclass)):
            data=copy.deepcopy(train)
            temp=np.select([data['y']==multiclass[i],data['y']==multiclass[j]], \
                            [-1.0,1.0],default=0.0)
            data['y']=temp
            data=data[data['y']!=0.0]
            multiclass_params['{},{}'.format(multiclass[i], \
                                             multiclass[j])]=binary_svm(data['x'], \
                                                                        data['y'], \
                                                                        **kwargs)
            
    result=[]
    
    #store all the predictions in one list
    #and select the value with the most frequency in this list
    predict=[]
    for i in train['x']:
        temp=[]
        for j in multiclass_params:
            w=multiclass_params[j][0]
            b=multiclass_params[j][1]
            value=np.sign(np.multiply(w,i)+b)
            temp.append(j.split(',')[0] if value==-1.0 else j.split(',')[1])
        
        predict.append(max(set(temp), key=temp.count))
        
    predict=pd.Series(predict).apply(int)
    result.append('train accuracy: %.2f'%(
        len(predict[predict==train['y']])/len(predict)*100)+'%')
    
    
    #kinda the same as training sample prediction
    predict=[]
    for i in test['x']:
        temp=[]
        for j in multiclass_params:
            w=multiclass_params[j][0]
            b=multiclass_params[j][1]
            value=np.sign(np.multiply(w,i)+b)
            temp.append(j.split(',')[0] if value==-1 else j.split(',')[1])
            
        predict.append(max(set(temp), key=temp.count))

    predict=pd.Series(predict).apply(int)
    
    result.append('test accuracy: %.2f'%(
        len(predict[predict==test['y']])/len(predict)*100)+'%')
    
    return result

In [4]:
#alternatively, one vs rest multiclass svm
#given n classes, we do n times binary classification as one vs rest
#we would obtain w and b for each binary classification
#when we make a prediction, we use each w and b to get the decision function value
#we select the classifier with the maximum decision function value
#that classifier would return +1.0 and we would take it as the result
def get_accuracy_ovr(train,test,**kwargs):
    
    multiclass=train['y'].drop_duplicates()
    multiclass_params={}
    
    #calculate w and b for each binary classification
    for i in multiclass:
        data=copy.deepcopy(train)
        data['y']=np.where(data['y']==i,1.0,-1.0)
        multiclass_params[i]=binary_svm(data['x'],data['y'],**kwargs)

    result=[]
        
    #store all the decision function values in one list
    #and select the classifier which gives the largest value
    predict=[]
    for i in train['x']:
        max_value=float('-inf')
        idx=0
        for j in multiclass_params:
            w=multiclass_params[j][0]
            b=multiclass_params[j][1]
            value=np.multiply(w,i)+b
            if value>max_value:
                max_value=value
                idx=j
    
        predict.append(idx)
    
    predict=pd.Series(predict).apply(int)
    result.append('train accuracy: %.2f'%(
        len(predict[predict==train['y']])/len(predict)*100)+'%')
    
    #kinda the same as training sample prediction
    predict=[]
    for i in test['x']:
        max_value=float('-inf')
        idx=0
        for j in multiclass_params:
            w=multiclass_params[j][0]
            b=multiclass_params[j][1]
            value=np.multiply(w,i)+b
            if value>max_value:
                max_value=value
                idx=j
    
        predict.append(idx)

    predict=pd.Series(predict).apply(int)
    result.append('test accuracy: %.2f'%(
        len(predict[predict==test['y']])/len(predict)*100)+'%')

    return result

In [5]:
#using official sklearn package with the same parameters
def skl_multiclass_svm(x_train,x_test,y_train,y_test,**kwargs):
    
    m=SVC(**kwargs).fit(np.array(x_train).reshape(-1, 1), \
                        np.array(y_train).ravel())
    
    train=m.score(np.array(x_train).reshape(-1, 1), \
                  np.array(y_train).ravel())*100
    test=m.score(np.array(x_test).reshape(-1, 1), \
                 np.array(y_test).ravel())*100
    
    print('\ntrain accuracy: %s'%(train)+'%')
    print('\ntest accuracy: %s'%(test)+'%')

In [6]:
df=pd.read_csv('iris.csv')

In [7]:
df['y']=np.select([df['type']=='Iris-setosa', \
                   df['type']=='Iris-versicolor', \
                   df['type']=='Iris-virginica'],[1,2,3])

In [8]:
#for the simplicity, let us reduce the dimension of x to 1
temp=pd.concat([df[i] for i in df.columns if 'length' in i or 'width' in i],axis=1)
x=PCA(n_components=1).fit_transform(temp)

In [9]:
x=pd.Series([x[i].item() for i in range(len(x))])
y=df['y']

In [10]:
#train test split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3)

In [11]:
#crucial!!!!
#or we would get errors in the next step
x_test.reset_index(inplace=True,drop=True)
y_test.reset_index(inplace=True,drop=True)
x_train.reset_index(inplace=True,drop=True)
y_train.reset_index(inplace=True,drop=True)

In [12]:
train=pd.DataFrame({'x':x_train,'y':y_train})
test=pd.DataFrame({'x':x_test,'y':y_test})

In [13]:
ovr=get_accuracy_ovr(train,test)

     pcost       dcost       gap    pres   dres
 0: -8.6660e+01 -2.0944e+02  1e+02  6e-15  2e+00
 1: -1.8258e+02 -1.8562e+02  3e+00  1e-14  1e+00
 2: -1.1211e+04 -1.1213e+04  2e+00  2e-12  1e+00
 3: -6.4122e+07 -6.4122e+07  1e+02  3e-09  1e+00
 4: -3.9742e+10 -3.9742e+10  7e+04  4e-06  1e+00
Terminated (singular KKT matrix).
     pcost       dcost       gap    pres   dres
 0: -1.4006e+01 -2.7977e+01  4e+02  2e+01  2e+00
 1: -1.7856e+01 -1.2674e+01  1e+02  7e+00  6e-01
 2: -9.4617e+00 -4.3958e+00  8e+01  3e+00  3e-01
 3: -2.7741e-01 -1.5962e+00  3e+00  7e-02  7e-03
 4: -4.7994e-01 -9.1325e-01  6e-01  9e-03  9e-04
 5: -5.8552e-01 -9.7564e-01  5e-01  5e-03  5e-04
 6: -8.8473e-01 -8.9714e-01  1e-02  3e-05  3e-06
 7: -8.9440e-01 -8.9453e-01  1e-04  3e-07  3e-08
 8: -8.9450e-01 -8.9450e-01  1e-06  3e-09  3e-10
 9: -8.9450e-01 -8.9450e-01  1e-08  3e-11  3e-12
Optimal solution found.
     pcost       dcost       gap    pres   dres
 0: -4.4180e+01 -9.5985e+01  3e+02  1e+01  2e+00
 1: -1.0652e+0

In [14]:
ovo=get_accuracy_ovo(train,test)

     pcost       dcost       gap    pres   dres
 0: -4.6082e+00 -8.8512e+00  2e+02  1e+01  2e+00
 1: -4.8162e+00 -2.9306e+00  3e+01  2e+00  3e-01
 2: -1.2799e-01 -1.5401e+00  1e+00  4e-15  3e-15
 3: -5.2294e-01 -9.0210e-01  4e-01  7e-16  1e-15
 4: -6.5269e-01 -9.6697e-01  3e-01  7e-16  8e-16
 5: -8.8851e-01 -8.9732e-01  9e-03  5e-17  1e-15
 6: -8.9444e-01 -8.9453e-01  9e-05  4e-16  1e-15
 7: -8.9450e-01 -8.9450e-01  9e-07  4e-16  1e-15
Optimal solution found.
     pcost       dcost       gap    pres   dres
 0: -2.8624e+01 -7.6697e+01  3e+02  2e+01  3e+00
 1: -8.8730e+01 -1.7757e+02  3e+02  1e+01  2e+00
 2: -5.2039e+02 -6.7558e+02  2e+02  8e+00  1e+00
 3: -1.4043e+03 -1.6678e+03  3e+02  7e+00  1e+00
 4: -2.2960e+03 -2.6846e+03  4e+02  7e+00  1e+00
 5: -5.2025e+03 -5.9217e+03  7e+02  7e+00  1e+00
 6: -5.4043e+03 -6.1478e+03  8e+02  7e+00  1e+00
 7: -2.6778e+04 -2.8690e+04  2e+03  6e+00  1e+00
 8: -2.5261e+05 -2.6048e+05  8e+03  6e+00  1e+00
 9: -7.1411e+06 -7.1801e+06  4e+04  6e+00  1e+0

In [15]:
print('one vs rest self implementation')
for i in ovr:
    print('\n',i)

one vs rest self implementation

 train accuracy: 60.00%

 test accuracy: 44.44%


In [16]:
#normally ovo should work better than ovr 
#as time complexity of ovo is higher
#n*(n-1)/2>n
print('one vs one self implementation')
for i in ovo:
    print('\n',i)

one vs one self implementation

 train accuracy: 90.48%

 test accuracy: 95.56%


In [17]:
print('one vs rest sklearn')
skl_multiclass_svm(x_train,x_test,y_train,y_test,kernel='linear',decision_function_shape='ovr')

one vs rest sklearn

train accuracy: 95.23809523809523%

test accuracy: 88.88888888888889%


In [18]:
print('one vs one sklearn')
skl_multiclass_svm(x_train,x_test,y_train,y_test,kernel='linear',decision_function_shape='ovo')

one vs one sklearn

train accuracy: 95.23809523809523%

test accuracy: 88.88888888888889%
