In [1]:
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import cv2
import random
#from keras.preprocessing import image

## Image Processing Pipeline

In [2]:
def getImageData(path):
    p = Path(path)
    dirs= p.glob('*')

    label_dict={'cat':0,'dog':1,'horse':2,'human':3}
    labels=[]
    image_data=[]
    
    for folder in dirs:
        label = str(folder).split('/')[-1][:-1]
        for img_path in folder.glob('*.jpg'):
            img=cv2.imread(str(img_path))
            img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
            img=cv2.resize(img,(40,40))

            image_data.append(img)
            labels.append(label_dict[label])

    image_data=np.array(image_data)
    labels=np.array(labels)

    ## Random Shuffling of image_data and labels

    image_with_label= list(zip(image_data,labels))  ## zip
    random.shuffle(image_with_label)
    image_data[:],labels[:]=zip(*image_with_label)  #unzip
    
    return image_data.reshape(image_data.shape[0],-1),labels,label_dict 

def generateClasswiseData(X,Y):
    data={}
    
    no_of_classes=len(np.unique(Y))
    no_of_samples=X.shape[0]
    
    for i in range(no_of_classes):
        data[i]=[]
        
    for i in range(no_of_samples):
        data[Y[i]].append(X[i])
        
    for k in range(no_of_classes):
        data[k]=np.array(data[k])
        
    return data


## one-vs-one
def getPairData(d1,d2):
    
    l1=d1.shape[0]
    l2=d2.shape[0]
    data=np.zeros((l1+l2,d1.shape[1]))
    labels=np.zeros(l1+l2)
    
    data[:l1]=d1
    data[l1:]=d2
    
    labels[:l1]=1
    labels[l1:]=-1
    
    return data,labels

def drawImg(img):
    img=img.reshape(40,40,3)
    plt.imshow(img)
    plt.axis('off')
    plt.show()

## SVM Classifier

In [4]:
class SVM:
    def __init__(self,C=1.0):
        self.C=C
        self.W_=0
        self.b_=0
        
    def hingeLoss(self,X,Y,W,b):
        loss=0.5*np.dot(W,W.T)
        
        m=X.shape[0]
        
        for i in range(m):
            ti=Y[i]*(np.dot(W,X[i].T)+b)
            loss+=self.C*max(0,1-ti)
            
        return loss[0][0]
    
    def fit(self,X,Y,batch_size=120,learning_rate=0.001,max_itr=400):
        n=X.shape[1] # no. of features
        m=X.shape[0] # no. of samplesimage_data,labels
        
        W=np.zeros((1,n))
        b=0
        
        #training
        losses=[]
        
        for _ in range(max_itr):
            
            l=self.hingeLoss(X,Y,W,b)
            losses.append(l)
            
            #ids for mini batch
            ids=np.arange(m)
            np.random.shuffle(ids)
             
            #mini-batch gradient descent
            for batch_start in range(0,m,batch_size):
                gradw=0
                gradb=0
                for j in range(batch_start,batch_start+batch_size):
                    if j<m:
                        i=ids[j]
                        ti=Y[i]*(np.dot(W,X[i].T)+b)

                        if ti>1:
                            gradw+=0
                            gradb+=0
                        else:
                            gradw+=self.C*X[i]*Y[i]
                            gradb+=self.C*Y[i]
                
                W= W - learning_rate*(W - gradw)
                b= b + learning_rate*gradb
            
        self.W_=W
        self.b_=b
            
        return self.W_,self.b_,losses

## Training Data

In [5]:
def train(X,Y):
    data=generateClasswiseData(X,Y)
    svc=SVM()
    svm_classifiers={}
    for i in range(len(data)):
        svm_classifiers[i]={}
        for j in range(i+1,len(label_dict)):
            x,y=getPairData(data[i],data[j])
            wts,b,losses=svc.fit(x,y,learning_rate=0.0000001,max_itr=1000)
            svm_classifiers[i][j]=(wts,b)
            
#             plt.plot(losses)
#             plt.show()
    return svm_classifiers        

def predict(X,svm_classifiers):
    X=X.reshape(-1,1)
    classes=len(svm_classifiers)
    count=np.zeros(classes,)
    for i in range(classes):
        for j in range(i+1,classes):
            W = svm_classifiers[i][j][0]
            b = svm_classifiers[i][j][1]
            if (np.dot(W,X)+ b)>=0:
                count[i]+=1
            else:
                count[j]+=1
            
    index=np.argmax(count)
    return index

def score(X,Y):
    count=0
    for i in range(X.shape[0]):
        if Y[i]==predict(X[i],svm_classifiers):
            count+=1
        
    return count/X.shape[0]
# svc=svm.SVC(kernel='poly',gamma='auto')
# svc.fit(image_data,image_labels)
# svc.score(image_data,image_labels)

In [7]:
image_data,image_labels,label_dict= getImageData('images')
svm_classifiers= train(image_data,image_labels)

In [145]:
print(score(image_data,image_labels))

0.6534653465346535


## Scikit-learn

In [6]:
from sklearn import svm
from sklearn.model_selection impfrom multiclassSVM import OneVsOneSVM
svc=OneVsOneSVM()
svc.fit(X,Y)
svc.score(X,Y)ort GridSearchCV

In [12]:
# params=[
#     {
#         'kernel': ['linear','rbf','poly','sigmoid'],
#         'C':[0.1,1.0,2.0,5.0]
#     }
# ]
# gs=GridSearchCV(estimator=svm.SVC(gamma='auto'),param_grid=params,cv=5,n_jobs=4,scoring='accuracy')
# gs.fit(image_data,image_labels)
# print(gs.best_estimator_)
# print(gs.best_score_)

SVC(C=0.1, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)
0.349009900990099


In [15]:
svc=svm.SVC(kernel='linear',C=1.0)
svc.fit(image_data,image_labels)
print(svc.score(image_data,image_labels))

0.6757425742574258


In [8]:
from multiclassSVM import OneVsOneSVM
svc=OneVsOneSVM()
svc.fit(image_data,image_labels)
svc.score(image_data,image_labels)

0.6150990099009901