In [2]:
import cvxopt
import numpy as np
import os
from PIL import Image
import time
from sklearn.svm import SVC as svm
C = 1

In [3]:
def resize(img) :
    img = np.array(img.resize((16,16)))
    return img.reshape(img.shape[0]*img.shape[1]*img.shape[2])/255

def loadClass(path):
    images = []
    for i in os.listdir(path):
        images.append(resize(Image.open(os.path.join(path,i))))
    return np.array(images)

def loadSVMData(c1,c2):
    class0 = loadClass(c1)
    label0 = (-1)*np.ones((class0.shape[0],1))
    class1 = loadClass(c2)
    label1 = np.ones((class1.shape[0],1))
    return np.concatenate([class0,class1]) ,np.concatenate([label0,label1])

In [4]:
# loading data
# entry number : 2021MT10236. Required classes : 0 and 1
# assigning label -1 to class 0 and label 1 to class 1
c1 = 0; c2 = 1
trainX, trainY = loadSVMData('data/svm/train/'+str(c1),'data/svm/train/'+str(c2))
testX, testY = loadSVMData('data/svm/val/'+str(c1),'data/svm/val/'+str(c2))

FileNotFoundError: [Errno 2] No such file or directory: 'data/svm/train/0'

In [239]:
def getSupport(arr,tol = 1e-3, C = 1):
    supportAlpha = []; indices = []
    for i in range(len(arr)):
        if arr[i] > tol :
            supportAlpha.append(arr[i])
            indices.append(i)
    return supportAlpha,indices

def linearKernel(X1,X2):
    return np.matmul(X1,X2.T)

def gaussKernel(X1: np.ndarray, X2: np.ndarray, gamma: float = 0.001):
    prod = np.reshape(np.einsum('ij,ij->i', X1, X1), (X1.shape[0], 1)) + \
           np.reshape(np.einsum('ij,ij->i', X2, X2), (X2.shape[0], 1)).T \
             - 2 * np.matmul(X1, X2.T)
    return np.exp(-gamma * prod)

def SVM(X, Y, kernel = 'gaussian' , gamma = 0.1, C = 1, tol = 1e-4,showProg = False):
    if kernel == 'linear':
        kernelMatrix = linearKernel(trainX,trainX)
    elif kernel == 'gaussian': 
        kernelMatrix = gaussKernel(trainX,trainX,gamma = gamma)
    print("kernel computed")

    P = cvxopt.matrix((kernelMatrix * np.matmul(Y,Y.T)))
    q = cvxopt.matrix(-np.ones(X.shape[0]))
    c = 0
    G = cvxopt.matrix(np.concatenate([np.eye(X.shape[0]),(-1)*np.eye(X.shape[0])]))
    h = cvxopt.matrix(np.concatenate([C*np.ones((X.shape[0],1)),np.zeros((X.shape[0],1))]))
    A = cvxopt.matrix(Y.T ,tc = 'd')
    b = cvxopt.matrix(0.0)

    sol = cvxopt.solvers.qp(P, q, G, h, A, b, options={'show_progress': showProg})
    alphaRaw = np.array(sol['x'])
    supportAlpha, supportIndices = getSupport(alphaRaw,tol, C)
    supportAlpha = np.array(supportAlpha)
    ySupport = Y[supportIndices]; xSupport = X[supportIndices]

    w = np.sum(supportAlpha * ySupport * xSupport , axis = 0)

    wXt = np.sum(alphaRaw * Y * kernelMatrix,axis = 0)

    M = max(supportIndices, key=lambda i: -float("inf") if Y[i] == 1  or C - alphaRaw[i] <= tol else wXt[i])
    m = min(supportIndices, key=lambda i: float("inf") if Y[i] == -1  or C - alphaRaw[i] <= tol else wXt[i])
    intercept = -(wXt[M] + wXt[m]) / 2
    return w,intercept,alphaRaw, supportIndices



# Part - A

In [240]:
t = time.time()
wLinear, bLinear ,alphaRawLinear ,supportLinear = SVM(trainX,trainY, kernel = 'linear',showProg = False, tol=1e-3, C = 1.0)
print(time.time() -t)

kernel computed
66.93194317817688


In [246]:
nSV = len(supportLinear)
print(f'Number of support vectors = {nSV}')
print(f'Number of support vectors make up {nSV*100/trainX.shape[0] : .3f}% of the training set')
print(f'Intercept term : {bLinear}')
wLinear = wLinear.reshape((wLinear.shape[0],1))
pred = np.matmul(wLinear.T,testX.T) + bLinear
pred = np.where(pred[0] >= 0, 1, -1)
testYtemp = testY.reshape(testY.shape[0])
accLinear = np.where((testYtemp == pred) == True)[0].shape[0]/testYtemp.shape[0]
print(f'Validation set accuracy = {accLinear*100 : .3f}%')

Number of support vectors = 1379
Number of support vectors make up  28.971% of the training set
Intercept term : 2.4334437936343076
Validation set accuracy =  86.250%


In [247]:
predTrain = np.matmul(wLinear.T,trainX.T) + bLinear
predTrain = np.where(predTrain[0] >= 0, 1, -1)
trainYtemp = trainY.reshape(trainY.shape[0])
accLinearTrain = np.where((trainYtemp == predTrain) == True)[0].shape[0]/trainYtemp.shape[0]
print(f'Validation set accuracy = {accLinearTrain*100 : .3f}%')

Validation set accuracy =  91.176%


In [248]:
vecs = [(255*trainX[i]).reshape(16,16,3).astype(np.uint8) for i in np.argsort(alphaRawLinear[supportLinear].flatten())[:6]]
for i in range(6):
    img = Image.fromarray(vecs[i])
    img.save('images/Q2/supportVectorLinear'+str(i)+'.png')
wtemp = 255*wLinear.reshape((16,16,3)).astype(np.uint8)
img  = Image.fromarray(wtemp)
img.save('images/Q2/wLinear.png')


# Part - B

In [249]:
#gaussian prediction
t = time.time()
_ , bGaussian ,alphaRawGaussian ,supportGaussian = SVM(trainX,trainY, kernel = 'gaussian',gamma = 0.001,showProg = False, tol=1e-4, C = 1.0)
print(time.time()-t)

kernel computed
65.10463786125183


In [250]:
nSVg = len(supportGaussian)
print(f'Number of support vectors = {nSVg}')
print(f'Number of support vectors make up {nSVg*100/trainX.shape[0] : .3f}% of the training set')
print(f'Intercept term : {bGaussian}')
pred = np.sum(alphaRawGaussian[supportGaussian] * trainY[supportGaussian] * gaussKernel(trainX[supportGaussian], testX, 0.001), 0) + bGaussian
pred = np.where(pred>=0, 1, -1)
accGauss = np.where((testYtemp == pred) == True)[0].shape[0]/testYtemp.shape[0]
print(f'Validation set accuracy = {accGauss*100 : .3f}%')


Number of support vectors = 1919
Number of support vectors make up  40.315% of the training set
Intercept term : -8.15582215973239
Validation set accuracy =  84.500%


In [1]:
matchingSupport = np.where((supportLinear == supportGaussian)==True)[0]
print(f"Number of matching support vectors = {matchingSupport}")

NameError: name 'np' is not defined

In [262]:
predTrainG = np.sum(alphaRawGaussian[supportGaussian] * trainY[supportGaussian] * gaussKernel(trainX[supportGaussian], trainX, 0.001), 0) + bGaussian
predTrainG = np.where(predTrainG[0] >= 0, 1, -1)
accGaussTrain = np.where((trainY.flatten() == predTrainG) == True)[0].shape[0]/trainYtempG.shape[0]
print(f'Validation set accuracy = {accGaussTrain*100 : .3f}%')

Validation set accuracy =  50.000%


In [253]:
vecs = [(255*trainX[i]).reshape(16,16,3).astype(np.uint8) for i in np.argsort(alphaRawGaussian[supportLinear].flatten())[:6]]
for i in range(6):
    img = Image.fromarray(vecs[i])
    img.save('images/Q2/supportVectorGauss'+str(i)+'.png')

# Part C

In [5]:
import time
linSVM = svm(kernel='linear', C = C)
t = time.time()
linSVM.fit(trainX,trainY.flatten())
print(f'Time taken to train Sklearn Linear SVM = {time.time() - t : .3f}s')


gaussSVM = svm(kernel='rbf', C = C, gamma = 0.001)
t = time.time()
gaussSVM.fit(trainX,trainY.flatten())
print(f'Time taken to train Sklearn Gaussian SVM = {time.time() - t : .3f}s')


NameError: name 'trainX' is not defined

In [261]:
print(f'Using Sci-Kit Learn\nnSV for linear = {linSVM.support_vectors_.shape[0]}\nnSV for Gaussian = {gaussSVM.support_vectors_.shape[0]}')
print(f'bias for linear :{linSVM.intercept_[0]: .4f}')
print(f'norm of difference between wLinear from cvxopt and sklearn : {np.linalg.norm(wLinear.flatten() - linSVM.coef_) : .4f}')
predLinearSK = linSVM.predict(testX)
predLinearSK = np.where(predLinearSK>=0, 1, -1)
accLinearSK = np.where((testY.flatten() == predLinearSK) == True)[0].shape[0]/testY.shape[0]
print(f'Accuracy on validation set using sklearn Linear SVM = {accLinearSK}')
predGaussSK = gaussSVM.predict(testX)
predGaussSK = np.where(predGaussSK>=0, 1, -1)
accGaussSK = np.where((testY.flatten() == predGaussSK) == True)[0].shape[0]/testY.shape[0]
print(f'Accuracy on validation set using sklearn Gaussian SVM = {accGaussSK}')

Using Sci-Kit Learn
nSV for linear = 1379
nSV for Gaussian = 1916
bias for linear : 2.4337
norm of difference between wLinear from cvxopt and sklearn :  0.0208
Accuracy on validation set using sklearn Linear SVM = 0.85
Accuracy on validation set using sklearn Gaussian SVM = 0.845


In [263]:
print(f'Using Sci-Kit Learn\nnSV for linear = {linSVM.support_vectors_.shape[0]}\nnSV for Gaussian = {gaussSVM.support_vectors_.shape[0]}')
print(f'bias for linear :{linSVM.intercept_[0]: .4f}')
print(f'norm of difference between wLinear from cvxopt and sklearn : {np.linalg.norm(wLinear.flatten() - linSVM.coef_) : .4f}')
predLinearSK = linSVM.predict(trainX)
predLinearSK = np.where(predLinearSK>=0, 1, -1)
accLinearSK = np.where((trainY.flatten() == predLinearSK) == True)[0].shape[0]/trainY.shape[0]
print(f'Accuracy on validation set using sklearn Linear SVM = {accLinearSK}')
predGaussSK = gaussSVM.predict(trainX)
predGaussSK = np.where(predGaussSK>=0, 1, -1)
accGaussSK = np.where((trainY.flatten() == predGaussSK) == True)[0].shape[0]/trainY.shape[0]
print(f'Accuracy on validation set using sklearn Gaussian SVM = {accGaussSK}')

Using Sci-Kit Learn
nSV for linear = 1379
nSV for Gaussian = 1916
bias for linear : 2.4337
norm of difference between wLinear from cvxopt and sklearn :  0.0208
Accuracy on validation set using sklearn Linear SVM = 0.9088235294117647
Accuracy on validation set using sklearn Gaussian SVM = 0.8861344537815126


In [264]:
2.4337 - 2.4334437936343076

0.00025620636569234634

# Part - D

In [218]:
def resize(img) :
    img = np.array(img.resize((32,32)))
    return img.reshape(img.shape[0]*img.shape[1]*img.shape[2])/255

def loadClass(path):
    images = []
    for i in os.listdir(path):
        images.append(resize(Image.open(os.path.join(path,i))))
    return np.array(images)

def loadSVMData(c1,c2):
    class0 = loadClass(c1)
    label0 = (-1)*np.ones((class0.shape[0],1))
    class1 = loadClass(c2)
    label1 = np.ones((class1.shape[0],1))
    return np.concatenate([class0,class1]) ,np.concatenate([label0,label1])

# loading data
# entry number : 2021MT10236. Required classes : 0 and 1
# assigning label -1 to class 0 and label 1 to class 1
c1 = 0; c2 = 1
trainX, trainY = loadSVMData('data/svm/train/'+str(c1),'data/svm/train/'+str(c2))
testX, testY = loadSVMData('data/svm/val/'+str(c1),'data/svm/val/'+str(c2))

In [222]:
import time
linSVM = svm(kernel='linear', C = C)
t = time.time()
linSVM.fit(trainX,trainY.flatten())
print(f'Time taken to train Sklearn Linear SVM = {time.time() - t : .3f}s')


gaussSVM = svm(kernel='rbf', C = C, gamma = 0.001)
t = time.time()
gaussSVM.fit(trainX,trainY.flatten())
print(f'Time taken to train Sklearn Gaussian SVM = {time.time() - t : .3f}s')

Time taken to train Sklearn Linear SVM =  24.757s
Time taken to train Sklearn Gaussian SVM =  14.879s


In [232]:
print(f'Using Sci-Kit Learn\nnSV for linear = {linSVM.support_vectors_.shape[0]}\nnSV for Gaussian = {gaussSVM.support_vectors_.shape[0]}')
print(f'bias for linear :{linSVM.intercept_[0]: .4f}')
predLinearSK = linSVM.predict(testX)
accLinearSK = np.where((testY.flatten() == predLinearSK) == True)[0].shape[0]/testX.shape[0]
print(f'Accuracy on validation set using sklearn Linear SVM = {accLinearSK}')
predGaussSK = gaussSVM.predict(testX)
accGaussSK = np.where((testY.flatten() == predGaussSK) == True)[0].shape[0]/testX.shape[0]
print(f'Accuracy on validation set using sklearn Gaussian SVM = {accGaussSK}')

Using Sci-Kit Learn
nSV for linear = 1403
nSV for Gaussian = 1591
bias for linear : 2.6825
Accuracy on validation set using sklearn Linear SVM = 0.79
Accuracy on validation set using sklearn Gaussian SVM = 0.89
