In [1]:
from keras.datasets import cifar10
from sklearn.preprocessing import StandardScaler
from keras.applications.vgg16 import VGG16
from keras.models import Model
import numpy as np
from skimage import feature, color
from sklearn import preprocessing
from cvxopt import matrix, solvers
from matplotlib import pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

Extract features

In [2]:
# load cifar10 data
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# normalize data
y_train = y_train.ravel()
y_test = y_test.ravel()

# extract features
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(32, 32, 3))
model = Model(inputs=base_model.input, outputs=base_model.output)
features_train = model.predict(x_train)
features_test = model.predict(x_test)

# reshape features
features_train = features_train.reshape(features_train.shape[0], -1)
features_test = features_test.reshape(features_test.shape[0], -1)

# standardize features
scaler = StandardScaler()
features_train = scaler.fit_transform(features_train)
features_test = scaler.transform(features_test)

print("Training features shape:", features_train.shape)
print("Testing features shape:", features_test.shape)

[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 23ms/step
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 24ms/step
Training features shape: (50000, 512)
Testing features shape: (10000, 512)


Create Model

In [3]:
# create loss functions and accuracy function
def hinge_loss(y_true, y_pred):
    return np.maximum(0, 1 - y_true * y_pred).mean()

def accuracy(y_true, y_pred):
    return (y_true == y_pred).mean()

In [4]:
class KernelSVM:
    def __init__(self, kernel='linear', C=1.0, degree=3, coef0=1, sigma=1.0):
        self.kernel = kernel
        self.C = C
        self.degree = degree
        self.coef0 = coef0
        self.sigma = sigma
        self.alpha = None
        self.support_vectors = None
        self.support_vector_labels = None
        self.b = 0

    def get_params(self, deep=True):
        # return the parameters
        return {'kernel': self.kernel, 'C': self.C}
    
    def set_params(self, **parameters):
        # set the parameters
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self

    def kernel_function(self, x, y):
        if self.kernel == 'linear':
            return np.dot(x, y)
        elif self.kernel == 'polynomial':
            return (self.coef0 + np.dot(x, y)) ** self.degree
        elif self.kernel == 'rbf':
            return np.exp(-np.linalg.norm(x-y)**2 / (2 * (self.sigma ** 2)))
        else:
            raise ValueError("Unknown kernel function.")

    def fit(self, X, y):
        n_samples, n_features = X.shape
        y = y.reshape(-1, 1) * 1.0
        
        # innitialize H matrix
        H = np.zeros((n_samples, n_samples))
        # Calculate the kernel matrix
        for i in range(n_samples):
            for j in range(n_samples):
                H[i, j] = self.kernel_function(X[i], X[j]) * y[i] * y[j]

        # Convert into cvxopt format
        P = matrix(H)
        q = matrix(-np.ones((n_samples, 1)))
        G = matrix(np.vstack((np.eye(n_samples)*-1,np.eye(n_samples))))
        h = matrix(np.hstack((np.zeros(n_samples), np.ones(n_samples) * self.C)))
        A = matrix(y.reshape(1, -1))
        b = matrix(np.zeros(1))

        # Solve QP problem
        solution = solvers.qp(P, q, G, h, A, b)

        # Extract support vectors
        alphas = np.array(solution['x'])
        self.alpha = alphas[alphas > 1e-4].flatten()
        sv = alphas > 1e-4
        ind = np.where(sv)[0]
        self.support_vectors = X[ind]
        self.support_vector_labels = y[ind]
        self.b = np.mean(y[ind] - np.dot(self.support_vectors, self.support_vectors.T).dot(alphas[sv] * y[sv]))
        
        # Calculate the predictions on the training set
        y_pred = self.predict(X)
        # Convert predictions back to original class labels
        y_pred = np.where(y_pred < 0, -1, 1)
        y = np.where(y <= 0, -1, 1)
        
        # Calculate the loss and accuracy
        loss = hinge_loss(y, y_pred)
        acc = accuracy(y, y_pred)
        
        return loss, acc

    def predict(self, X):
        y_predict = np.zeros(len(X))
        for i in range(len(X)):
            prediction = 0
            for alpha, sv_y, sv in zip(self.alpha, self.support_vector_labels, self.support_vectors):
                prediction += alpha * sv_y * self.kernel_function(X[i], sv)
            prediction += self.b
            y_predict[i] = prediction  # return the prediction
        return y_predict

Sub-sample

In [5]:
# sub-sample the training set
indices = np.random.choice(range(features_train.shape[0]), size=1000, replace=False)
sub_features_train = features_train[indices]
sub_y_train = y_train[indices]

Training with different kernel function

In [6]:
# initialize the parameter grid
param_grid = {
    'kernel': ['linear'],
    'C': [0.001, 0.01, 0.1, 1, 10]
}


# initialize the SVM model
svm_model = KernelSVM(kernel='linear')

# create the GridSearchCV object
grid_search = GridSearchCV(svm_model, param_grid, cv=5, scoring='accuracy')

# perform the grid search
grid_search.fit(sub_features_train, sub_y_train)

# print the best parameters found
print("Best parameters found: ", grid_search.best_params_)
print("Best cross-validation score: {:.2f}".format(grid_search.best_score_))

     pcost       dcost       gap    pres   dres
 0: -6.7693e+01 -1.3608e+00  4e+03  6e+01  1e-12
 1: -1.3653e+00 -1.3523e+00  6e+01  1e+00  1e-12
 2: -2.1878e-01 -1.2282e+00  4e+00  5e-02  7e-14
 3: -1.5131e-01 -7.5372e-01  1e+00  1e-02  2e-14
 4: -1.2477e-01 -4.1068e-01  5e-01  5e-03  7e-15
 5: -1.0879e-01 -2.5542e-01  3e-01  2e-03  3e-15
 6: -9.8264e-02 -1.6985e-01  1e-01  1e-03  2e-15
 7: -8.9354e-02 -1.3111e-01  8e-02  4e-04  2e-15
 8: -8.5929e-02 -1.0747e-01  4e-02  2e-04  2e-15
 9: -8.4221e-02 -8.6520e-02  4e-03  2e-05  2e-15
10: -8.4003e-02 -8.4034e-02  5e-05  2e-07  2e-15
11: -8.4000e-02 -8.4000e-02  5e-07  2e-09  2e-15
12: -8.4000e-02 -8.4000e-02  5e-09  2e-11  2e-15
Optimal solution found.
     pcost       dcost       gap    pres   dres
 0: -6.8144e+01 -1.4091e+00  4e+03  6e+01  1e-12
 1: -1.9146e+00 -1.3894e+00  9e+01  2e+00  1e-12
 2: -2.3955e-01 -1.2835e+00  5e+00  7e-02  7e-14
 3: -1.5653e-01 -8.0009e-01  1e+00  1e-02  2e-14
 4: -1.2673e-01 -4.2957e-01  5e-01  5e-03  6e-1

In [7]:
svm_models = []
num_classes = 10
for class_idx in range(num_classes):
    print(f"Training model for class {class_idx}...")
    
    # Convert the labels to binary
    binary_y_train = np.where(sub_y_train == class_idx, 1, -1)
    
    # Train the model
    svm_model = KernelSVM(kernel='linear', C=0.001)
    loss, acc = svm_model.fit(sub_features_train, binary_y_train)
    
    # Store the model
    svm_models.append(svm_model)
    
    print(f"Training Loss: {loss:.4f}, Training Accuracy: {acc:.4f}")

# initialize decision function values
decision_function_values = np.zeros((features_test.shape[0], num_classes))

# use each SVM model to predict the decision function values
for class_idx, svm_model in enumerate(svm_models):
    print(f"Predicting with model for class {class_idx}...")
    decision_values = svm_model.predict(features_test)
    decision_function_values[:, class_idx] = decision_values

# select the class with the maximum decision function value
y_pred = np.argmax(decision_function_values, axis=1)

# calculate the test accuracy
test_accuracy = accuracy(y_test, y_pred)
print(f"Test Accuracy: {test_accuracy:.4f}")

Training model for class 0...
     pcost       dcost       gap    pres   dres
 0: -6.0458e+01 -2.3071e+00  7e+03  8e+01  1e-13
 1: -2.4732e+00 -2.2325e+00  2e+02  3e+00  1e-13
 2: -4.1559e-01 -1.9921e+00  2e+01  2e-01  1e-14
 3: -1.6072e-01 -1.4864e+00  2e+00  9e-03  1e-15
 4: -1.3494e-01 -3.5790e-01  2e-01  4e-04  1e-15
 5: -1.4840e-01 -2.1912e-01  7e-02  9e-05  6e-16
 6: -1.5377e-01 -1.9735e-01  4e-02  5e-05  4e-16
 7: -1.5828e-01 -1.8050e-01  2e-02  2e-05  4e-16
 8: -1.6059e-01 -1.7314e-01  1e-02  8e-06  4e-16
 9: -1.6242e-01 -1.6721e-01  5e-03  6e-18  4e-16
10: -1.6381e-01 -1.6469e-01  9e-04  8e-18  5e-16
11: -1.6410e-01 -1.6425e-01  1e-04  1e-17  5e-16
12: -1.6416e-01 -1.6416e-01  4e-06  7e-18  5e-16
13: -1.6416e-01 -1.6416e-01  7e-08  6e-18  5e-16
Optimal solution found.
Training Loss: 0.2914, Training Accuracy: 0.8543
Training model for class 1...
     pcost       dcost       gap    pres   dres
 0: -4.5086e+01 -2.3051e+00  6e+03  8e+01  1e-13
 1: -1.9543e+00 -2.2183e+00  2e+02  

In [8]:
# initialize the parameter grid
param_grid = {
    'kernel': ['polynomial'],
    'C': [0.1, 1, 10, 100],
    'degree': [2, 3, 4],
    'coef0': [0, 1, 2],
}

# initialize the SVM model
svm_model = KernelSVM(kernel='polynomial')

# create the GridSearchCV object
grid_search = GridSearchCV(svm_model, param_grid, cv=5, scoring='accuracy')

# perform the grid search
grid_search.fit(sub_features_train, sub_y_train)

# print the best parameters found
print("Best parameters found: ", grid_search.best_params_)
print("Best cross-validation score: {:.2f}".format(grid_search.best_score_))

     pcost       dcost       gap    pres   dres
 0: -4.6200e+01 -1.2062e+02  4e+03  2e+01  1e-15
 1: -7.1411e+00 -1.0714e+02  1e+02  2e-01  2e-15
 2: -7.7432e+00 -1.3837e+01  7e+00  9e-03  3e-15
 3: -8.3766e+00 -8.5985e+00  2e-01  3e-04  3e-15
 4: -8.3933e+00 -8.4599e+00  7e-02  8e-05  2e-15
 5: -8.3980e+00 -8.4203e+00  3e-02  2e-05  2e-15
 6: -8.3993e+00 -8.4080e+00  1e-02  7e-06  4e-15
 7: -8.3998e+00 -8.4023e+00  3e-03  2e-06  3e-15
 8: -8.3999e+00 -8.4009e+00  1e-03  7e-07  4e-15
 9: -8.4000e+00 -8.4002e+00  2e-04  1e-07  3e-15
10: -8.4000e+00 -8.4000e+00  3e-05  1e-08  3e-15
11: -8.4000e+00 -8.4000e+00  3e-07  1e-10  2e-15
Optimal solution found.
     pcost       dcost       gap    pres   dres
 0: -4.5650e+01 -1.2057e+02  4e+03  2e+01  2e-15
 1: -7.0561e+00 -1.0707e+02  1e+02  2e-01  2e-15
 2: -7.6510e+00 -1.3744e+01  7e+00  9e-03  3e-15
 3: -8.2739e+00 -8.5230e+00  3e-01  4e-04  3e-15
 4: -8.2917e+00 -8.3742e+00  9e-02  8e-05  3e-15
 5: -8.2976e+00 -8.3236e+00  3e-02  2e-05  4e-1

In [9]:
svm_models = []
num_classes = 10
for class_idx in range(num_classes):
    print(f"Training model for class {class_idx}...")
    
    # Convert the labels to binary
    binary_y_train = np.where(sub_y_train == class_idx, 1, -1)
    
    # Train the model
    svm_model = KernelSVM(kernel='polynomial', C=0.1, degree=2, coef0=0) # use the best parameters found
    loss, acc = svm_model.fit(sub_features_train, binary_y_train)
    
    # Store the model
    svm_models.append(svm_model)
    
    print(f"Training Loss: {loss:.4f}, Training Accuracy: {acc:.4f}")

# initialize decision function values
decision_function_values = np.zeros((features_test.shape[0], num_classes))

# use each SVM model to predict the decision function values
for class_idx, svm_model in enumerate(svm_models):
    print(f"Predicting with model for class {class_idx}...")
    decision_values = svm_model.predict(features_test)
    decision_function_values[:, class_idx] = decision_values

# select the class with the maximum decision function value
y_pred = np.argmax(decision_function_values, axis=1)

# calculate the test accuracy
test_accuracy = accuracy(y_test, y_pred)
print(f"Test Accuracy: {test_accuracy:.4f}")

Training model for class 0...
     pcost       dcost       gap    pres   dres
 0: -2.5453e-03 -1.0001e+02  2e+03  1e+01  1e-15
 1: -2.3815e-03 -8.2834e+01  1e+02  1e-01  1e-15
 2:  2.9489e-04 -1.9660e+00  2e+00  2e-03  2e-15
 3: -1.2926e-03 -1.4425e-01  2e-01  1e-04  2e-15
 4: -1.1962e-03 -4.3440e-02  5e-02  3e-05  2e-15
 5: -2.0065e-03 -8.3620e-03  7e-03  3e-06  2e-15
 6: -2.3152e-03 -4.0723e-03  2e-03  6e-07  1e-15
 7: -2.4331e-03 -2.7953e-03  4e-04  7e-08  1e-15
 8: -2.4731e-03 -2.5226e-03  5e-05  2e-16  1e-15
 9: -2.4819e-03 -2.4898e-03  8e-06  2e-16  1e-15
10: -2.4838e-03 -2.4844e-03  6e-07  2e-16  1e-15
11: -2.4840e-03 -2.4840e-03  2e-08  2e-16  1e-15
Optimal solution found.
Training Loss: 1.7980, Training Accuracy: 0.1010
Training model for class 1...
     pcost       dcost       gap    pres   dres
 0: -2.7871e-03 -1.0001e+02  2e+03  1e+01  1e-15
 1: -2.6079e-03 -8.2835e+01  1e+02  1e-01  1e-15
 2:  3.0048e-04 -2.0069e+00  2e+00  2e-03  2e-15
 3: -1.4457e-03 -1.3389e-01  1e-01  

In [10]:
# initialize the parameter grid
param_grid_rbf = {
    'kernel': ['rbf'],
    'C': [0.1, 1, 10, 100],
    'gamma': [0.001, 0.01, 0.1, 1, 10]
}

# initialize the SVM model
svm_model = KernelSVM(kernel='rbf')

# create the GridSearchCV object
grid_search = GridSearchCV(svm_model, param_grid_rbf, cv=5, scoring='accuracy')

# perform the grid search
grid_search.fit(sub_features_train, sub_y_train)

# print the best parameters found
print("Best parameters found: ", grid_search.best_params_)
print("Best cross-validation score: {:.2f}".format(grid_search.best_score_))

     pcost       dcost       gap    pres   dres
 0: -6.4981e+01 -1.3052e+02  4e+03  2e+01  2e-16
 1: -1.7235e+01 -1.1564e+02  2e+02  4e-01  3e-16
 2: -1.5421e+01 -4.5391e+01  5e+01  1e-01  4e-16
 3: -1.2491e+01 -2.3685e+01  2e+01  4e-02  6e-15
 4: -8.2921e+00 -1.3770e+01  9e+00  1e-02  3e-15
 5: -8.3988e+00 -8.4625e+00  1e-01  2e-04  1e-15
 6: -8.4000e+00 -8.4006e+00  1e-03  2e-06  1e-15
 7: -8.4000e+00 -8.4000e+00  1e-05  2e-08  2e-15
 8: -8.4000e+00 -8.4000e+00  1e-07  2e-10  1e-15
Optimal solution found.
     pcost       dcost       gap    pres   dres
 0: -6.4459e+01 -1.3032e+02  4e+03  2e+01  2e-16
 1: -1.7000e+01 -1.1537e+02  2e+02  4e-01  4e-16
 2: -1.5339e+01 -4.4967e+01  4e+01  1e-01  1e-15
 3: -1.2491e+01 -2.3964e+01  2e+01  4e-02  1e-14
 4: -8.1892e+00 -1.3518e+01  9e+00  1e-02  4e-15
 5: -8.2982e+00 -8.3609e+00  1e-01  2e-04  1e-15
 6: -8.3000e+00 -8.3006e+00  1e-03  2e-06  1e-15
 7: -8.3000e+00 -8.3000e+00  1e-05  2e-08  1e-15
 8: -8.3000e+00 -8.3000e+00  1e-07  2e-10  2e-1

In [11]:
svm_models = []
num_classes = 10
for class_idx in range(num_classes):
    print(f"Training model for class {class_idx}...")
    
    # Convert the labels to binary
    binary_y_train = np.where(sub_y_train == class_idx, 1, -1)
    
    # Train the model
    svm_model = KernelSVM(kernel='rbf', C=0.1, sigma=0.001) # use the best parameters found
    loss, acc = svm_model.fit(sub_features_train, binary_y_train)
    
    # Store the model
    svm_models.append(svm_model)
    
    print(f"Training Loss: {loss:.4f}, Training Accuracy: {acc:.4f}")

# initialize decision function values
decision_function_values = np.zeros((features_test.shape[0], num_classes))

# use each SVM model to predict the decision function values
for class_idx, svm_model in enumerate(svm_models):
    print(f"Predicting with model for class {class_idx}...")
    decision_values = svm_model.predict(features_test)
    decision_function_values[:, class_idx] = decision_values

# select the class with the maximum decision function value
y_pred = np.argmax(decision_function_values, axis=1)

# calculate the test accuracy
test_accuracy = accuracy(y_test, y_pred)
print(f"Test Accuracy: {test_accuracy:.4f}")

Training model for class 0...
     pcost       dcost       gap    pres   dres
 0: -1.0876e+02 -1.9366e+02  5e+03  2e+01  1e-16
 1: -3.2039e+01 -1.6305e+02  4e+02  1e+00  3e-16
 2: -1.9385e+01 -5.9714e+01  4e+01  6e-14  7e-16
 3: -1.9595e+01 -2.0126e+01  5e-01  2e-15  2e-16
 4: -1.9638e+01 -1.9643e+01  5e-03  4e-14  2e-17
 5: -1.9638e+01 -1.9638e+01  5e-05  3e-14  8e-17
 6: -1.9638e+01 -1.9638e+01  5e-07  1e-15  1e-16
Optimal solution found.
Training Loss: 0.2020, Training Accuracy: 0.8990
Training model for class 1...
     pcost       dcost       gap    pres   dres
 0: -1.0876e+02 -1.9366e+02  5e+03  2e+01  1e-16
 1: -3.2039e+01 -1.6305e+02  4e+02  1e+00  4e-16
 2: -1.9385e+01 -5.9714e+01  4e+01  7e-14  2e-15
 3: -1.9595e+01 -2.0126e+01  5e-01  2e-14  2e-16
 4: -1.9638e+01 -1.9643e+01  5e-03  3e-14  5e-17
 5: -1.9638e+01 -1.9638e+01  5e-05  1e-14  4e-17
 6: -1.9638e+01 -1.9638e+01  5e-07  3e-15  3e-16
Optimal solution found.
Training Loss: 1.7980, Training Accuracy: 0.1010
Training mod

Train with SVM in SKlearn

In [12]:
# train a linear SVM model using the sklearn library
svm = SVC(kernel='linear', C=1.0)
svm.fit(sub_features_train, sub_y_train)
# print the training accuracy
y_pred = svm.predict(sub_features_train)
train_accuracy = accuracy(sub_y_train, y_pred)
print(f"Training Accuracy: {train_accuracy:.4f}")
# print the test accuracy
y_pred = svm.predict(features_test)
test_accuracy = accuracy(y_test, y_pred)
print(f"Test Accuracy: {test_accuracy:.4f}")

Training Accuracy: 1.0000
Test Accuracy: 0.4441
