In [1]:
import numpy as np
from cvxopt import solvers, matrix, spmatrix, spdiag, sparse
import matplotlib.pyplot as plt
from softsvm import softsvm, fix_small_eigvals, error, get_random_sample
import softsvm
# unlimited np output
# np.set_printoptions(threshold=np.inf)
data = np.load(r'G:\My Drive\uni\Machine Learning intro\Introduction-To-Machine-Learning\ex2\ex2q4_data.npz')
trainX, testX = data['Xtrain'], data['Xtest']
trainY, testY = data['Ytrain'], data['Ytest']

In [2]:
def K(dot_res, k):
    """
    :param dot_res: dot product of two np.arrays
    :param k: the degree of the poly kernel
    :return: (1 + dot_res)^k
    """
    return np.power(float(1) + dot_res, k)

In [3]:
def get_gram_matrix(X, k):
    print(X.shape)
    G = np.dot(X, X.T)
    # apply k to every item in G
    G = np.vectorize(lambda sample: K(sample, k))(G)
    return G
    

In [4]:
def softsvmpoly(l: float, k: float, trainX: np.array, trainy: np.array):
    """
    :param l: the parameter lambda of the soft SVM algorithm
    :param sigma: the bandwidth parameter sigma of the RBF kernel.
    :param trainX: numpy array of size (m, d) containing the training sample
    :param trainy: numpy array of size (m, 1) containing the labels of the training sample
    :return: numpy array of size (m, 1) which describes the coefficients found by the algorithm
    """
    m, d = trainX.shape
    
    G = get_gram_matrix(trainX, k)
    # G = fix_small_eigvals(G)

    H = np.pad(float(2 * l) * G, [(0, m), (0, m)])
    H = fix_small_eigvals(H)

    A = np.block([[np.zeros((m, m)), np.identity(m)],
                  [G * trainy.reshape(-1, 1), np.identity(m)]])
    # A = fix_small_eigvals(A)

    u = np.hstack((np.full(m, float(0)), np.full(m, 1/m)))

    v = np.hstack((np.zeros(m), np.ones(m)))
  
    z = solvers.qp(matrix(H), matrix(u), -matrix(A), -matrix(v))
    alphas = np.array(z["x"])[:m]
    return alphas

In [5]:
def cartesian_product(set_a: np.array, set_b: np.array):
    return [(ai, bi) for ai in set_a for bi in set_b]

In [6]:
def predict_single_sample(alphas: np.array, k: int, sample: np.array, trainX: np.array):
    train_kernels = np.array([K(np.dot(sample, xi), k) for xi in trainX])
    return np.sign(np.dot(train_kernels, alphas))


In [7]:
def predict(alphas: np.array, k: int, testX: np.array, trainX: np.array):
    """
    :param alphas: numpy array of size (m, 1) containing the coefficients of the soft SVM algorithm
    :param testX: numpy array of size (m, d) containing the test sample
    :param k: int, the degree of the poly kernel
    :return: numpy array of size (m, 1) containing the predicted labels of the test sample
    """
    
    return np.array([predict_single_sample(alphas, k, sample, trainX) for sample in testX])

In [8]:
def split_data(folds: int):
    X_chunks = np.array(np.split(trainX, folds))
    Y_chunks = np.array(np.split(trainY, folds))
    splitted = []
    shp = X_chunks.shape
    for i in range(folds):
        test, test_labales = X_chunks[i], Y_chunks[i]
        # current test set is the the entire train besides the current chunk used for training
        train = np.concatenate(np.delete(X_chunks, i, axis=0))
        train_labales = np.concatenate(np.delete(Y_chunks, i, axis=0))
        splitted.append({"train": train,
                        "train_labales": train_labales,
                        "test": test,
                        "test_labales": test_labales})
    return np.array(splitted)

In [14]:
def poly_cross_validation(lambdas: np.array, ks, folds: int):
    """
    find pair (lambda, k) with the lowest validation error, and get classifier based on that pair
    :param lambdas: the lambda parameters to use
    :param ks: the k parameters to use in the kernel function
    :param folds: number of chunks we split the data to
    :return: prediction of test set of classifier trained on the entire train set using best lambda and k found
    """
    # poly softSVM
    errors = {(l, k): 0 for l, k in cartesian_product(lambdas, ks)}
    a = split_data(folds)
    for fold in split_data(folds):
        for l, k in cartesian_product(lambdas, ks):
            print(l,k)
            alphas = softsvmpoly(l, k, fold["train"], fold["train_labales"])
            predicted = predict(alphas, k, fold["test"], fold["train"])
            # continuesly calculate the avg error
            errors[(l, k)] += error(fold["test_labales"], predicted) / folds
    
    # get the pair with lowest avg error
    best_lambda, best_k = min(errors.items(), key=lambda x: x[1])[0]
    print(f"best_lambda={best_lambda}, best_k={best_k}")
    alphas = softsvmpoly(best_lambda, best_k, trainX, trainY)
    return predict(alphas, best_k, testX, trainX)

In [10]:
def softsvm_cross_validation(lambdas, folds):
    errors = {l: 0 for l in lambdas}
    for fold in split_data(folds):
        for l in lambdas:
            w = softsvm.softsvm(l, fold["train"], fold["train_labales"])
            errors[l] += error(fold["test_labales"], softsvm.predict(w, fold["test"])) / folds
    
    # get the l with lowest avg error
    best_lambda = min(errors.items(), key=lambda x: x[1])[0]
    print(f"best_lambda={best_lambda}")
    w = softsvm.softsvm(best_lambda, trainX, trainY)
    return softsvm.predict(w, testX)

In [11]:
def cross_validation_error():
    """
    returns the error on test sample of
    clasiisfier from poly_cross_validation with 5 folds
    """
    lambdas = np.array([1.0, 10.0, 100.0])
    ks = np.array([2.0, 5.0, 8.0])

    # polynomial kernel
    poly_predicions = poly_cross_validation(lambdas, ks, 5)
    poly_error = error(testY, poly_predicions)

    # linear softsvm
    soft_svm_predictions = softsvm_cross_validation(lambdas, 5)
    soft_svm_error =  error(testY, soft_svm_predictions)
    print(f"soft_svm_error={soft_svm_error}")
    print(f"poly_error={poly_error}")


In [15]:
cross_validation_error()

1.0 2.0
(800, 2)
     pcost       dcost       gap    pres   dres
 0:  1.2266e+00  1.3519e+02  5e+03  3e+00  2e+04
 1:  4.6404e+00 -5.8985e+01  6e+01  3e-02  2e+02
 2:  3.8243e+00 -1.0594e-01  4e+00  1e-04  1e+00
 3:  1.3577e+00  9.8524e-01  4e-01  6e-16  5e-14
 4:  1.1262e+00  1.1129e+00  1e-02  5e-16  3e-14
 5:  1.1214e+00  1.1212e+00  2e-04  5e-16  2e-14
 6:  1.1214e+00  1.1214e+00  2e-06  4e-16  4e-14
 7:  1.1214e+00  1.1214e+00  2e-08  5e-16  2e-14
Optimal solution found.
1.0 5.0
(800, 2)
     pcost       dcost       gap    pres   dres
 0:  4.4015e+00  6.8830e+01  4e+03  2e+00  1e+05
 1:  4.5468e+00 -1.4816e+02  2e+02  9e-02  5e+03
 2:  3.3195e+00 -1.2136e+01  2e+01  7e-03  4e+02
 3:  2.4258e+00 -9.9854e-01  3e+00  9e-04  5e+01
 4:  1.0805e+00  6.5499e-01  4e-01  1e-15  8e-14
 5:  8.1490e-01  7.5846e-01  6e-02  9e-16  4e-14
 6:  7.9093e-01  7.7898e-01  1e-02  8e-16  6e-14
 7:  7.8605e-01  7.8346e-01  3e-03  8e-16  6e-14
 8:  7.8491e-01  7.8453e-01  4e-04  8e-16  2e-13
 9:  7.8474e-

TypeError: 'module' object is not callable