In [1]:
import os
import numpy as np
import pandas as pd
import cvxpy as cp
import cplex
import osqp

In [2]:
def read_data_mat100(dataset="tr0"):
    folder = "kernel-methods-for-machine-learning-2018-2019"
    features_file = "X" + dataset + "_mat100.csv"
    labels_file = "Y" + dataset + ".csv"
    
    X = pd.read_csv(
        os.path.join(folder, features_file),
        sep=" ",
        header=None
    )
    if "te" in dataset:
        return np.array(X)
    
    elif "tr" in dataset:
        Y = pd.read_csv(
            os.path.join(folder, labels_file),
            sep=",",
            index_col=0,
        )
        return np.array(X), 2 * np.array(Y.iloc[:, 0]) - 1

In [3]:
def kernel(x1, x2):
    return x1.dot(x2)

lambd = 1

In [4]:
def compute_predictor(Xtr, Ytr, kernel, lambd):
    m = Xtr.mean(axis=0)
    s = Xtr.std(axis=0)
    Xc = (Xtr - m)/s

    n = len(Xc)

    I = np.eye(n)
    U = (1/n) * np.ones((n, n))
    gram_matrix = kernel(Xc, Xc.T)
    K = gram_matrix + 1e-9*I
    
    alpha = cp.Variable(n)

    constraints = [
        cp.multiply(Ytr, alpha) >= np.zeros(n),
        cp.multiply(Ytr, alpha) <= np.ones(n) / (2 * lambd * n)
    ]

    objective = cp.Minimize(
        - 2 * (Ytr * alpha)
        + cp.quad_form(alpha, K)
    )

    prob = cp.Problem(objective, constraints)

    result = prob.solve(solver=cp.OSQP, verbose=False)
    
    return lambda x_new: np.sign(alpha.value.dot(kernel(Xc, (x_new - m)/s)))

In [6]:
Ypred = []

for k in [0, 1, 2]:
    print("\nDATASET {}\n".format(k))

    Xtr, Ytr = read_data_mat100("tr" + str(k))
    Xte = read_data_mat100("te" + str(k))
    
    f = compute_predictor(Xtr, Ytr, kernel, lambd)
    Yte = np.array([f(Xte[i]) for i in range(len(Xte))])
    
    Ypred.extend(list(((Yte + 1) / 2).astype(int)))
    
Ypred = pd.Series(
    index=np.arange(len(Ypred)),
    data=Ypred
)
Ypred.index.name = "Id"
Ypred.name = "Bound"
Ypred.to_csv("Ypred.csv", header=True)


DATASET 0


DATASET 1


DATASET 2

