In [1]:
import numpy as np
import pandas as pd
import time

# Load the data

For $k = 0, 1, 2$ we have the following files:
* Xtrk.csv - the training sequences.
* Xtek.csv - the test sequences.
* Ytrk.csv - labels for the training sequences

In [2]:
Xtr0_mat100 = np.genfromtxt("data/Xtr0_mat100.csv", delimiter='')
Ytr0 = np.genfromtxt("data/Ytr0.csv", delimiter=',', skip_header=1)

Xtr1_mat100 = np.genfromtxt("data/Xtr1_mat100.csv", delimiter='')
Ytr1 = np.genfromtxt("data/Ytr1.csv", delimiter=',', skip_header=1)

Xtr2_mat100 = np.genfromtxt("data/Xtr2_mat100.csv", delimiter='')
Ytr2 = np.genfromtxt("data/Ytr2.csv", delimiter=',', skip_header=1)


In [3]:
def accuracy(y_true,y_pred, mode='SVM'):
    n = y_true.shape[0]
    if mode == 'SVM':
        predictions = np.ones(n)
        predictions[y_pred < 0] = 0
    else:
        predictions = np.zeros(n)
        predictions[y_pred >= 0.5] = 1
    
    return np.sum(y_true == predictions) / n

# Implementing some kernels

## Linear Kernel

In [257]:
def linear_kernel(X_train, X_valid, scale=True, mode="train"):
    
    if scale:
        X_tr = (X_train-X_train.mean(axis=0)) / X_train.std(axis=0)
        X_va = (X_valid-X_train.mean(axis=0)) / X_train.std(axis=0)
        
        K_va = X_va @ X_tr.T
        
        if mode == "test":
            return(K_va)
        
        K_tr = X_tr @ X_tr.T
        
    else:
        K_va = X_valid @ X_train.T
        
        if mode == "test":
            return(K_va)
        
        K_tr = X_train @ X_train.T
        
    return(K_tr, K_va)

## Gaussian Kernel

In [255]:
# Idea : efficient computation of the pairwise distances
def gaussian_kernel(X_train, X_valid, sigma=None, scale=True, scale_sigma=True, mode="train"):
    
    n, p = X_train.shape
    
    if scale:
        X_tr = (X_train-X_train.mean(axis=0)) / X_train.std(axis=0)
        X_va = (X_valid-X_train.mean(axis=0)) / X_train.std(axis=0)
    if scale_sigma:
        sigma = p
        
    K_va = np.linalg.norm(X_va[:, None, ...] - X_tr[None, ...], axis=-1)**2
    K_va = np.exp((-K_va)/(sigma))
    
    if mode=="test":
        return(K_va)
    
    else:
        
        K_tr = ((X_tr[:, :, None] - X_tr[:, :, None].T) ** 2).sum(1)
        K_tr = np.exp((-K_tr)/(sigma))
        return(K_tr, K_va)


## Polynomial Kernel

In [270]:
def polynomial_kernel(X_train, X_valid, d=3, c=1, scale=True, mode="train"):
    
    # k(x,y) = (<x,y> + c)**d
    if scale:
        
        X_tr = (X_train-X_train.mean(axis=0)) / X_train.std(axis=0)
        X_va = (X_valid-X_train.mean(axis=0)) / X_train.std(axis=0)
        
        K_va = X_va @ X_tr.T + c
        K_va = K_va**d
        
        if mode=="test":
            return(K_va)
        
        K_tr = X_tr @ X_tr.T + c
        K_tr = K_tr**d
    
        
    else:
        K_va = X_valid @ X_train.T + c
        K_va = K_va**d
        
        if mode=="test":
            return(K_va)
        
        K_tr = X_train @ X_train.T + c
        K_tr = K_tr**d
        
    return(K_tr, K_va)



## Kernel Ridge Regression

* Consider RKHS $\mathcal H$, associated to a p.d. kernel K on $\mathcal X$
* Let $y = (y_1, \dots, y_n)^T \in \mathbb R ^n$
* Let $\alpha = (\alpha_1, \dots, \alpha_n)^T \in \mathbb R ^n$
* Let $K$ be the $n\times n$ Gram Matrix such that $K_{i,j} = K(x_i, x_j)$
* We can then write
$$
(\hat f(x_1), \dots, \hat f(x_n))^T = K\alpha
$$
* The norm is $||\hat f||^2_{\mathcal H} = \alpha^T K \alpha$
* KRR $\leftrightarrow \text{argmin}_{\alpha \in \mathbb R^n} \frac{1}{n} (K\alpha - y)^T(K\alpha - y) + \lambda \alpha^T K \alpha$
* Solution for $\lambda > 0$:
$$
\alpha = (K+\lambda nI)^{-1}y
$$


In [207]:
def KRR(K, y, Kval, yval, lambd):
    """
    takes the kernel matrix as an input and computes the MSE and the predictions for each value in lambd (list)
    """
    assert K.shape[0] == y.shape[0]
    assert len(lambd) > 0
    n = K.shape[0]
    
    loss = []
    acc = []
    
    loss_val = []
    acc_val = []
    alphas = []
    
    for l in lambd:
        
        assert l >= 0
        # find the parameter alpha
        alpha = np.linalg.solve((K + l*n*np.eye(n)), y)
        # predict
        
        loss_lambda = MSE(K, y, l, alpha)
        acc_lambda = accuracy(y,K@alpha, mode="KRR")
        
        loss_lambdaval = MSE(Kval, yval, l, alpha)
        acc_lambdaval = accuracy(yval,Kval@alpha, mode="KRR")

        print(f"***********lambda = {l}***********")
        print(f"Training: loss = {loss_lambda:.4f}, accuracy = {acc_lambda:.6f}")
        print(f"Validation: loss = {loss_lambdaval:.4f}, accuracy = {acc_lambdaval:.6f}")
        
        loss += [loss_lambda]
        acc += [acc_lambda]
        
        loss_val += [loss_lambdaval]
        acc_val += [acc_lambdaval]
        
        
        alphas +=[alpha]
        
    return(alphas, loss, acc, loss_val, acc_val)
    

In [224]:
def MSE(K, y, lambd, alpha):
    n = y.shape[0]
    data_term = (np.linalg.norm(np.dot(K, alpha.reshape(-1,1)) - y)**2)/n
    reg_term = alpha @ K @ alpha
    return(data_term + lambd * reg_term)

## Kernel Logistic Regression

- Binary Classificaiton setup: $\mathcal Y = \{-1, 1\}$
- $\mathcal l_{\text{logistic}}(f(x),y) = -\log p(y|f(x)) = \log(1 + e^{-yf(x)})$ where $p(y|f(x)) = \sigma(y(f(x))$

Objective:
\begin{align*}
\hat f &= \text{argmin}_{f\in \mathcal H} \frac{1}{n} \sum_{i=1}^n \log(1+e^{-y_if(x_i)}) + \frac{\lambda}{2}||f||^2_{\mathcal H}\\
\alpha &= \text{argmin}_{\alpha \in \mathbb R^n} \frac{1}{n} \sum_{i=1}^n \log(1+e^{-y_i[K\alpha]_i}) + \frac{\lambda}{2} \alpha^T K \alpha
\end{align*}

We define the following fonctions and vectors:
* $\mathcal l _\text{logistic}(u) = \log(1+e^{-u})$
* $\mathcal l' _\text{logistic}(u) = -\sigma(-u)$
* $\mathcal l'' _\text{logistic}(u) = \sigma(u)\sigma(-u)$

* for $i = 1, \dots, n$, $P_i(\alpha) = \mathcal l' _\text{logistic}(y_i[K\alpha]_i)$
* for $i = 1, \dots, n$, $W_i(\alpha) = \mathcal l'' _\text{logistic}(y_i[K\alpha]_i)$




\begin{align*}
J(\alpha) &= \frac{1}{n} \sum_{i=1}^n \log(1+e^{-y_i[K\alpha]_i}) + \frac{\lambda}{2} \alpha^T K \alpha\\
\nabla J(\alpha) &= \frac{1}{n} KP(\alpha) y + \lambda K \alpha \quad \text{where } P(\alpha) = \text{diag}(P_1(\alpha), \dots, P_n(\alpha))\\
\nabla^2 J(\alpha) &= \frac{1}{n}KW(\alpha)K+\lambda K \quad \text{where } W(\alpha) = \text{diag}(W_1(\alpha), \dots, W_n(\alpha))
\end{align*}

We are interested in the quadratic approximation of $J$ near a point $\alpha_0$:
\begin{align*}
J_q(\alpha) &= J(\alpha_0) + (\alpha - \alpha_0)^T \nabla J(\alpha_0) + \frac{1}{2} (\alpha - \alpha_0)^T \nabla^2 J(\alpha_0)(\alpha - \alpha_0)\\
2J_q(\alpha) &= -\frac{2}{n} \alpha^T KW(K\alpha_0-W^{-1}Py)+\frac{1}{n}\alpha^TKWK\alpha+ \lambda\alpha^TK\alpha +C\\
&= \frac{1}{n} (K\alpha - z)^TW(K\alpha - z) + \lambda\alpha^TK\alpha + C \quad \text{where} z = K\alpha_0 - W^{-1} P y
\end{align*}

The WKRR problem is presented as:
$$
\text{argmin}_{\alpha \in \mathbb R^n} \frac{1}{n}(K\alpha - y)^TW(K\alpha - y) + \lambda \alpha^TK\alpha
$$
and has as solution:
$$
\alpha = W^{1/2} (W^{1/2}KW^{1/2}+n\lambda I)^{-1} W^{1/2}y
$$

So, in order to solve KRL, we use IRLS on a WKRR problem until convergence:
$$\alpha^{t+1} \gets \text{solveWKRR}(K, W^t, z^t)$$
With the updates for $W^t$ and $z^t$ from $\alpha^t$ are:
- $m_i \gets [K\alpha^t]_i$
- $P_i^t \gets -\sigma(-y_im_i)$
- $W_i^t \gets \sigma(m_i)\sigma(-m_i)$
- $z_i^t \gets m_i + y_i / \sigma(-y_im_i)$

In [23]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

def logistic_loss(y_true, y_pred):
    n = y_true.shape[0]
    log_term = np.log(sigmoid(y_true*y_pred))
    return(-np.sum(log_term)/n)
    

In [24]:
def KLR(K, y, Kval, yval, lambd, maxIter = 100, tresh = 1e-8):
    
    # initialize the values
    assert K.shape[0] == y.shape[0]
    n = K.shape[0]
    
    y_ = np.ones(n)
    yval_ = np.ones(n)
    
    y_[y == 0] = -1
    yval_[yval == 0] = -1
    
    
    loss = []
    acc = []
    
    loss_val = []
    acc_val = []
    
    
    alphas = []
    
    for l in lambd :
        cnt = 0
        
        P_t, W_t = np.eye(n), np.eye(n)
        z_t = K@ np.ones(n) - y_
        alpha_t = np.ones(n)
        diff_alpha = np.inf


        while (diff_alpha > tresh) and (cnt < maxIter):

            old_alpha = alpha_t
            alpha_t = solveWKRR(K, W_t, z_t, y_, l)

            m_t = K@alpha_t
            sigma_m = sigmoid(m_t)
            sigma_my = sigmoid(-y_*m_t)

            P_t = - np.diag(sigma_my)
            W_t = np.diag(sigma_m * (1-sigma_m))

            z_t = m_t - (P_t@y_)/(sigma_m * (1-sigma_m))

            diff_alpha = np.linalg.norm(alpha_t - old_alpha)
            cnt+=1
            if cnt % 10 == 0:
                print(l, cnt)
        
        loss_lambda = logistic_loss(y_, K@alpha_t)
        acc_lambda = accuracy(y,K@alpha_t, mode="SVM")
        
        loss_lambdaval = logistic_loss(yval_, Kval@alpha_t)
        acc_lambdaval = accuracy(yval,Kval@alpha_t, mode="SVM")

        
        print(f"***********lambda = {l}***********")
        print(f"Training: loss = {loss_lambda:.4f}, accuracy = {acc_lambda:.6f}")
        print(f"Validation: loss = {loss_lambdaval:.4f}, accuracy = {acc_lambdaval:.6f}")
        
        
        loss += [loss_lambda]
        acc += [acc_lambda]
        
        loss_val += [loss_lambdaval]
        acc_val += [acc_lambdaval]
        
        alphas +=[alpha_t]
        
    return(alphas, loss, acc, loss_val, acc_val)
        

## Support Vector Machine approach (SVM)

- Binary Classificaiton setup: $\mathcal Y = \{-1, 1\}$
- $\mathcal l_{\text{hinge}}(f(x),y) = \max(1- y f(x), 0)$

Objective:
\begin{align*}
\hat f &= \text{argmin}_{f\in \mathcal H} \frac{1}{n} \sum_{i=1}^n \max(1- y_i f(x_i), 0) + \lambda||f||^2_{\mathcal H}\\
\alpha &= \text{argmin}_{\alpha \in \mathbb R^n} \frac{1}{n} \sum_{i=1}^n \max(y_i[K\alpha]_i, 0) + \lambda \alpha^T K \alpha
\end{align*}

It is a convex optimization problem but the objective is not smooth.

By introducing additional slack variables $\xi_i$, the problem's objective becomes smooth but it is not the case for the constraints anymore. Let us solve the dual formulation instead (which is sparse, leading to faster algorithms). 

The dual can be rewritten as a quadratic minimization under box constraints : 

\begin{align*}
\min_{\alpha \in \mathbb R^n} \frac{1}{2} \alpha^TK\alpha - \alpha^T y\\
\text{s.t. }  0\leq y_i\alpha_i\leq \frac{1}{2\lambda n}
\end{align*}


We will solve it using CVXOpt tools

In [25]:
def hinge_loss(y_true, y_pred):
    n = y_true.shape[0]
    term = np.maximum(1-y_true*y_pred, 0)
    return(np.sum(term)/n)

In [26]:
## Je pense qu'une plus belle façon de faire serait de créer des fonctions de 
## kernel(X, sigma) et de les appeler avec en paramètres (X_train ou X_val) selon si on 
## fait le training ou la validation, pour pas avoir à garder en mémoire les kernels train/val
## comme on le fait jusqu'à maintenant
## Mais bon, là il est 2h47 du matin, j'ai un peu la flemme et j'imagine que toi aussi,
## ça marche déjà bien comme ça ^^

def _gaussian_kernel(sigma=1):
    """
    Prepares a Gaussian RBF kernel using the provided sigma.

    Returns:
    --------
    kernel_function: A callable to the Gaussian RBF kernel function.

    """
    gamma = -1 / (2 * sigma ** 2)
    kernel_function = lambda X, y: np.exp(gamma * np.square(X[:, np.newaxis] - y).sum(axis=2))
    return kernel_function

In [27]:
from cvxopt import matrix, solvers

def SVM(K, y, K_val, y_val, lambd):
    # takes y with values in 0, 1 which need to be turnt into -1,1
    # initialize the values
    assert K.shape[0] == y.shape[0]
    n = K.shape[0]
    
    y_ = np.ones(n)
    yval_ = np.ones(n)
    
    y_[y == 0] = -1
    yval_[y_val == 0] = -1
    
    y_preds, y_preds_val = [], []
    losses, losses_val = [], []
    accuracies, accuracies_val = [], []
    alphas = []
    
    
    for l in lambd :

        ## Solving dual using CVXOpt
        P = matrix(K)
        q = matrix(-y_)
        D = np.diag(-y_)
        G = matrix(np.vstack((D,-D)))
        h = matrix(np.concatenate((np.zeros(n), 1/(2*l*n) * np.ones(n)), axis=0))
        solvers.options['show_progress'] = False
        sol=solvers.qp(P, q, G, h)
        alpha = sol['x']
        alpha = np.reshape(alpha,-1)               

        ## predictions
        # training
        pred_l = K @ alpha
        y_preds += [pred_l]
        loss_l = hinge_loss(y_, pred_l)
        acc_l = accuracy(y, pred_l, mode="SVM")

        
        # validation
        pred_l_val = K_val@alpha
        y_preds_val += [pred_l_val]
        loss_l_val = hinge_loss(yval_, pred_l_val)
        acc_l_val = accuracy(y_val,pred_l_val, mode="SVM")
        

        print(15*"-", f" lambda = {l} ", 15*"-")
        print(f"Training: loss = {loss_l:.6f}, accuracy = {acc_l:.6f}")
        print(f"Validation: loss = {loss_l_val:.6f}, accuracy = {acc_l_val:.6f}")
        
        losses += [loss_l]
        accuracies += [acc_l]
        
        losses_val += [loss_l_val]
        accuracies_val += [acc_l_val]
    
        alphas +=[alpha] 
        
    return(alphas, losses, accuracies, losses_val, accuracies_val)
            

# Testing the accuracy

## Splitting data

In [230]:
from sklearn.model_selection import train_test_split

Xtr0, Xval0, ytr0, yval0 = train_test_split(Xtr0_mat100, Ytr0, test_size=0.5, random_state=42)
Xtr1, Xval1, ytr1, yval1 = train_test_split(Xtr1_mat100, Ytr1, test_size=0.5, random_state=42)
Xtr2, Xval2, ytr2, yval2 = train_test_split(Xtr2_mat100, Ytr2, test_size=0.5, random_state=42)

## Create the kernel matrices

In [231]:
K_tr0_ln, K_val0_ln = linear_kernel(Xtr0, Xval0)
K_tr1_ln, K_val1_ln = linear_kernel(Xtr1, Xval1)
K_tr2_ln, K_val2_ln = linear_kernel(Xtr2, Xval2)

In [232]:
K_tr0, K_val0 = gaussian_kernel(Xtr0, Xval0)
K_tr1, K_val1 = gaussian_kernel(Xtr1, Xval1)
K_tr2, K_val2 = gaussian_kernel(Xtr2, Xval2)

In [271]:
K_tr0_poly, K_val0_poly = polynomial_kernel(Xtr0, Xval0, d=3, c=1)
K_tr1_poly, K_val1_poly = polynomial_kernel(Xtr1, Xval1, d=3, c=1)
K_tr2_poly, K_val2_poly = polynomial_kernel(Xtr2, Xval2, d=3, c=1)

## Testing KRR

### Gaussian Kernel

In [233]:
lambdas = [0] + [10**i for i in range(-10,2)]
print("************* KRR for dataset 0*************\n")
alphas_tr0, loss_tr0, acc_0, loss_val0, acc_val0 = KRR(K_tr0, ytr0[:,1], K_val0, yval0[:,1], lambdas)
print("************* KRR for dataset 1*************\n")
alphas_tr1, loss_tr1, acc_1, loss_val1, acc_val1 = KRR(K_tr1, ytr1[:,1], K_val1, yval1[:,1],lambdas)
print("************* KRR for dataset 2*************\n")
alphas_tr2, loss_tr2, acc_2, loss_val2, acc_val2 = KRR(K_tr2, ytr2[:,1], K_val2, yval2[:,1],lambdas)

************* KRR for dataset 0*************

***********lambda = 0***********
Training: loss = 497.5500, accuracy = 1.000000
Validation: loss = 310.6197, accuracy = 0.570000
***********lambda = 1e-10***********
Training: loss = 497.5499, accuracy = 1.000000
Validation: loss = 310.6197, accuracy = 0.570000
***********lambda = 1e-09***********
Training: loss = 497.5491, accuracy = 1.000000
Validation: loss = 310.6196, accuracy = 0.570000
***********lambda = 1e-08***********
Training: loss = 497.5411, accuracy = 1.000000
Validation: loss = 310.6186, accuracy = 0.570000
***********lambda = 1e-07***********
Training: loss = 497.4610, accuracy = 1.000000
Validation: loss = 310.6092, accuracy = 0.570000
***********lambda = 1e-06***********
Training: loss = 496.6629, accuracy = 1.000000
Validation: loss = 310.5155, accuracy = 0.570000
***********lambda = 1e-05***********
Training: loss = 488.9667, accuracy = 1.000000
Validation: loss = 309.6093, accuracy = 0.573000
***********lambda = 0.0001*

### Linear Kernel

In [234]:
lambdas = [0] + [10**i for i in range(-10,2)]
print("************* KRR for dataset 0*************\n")
alphas_tr0, loss_tr0, acc_0, loss_val0, acc_val0 = KRR(K_tr0_ln, ytr0[:,1], K_val0_ln, yval0[:,1], lambdas)
print("************* KRR for dataset 1*************\n")
alphas_tr1, loss_tr1, acc_1, loss_val1, acc_val1 = KRR(K_tr1_ln, ytr1[:,1], K_val1_ln, yval1[:,1],lambdas)
print("************* KRR for dataset 2*************\n")
alphas_tr2, loss_tr2, acc_2, loss_val2, acc_val2 = KRR(K_tr2_ln, ytr2[:,1], K_val2_ln, yval2[:,1],lambdas)

************* KRR for dataset 0*************

***********lambda = 0***********
Training: loss = 2797814.3402, accuracy = 0.479000
Validation: loss = 3080449.7376, accuracy = 0.501000
***********lambda = 1e-10***********
Training: loss = 507.2335, accuracy = 0.545000
Validation: loss = 535.5736, accuracy = 0.506000
***********lambda = 1e-09***********
Training: loss = 507.2331, accuracy = 0.545000
Validation: loss = 535.5728, accuracy = 0.506000
***********lambda = 1e-08***********
Training: loss = 507.2330, accuracy = 0.545000
Validation: loss = 535.5728, accuracy = 0.506000
***********lambda = 1e-07***********
Training: loss = 507.2330, accuracy = 0.545000
Validation: loss = 535.5728, accuracy = 0.506000
***********lambda = 1e-06***********
Training: loss = 507.2329, accuracy = 0.545000
Validation: loss = 535.5726, accuracy = 0.506000
***********lambda = 1e-05***********
Training: loss = 507.2317, accuracy = 0.545000
Validation: loss = 535.5713, accuracy = 0.506000
***********lambda =

### Polynomial Kernel

In [272]:
lambdas = [0] + [10**i for i in range(-10,2)]
print("************* KRR for dataset 0*************\n")
alphas_tr0, loss_tr0, acc_0, loss_val0, acc_val0 = KRR(K_tr0_poly, ytr0[:,1], K_val0_poly, yval0[:,1], lambdas)
print("************* KRR for dataset 1*************\n")
alphas_tr1, loss_tr1, acc_1, loss_val1, acc_val1 = KRR(K_tr1_poly, ytr1[:,1], K_val1_poly, yval1[:,1],lambdas)
print("************* KRR for dataset 2*************\n")
alphas_tr2, loss_tr2, acc_2, loss_val2, acc_val2 = KRR(K_tr2_poly, ytr2[:,1], K_val2_poly, yval2[:,1],lambdas)

************* KRR for dataset 0*************

***********lambda = 0***********
Training: loss = 497.5500, accuracy = 1.000000
Validation: loss = 406.5435, accuracy = 0.500000
***********lambda = 1e-10***********
Training: loss = 497.5500, accuracy = 1.000000
Validation: loss = 406.5435, accuracy = 0.500000
***********lambda = 1e-09***********
Training: loss = 497.5500, accuracy = 1.000000
Validation: loss = 406.5435, accuracy = 0.500000
***********lambda = 1e-08***********
Training: loss = 497.5500, accuracy = 1.000000
Validation: loss = 406.5435, accuracy = 0.500000
***********lambda = 1e-07***********
Training: loss = 497.5500, accuracy = 1.000000
Validation: loss = 406.5435, accuracy = 0.500000
***********lambda = 1e-06***********
Training: loss = 497.5500, accuracy = 1.000000
Validation: loss = 406.5435, accuracy = 0.500000
***********lambda = 1e-05***********
Training: loss = 497.5500, accuracy = 1.000000
Validation: loss = 406.5435, accuracy = 0.500000
***********lambda = 0.0001*

### Conclusion : gaussian is better

## Testing KLR

### Gaussian Kernel

In [235]:
lambdas = [0.001, 0.01, 0.1]

print("*************KLR for dataset 0*************\n")
alphas_tr0_klr, loss_tr0_klr, acc_0_klr, loss_val0_klr, acc_val0_klr = KLR(K_tr0, ytr0[:,1], K_val0, yval0[:,1], lambdas, tresh=1e-5)
print("*************KLR for dataset 1*************\n")
alphas_tr1_klr, loss_tr1_klr, acc_1_klr, loss_val1_klr, acc_val1_klr = KLR(K_tr1, ytr1[:,1], K_val1, yval1[:,1], lambdas, tresh=1e-5)
print("*************KLR for dataset 2*************\n")
alphas_tr2_klr, loss_tr2_klr, acc_2_klr, loss_val2_klr, acc_val2_klr = KLR(K_tr2, ytr2[:,1], K_val2, yval2[:,1], lambdas, tresh=1e-5)


*************KLR for dataset 0*************

***********lambda = 0.001***********
Training: loss = 0.6039, accuracy = 0.891000
Validation: loss = 0.6708, accuracy = 0.568000
***********lambda = 0.01***********
Training: loss = 0.6752, accuracy = 0.691000
Validation: loss = 0.6861, accuracy = 0.543000
***********lambda = 0.1***********
Training: loss = 0.6906, accuracy = 0.540000
Validation: loss = 0.6922, accuracy = 0.503000
*************KLR for dataset 1*************

***********lambda = 0.001***********
Training: loss = 0.6160, accuracy = 0.891000
Validation: loss = 0.6812, accuracy = 0.568000
***********lambda = 0.01***********
Training: loss = 0.6810, accuracy = 0.769000
Validation: loss = 0.6907, accuracy = 0.543000
***********lambda = 0.1***********
Training: loss = 0.6918, accuracy = 0.718000
Validation: loss = 0.6929, accuracy = 0.536000
*************KLR for dataset 2*************

***********lambda = 0.001***********
Training: loss = 0.5748, accuracy = 0.866000
Validation: los

### Linear Kernel

In [236]:
lambdas = [0.001, 0.01, 0.1]

print("*************KLR for dataset 0*************\n")
alphas_tr0_klr, loss_tr0_klr, acc_0_klr, loss_val0_klr, acc_val0_klr = KLR(K_tr0_ln, ytr0[:,1], K_val0_ln, yval0[:,1], lambdas, tresh=1e-5)
print("*************KLR for dataset 1*************\n")
alphas_tr1_klr, loss_tr1_klr, acc_1_klr, loss_val1_klr, acc_val1_klr = KLR(K_tr1_ln, ytr1[:,1], K_val1_ln, yval1[:,1], lambdas, tresh=1e-5)
print("*************KLR for dataset 2*************\n")
alphas_tr2_klr, loss_tr2_klr, acc_2_klr, loss_val2_klr, acc_val2_klr = KLR(K_tr2_ln, ytr2[:,1], K_val2_ln, yval2[:,1], lambdas, tresh=1e-5)


*************KLR for dataset 0*************

***********lambda = 0.001***********
Training: loss = 0.6290, accuracy = 0.677000
Validation: loss = 0.6792, accuracy = 0.560000
***********lambda = 0.01***********
Training: loss = 0.6316, accuracy = 0.678000
Validation: loss = 0.6777, accuracy = 0.561000
***********lambda = 0.1***********
Training: loss = 0.6469, accuracy = 0.674000
Validation: loss = 0.6747, accuracy = 0.571000
*************KLR for dataset 1*************

***********lambda = 0.001***********
Training: loss = 0.6389, accuracy = 0.658000
Validation: loss = 0.6862, accuracy = 0.559000
***********lambda = 0.01***********
Training: loss = 0.6409, accuracy = 0.663000
Validation: loss = 0.6849, accuracy = 0.560000
***********lambda = 0.1***********
Training: loss = 0.6532, accuracy = 0.652000
Validation: loss = 0.6830, accuracy = 0.559000
*************KLR for dataset 2*************

***********lambda = 0.001***********
Training: loss = 0.5810, accuracy = 0.740000
Validation: los

### Polynomial kernel

In [273]:
lambdas = [0.001, 0.01, 0.1]

print("*************KLR for dataset 0*************\n")
alphas_tr0_klr, loss_tr0_klr, acc_0_klr, loss_val0_klr, acc_val0_klr = KLR(K_tr0_poly, ytr0[:,1], K_val0_poly, yval0[:,1], lambdas, tresh=1e-5)
print("*************KLR for dataset 1*************\n")
alphas_tr1_klr, loss_tr1_klr, acc_1_klr, loss_val1_klr, acc_val1_klr = KLR(K_tr1_poly, ytr1[:,1], K_val1_poly, yval1[:,1], lambdas, tresh=1e-5)
print("*************KLR for dataset 2*************\n")
alphas_tr2_klr, loss_tr2_klr, acc_2_klr, loss_val2_klr, acc_val2_klr = KLR(K_tr2_poly, ytr2[:,1], K_val2_poly, yval2[:,1], lambdas, tresh=1e-5)


*************KLR for dataset 0*************

***********lambda = 0.001***********
Training: loss = 0.3133, accuracy = 1.000000
Validation: loss = 0.6800, accuracy = 0.571000
***********lambda = 0.01***********
Training: loss = 0.3133, accuracy = 1.000000
Validation: loss = 0.6800, accuracy = 0.571000
***********lambda = 0.1***********
Training: loss = 0.3135, accuracy = 1.000000
Validation: loss = 0.6800, accuracy = 0.571000
*************KLR for dataset 1*************

***********lambda = 0.001***********
Training: loss = 0.3133, accuracy = 1.000000
Validation: loss = 0.6816, accuracy = 0.556000
***********lambda = 0.01***********
Training: loss = 0.3133, accuracy = 1.000000
Validation: loss = 0.6816, accuracy = 0.556000
***********lambda = 0.1***********
Training: loss = 0.3135, accuracy = 1.000000
Validation: loss = 0.6816, accuracy = 0.556000
*************KLR for dataset 2*************

***********lambda = 0.001***********
Training: loss = 0.3133, accuracy = 1.000000
Validation: los

## Testing SVM

### Gaussian Kernel

In [237]:
lambdas = [10**i for i in range(-10, 0)]
print("************* SVM for dataset 0 *************\n")
alphas_tr0, loss_tr0, acc_0, loss_val0, acc_val0 = SVM(K_tr0, ytr0[:,1], K_val0, yval0[:,1], lambdas)

print("")
print("")
print("************* SVM for dataset 1 *************\n")
alphas_tr1, loss_tr1, acc_1, loss_val1, acc_val1 = SVM(K_tr1, ytr1[:,1], K_val1, yval1[:,1],lambdas)

print("")
print("")
print("************* SVM for dataset 2 *************\n")
alphas_tr2, loss_tr2, acc_2, loss_val2, acc_val2 = SVM(K_tr2, ytr2[:,1], K_val2, yval2[:,1],lambdas)

************* SVM for dataset 0 *************

---------------  lambda = 1e-10  ---------------
Training: loss = 0.000000, accuracy = 1.000000
Validation: loss = 0.907278, accuracy = 0.558000
---------------  lambda = 1e-09  ---------------
Training: loss = 0.000000, accuracy = 1.000000
Validation: loss = 0.907282, accuracy = 0.558000
---------------  lambda = 1e-08  ---------------
Training: loss = 0.000000, accuracy = 1.000000
Validation: loss = 0.907276, accuracy = 0.558000
---------------  lambda = 1e-07  ---------------
Training: loss = 0.000000, accuracy = 1.000000
Validation: loss = 0.907280, accuracy = 0.558000
---------------  lambda = 1e-06  ---------------
Training: loss = 0.000000, accuracy = 1.000000
Validation: loss = 0.907283, accuracy = 0.558000
---------------  lambda = 1e-05  ---------------
Training: loss = 0.000000, accuracy = 1.000000
Validation: loss = 0.907277, accuracy = 0.558000
---------------  lambda = 0.0001  ---------------
Training: loss = 0.002915, accura

### Linear Kernel

In [238]:
lambdas = [10**i for i in range(-10, 0)]
print("************* SVM for dataset 0 *************\n")
alphas_tr0, loss_tr0, acc_0, loss_val0, acc_val0 = SVM(K_tr0_ln, ytr0[:,1], K_val0_ln, yval0[:,1], lambdas)

print("")
print("")
print("************* SVM for dataset 1 *************\n")
alphas_tr1, loss_tr1, acc_1, loss_val1, acc_val1 = SVM(K_tr1_ln, ytr1[:,1], K_val1_ln, yval1[:,1],lambdas)

print("")
print("")
print("************* SVM for dataset 2 *************\n")
alphas_tr2, loss_tr2, acc_2, loss_val2, acc_val2 = SVM(K_tr2_ln, ytr2[:,1], K_val2_ln, yval2[:,1],lambdas)

************* SVM for dataset 0 *************

---------------  lambda = 1e-10  ---------------
Training: loss = 0.716006, accuracy = 0.691000
Validation: loss = 0.944198, accuracy = 0.567000
---------------  lambda = 1e-09  ---------------
Training: loss = 0.716006, accuracy = 0.691000
Validation: loss = 0.944198, accuracy = 0.567000
---------------  lambda = 1e-08  ---------------
Training: loss = 0.716006, accuracy = 0.691000
Validation: loss = 0.944198, accuracy = 0.567000
---------------  lambda = 1e-07  ---------------
Training: loss = 0.716006, accuracy = 0.691000
Validation: loss = 0.944198, accuracy = 0.567000
---------------  lambda = 1e-06  ---------------
Training: loss = 0.716006, accuracy = 0.691000
Validation: loss = 0.944198, accuracy = 0.567000
---------------  lambda = 1e-05  ---------------
Training: loss = 0.716006, accuracy = 0.691000
Validation: loss = 0.944198, accuracy = 0.567000
---------------  lambda = 0.0001  ---------------
Training: loss = 0.716007, accura

### Polynomial Kernel

In [274]:
lambdas = [10**i for i in range(-10, 0)]
print("************* SVM for dataset 0 *************\n")
alphas_tr0, loss_tr0, acc_0, loss_val0, acc_val0 = SVM(K_tr0_poly, ytr0[:,1], K_val0_poly, yval0[:,1], lambdas)

print("")
print("")
print("************* SVM for dataset 1 *************\n")
alphas_tr1, loss_tr1, acc_1, loss_val1, acc_val1 = SVM(K_tr1_poly, ytr1[:,1], K_val1_poly, yval1[:,1],lambdas)

print("")
print("")
print("************* SVM for dataset 2 *************\n")
alphas_tr2, loss_tr2, acc_2, loss_val2, acc_val2 = SVM(K_tr2_poly, ytr2[:,1], K_val2_poly, yval2[:,1],lambdas)

************* SVM for dataset 0 *************

---------------  lambda = 1e-10  ---------------
Training: loss = 0.000000, accuracy = 1.000000
Validation: loss = 0.934925, accuracy = 0.569000
---------------  lambda = 1e-09  ---------------
Training: loss = 0.000000, accuracy = 1.000000
Validation: loss = 0.934940, accuracy = 0.569000
---------------  lambda = 1e-08  ---------------
Training: loss = 0.000000, accuracy = 1.000000
Validation: loss = 0.934915, accuracy = 0.569000
---------------  lambda = 1e-07  ---------------
Training: loss = 0.000000, accuracy = 1.000000
Validation: loss = 0.934917, accuracy = 0.569000
---------------  lambda = 1e-06  ---------------
Training: loss = 0.000000, accuracy = 1.000000
Validation: loss = 0.934881, accuracy = 0.569000
---------------  lambda = 1e-05  ---------------
Training: loss = 0.000000, accuracy = 1.000000
Validation: loss = 0.934884, accuracy = 0.569000
---------------  lambda = 0.0001  ---------------
Training: loss = 0.000000, accura

## Making predictions

### First create the kernels for each testing set with the chosen parameters

In [241]:
Xte0 = np.genfromtxt("data/Xte0_mat100.csv", delimiter='')
Xte1 = np.genfromtxt("data/Xte1_mat100.csv", delimiter='')
Xte2 = np.genfromtxt("data/Xte2_mat100.csv", delimiter='')

Please make sure to use the same parameters as those that were used to create the initial kernel.

In [256]:
K_te0 = gaussian_kernel(Xtr0, Xte0, mode="test")
K_te1 = gaussian_kernel(Xtr1, Xte1, mode="test")
K_te2 = gaussian_kernel(Xtr2, Xte2, mode="test")

In [275]:
K_te0_ln = linear_kernel(Xtr0, Xte0, scale=True, mode="test")
K_te1_ln = linear_kernel(Xtr1, Xte1, scale=True, mode="test")
K_te2_ln = linear_kernel(Xtr2, Xte2, scale=True, mode="test")

In [276]:
K_te0_poly = polynomial_kernel(Xtr0, Xte0, d=3, c=1, mode="test")
K_te1_poly = polynomial_kernel(Xtr1, Xte1, d=3, c=1, mode="test")
K_te2_poly = polynomial_kernel(Xtr2, Xte2, d=3, c=1, mode="test")

In [259]:
def write_predictions_csv(test_kernels, test_alphas, path):
    
    predictions = np.zeros(3000, dtype=int)
    
    for i in range(3):
        y_pred = test_kernels[i] @ test_alphas[i]
        y_pred[y_pred >= 0.5] = 1
        y_pred[y_pred < 0.5] = 0
        
        predictions[1000*i:1000*(i+1)] = y_pred
    
    #predictions = predictions.astype(int)
    pred = pd.DataFrame({"Bound" : predictions})
    pred.to_csv(path, index=True,index_label="Id")
    print("saving predictions")
    #np.savetxt("data/Ytest_KRR.csv", predictions, header = "Id, Bound", delimiter =",")
    print("saved predictions")
    return(predictions)
        

Example

In [260]:
test_kernels = [K_te0, K_te1, K_te2]
#test_alphas = [alphas_tr0[-4], alphas_tr1[-4], alphas_tr2[-3]] # il faut choisir l'alpha associé à un bon lambda!
test_alphas = [alphas_tr0_klr[0], alphas_tr1_klr[0], alphas_tr2_klr[0]]
write_predictions_csv(test_kernels, test_alphas, path ="data/Ytest_KLR.csv")

saving predictions
saved predictions


array([0, 0, 0, ..., 0, 0, 0])