In [3]:
import numpy as np
import pandas as pd
import time

# Load the data

For $k = 0, 1, 2$ we have the following files:
* Xtrk.csv - the training sequences.
* Xtek.csv - the test sequences.
* Ytrk.csv - labels for the training sequences

In [4]:
Xtr0_mat100 = np.genfromtxt("data/Xtr0_mat100.csv", delimiter='')
Ytr0 = np.genfromtxt("data/Ytr0.csv", delimiter=',', skip_header=1)

Xtr1_mat100 = np.genfromtxt("data/Xtr1_mat100.csv", delimiter='')
Ytr1 = np.genfromtxt("data/Ytr1.csv", delimiter=',', skip_header=1)

Xtr2_mat100 = np.genfromtxt("data/Xtr2_mat100.csv", delimiter='')
Ytr2 = np.genfromtxt("data/Ytr2.csv", delimiter=',', skip_header=1)


In [5]:
def accuracy(y_true,y_pred, mode='SVM'):
    n = y_true.shape[0]
    if mode == 'SVM':
        predictions = np.ones(n)
        predictions[y_pred < 0] = 0
    else:
        predictions = np.zeros(n)
        predictions[y_pred >= 0.5] = 1
    
    return np.sum(y_true == predictions) / n

# Implementing some kernels

## Linear Kernel

In [6]:
def linear_kernel(X_train, X_valid, scale=True, mode="train"):
    
    if scale:
        X_tr = (X_train-X_train.mean(axis=0)) / X_train.std(axis=0)
        X_va = (X_valid-X_train.mean(axis=0)) / X_train.std(axis=0)
        
        K_va = X_va @ X_tr.T
        
        if mode == "test":
            return(K_va)
        
        K_tr = X_tr @ X_tr.T
        
    else:
        K_va = X_valid @ X_train.T
        
        if mode == "test":
            return(K_va)
        
        K_tr = X_train @ X_train.T
        
    return(K_tr, K_va)

## Gaussian Kernel

In [20]:
# Idea : efficient computation of the pairwise distances
def gaussian_kernel(X_train, X_valid, sigma=None, scale=True, mode="train"):
    
    n, p = X_train.shape
    
    if scale:
        X_tr = (X_train-X_train.mean(axis=0)) / X_train.std(axis=0)
        X_va = (X_valid-X_train.mean(axis=0)) / X_train.std(axis=0)
    else:
        X_tr = np.copy(X_train)
        X_va = np.copy(X_valid)
    if sigma is None:
        sigma = p
        
    K_va = np.linalg.norm(X_va[:, None, ...] - X_tr[None, ...], axis=-1)**2
    K_va = np.exp((-K_va)/(sigma))
    
    if mode=="test":
        return(K_va)
    
    else:
        
        K_tr = ((X_tr[:, :, None] - X_tr[:, :, None].T) ** 2).sum(1)
        K_tr = np.exp((-K_tr)/(sigma))
        return(K_tr, K_va)


## Polynomial Kernel

In [8]:
def polynomial_kernel(X_train, X_valid, d=3, c=1, scale=True, mode="train"):
    
    # k(x,y) = (<x,y> + c)**d
    if scale:
        
        X_tr = (X_train-X_train.mean(axis=0)) / X_train.std(axis=0)
        X_va = (X_valid-X_train.mean(axis=0)) / X_train.std(axis=0)
        
        K_va = X_va @ X_tr.T + c
        K_va = K_va**d
        
        if mode=="test":
            return(K_va)
        
        K_tr = X_tr @ X_tr.T + c
        K_tr = K_tr**d
    
        
    else:
        K_va = X_valid @ X_train.T + c
        K_va = K_va**d
        
        if mode=="test":
            return(K_va)
        
        K_tr = X_train @ X_train.T + c
        K_tr = K_tr**d
        
    return(K_tr, K_va)



## Kernel Ridge Regression

* Consider RKHS $\mathcal H$, associated to a p.d. kernel K on $\mathcal X$
* Let $y = (y_1, \dots, y_n)^T \in \mathbb R ^n$
* Let $\alpha = (\alpha_1, \dots, \alpha_n)^T \in \mathbb R ^n$
* Let $K$ be the $n\times n$ Gram Matrix such that $K_{i,j} = K(x_i, x_j)$
* We can then write
$$
(\hat f(x_1), \dots, \hat f(x_n))^T = K\alpha
$$
* The norm is $||\hat f||^2_{\mathcal H} = \alpha^T K \alpha$
* KRR $\leftrightarrow \text{argmin}_{\alpha \in \mathbb R^n} \frac{1}{n} (K\alpha - y)^T(K\alpha - y) + \lambda \alpha^T K \alpha$
* Solution for $\lambda > 0$:
$$
\alpha = (K+\lambda nI)^{-1}y
$$


In [9]:
def KRR(K, y, Kval, yval, lambd):
    """
    takes the kernel matrix as an input and computes the MSE and the predictions for each value in lambd (list)
    """
    assert K.shape[0] == y.shape[0]
    assert len(lambd) > 0
    n = K.shape[0]
    
    loss = []
    acc = []
    
    loss_val = []
    acc_val = []
    alphas = []
    
    for l in lambd:
        
        assert l >= 0
        # find the parameter alpha
        alpha = np.linalg.solve((K + l*n*np.eye(n)), y)
        # predict
        
        loss_lambda = MSE(K, y, l, alpha)
        acc_lambda = accuracy(y,K@alpha, mode="KRR")
        
        loss_lambdaval = MSE(Kval, yval, l, alpha, valid=True)
        acc_lambdaval = accuracy(yval,Kval@alpha, mode="KRR")

        print(f"***********lambda = {l}***********")
        print(f"Training: loss = {loss_lambda:.4f}, accuracy = {acc_lambda:.6f}")
        print(f"Validation: loss = {loss_lambdaval:.4f}, accuracy = {acc_lambdaval:.6f}")
        
        loss += [loss_lambda]
        acc += [acc_lambda]
        
        loss_val += [loss_lambdaval]
        acc_val += [acc_lambdaval]
        
        
        alphas +=[alpha]
        
    return(alphas, loss, acc, loss_val, acc_val)
    

In [10]:
def MSE(K, y, lambd, alpha, valid=False):
    n = y.shape[0]
    data_term = (np.linalg.norm(np.dot(K, alpha.reshape(-1,1)) - y)**2)/n
    if not valid:
        data_term += alpha @ K @ alpha
    return(data_term)

## Kernel Logistic Regression

- Binary Classificaiton setup: $\mathcal Y = \{-1, 1\}$
- $\mathcal l_{\text{logistic}}(f(x),y) = -\log p(y|f(x)) = \log(1 + e^{-yf(x)})$ where $p(y|f(x)) = \sigma(y(f(x))$

Objective:
\begin{align*}
\hat f &= \text{argmin}_{f\in \mathcal H} \frac{1}{n} \sum_{i=1}^n \log(1+e^{-y_if(x_i)}) + \frac{\lambda}{2}||f||^2_{\mathcal H}\\
\alpha &= \text{argmin}_{\alpha \in \mathbb R^n} \frac{1}{n} \sum_{i=1}^n \log(1+e^{-y_i[K\alpha]_i}) + \frac{\lambda}{2} \alpha^T K \alpha
\end{align*}

We define the following fonctions and vectors:
* $\mathcal l _\text{logistic}(u) = \log(1+e^{-u})$
* $\mathcal l' _\text{logistic}(u) = -\sigma(-u)$
* $\mathcal l'' _\text{logistic}(u) = \sigma(u)\sigma(-u)$

* for $i = 1, \dots, n$, $P_i(\alpha) = \mathcal l' _\text{logistic}(y_i[K\alpha]_i)$
* for $i = 1, \dots, n$, $W_i(\alpha) = \mathcal l'' _\text{logistic}(y_i[K\alpha]_i)$




\begin{align*}
J(\alpha) &= \frac{1}{n} \sum_{i=1}^n \log(1+e^{-y_i[K\alpha]_i}) + \frac{\lambda}{2} \alpha^T K \alpha\\
\nabla J(\alpha) &= \frac{1}{n} KP(\alpha) y + \lambda K \alpha \quad \text{where } P(\alpha) = \text{diag}(P_1(\alpha), \dots, P_n(\alpha))\\
\nabla^2 J(\alpha) &= \frac{1}{n}KW(\alpha)K+\lambda K \quad \text{where } W(\alpha) = \text{diag}(W_1(\alpha), \dots, W_n(\alpha))
\end{align*}

We are interested in the quadratic approximation of $J$ near a point $\alpha_0$:
\begin{align*}
J_q(\alpha) &= J(\alpha_0) + (\alpha - \alpha_0)^T \nabla J(\alpha_0) + \frac{1}{2} (\alpha - \alpha_0)^T \nabla^2 J(\alpha_0)(\alpha - \alpha_0)\\
2J_q(\alpha) &= -\frac{2}{n} \alpha^T KW(K\alpha_0-W^{-1}Py)+\frac{1}{n}\alpha^TKWK\alpha+ \lambda\alpha^TK\alpha +C\\
&= \frac{1}{n} (K\alpha - z)^TW(K\alpha - z) + \lambda\alpha^TK\alpha + C \quad \text{where} z = K\alpha_0 - W^{-1} P y
\end{align*}

The WKRR problem is presented as:
$$
\text{argmin}_{\alpha \in \mathbb R^n} \frac{1}{n}(K\alpha - y)^TW(K\alpha - y) + \lambda \alpha^TK\alpha
$$
and has as solution:
$$
\alpha = W^{1/2} (W^{1/2}KW^{1/2}+n\lambda I)^{-1} W^{1/2}y
$$

So, in order to solve KRL, we use IRLS on a WKRR problem until convergence:
$$\alpha^{t+1} \gets \text{solveWKRR}(K, W^t, z^t)$$
With the updates for $W^t$ and $z^t$ from $\alpha^t$ are:
- $m_i \gets [K\alpha^t]_i$
- $P_i^t \gets -\sigma(-y_im_i)$
- $W_i^t \gets \sigma(m_i)\sigma(-m_i)$
- $z_i^t \gets m_i + y_i / \sigma(-y_im_i)$

We can rewrite the WKRR problem as:

$$
\text{argmin}_{\alpha \in \mathbb R^n} = \frac{1}{2}\alpha^T(\frac{2}{n} KWK + 2\lambda K)\alpha + (-\frac{2}{n}KWy)^T\alpha
$$
which is a quadratic program with no constraints that can be solved using cvxopt tools

In [11]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

def logistic_loss(y_true, y_pred):
    n = y_true.shape[0]
    log_term = np.log(sigmoid(y_true*y_pred))
    return(-np.sum(log_term)/n)
    

In [12]:
def solveWKRR(K, W_t, z_t, y_, l):
    n = K.shape[0]
    W_sq = np.sqrt(W_t)
    sol = np.linalg.solve(W_sq @ K @ W_sq + n* l * np.eye(n), W_sq@y_)
    return(W_sq @ sol)

In [13]:
def KLR(K, y, Kval, yval, lambd, maxIter = 100, tresh = 1e-8):
    
    # initialize the values
    assert K.shape[0] == y.shape[0]
    n = K.shape[0]
    n_val = Kval.shape[0]
    
    y_ = np.ones(n)
    yval_ = np.ones(n_val)
    
    y_[y == 0] = -1
    yval_[yval == 0] = -1
    
    
    loss = []
    acc = []
    
    loss_val = []
    acc_val = []
    
    
    alphas = []
    
    for l in lambd :
        cnt = 0
        
        P_t, W_t = np.eye(n), np.eye(n)
        z_t = K@ np.ones(n) - y_
        alpha_t = np.ones(n)
        diff_alpha = np.inf


        while (diff_alpha > tresh) and (cnt < maxIter):

            old_alpha = alpha_t
            
            ## Solving dual using CVXOpt
            #P = matrix(2*((K @ W_t @ K)/n + l*K))
            #q = matrix((-2*K@W_t@y_)/n)
            #solvers.options['show_progress'] = False
            #sol=solvers.qp(P, q)
            #alpha_t = sol['x']
            #alpha_t = np.reshape(alpha_t,-1)  
            
            alpha_t = solveWKRR(K, W_t, z_t, y_, l)

            m_t = K@alpha_t
            sigma_m = sigmoid(m_t)
            sigma_my = sigmoid(-y_*m_t)

            P_t = - np.diag(sigma_my)
            W_t = np.diag(sigma_m * (1-sigma_m))

            z_t = m_t - (P_t@y_)/(sigma_m * (1-sigma_m))

            diff_alpha = np.linalg.norm(alpha_t - old_alpha)
            cnt+=1
            if cnt % 10 == 0:
                print(l, cnt)
        
        loss_lambda = logistic_loss(y_, K@alpha_t)
        acc_lambda = accuracy(y,K@alpha_t, mode="SVM")
        
        loss_lambdaval = logistic_loss(yval_, Kval@alpha_t)
        acc_lambdaval = accuracy(yval,Kval@alpha_t, mode="SVM")

        
        print(f"***********lambda = {l}***********")
        print(f"Training: loss = {loss_lambda:.4f}, accuracy = {acc_lambda:.6f}")
        print(f"Validation: loss = {loss_lambdaval:.4f}, accuracy = {acc_lambdaval:.6f}")
        
        
        loss += [loss_lambda]
        acc += [acc_lambda]
        
        loss_val += [loss_lambdaval]
        acc_val += [acc_lambdaval]
        
        alphas +=[alpha_t]
        
    return(alphas, loss, acc, loss_val, acc_val)
        

## Support Vector Machine approach (SVM)

- Binary Classificaiton setup: $\mathcal Y = \{-1, 1\}$
- $\mathcal l_{\text{hinge}}(f(x),y) = \max(1- y f(x), 0)$

Objective:
\begin{align*}
\hat f &= \text{argmin}_{f\in \mathcal H} \frac{1}{n} \sum_{i=1}^n \max(1- y_i f(x_i), 0) + \lambda||f||^2_{\mathcal H}\\
\alpha &= \text{argmin}_{\alpha \in \mathbb R^n} \frac{1}{n} \sum_{i=1}^n \max(y_i[K\alpha]_i, 0) + \lambda \alpha^T K \alpha
\end{align*}

It is a convex optimization problem but the objective is not smooth.

By introducing additional slack variables $\xi_i$, the problem's objective becomes smooth but it is not the case for the constraints anymore. Let us solve the dual formulation instead (which is sparse, leading to faster algorithms). 

The dual can be rewritten as a quadratic minimization under box constraints : 

\begin{align*}
\min_{\alpha \in \mathbb R^n} \frac{1}{2} \alpha^TK\alpha - \alpha^T y\\
\text{s.t. }  0\leq y_i\alpha_i\leq \frac{1}{2\lambda n}
\end{align*}


We will solve it using CVXOpt tools

In [14]:
def hinge_loss(y_true, y_pred):
    n = y_true.shape[0]
    term = np.maximum(1-y_true*y_pred, 0)
    return(np.sum(term)/n)

In [15]:
## Je pense qu'une plus belle façon de faire serait de créer des fonctions de 
## kernel(X, sigma) et de les appeler avec en paramètres (X_train ou X_val) selon si on 
## fait le training ou la validation, pour pas avoir à garder en mémoire les kernels train/val
## comme on le fait jusqu'à maintenant
## Mais bon, là il est 2h47 du matin, j'ai un peu la flemme et j'imagine que toi aussi,
## ça marche déjà bien comme ça ^^

def _gaussian_kernel(sigma=1):
    """
    Prepares a Gaussian RBF kernel using the provided sigma.

    Returns:
    --------
    kernel_function: A callable to the Gaussian RBF kernel function.

    """
    gamma = -1 / (2 * sigma ** 2)
    kernel_function = lambda X, y: np.exp(gamma * np.square(X[:, np.newaxis] - y).sum(axis=2))
    return kernel_function

In [16]:
from cvxopt import matrix, solvers

def SVM(K, y, K_val, y_val, lambd):
    # takes y with values in 0, 1 which need to be turnt into -1,1
    # initialize the values
    assert K.shape[0] == y.shape[0]
    n = K.shape[0]
    n_val = K_val.shape[0]
    
    y_ = np.ones(n)
    yval_ = np.ones(n_val)
    
    y_[y == 0] = -1
    yval_[y_val == 0] = -1
    
    y_preds, y_preds_val = [], []
    losses, losses_val = [], []
    accuracies, accuracies_val = [], []
    alphas = []
    
    
    for l in lambd :

        ## Solving dual using CVXOpt
        P = matrix(K)
        q = matrix(-y_)
        D = np.diag(-y_)
        G = matrix(np.vstack((D,-D)))
        h = matrix(np.concatenate((np.zeros(n), 1/(2*l*n) * np.ones(n)), axis=0))
        solvers.options['show_progress'] = False
        sol=solvers.qp(P, q, G, h)
        alpha = sol['x']
        alpha = np.reshape(alpha,-1)               

        ## predictions
        # training
        pred_l = K @ alpha
        y_preds += [pred_l]
        loss_l = hinge_loss(y_, pred_l)
        acc_l = accuracy(y, pred_l, mode="SVM")

        
        # validation
        pred_l_val = K_val@alpha
        y_preds_val += [pred_l_val]
        loss_l_val = hinge_loss(yval_, pred_l_val)
        acc_l_val = accuracy(y_val,pred_l_val, mode="SVM")
        

        print(15*"-", f" lambda = {l} ", 15*"-")
        print(f"Training: loss = {loss_l:.6f}, accuracy = {acc_l:.6f}")
        print(f"Validation: loss = {loss_l_val:.6f}, accuracy = {acc_l_val:.6f}")
        
        losses += [loss_l]
        accuracies += [acc_l]
        
        losses_val += [loss_l_val]
        accuracies_val += [acc_l_val]
    
        alphas +=[alpha] 
        
    return(alphas, losses, accuracies, losses_val, accuracies_val)
            

# Testing the accuracy on mat100

## Splitting data

In [17]:
from sklearn.model_selection import train_test_split

Xtr0, Xval0, ytr0, yval0 = train_test_split(Xtr0_mat100, Ytr0, test_size=0.2, random_state=42)
Xtr1, Xval1, ytr1, yval1 = train_test_split(Xtr1_mat100, Ytr1, test_size=0.2, random_state=42)
Xtr2, Xval2, ytr2, yval2 = train_test_split(Xtr2_mat100, Ytr2, test_size=0.2, random_state=42)

## Create the kernel matrices

In [18]:
K_tr0_ln, K_val0_ln = linear_kernel(Xtr0, Xval0)
K_tr1_ln, K_val1_ln = linear_kernel(Xtr1, Xval1)
K_tr2_ln, K_val2_ln = linear_kernel(Xtr2, Xval2)

In [21]:
K_tr0, K_val0 = gaussian_kernel(Xtr0, Xval0, sigma= 0.01, scale = False )
K_tr1, K_val1 = gaussian_kernel(Xtr1, Xval1, sigma= 0.01, scale = False)
K_tr2, K_val2 = gaussian_kernel(Xtr2, Xval2, sigma= 0.01, scale = False)

In [76]:
K_tr0_poly, K_val0_poly = polynomial_kernel(Xtr0, Xval0, d=3, c=1)
K_tr1_poly, K_val1_poly = polynomial_kernel(Xtr1, Xval1, d=3, c=1)
K_tr2_poly, K_val2_poly = polynomial_kernel(Xtr2, Xval2, d=3, c=1)

## Testing KRR

### Gaussian Kernel

In [22]:
lambdas = [0] + [10**i for i in range(-10,2)]
print("************* KRR for dataset 0*************\n")
alphas_tr0, loss_tr0, acc_0, loss_val0, acc_val0 = KRR(K_tr0, ytr0[:,1], K_val0, yval0[:,1], lambdas)
print("************* KRR for dataset 1*************\n")
alphas_tr1, loss_tr1, acc_1, loss_val1, acc_val1 = KRR(K_tr1, ytr1[:,1], K_val1, yval1[:,1],lambdas)
print("************* KRR for dataset 2*************\n")
alphas_tr2, loss_tr2, acc_2, loss_val2, acc_val2 = KRR(K_tr2, ytr2[:,1], K_val2, yval2[:,1],lambdas)

************* KRR for dataset 0*************

***********lambda = 0***********
Training: loss = 1393.3497, accuracy = 1.000000
Validation: loss = 123.8880, accuracy = 0.605000
***********lambda = 1e-10***********
Training: loss = 1393.3492, accuracy = 1.000000
Validation: loss = 123.8880, accuracy = 0.605000
***********lambda = 1e-09***********
Training: loss = 1393.3442, accuracy = 1.000000
Validation: loss = 123.8880, accuracy = 0.605000
***********lambda = 1e-08***********
Training: loss = 1393.2947, accuracy = 1.000000
Validation: loss = 123.8875, accuracy = 0.605000
***********lambda = 1e-07***********
Training: loss = 1392.7998, accuracy = 1.000000
Validation: loss = 123.8833, accuracy = 0.605000
***********lambda = 1e-06***********
Training: loss = 1387.8773, accuracy = 1.000000
Validation: loss = 123.8409, accuracy = 0.602500
***********lambda = 1e-05***********
Training: loss = 1341.1438, accuracy = 1.000000
Validation: loss = 123.4334, accuracy = 0.600000
***********lambda = 

### Linear Kernel

In [78]:
lambdas = [0] + [10**i for i in range(-10,2)]
print("************* KRR for dataset 0*************\n")
alphas_tr0, loss_tr0, acc_0, loss_val0, acc_val0 = KRR(K_tr0_ln, ytr0[:,1], K_val0_ln, yval0[:,1], lambdas)
print("************* KRR for dataset 1*************\n")
alphas_tr1, loss_tr1, acc_1, loss_val1, acc_val1 = KRR(K_tr1_ln, ytr1[:,1], K_val1_ln, yval1[:,1],lambdas)
print("************* KRR for dataset 2*************\n")
alphas_tr2, loss_tr2, acc_2, loss_val2, acc_val2 = KRR(K_tr2_ln, ytr2[:,1], K_val2_ln, yval2[:,1],lambdas)

************* KRR for dataset 0*************

***********lambda = 0***********
Training: loss = -263107891951975328.0000, accuracy = 0.502500
Validation: loss = 4536425.7539, accuracy = 0.491875
***********lambda = 1e-10***********
Training: loss = 8732.8964, accuracy = 0.562500
Validation: loss = 959.2869, accuracy = 0.520625
***********lambda = 1e-09***********
Training: loss = 303.9139, accuracy = 0.562500
Validation: loss = 959.2904, accuracy = 0.520625
***********lambda = 1e-08***********
Training: loss = 218.5145, accuracy = 0.562500
Validation: loss = 959.2907, accuracy = 0.520625
***********lambda = 1e-07***********
Training: loss = 217.6592, accuracy = 0.562500
Validation: loss = 959.2907, accuracy = 0.520625
***********lambda = 1e-06***********
Training: loss = 217.6509, accuracy = 0.562500
Validation: loss = 959.2900, accuracy = 0.520625
***********lambda = 1e-05***********
Training: loss = 217.6499, accuracy = 0.562500
Validation: loss = 959.2826, accuracy = 0.520625
******

### Polynomial Kernel

In [79]:
lambdas = [0] + [10**i for i in range(-10,2)]
print("************* KRR for dataset 0*************\n")
alphas_tr0, loss_tr0, acc_0, loss_val0, acc_val0 = KRR(K_tr0_poly, ytr0[:,1], K_val0_poly, yval0[:,1], lambdas)
print("************* KRR for dataset 1*************\n")
alphas_tr1, loss_tr1, acc_1, loss_val1, acc_val1 = KRR(K_tr1_poly, ytr1[:,1], K_val1_poly, yval1[:,1],lambdas)
print("************* KRR for dataset 2*************\n")
alphas_tr2, loss_tr2, acc_2, loss_val2, acc_val2 = KRR(K_tr2_poly, ytr2[:,1], K_val2_poly, yval2[:,1],lambdas)

************* KRR for dataset 0*************

***********lambda = 0***********
Training: loss = 199.1552, accuracy = 1.000000
Validation: loss = 642.6999, accuracy = 0.500625
***********lambda = 1e-10***********
Training: loss = 199.1552, accuracy = 1.000000
Validation: loss = 642.6999, accuracy = 0.500625
***********lambda = 1e-09***********
Training: loss = 199.1552, accuracy = 1.000000
Validation: loss = 642.6999, accuracy = 0.500625
***********lambda = 1e-08***********
Training: loss = 199.1552, accuracy = 1.000000
Validation: loss = 642.6999, accuracy = 0.500625
***********lambda = 1e-07***********
Training: loss = 199.1552, accuracy = 1.000000
Validation: loss = 642.6999, accuracy = 0.500625
***********lambda = 1e-06***********
Training: loss = 199.1552, accuracy = 1.000000
Validation: loss = 642.6999, accuracy = 0.500625
***********lambda = 1e-05***********
Training: loss = 199.1552, accuracy = 1.000000
Validation: loss = 642.6999, accuracy = 0.500625
***********lambda = 0.0001*

### Conclusion : gaussian is better

## Testing KLR

### Gaussian Kernel

In [23]:
lambdas = [10**i for i in range(-4,1)]

print("*************KLR for dataset 0*************\n")
alphas_tr0_klr, loss_tr0_klr, acc_0_klr, loss_val0_klr, acc_val0_klr = KLR(K_tr0, ytr0[:,1], K_val0, yval0[:,1], lambdas, tresh=1e-8)
print("*************KLR for dataset 1*************\n")
alphas_tr1_klr, loss_tr1_klr, acc_1_klr, loss_val1_klr, acc_val1_klr = KLR(K_tr1, ytr1[:,1], K_val1, yval1[:,1], lambdas, tresh=1e-8)
print("*************KLR for dataset 2*************\n")
alphas_tr2_klr, loss_tr2_klr, acc_2_klr, loss_val2_klr, acc_val2_klr = KLR(K_tr2, ytr2[:,1], K_val2, yval2[:,1], lambdas, tresh=1e-8)


*************KLR for dataset 0*************

0.0001 10
***********lambda = 0.0001***********
Training: loss = 0.4589, accuracy = 0.995625
Validation: loss = 0.6579, accuracy = 0.585000
***********lambda = 0.001***********
Training: loss = 0.6184, accuracy = 0.878125
Validation: loss = 0.6681, accuracy = 0.590000
***********lambda = 0.01***********
Training: loss = 0.6791, accuracy = 0.726875
Validation: loss = 0.6865, accuracy = 0.575000
***********lambda = 0.1***********
Training: loss = 0.6914, accuracy = 0.618750
Validation: loss = 0.6923, accuracy = 0.550000
***********lambda = 1***********
Training: loss = 0.6930, accuracy = 0.584375
Validation: loss = 0.6931, accuracy = 0.532500
*************KLR for dataset 1*************

0.0001 10
***********lambda = 0.0001***********
Training: loss = 0.4651, accuracy = 0.998750
Validation: loss = 0.6645, accuracy = 0.620000
***********lambda = 0.001***********
Training: loss = 0.6312, accuracy = 0.895625
Validation: loss = 0.6788, accuracy = 0

### Linear Kernel

In [89]:
lambdas = [10**i for i in range(-4,1)]

print("*************KLR for dataset 0*************\n")
alphas_tr0_klr, loss_tr0_klr, acc_0_klr, loss_val0_klr, acc_val0_klr = KLR(K_tr0_ln, ytr0[:,1], K_val0_ln, yval0[:,1], lambdas, tresh=1e-5)
print("*************KLR for dataset 1*************\n")
alphas_tr1_klr, loss_tr1_klr, acc_1_klr, loss_val1_klr, acc_val1_klr = KLR(K_tr1_ln, ytr1[:,1], K_val1_ln, yval1[:,1], lambdas, tresh=1e-5)
print("*************KLR for dataset 2*************\n")
alphas_tr2_klr, loss_tr2_klr, acc_2_klr, loss_val2_klr, acc_val2_klr = KLR(K_tr2_ln, ytr2[:,1], K_val2_ln, yval2[:,1], lambdas, tresh=1e-5)


*************KLR for dataset 0*************

0.0001 10
***********lambda = 0.0001***********
Training: loss = 0.5748, accuracy = 0.742500
Validation: loss = 0.7005, accuracy = 0.556250
0.001 10
***********lambda = 0.001***********
Training: loss = 0.5755, accuracy = 0.742500
Validation: loss = 0.6997, accuracy = 0.555625
***********lambda = 0.01***********
Training: loss = 0.5813, accuracy = 0.745000
Validation: loss = 0.6938, accuracy = 0.563125
***********lambda = 0.1***********
Training: loss = 0.6102, accuracy = 0.730000
Validation: loss = 0.6806, accuracy = 0.566875
***********lambda = 1***********
Training: loss = 0.6607, accuracy = 0.680000
Validation: loss = 0.6834, accuracy = 0.558125
*************KLR for dataset 1*************

0.0001 10
***********lambda = 0.0001***********
Training: loss = 0.5937, accuracy = 0.720000
Validation: loss = 0.7062, accuracy = 0.543125
***********lambda = 0.001***********
Training: loss = 0.5945, accuracy = 0.720000
Validation: loss = 0.7049, acc

### Polynomial kernel

In [90]:
lambdas = [10**i for i in range(-4,1)]

print("*************KLR for dataset 0*************\n")
alphas_tr0_klr, loss_tr0_klr, acc_0_klr, loss_val0_klr, acc_val0_klr = KLR(K_tr0_poly, ytr0[:,1], K_val0_poly, yval0[:,1], lambdas, tresh=1e-5)
print("*************KLR for dataset 1*************\n")
alphas_tr1_klr, loss_tr1_klr, acc_1_klr, loss_val1_klr, acc_val1_klr = KLR(K_tr1_poly, ytr1[:,1], K_val1_poly, yval1[:,1], lambdas, tresh=1e-5)
print("*************KLR for dataset 2*************\n")
alphas_tr2_klr, loss_tr2_klr, acc_2_klr, loss_val2_klr, acc_val2_klr = KLR(K_tr2_poly, ytr2[:,1], K_val2_poly, yval2[:,1], lambdas, tresh=1e-5)


*************KLR for dataset 0*************

***********lambda = 0.0001***********
Training: loss = 0.3133, accuracy = 1.000000
Validation: loss = 0.6741, accuracy = 0.567500
***********lambda = 0.001***********
Training: loss = 0.3133, accuracy = 1.000000
Validation: loss = 0.6741, accuracy = 0.567500
***********lambda = 0.01***********
Training: loss = 0.3133, accuracy = 1.000000
Validation: loss = 0.6741, accuracy = 0.567500
***********lambda = 0.1***********
Training: loss = 0.3133, accuracy = 1.000000
Validation: loss = 0.6741, accuracy = 0.567500
***********lambda = 1***********
Training: loss = 0.3140, accuracy = 1.000000
Validation: loss = 0.6741, accuracy = 0.567500
*************KLR for dataset 1*************

***********lambda = 0.0001***********
Training: loss = 0.3133, accuracy = 1.000000
Validation: loss = 0.6826, accuracy = 0.523125
***********lambda = 0.001***********
Training: loss = 0.3133, accuracy = 1.000000
Validation: loss = 0.6826, accuracy = 0.523125
***********l

## Testing SVM

### Gaussian Kernel

In [91]:
lambdas = [10**i for i in range(-10, 0)]
print("************* SVM for dataset 0 *************\n")
alphas_tr0, loss_tr0, acc_0, loss_val0, acc_val0 = SVM(K_tr0, ytr0[:,1], K_val0, yval0[:,1], lambdas)

print("")
print("")
print("************* SVM for dataset 1 *************\n")
alphas_tr1, loss_tr1, acc_1, loss_val1, acc_val1 = SVM(K_tr1, ytr1[:,1], K_val1, yval1[:,1],lambdas)

print("")
print("")
print("************* SVM for dataset 2 *************\n")
alphas_tr2, loss_tr2, acc_2, loss_val2, acc_val2 = SVM(K_tr2, ytr2[:,1], K_val2, yval2[:,1],lambdas)

************* SVM for dataset 0 *************

---------------  lambda = 1e-10  ---------------
Training: loss = 0.000000, accuracy = 1.000000
Validation: loss = 0.905722, accuracy = 0.572500
---------------  lambda = 1e-09  ---------------
Training: loss = 0.000000, accuracy = 1.000000
Validation: loss = 0.905723, accuracy = 0.572500
---------------  lambda = 1e-08  ---------------
Training: loss = 0.000000, accuracy = 1.000000
Validation: loss = 0.905724, accuracy = 0.572500
---------------  lambda = 1e-07  ---------------
Training: loss = 0.000000, accuracy = 1.000000
Validation: loss = 0.905722, accuracy = 0.572500
---------------  lambda = 1e-06  ---------------
Training: loss = 0.000000, accuracy = 1.000000
Validation: loss = 0.905723, accuracy = 0.572500
---------------  lambda = 1e-05  ---------------
Training: loss = 0.000000, accuracy = 1.000000
Validation: loss = 0.905724, accuracy = 0.572500
---------------  lambda = 0.0001  ---------------
Training: loss = 0.000000, accura

### Linear Kernel

In [92]:
lambdas = [10**i for i in range(-10, 0)]
print("************* SVM for dataset 0 *************\n")
alphas_tr0, loss_tr0, acc_0, loss_val0, acc_val0 = SVM(K_tr0_ln, ytr0[:,1], K_val0_ln, yval0[:,1], lambdas)

print("")
print("")
print("************* SVM for dataset 1 *************\n")
alphas_tr1, loss_tr1, acc_1, loss_val1, acc_val1 = SVM(K_tr1_ln, ytr1[:,1], K_val1_ln, yval1[:,1],lambdas)

print("")
print("")
print("************* SVM for dataset 2 *************\n")
alphas_tr2, loss_tr2, acc_2, loss_val2, acc_val2 = SVM(K_tr2_ln, ytr2[:,1], K_val2_ln, yval2[:,1],lambdas)

************* SVM for dataset 0 *************

---------------  lambda = 1e-10  ---------------
Training: loss = 0.554536, accuracy = 0.775000
Validation: loss = 1.192354, accuracy = 0.556875
---------------  lambda = 1e-09  ---------------
Training: loss = 0.554536, accuracy = 0.775000
Validation: loss = 1.192354, accuracy = 0.556875
---------------  lambda = 1e-08  ---------------
Training: loss = 0.554536, accuracy = 0.775000
Validation: loss = 1.192353, accuracy = 0.556875
---------------  lambda = 1e-07  ---------------
Training: loss = 0.554536, accuracy = 0.775000
Validation: loss = 1.192353, accuracy = 0.556875
---------------  lambda = 1e-06  ---------------
Training: loss = 0.554536, accuracy = 0.775000
Validation: loss = 1.192354, accuracy = 0.556875
---------------  lambda = 1e-05  ---------------
Training: loss = 0.554536, accuracy = 0.775000
Validation: loss = 1.192354, accuracy = 0.556875
---------------  lambda = 0.0001  ---------------
Training: loss = 0.554537, accura

### Polynomial Kernel

In [93]:
lambdas = [10**i for i in range(-10, 0)]
print("************* SVM for dataset 0 *************\n")
alphas_tr0, loss_tr0, acc_0, loss_val0, acc_val0 = SVM(K_tr0_poly, ytr0[:,1], K_val0_poly, yval0[:,1], lambdas)

print("")
print("")
print("************* SVM for dataset 1 *************\n")
alphas_tr1, loss_tr1, acc_1, loss_val1, acc_val1 = SVM(K_tr1_poly, ytr1[:,1], K_val1_poly, yval1[:,1],lambdas)

print("")
print("")
print("************* SVM for dataset 2 *************\n")
alphas_tr2, loss_tr2, acc_2, loss_val2, acc_val2 = SVM(K_tr2_poly, ytr2[:,1], K_val2_poly, yval2[:,1],lambdas)

************* SVM for dataset 0 *************

---------------  lambda = 1e-10  ---------------
Training: loss = 0.000000, accuracy = 1.000000
Validation: loss = 0.934044, accuracy = 0.565625
---------------  lambda = 1e-09  ---------------
Training: loss = 0.000000, accuracy = 1.000000
Validation: loss = 0.934020, accuracy = 0.565625
---------------  lambda = 1e-08  ---------------
Training: loss = 0.000000, accuracy = 1.000000
Validation: loss = 0.934041, accuracy = 0.565625
---------------  lambda = 1e-07  ---------------
Training: loss = 0.000000, accuracy = 1.000000
Validation: loss = 0.934037, accuracy = 0.565625
---------------  lambda = 1e-06  ---------------
Training: loss = 0.000000, accuracy = 1.000000
Validation: loss = 0.934035, accuracy = 0.565625
---------------  lambda = 1e-05  ---------------
Training: loss = 0.000000, accuracy = 1.000000
Validation: loss = 0.934030, accuracy = 0.565625
---------------  lambda = 0.0001  ---------------
Training: loss = 0.000000, accura

# Testing the accuracy on sequences

In [94]:
from csv import reader

def features_into_array(path):
    with open(path, 'r') as read_obj:
        csv_reader = reader(read_obj)
        header = next(csv_reader)
        X = list()
        if header != None:
            for row in csv_reader:
                # row variable is a list that represents a row in csv
                X.append(np.array(row[1]))
                
    X = np.array(X) ## dtype might be changed in something more convenient. For now, dtype = "<U1"
    return X

In [95]:
Xtr0_seq = features_into_array("data/Xtr0.csv")
Ytr0 = np.genfromtxt("data/Ytr0.csv", delimiter=',', skip_header=1)

Xtr1_seq = features_into_array("data/Xtr1.csv")
Ytr1 = np.genfromtxt("data/Ytr1.csv", delimiter=',', skip_header=1)

Xtr2_seq = features_into_array("data/Xtr2.csv")
Ytr2 = np.genfromtxt("data/Ytr2.csv", delimiter=',', skip_header=1)

In [21]:
from sklearn.model_selection import train_test_split

Xtr0, Xval0, ytr0, yval0 = train_test_split(Xtr0_seq, Ytr0, test_size=0.5, random_state=42)
Xtr1, Xval1, ytr1, yval1 = train_test_split(Xtr1_seq, Ytr1, test_size=0.5, random_state=42)
Xtr2, Xval2, ytr2, yval2 = train_test_split(Xtr2_seq, Ytr2, test_size=0.5, random_state=42)

## Predictions on the testing sets

In [22]:
Xte0_seq = features_into_array("data/Xte0.csv")
Xte1_seq = features_into_array("data/Xte1.csv")
Xte2_seq = features_into_array("data/Xte2.csv")

## Making predictions

### First create the kernels for each testing set with the chosen parameters

In [241]:
Xte0 = np.genfromtxt("data/Xte0_mat100.csv", delimiter='')
Xte1 = np.genfromtxt("data/Xte1_mat100.csv", delimiter='')
Xte2 = np.genfromtxt("data/Xte2_mat100.csv", delimiter='')

Please make sure to use the same parameters as those that were used to create the initial kernel.

In [256]:
K_te0 = gaussian_kernel(Xtr0, Xte0, mode="test")
K_te1 = gaussian_kernel(Xtr1, Xte1, mode="test")
K_te2 = gaussian_kernel(Xtr2, Xte2, mode="test")

In [275]:
K_te0_ln = linear_kernel(Xtr0, Xte0, scale=True, mode="test")
K_te1_ln = linear_kernel(Xtr1, Xte1, scale=True, mode="test")
K_te2_ln = linear_kernel(Xtr2, Xte2, scale=True, mode="test")

In [276]:
K_te0_poly = polynomial_kernel(Xtr0, Xte0, d=3, c=1, mode="test")
K_te1_poly = polynomial_kernel(Xtr1, Xte1, d=3, c=1, mode="test")
K_te2_poly = polynomial_kernel(Xtr2, Xte2, d=3, c=1, mode="test")

In [259]:
def write_predictions_csv(test_kernels, test_alphas, path):
    
    predictions = np.zeros(3000, dtype=int)
    
    for i in range(3):
        y_pred = test_kernels[i] @ test_alphas[i]
        y_pred[y_pred >= 0.5] = 1
        y_pred[y_pred < 0.5] = 0
        
        predictions[1000*i:1000*(i+1)] = y_pred
    
    #predictions = predictions.astype(int)
    pred = pd.DataFrame({"Bound" : predictions})
    pred.to_csv(path, index=True,index_label="Id")
    print("saving predictions")
    #np.savetxt("data/Ytest_KRR.csv", predictions, header = "Id, Bound", delimiter =",")
    print("saved predictions")
    return(predictions)
        

Example

In [260]:
test_kernels = [K_te0, K_te1, K_te2]
#test_alphas = [alphas_tr0[-4], alphas_tr1[-4], alphas_tr2[-3]] # il faut choisir l'alpha associé à un bon lambda!
test_alphas = [alphas_tr0_klr[0], alphas_tr1_klr[0], alphas_tr2_klr[0]]
write_predictions_csv(test_kernels, test_alphas, path ="data/Ytest_KLR.csv")

saving predictions
saved predictions


array([0, 0, 0, ..., 0, 0, 0])

In [48]:
ex1 = Xtr0_mat100[:10]
ex2 = Xtr0_mat100[:5]

In [49]:
M = np.zeros((5,10))
for i in range(5):
    for j in range(10):
        M[i,j] = np.dot(ex2[i], ex1[j])

In [59]:
print(M == np.inner(ex2,ex1))
print(np.linalg.norm(M - np.inner(ex2,ex1)))

[[ True False  True False False  True  True  True  True  True]
 [False False  True False  True  True  True  True False  True]
 [ True  True False False False False False False  True  True]
 [False False False False  True False False  True False False]
 [False  True False  True  True  True  True  True False False]]
1.9081958235744878e-17


In [56]:
np.inner(ex2, ex1)

array([[0.01913989, 0.0094518 , 0.00980624, 0.01051512, 0.00756144,
        0.0114603 , 0.00968809, 0.00862476, 0.01110586, 0.01063327],
       [0.0094518 , 0.03048204, 0.0116966 , 0.01016068, 0.00791588,
        0.00815217, 0.0044896 , 0.01512287, 0.01240548, 0.00378072],
       [0.00980624, 0.0116966 , 0.03331758, 0.01772212, 0.01134216,
        0.00508034, 0.00968809, 0.01429584, 0.0137051 , 0.00531664],
       [0.01051512, 0.01016068, 0.01772212, 0.02457467, 0.01181474,
        0.0067344 , 0.01051512, 0.01488658, 0.01465028, 0.00732514],
       [0.00756144, 0.00791588, 0.01134216, 0.01181474, 0.02079395,
        0.00756144, 0.01228733, 0.01228733, 0.01075142, 0.00850662]])

In [57]:
M

array([[0.01913989, 0.0094518 , 0.00980624, 0.01051512, 0.00756144,
        0.0114603 , 0.00968809, 0.00862476, 0.01110586, 0.01063327],
       [0.0094518 , 0.03048204, 0.0116966 , 0.01016068, 0.00791588,
        0.00815217, 0.0044896 , 0.01512287, 0.01240548, 0.00378072],
       [0.00980624, 0.0116966 , 0.03331758, 0.01772212, 0.01134216,
        0.00508034, 0.00968809, 0.01429584, 0.0137051 , 0.00531664],
       [0.01051512, 0.01016068, 0.01772212, 0.02457467, 0.01181474,
        0.0067344 , 0.01051512, 0.01488658, 0.01465028, 0.00732514],
       [0.00756144, 0.00791588, 0.01134216, 0.01181474, 0.02079395,
        0.00756144, 0.01228733, 0.01228733, 0.01075142, 0.00850662]])

In [97]:
1/(2*1000)

0.0005