In [4]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Linear regression with laplace prior
 + In general, laplace prior gives sparse result for regression
     + However, it is difficult to deal with it well due to non-differential point at the origin.
         + $\log p(w) \equiv -1/\beta \sum_j |w_j| $, $|w_j|$ is non-differential at the origin.
 + By the way, non-differential point is eliminated by integrating $|w_j|$:
     + $E[|w_j|]$ does not have non-diffenrential point when the distribution is normal distribution.
     + It is achieved when we consider about the objective function of variational Bayes.
         + $\mathcal{F} := E[\log \frac{q(w)}{p(Y|X,w}p(w)]$
         + Here, $\mathcal{F}$ has a parameter that decides the form of $q(w) = N(w|m, \Sigma)$, $(m, \Sigma)$ is the parameter and optimized by it.
 + In this notebook, the approximated posterior distribution by Variational Bayes is studied.
     + The objective function is optimized by a gradient descent method.
         + Specifically, the Natural gradient descent is efficient method when we consider about a constrained parameter like positive definite matrix, positive real value, simplex, and so on.
         + Thus, we used the natural gradient descent.

# Formulation
+ Learning Model:
    + $p(y|x,w) = N(y|x \cdot w, 1), y \in mathbb{R}, x,w \in \mathbb{R}^M$
    + $p(w) \equiv \exp(-\frac{1}{\beta} \sum_j |w_j|)$, $\beta$ is hyperparameter.
+ Approximated Variational Posterior distribution:
    + $q(w) = N(w|m, \Sigma)$
        + $m \in \mathbb{R}^M, \Sigma \in \mathbb{R}^{M \times M}$ is the parameters to be optimized.

# In this notebook
+ We compare the following average generalization error:
$$
    G(n) = \frac{1}{L} \sum_{j=1}^L \| y - X \hat{w}(x^l, y^l) \|^2,
$$
where $\hat{w}$ is estimated parameter by $(x^l, y^l)$.  
We evaluate the error among Lasso, Ridge, and VB laplace(this calculation).

# Preliminary
## Import library

In [5]:
import pandas as pd
import numpy as np
from scipy.stats import norm
from scipy.stats import invwishart

In [6]:
from sklearn.linear_model import LassoCV, Lasso, LassoLarsCV
from sklearn.linear_model import RidgeCV, Ridge
from sklearn.linear_model import ARDRegression
from sklearn.base import BaseEstimator, RegressorMixin

## Data setting

In [7]:
## data setting
n = 100 # train size
M = 200 # # of features
zero_ratio = 1
n_zero_ind = int(M*zero_ratio) # # of zero elements in the parameter
prob_seed = 20201110 # random seed

N = 10000 # test size

datasets = 100

## Problem setting

In [16]:
np.random.seed(prob_seed)
true_w = np.random.normal(scale = 3, size = M)
zero_ind = np.random.choice(M, size = n_zero_ind)
true_w[zero_ind] = 0

## Learning settings

In [17]:
ln_vb_params = {
    "pri_beta": 10,
    "pri_opt_flag": True,
    "iteration": 10000,
    "step": 0.2,
    "is_trace": False,
    "trace_step": 100
}
ln_lasso_params = {
    "fit_intercept": False,
    "cv": 5,
    "max_iter": 10000
}
ln_ridge_params = {
    "fit_intercept": False,
    "cv": 5
}
ln_ard_params = {
    "fit_intercept": False
}

# Classes

In [18]:
class VBLaplace(BaseEstimator, RegressorMixin):
    def __init__(
        self, pri_beta: float = 20, pri_opt_flag: bool = True,
        seed: int = -1, iteration: int = 1000, tol: float = 1e-8, step: float = 0.1,
        is_trace: bool = False, trace_step: int = 20
    ):
        self.pri_beta = pri_beta
        self.pri_opt_flag = pri_opt_flag
        self.seed = seed
        self.iteration = iteration
        self.tol = tol
        self.step = step
        self.is_trace = is_trace
        self.trace_step = trace_step
        pass
    
    def _initialization(self, M: int):
        seed = self.seed
        
        if seed > 0:
            np.random.seed(seed)
        
        mean = np.random.normal(size = M)
        sigma = invwishart.rvs(df = M+2, scale = np.eye(M), size = 1)
        pri_beta = np.random.gamma(shape = 3, size = 1) if self.pri_opt_flag else self.pri_beta
        
        self.mean_ = mean
        self.sigma_ = sigma
        self.pri_beta_ = pri_beta
        pass
    
    def _obj_func(self, X:np.ndarray, y:np.ndarray, pri_beta:float, mean:np.ndarray, sigma:np.ndarray) -> float:
        """
        Calculate objective function.

        + Input:
            1. X: input matrix (n, M) matrix
            2. y: output vector (n, ) matrix
            3. mean: mean parameter of vb posterior
            4. sigma: covariance matrix of vb posterior

        + Output:
            value of the objective function.

        """

        n, M = X.shape

        sq_sigma_diag = np.sqrt(np.diag(sigma))
        log_2pi = np.log(2*np.pi)

        F = 0
        # const values
        F += -M/2*log_2pi -M/2 + M*log_2pi + n*M/2*log_2pi + M*np.log(2*pri_beta)

        F += ((y - X@mean)**2).sum()/2 - np.linalg.slogdet(sigma)[1]/2 + np.trace(X.T @ X @ sigma)/2

        # term obtained from laplace prior
        F += ((mean + 2*sq_sigma_diag*norm.pdf(-mean/sq_sigma_diag)-2*mean*norm.cdf(-mean/sq_sigma_diag))/pri_beta).sum()

        return F
    
    def fit(self, train_X:np.ndarray, train_Y:np.ndarray):
        pri_beta = self.pri_beta
        iteration = self.iteration
        step = self.step
        tol = self.tol
        
        is_trace = self.is_trace
        trace_step = self.trace_step
        
        M = train_X.shape[1]
        
        if not hasattr(self, "mean_"):
            self._initialization(M)
        
        est_mean = self.mean_
        est_sigma = self.sigma_
        est_pri_beta = self.pri_beta_
        
        # transformation to natural parameter
        theta1 = np.linalg.solve(est_sigma, est_mean)
        theta2 = -np.linalg.inv(est_sigma)/2        
        
        F = []
        
        cov_X = train_X.T @ train_X
        cov_YX = train_Y @ train_X
        for ite in range(iteration):
            sq_sigma_diag = np.sqrt(np.diag(est_sigma))

            # update mean and sigma by natural gradient
            dFdnu1 = theta1 - cov_YX
            dFdnu1 += (1 - 2*est_mean/sq_sigma_diag*norm.pdf(-est_mean/sq_sigma_diag) - 2*norm.cdf(-est_mean/sq_sigma_diag)) / est_pri_beta
            dFdnu2 = theta2 + cov_X/2
            dFdnu2[np.diag_indices(M)] += 1/sq_sigma_diag*norm.pdf(-est_mean/sq_sigma_diag)/est_pri_beta

            theta1 += -step * dFdnu1
            theta2 += -step * dFdnu2
            est_sigma = -np.linalg.inv(theta2)/2
            est_mean = est_sigma @ theta1
            
            # update pri_beta by extreme value
            sq_sigma_diag = np.sqrt(np.diag(est_sigma))
            est_pri_beta = ((est_mean + 2*sq_sigma_diag*norm.pdf(-est_mean/sq_sigma_diag)-2*est_mean*norm.cdf(-est_mean/sq_sigma_diag))).mean() if self.pri_opt_flag else pri_beta
            current_F = self._obj_func(train_X, train_Y, est_pri_beta, est_mean, est_sigma)
            if is_trace and ite % trace_step == 0:
                print(current_F, (dFdnu1**2).sum(), (dFdnu2**2).sum())            
            
            if ite > 0 and np.abs(current_F - F[ite-1]) < tol:
                if is_trace:
                    print(current_F, (dFdnu1**2).sum(), (dFdnu2**2).sum())                            
                break
            else:
                F.append(current_F)
            pass
        
        
        self.F_ = F
        self.mean_ = est_mean
        self.sigma_ = est_sigma
        self.pri_beta_ = est_pri_beta
        
        return self
        pass
    
    def predict(self, test_X: np.ndarray):
        if not hasattr(self, "mean_"):
            raise ValueError("fit has not finished yet, should fit before predict.")
        return test_X @ self.mean_
        pass
        
    pass

In [19]:
class VBNormal(BaseEstimator, RegressorMixin):
    def __init__(
        self, pri_beta: float = 20, pri_opt_flag: bool = True,
        seed: int = -1, iteration: int = 1000, tol: float = 1e-8, step: float = 0.1,
        is_trace: bool = False, trace_step: int = 20
    ):
        self.pri_beta = pri_beta
        self.pri_opt_flag = pri_opt_flag
        self.seed = seed
        self.iteration = iteration
        self.tol = tol
        self.step = step
        self.is_trace = is_trace
        self.trace_step = trace_step
        pass
    
    def _initialization(self, M: int):
        seed = self.seed
        
        if seed > 0:
            np.random.seed(seed)
        
        mean = np.random.normal(size = M)
        sigma = invwishart.rvs(df = M+2, scale = np.eye(M), size = 1)
        pri_beta = np.random.gamma(shape = 3, size = 1) if self.pri_opt_flag else self.pri_beta
        
        self.mean_ = mean
        self.sigma_ = sigma
        self.pri_beta_ = pri_beta
        pass
    
    def _obj_func(self, X:np.ndarray, y:np.ndarray, pri_beta:float, mean:np.ndarray, sigma:np.ndarray) -> float:
        """
        Calculate objective function.

        + Input:
            1. X: input matrix (n, M) matrix
            2. y: output vector (n, ) matrix
            3. mean: mean parameter of vb posterior
            4. sigma: covariance matrix of vb posterior

        + Output:
            value of the objective function.

        """

        n, M = X.shape

        log_2pi = np.log(2*np.pi)

        F = 0
        # const values
        F += -M/2*log_2pi -M/2 + M*log_2pi + n*M/2*log_2pi + M*np.log(2*pri_beta)

        F += ((y - X@mean)**2).sum()/2 - np.linalg.slogdet(sigma)[1]/2 + np.trace(X.T @ X @ sigma)/2

        # term obtained from Normal prior
        F += pri_beta/2*(mean@mean + np.trace(sigma)) - M/2*np.log(pri_beta) + M/2*log_2pi
        
        return F
    
    def fit(self, train_X:np.ndarray, train_Y:np.ndarray):
        pri_beta = self.pri_beta
        iteration = self.iteration
        step = self.step
        tol = self.tol
        
        is_trace = self.is_trace
        trace_step = self.trace_step
        
        M = train_X.shape[1]
        
        if not hasattr(self, "mean_"):
            self._initialization(M)
        
        est_mean = self.mean_
        est_sigma = self.sigma_
        est_pri_beta = self.pri_beta_
                
        F = []
        XY_cov = train_Y@train_X
        X_cov = train_X.T@train_X
        
        for ite in range(iteration):
            sigma_inv = X_cov + est_pri_beta*np.eye(M)
            est_mean = np.linalg.solve(sigma_inv, XY_cov)
            est_sigma = np.linalg.inv(sigma_inv)
            
            # update pri_beta by extreme value
            est_pri_beta = M/(est_mean@est_mean + np.trace(est_sigma)) if self.pri_opt_flag else pri_beta
            current_F = self._obj_func(train_X, train_Y, est_pri_beta, est_mean, est_sigma)
            if is_trace and ite % trace_step == 0:
                print(current_F, (dFdnu1**2).sum(), (dFdnu2**2).sum())            
            
            if ite > 0 and np.abs(current_F - F[ite-1]) < tol:
                if is_trace:
                    print(current_F, (dFdnu1**2).sum(), (dFdnu2**2).sum())                            
                break
            else:
                F.append(current_F)
            pass
        
        
        self.F_ = F
        self.mean_ = est_mean
        self.sigma_ = est_sigma
        self.pri_beta_ = est_pri_beta
        
        return self
        pass
    
    def predict(self, test_X: np.ndarray):
        if not hasattr(self, "mean_"):
            raise ValueError("fit has not finished yet, should fit before predict.")
        return test_X @ self.mean_
        pass
        
    pass

In [20]:
class VBApproxLaplace(BaseEstimator, RegressorMixin):
    """
    Laplace prior is approximated by normal distribution, and approximated posterior distribution is obtained by the approximated laplace prior.
    """
    
    def __init__(
        self, pri_beta: float = 20, pri_opt_flag: bool = True,
        seed: int = -1, iteration: int = 1000, tol: float = 1e-8, step: float = 0.1,
        is_trace: bool = False, trace_step: int = 20
    ):
        self.pri_beta = pri_beta
        self.pri_opt_flag = pri_opt_flag
        self.seed = seed
        self.iteration = iteration
        self.tol = tol
        self.step = step
        self.is_trace = is_trace
        self.trace_step = trace_step
        pass
    
    def _initialization(self, M: int):
        seed = self.seed
        
        if seed > 0:
            np.random.seed(seed)
        
        mean = np.random.normal(size = M)
        sigma = invwishart.rvs(df = M+2, scale = np.eye(M), size = 1)
        pri_beta = np.random.gamma(shape = 3, size = 1) if self.pri_opt_flag else self.pri_beta
        
        self.mean_ = mean
        self.sigma_ = sigma
        self.pri_beta_ = pri_beta
        pass
    
    def _obj_func(self, y:np.ndarray, pri_beta:float, mean:np.ndarray, inv_sigma:np.ndarray, h_xi: np.ndarray, v_xi: np.ndarray) -> float:
        """
        Calculate objective function.

        + Input:
            1. X: input matrix (n, M) matrix
            2. y: output vector (n, ) matrix
            3. mean: mean parameter of vb posterior
            4. sigma: covariance matrix of vb posterior

        + Output:
            value of the objective function.

        """

        F = 0
        F += pri_beta/2*np.sqrt(h_xi).sum() + v_xi@h_xi - M*np.log(pri_beta/2)
        F += n/2*np.log(2*np.pi) + train_Y@train_Y/2 - mean @ (inv_sigma @ mean)/2 + np.linalg.slogdet(inv_sigma)[0]/2
        return F
    
    def fit(self, train_X:np.ndarray, train_Y:np.ndarray):
        iteration = self.iteration
        step = self.step
        tol = self.tol
        
        is_trace = self.is_trace
        trace_step = self.trace_step
        
        M = train_X.shape[1]
        
        if not hasattr(self, "mean_"):
            self._initialization(M)
        
        est_mean = self.mean_
        est_sigma = self.sigma_
        est_pri_beta = self.pri_beta_
                
        F = []
        X_cov = train_X.T@train_X
        XY_cov = train_X.T @ train_Y
        
        for ite in range(iteration):
            # update form of approximated laplace prior
            est_h_xi = est_mean**2 + np.diag(est_sigma)
            est_v_xi = -est_pri_beta/2/np.sqrt(est_h_xi)            
            
            # update posterior distribution
            inv_sigma = X_cov -2*np.diag(est_v_xi)
            est_mean = np.linalg.solve(inv_sigma, XY_cov)
            est_sigma = np.linalg.inv(inv_sigma)
            
            # update pri_beta by extreme value
            est_pri_beta = M/((est_mean**2 + np.diag(est_sigma))/(2*np.sqrt(est_h_xi))).sum() if self.pri_opt_flag else pri_beta
            
            current_F = self._obj_func(train_Y, est_pri_beta, est_mean, inv_sigma, est_h_xi, est_v_xi)
            if is_trace and ite % trace_step == 0:
                print(current_F)            
            
            if ite > 0 and np.abs(current_F - F[ite-1]) < tol:
                if is_trace:
                    print(current_F, (dFdnu1**2).sum(), (dFdnu2**2).sum())                            
                break
            else:
                F.append(current_F)
            pass
        
        
        self.F_ = F
        self.mean_ = est_mean
        self.sigma_ = est_sigma
        self.pri_beta_ = est_pri_beta
        
        return self
        pass
    
    def predict(self, test_X: np.ndarray):
        if not hasattr(self, "mean_"):
            raise ValueError("fit has not finished yet, should fit before predict.")
        return test_X @ self.mean_
        pass
    
    pass

# Experiment part
+ By some datasets are used for train and evaluate

In [21]:
score_func = lambda X, y, coef: 1 - ((y - X@coef)**2).sum() / ((y - y.mean())**2).sum()
score_vb_laplace_exact = np.zeros(datasets)
score_vb_laplace_approx = np.zeros(datasets)
score_vb_normal = np.zeros(datasets)
score_ard = np.zeros(datasets)
score_lasso = np.zeros(datasets)
score_ridge = np.zeros(datasets)

In [22]:
sq_error = lambda X, y, coef: ((y - X@coef)**2).mean()
sq_error_vb_laplace_exact = np.zeros(datasets)
sq_error_vb_laplace_approx = np.zeros(datasets)
sq_error_vb_normal = np.zeros(datasets)
sq_error_ard = np.zeros(datasets)
sq_error_lasso = np.zeros(datasets)
sq_error_ridge = np.zeros(datasets)

In [23]:
for dataset_ind in range(datasets):
    vb_laplace_exact_obj = VBLaplace(**ln_vb_params)
    vb_laplace_approx_obj = VBApproxLaplace(**ln_vb_params)
    vb_normal_obj = VBNormal(**ln_vb_params)
    lasso_obj = LassoCV(**ln_lasso_params)
    ridge_obj = RidgeCV(**ln_ridge_params)
    ard_obj = ARDRegression(**ln_ard_params)
    
    # data generation
    train_X = np.random.normal(size = (n, M))
    train_Y = train_X @ true_w + np.random.normal(size = n)

    lasso_obj.fit(train_X, train_Y)
    ridge_obj.fit(train_X, train_Y)
    ard_obj.fit(train_X, train_Y)
    vb_laplace_exact_obj.fit(train_X, train_Y)
    vb_normal_obj.fit(train_X, train_Y)
    vb_laplace_approx_obj.fit(train_X, train_Y)

    test_X = np.random.normal(size = (N, M))
    test_Y = test_X @ true_w + np.random.normal(size = N)
    
    ### evaluation by square error
    sq_error_lasso[dataset_ind] = sq_error(test_X, test_Y, lasso_obj.coef_)
    sq_error_ridge[dataset_ind] = sq_error(test_X, test_Y, ridge_obj.coef_)
    sq_error_ard[dataset_ind] = sq_error(test_X, test_Y, ard_obj.coef_)
    sq_error_vb_laplace_exact[dataset_ind] = sq_error(test_X, test_Y, vb_laplace_exact_obj.mean_)
    sq_error_vb_normal[dataset_ind] = sq_error(test_X, test_Y, vb_normal_obj.mean_)
    sq_error_vb_laplace_approx[dataset_ind] = sq_error(test_X, test_Y, vb_laplace_approx_obj.mean_)

    print(
        "sq_error:"
        , sq_error_lasso[dataset_ind]
        , sq_error_ridge[dataset_ind]
        , sq_error_ard[dataset_ind]
        , sq_error_vb_laplace_exact[dataset_ind]
        , sq_error_vb_normal[dataset_ind]
        , sq_error_vb_laplace_approx[dataset_ind]
    )    
    
    ### evaluation by R^2 score
    score_lasso[dataset_ind] = score_func(test_X, test_Y, lasso_obj.coef_)
    score_ridge[dataset_ind] = score_func(test_X, test_Y, ridge_obj.coef_)
    score_ard[dataset_ind] = score_func(test_X, test_Y, ard_obj.coef_)
    score_vb_laplace_exact[dataset_ind] = score_func(test_X, test_Y, vb_laplace_exact_obj.mean_)
    score_vb_normal[dataset_ind] = score_func(test_X, test_Y, vb_normal_obj.mean_)
    score_vb_laplace_approx[dataset_ind] = score_func(test_X, test_Y, vb_laplace_approx_obj.mean_)
    
    print(
        "R^2 score:"
        , score_lasso[dataset_ind]
        , score_ridge[dataset_ind]
        , score_ard[dataset_ind]
        , score_vb_laplace_exact[dataset_ind]
        , score_vb_normal[dataset_ind]
        , score_vb_laplace_approx[dataset_ind]
    )

sq_error: 211.99947382656495 262.8633089588862 244.28250191764744 213.0808993575275 262.8720650855691 184.89152016220785
R^2 score: 0.6112105411694024 0.517930484487177 0.5520061441282424 0.6092272964029909 0.5179144264767819 0.6609242807600602
sq_error: 157.20510380218326 266.92466949358436 168.06530062795468 210.27424979392399 266.8903924153819 161.33171087517715
R^2 score: 0.7063578628601705 0.5014135768513759 0.6860721893766204 0.6072304358981974 0.5014776027274757 0.698649807009929
sq_error: 128.47485340937857 201.0191447016561 136.20878445692813 131.46938256864007 195.73172187705399 97.6653494646435
R^2 score: 0.764963029453435 0.6322476380495341 0.7508142705672125 0.7594847195497569 0.6419206581753738 0.8213271526965937
sq_error: 92.92312836929125 224.46967769591458 121.5171440724955 143.2676857345334 224.45365108880972 90.91516803436357
R^2 score: 0.827647020853728 0.5836556693922094 0.7746110988105437 0.7342683906033993 0.5836853953983028 0.8313713675455113
sq_error: 163.34886

In [24]:
np.abs(vb_laplace_exact_obj.mean_ - np.sqrt(np.diag(vb_laplace_exact_obj.sigma_)))

array([8.42579850e-01, 2.58307681e+00, 3.04929324e+00, 4.69251805e-01,
       8.65503650e-01, 2.39588821e+00, 1.81665592e+00, 3.22227545e-01,
       7.81719592e-01, 8.70814160e-01, 7.59146840e-01, 1.09531229e+00,
       6.63304506e-01, 1.23803751e-01, 3.66329669e-01, 9.55048759e-01,
       6.18741613e-01, 1.36999754e+00, 1.80068829e+00, 6.30155411e+00,
       1.11670344e+00, 5.17454343e-01, 1.27908422e+00, 1.51344126e-01,
       2.13806304e+00, 2.27860240e+00, 2.41070991e-02, 4.57708486e-01,
       2.30080778e+00, 4.50777373e-01, 1.24084154e+00, 1.52522300e+00,
       2.12599545e+00, 7.01237508e-02, 8.21981334e-01, 3.46841699e-01,
       2.99670279e-01, 5.89226640e+00, 8.00567606e-01, 1.00850448e+00,
       1.13598164e+00, 1.39240392e+00, 9.79933249e-02, 1.33878390e-01,
       1.38862960e+00, 1.40470931e-01, 1.19874958e+00, 1.37112526e-01,
       1.78480001e+00, 9.66082280e-01, 1.18055679e+00, 1.92307299e+00,
       1.08985184e+00, 9.85456676e-01, 3.59484722e-01, 8.05456099e-02,
      

In [None]:
upper = vb_laplace_exact_obj.mean_ + 0.8 * np.sqrt(np.diag(vb_laplace_exact_obj.sigma_))

In [None]:
lower = vb_laplace_exact_obj.mean_ - 0.8 * np.sqrt(np.diag(vb_laplace_exact_obj.sigma_))

In [None]:
(((lower < 0) & (0 < upper)))[:100]

In [None]:
(np.abs(vb_laplace_exact_obj.mean_) < 1).sum()

In [None]:
(lasso_obj.coef_ < 0.001).sum()

In [None]:
(true_w < 0.001).sum()

In [25]:
print(
    sq_error_lasso.mean()
    , sq_error_ridge.mean()
    , sq_error_ard.mean()
    , sq_error_vb_laplace_exact.mean()
    , sq_error_vb_normal.mean()
    , sq_error_vb_laplace_approx.mean()
)

229.2178187139233 270.9310271850848 236.02793215883284 216.86059372585152 269.37613375278215 188.90862899368614


In [26]:
print(
    score_lasso.mean()
    , score_ridge.mean()
    , score_ard.mean()
    , score_vb_laplace_exact.mean()
    , score_vb_normal.mean()
    , score_vb_laplace_approx.mean()
)

0.5749632491163943 0.4975290123932801 0.5623307038489177 0.5978201577849914 0.5004077146598158 0.6496948673843996


In [None]:
import matplotlib.pyplot as plt

In [20]:
true_w

array([ 0.        ,  0.        ,  0.        ,  0.        , -0.76668197,
        0.07758316,  0.        , -0.0995576 ,  0.        , -0.90644613,
        0.        ,  0.        , -1.16645995,  1.00565264, -1.27819984,
        0.30094245, -0.627204  ,  3.38047157,  1.22344481,  0.0473449 ,
       -0.03814966,  0.        ,  0.        ,  3.52029545,  0.        ,
        4.81596502,  0.        , -3.25347577,  0.66002526,  0.        ,
        0.        ,  0.        ,  0.        ,  2.63380714,  1.33677451,
        0.        , -2.69622796,  3.58475763,  0.        ,  0.        ,
        2.71306942,  0.        ,  1.88162727, -2.59235379,  3.48865255,
        0.        ,  0.        ,  0.        ,  2.00792993, -0.4410573 ,
        0.        , -4.15620415,  3.9016001 ,  0.        ,  6.5176337 ,
       -3.96303101, -0.03691128,  1.35713552,  2.36783035,  0.        ,
        0.        ,  0.18762913, -1.26910508, -3.83069906,  0.26426108,
       -1.0842188 ,  0.        ,  0.        ,  0.        ,  0.  

# Conclusion
+ We experimented the performance of the rigorously derived variational linear regression algorithm for the Laplace prior by comparing:
    1. Ordinal optimized Lasso by cross-validation
    2. Ordinal optimized Ridge by cross-validation
    3. Variational Bayes linear regression for the normal prior
    4. Bayesian ARD
    5. Variational Bayes linear regression for the approximated Laplace prior.
+ Results are as follows:
    1. n > M with non-zero elements: ridge, vb for the normal prior gives the best performance, although vb for the Laplace prior gives better performance.
    2. n > M with zero-elements: lasso, vb for the approximated Laplace gives the best performance. although vb for the Laplace prior also gives better performance.
    3. M > n with zero-elements: results is similar with 1.
    4. M > n with zero-elements: results is similar with 2.
    5. M >> n, especially # of non-zero elements is larger than # of samples, vb for the Laplace prior gives the best performance.
+ Summary of results:
    + Derived algorithm can estimate every case, and # of features are extremely larger. 