In [1]:
# %load ../loaders/imports.py
import sys, os
import numpy as np
import matplotlib.pyplot as plt
import time
import pdb

sys.path.append('..')

from utils import gen_covariance, gen_beta2, gen_data, get_cov_list
from utils import selection_accuracy
from sklearn.linear_model import LassoLars, lasso_path, LinearRegression

from pyuoi.linear_model import UoI_Lasso
from pyuoi.linear_model.adaptive import mBIC, eBIC, mBIC2, BIC, MIC, bayesian_log_ll, bayesian_lambda_selection

In [2]:
from sklearn.linear_model import lars_path

In [3]:
from sklearn.preprocessing import StandardScaler

In [4]:
# %load ../loaders/datgen.py
n_features = 50
n_samples = 200


# Iterate over sparsity. Yield a sequences of models using LARS. Choose a model using the various extensions 
# of the BIC. 

sparsity = np.linspace(0.05, 1, 11)
sigma = gen_covariance(n_features, 0, n_features, 5, 1)

oracle_penalty = np.zeros(sparsity.size)
MIC_oracle_sa = np.zeros(sparsity.size)

bll_scores = []
BIC_scores = []
mBIC_scores = []
eBIC_scores = []
mBIC2_scores = []

# Oracle sa
sa = []


for i, s in enumerate(sparsity):

    beta = gen_beta2(n_features, n_features, sparsity = s, betawidth = 0)
    X, X_test, y, y_test, ss = gen_data(n_samples, n_features, kappa = 100, 
                                        covariance = sigma, beta = beta)
    
    X = StandardScaler().fit_transform(X)
    y -= np.mean(y)
    
    _, _, coefs  = lars_path(X, y.ravel(), method = 'lasso')

    supports = (coefs.T != 0).astype(bool)

    # Stick the true model in there
    # supports = np.vstack([supports, (beta.ravel() !=0).astype(bool)])
    
    sa.append(selection_accuracy(beta.ravel(), supports))
        
    # Fit OLS models to each of the supports
    models = []

    penalties = np.linspace(0, 2 * np.log(n_samples), 80)
    bll_scores_ = np.zeros(supports.shape[0])
    MIC_scores_ = np.zeros((supports.shape[0], penalties.size))
    BIC_scores_ = np.zeros(supports.shape[0])
    mBIC_scores_ = np.zeros(supports.shape[0])
    eBIC_scores_ = np.zeros(supports.shape[0])
    mBIC2_scores_ = np.zeros(supports.shape[0])
    
    for j in range(supports.shape[0]):
        support = supports[j, :]
        if np.count_nonzero(1 * support > 0):
            model = LinearRegression().fit(X[:, support] , y)
            models.append(model)        
            y_pred = model.predict(X[:, support])

        else:
            models.append(np.nan)
            y_pred = np.zeros(y.size)
        
        bll_scores_[j] = bayesian_lambda_selection(y, y_pred, n_features, np.count_nonzero(1 * support), s, 2)
        
        MIC_scores_[j, :] =  np.array([MIC(y, y_pred, np.count_nonzero(1 * support), penalty) 
                                       for penalty in penalties])
        
        BIC_scores_[j] = BIC(y, y_pred, np.count_nonzero(1 * support))
        
        mBIC_scores_[j] = mBIC(y, y_pred, np.count_nonzero(1 * support), s)
        
        eBIC_scores_[j] = eBIC(y, y_pred, X.shape[1], np.count_nonzero(1 * support))
        
        mBIC2_scores_[j] = mBIC2(y, y_pred, np.count_nonzero(1 * support), s)

    
    
    # For MIC scores, record the oracle selection accuracy and the oracle penalty
    selected_models = np.argmin(MIC_scores_, axis = 0)
    MIC_selection_accuracies = [selection_accuracy(beta.ravel(), supports[selected_models[j], :]) 
                                for j in range(selected_models.size)]
    oracle_penalty[i] = penalties[np.argmax(MIC_selection_accuracies)]
    MIC_oracle_sa[i] = np.max(MIC_selection_accuracies)    

    bll_scores.append(bll_scores_)
    BIC_scores.append(BIC_scores_)
    mBIC_scores.append(mBIC_scores_)
    eBIC_scores.append(eBIC_scores_)
    mBIC2_scores.append(mBIC2_scores_)
    

> c:\users\akumar\nse\pyuoi\pyuoi\linear_model\adaptive.py(37)bayesian_lambda_selection()
-> return 0
(Pdb) ll
  7  	def bayesian_lambda_selection(y, y_pred, n_features, model_size, sparsity_prior, penalty):
  8  	
  9  	    y = y.ravel()
 10  	    y_pred = y_pred.ravel()
 11  	
 12  	    n_samples = y.size
 13  	
 14  	    # Log likelihood
 15  	    ll = log_likelihood_glm('normal', y, y_pred)
 16  	
 17  	    # Regularization Penalty
 18  	    p1 = 2 * penalty * model_size
 19  	
 20  	    # Normal BIC penalty
 21  	    BIC = model_size * np.log(n_samples)
 22  	
 23  	    # Second order Bayes factor approximation
 24  	    RSS = np.sum((y - y_pred)**2)
 25  	    BIC2 = n_samples**3/(2 * RSS*2)
 26  	
 27  	    # Term arising from normalization
 28  	    BIC3 = model_size * np.log(2 * np.pi)
 29  	
 30  	    # Model probability prior
 31  	    M_k = scipy.special.binom(n_features, model_size) * \
 32  	          sparsity_prior**model_size * (1 - sparsity_prior)**(n_features - model_s

BdbQuit: 

In [None]:
# Oracle sa
plt.plot([np.max(x) for x in sa])

plt.plot(MIC_oracle_sa)

# BIC sa
plt.plot([sa[i][np.argmin(BIC_scores[i])] for i in range(len(sparsity))])
# mBIC sa
plt.plot([sa[i][np.argmin(mBIC_scores[i])] for i in range(len(sparsity))])
# eBIC sa
# plt.plot([sa[i][np.argmin(eBIC_scores[i])] for i in range(len(sparsity))])
# mBIC2 sa
plt.plot([sa[i][np.argmin(mBIC2_scores[i])] for i in range(len(sparsity))])

In [None]:
plt.plot(oracle_penalty)

In [None]:
plt.plot([np.max(x) for x in sa])

plt.plot(MIC_oracle_sa)

plt.plot([sa[i][np.argmin(bll_scores[i])] for i in range(len(sparsity))])

In [None]:
# High SNR with correlations --> might be redeeming for some of the Bayesian approaches

In [4]:
# Just look at the different terms in the Bayes Factor and how they scale with (1) sparsity (2) model penalty (3) number of samples
n_features = 50
n_samples = np.array([200, 400, 800, 1600, 5000])

# Iterate over sparsity. Yield a sequences of models using LARS. Choose a model using the various extensions 
# of the BIC. 

sparsity = np.linspace(0.05, 1, 11)
sigma = gen_covariance(n_features, 0, n_features, 5, 0)

oracle_penalty = np.zeros((sparsity.size, n_samples.size))
MIC_oracle_sa = np.zeros((sparsity.size, n_samples.size))

# Oracle sa
sa = np.empty((sparsity.size, n_samples.size), dtype =  np.dtype('O'))

np1 = 80
np2 = 120

# All the different terms:
ll = np.empty((sparsity.size, n_samples.size, np2), dtype =  np.dtype('O'))
p1 = np.empty((sparsity.size, n_samples.size, np2), dtype =  np.dtype('O'))
BIC = np.empty((sparsity.size, n_samples.size, np2), dtype =  np.dtype('O'))
RSS = np.empty((sparsity.size, n_samples.size, np2), dtype =  np.dtype('O'))
BIC2 = np.empty((sparsity.size, n_samples.size, np2), dtype =  np.dtype('O'))
BIC3 = np.empty((sparsity.size, n_samples.size, np2), dtype =  np.dtype('O'))
M_k = np.empty((sparsity.size, n_samples.size, np2), dtype =  np.dtype('O'))
P_M = np.empty((sparsity.size, n_samples.size, np2), dtype =  np.dtype('O'))

for i, s in enumerate(sparsity):

    for ii, ns in enumerate(n_samples):
        
        t0 = time.time()
        
        penalties = np.linspace(0, 2 * np.log(ns), np1)

        bpenalties = np.linspace(-np.log(ns), 2 * np.log(ns), np2)

        
        beta = gen_beta2(n_features, n_features, sparsity = s, betawidth = 0)
        X, X_test, y, y_test, ss = gen_data(ns, n_features, kappa = 5, 
                                            covariance = sigma, beta = beta)

        X = StandardScaler().fit_transform(X)
        y -= np.mean(y)

        _, _, coefs  = lars_path(X, y.ravel(), method = 'lasso')

        supports = (coefs.T != 0).astype(bool)

        # Stick the true model in there
        # supports = np.vstack([supports, (beta.ravel() !=0).astype(bool)])

        sa[i, ii] = selection_accuracy(beta.ravel(), supports)

        # Fit OLS models to each of the supports
        models = []
        
        MIC_scores_ = np.zeros((supports.shape[0], penalties.size))
        
        for j in range(supports.shape[0]):
            support = supports[j, :]
            if np.count_nonzero(1 * support > 0):
                model = LinearRegression().fit(X[:, support] , y)
                models.append(model)        
                y_pred = model.predict(X[:, support])

            else:
                models.append(np.nan)
                y_pred = np.zeros(y.size)

            MIC_scores_[j, :] =  np.array([MIC(y, y_pred, np.count_nonzero(1 * support), penalty) 
                                           for penalty in penalties])

            for i3, penalty in enumerate(bpenalties):
                # Record all Bayesian terms for each penalty
                ll_, p1_, BIC_, BIC2_, BIC3_, M_k_, P_M_ = bayesian_lambda_selection(
                                                           y, y_pred, n_features, np.count_nonzero(1 * support),
                                                           s, penalty)
                ll[i, ii, i3] = ll_
                p1[i, ii, i3] = p1_
                BIC[i, ii, i3] = BIC_
                BIC2[i, ii, i3] = BIC2_
                BIC3[i, ii, i3] = BIC3_
                M_k[i, ii, i3] = M_k_
                P_M[i, ii, i3] = P_M_
                
        # For MIC scores, record the oracle selection accuracy and the oracle penalty
        selected_models = np.argmin(MIC_scores_, axis = 0)
        MIC_selection_accuracies = [selection_accuracy(beta.ravel(), supports[selected_models[j], :]) 
                                    for j in range(selected_models.size)]
        oracle_penalty[i, ii] = penalties[np.argmax(MIC_selection_accuracies)]
        MIC_oracle_sa[i, ii] = np.max(MIC_selection_accuracies)    

        print(time.time() - t0)

0.2919948101043701
0.4270005226135254
0.7379999160766602
2.0329999923706055
16.360000133514404
0.285999059677124
0.41300082206726074
0.7270007133483887
1.9539985656738281
16.16600012779236
0.2929995059967041
0.4159994125366211
0.7300014495849609
1.984999656677246
16.193000316619873
0.3059995174407959
0.42599916458129883
0.7140014171600342
1.9910008907318115
16.276998043060303
0.30099987983703613
0.4120008945465088
0.8019993305206299
2.042999505996704
16.292001962661743
0.2909984588623047
0.4230005741119385
0.7310004234313965
1.9999990463256836
16.219001531600952
0.29600024223327637
0.4159996509552002
0.7360007762908936
1.9969992637634277
16.215998888015747
0.29400014877319336
0.42099833488464355
0.7090013027191162
2.0029993057250977
16.2860004901886
0.29600048065185547
0.4139993190765381
0.7149999141693115
1.9690020084381104
16.246999740600586
0.288999080657959
0.41699862480163574
0.7279999256134033
2.0570015907287598
16.304999828338623
0.2949988842010498
0.4160006046295166
0.718000650

In [None]:
# For a given number of samples, track the various terms as vs. the sparsity.

# First: Terms that do not involve the penalty:
fig, ax = plt.subplots((2, 3), figsize = (15, 15))

ax[0].plot(ll)