In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso, LassoCV
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
from numpy import linalg as LA

%matplotlib inline


n=90
q=100


beta=np.random.normal(0, 1, q)

#sparse level
sparse_level=0.99

zero_one = np.random.choice([0, 1], size=q, p=[sparse_level, 1-sparse_level])
beta = beta*zero_one

X=np.random.normal(0, 1, (n,q))
SNR=15
gamma=SNR


sigma = np.sqrt(LA.norm(X.dot(beta))**2/(n-1))/gamma
epsilon=np.random.normal(0, sigma, n)
    

y=X.dot(beta)+epsilon


In [None]:
lasso = Lasso(max_iter = 10000, normalize = True)
coefs = []

#generating an array of alpha (lambda) values ranging from very big to very small
alphas = 10**np.linspace(10,-2,100)*0.5

for a in alphas:
    lasso.set_params(alpha=a)
    lasso.fit(X, y)
    coefs.append(lasso.coef_)

ax = plt.gca()
ax.plot(alphas*2, coefs)
ax.set_xscale('log')
plt.axis('tight')
plt.xlabel('log(lambda)')
plt.ylabel('Coefficient')


########################################

# Split data into training and test sets
X_train, X_test , y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

#perform 10-fold cross-validation to choose the best lambda
lassocv = LassoCV(alphas = None, cv = 10, max_iter = 100000, normalize = True)
lassocv.fit(X_train, y_train)

#refit the model
lasso.set_params(alpha=lassocv.alpha_)
lasso.fit(X_train, y_train)
print('The optimal lambda is:',round(lassocv.alpha_,3))

#compute the associated test error:
y_pred=lasso.predict(X_test)
meanSqError=mean_squared_error(y_test, y_pred)
print('The mean squared error is:', round(meanSqError,3))

plt.axvline(x=lassocv.alpha_,color='red',linestyle='dotted')
plt.title('Lasso with optimal lambda=%s' %(round(lassocv.alpha_,3)))

In [None]:
true_positive=0
false_positive=0
true_negative=0
false_negative=0

beta_hat=lasso.coef_

for i in range(q):
    if beta_hat[i]!=0:
        if beta[i]!=0:
            true_positive=true_positive+1
        else:
            false_positive=false_positive+1
    else:
        if beta[i]==0:
            true_negative=true_negative+1
        else:
            false_negative=false_negative+1
            
print('TP=',true_positive)
print('FP=',false_positive)
print('TN=',true_negative)
print('FN=',false_negative)

#sensitivity
sensitivity=true_positive/(true_positive+false_negative)

#specificity
specificity=true_negative/(true_negative+false_positive)

#accuracy
accuracy=(true_positive+true_negative)/(true_positive+true_negative+false_negative+false_positive)

print('The sensitivity for the %s sparse level is %s' % (sparse_level, round(sensitivity,3)))
print('The specificity for %s sparse level is %s' % (sparse_level, round(specificity,3)))
print('The accuracy for the model is',accuracy)