In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import numpy as np

import sklearn
from sklearn import linear_model
from sklearn.linear_model import lasso_path, enet_path
from itertools import cycle

import matplotlib.pyplot as plt

# Lasso in Sklearn

## data loading

In [3]:
retmat = pd.read_csv(
    "HedgeFundReplication/replicatingHDG.dat", 
    sep = " ")

X = retmat.copy()
X = X.drop(labels = "HDG", axis=1)

y = retmat["HDG"]

FileNotFoundError: [Errno 2] No such file or directory: 'HedgeFundReplication/replicatingHDG.dat'

In [None]:
X.shape

## Lasso in Sklearn with LassoCV

In [None]:
from sklearn.linear_model import LassoCV

n_folds = 10
clf3 = LassoCV(n_alphas = 100, cv = n_folds, normalize = True)
clf3.fit(X, y);

## Graphic visualization of CV MSE vs $\alpha$ level

In [None]:
alpha_range = clf3.alphas_
mse_values = clf3.mse_path_.mean(axis = 1)
sd_values = clf3.mse_path_.std(axis = 1)

plt.plot(np.log(alpha_range), mse_values ,"k-o")
plt.plot(np.log(alpha_range), mse_values + sd_values,"r--")
plt.plot(np.log(alpha_range), mse_values - sd_values,"r--")
plt.fill_between(np.log(alpha_range),
                mse_values + sd_values,
                mse_values - sd_values, alpha = .2)

plt.plot([np.log(clf3.alpha_)]*2, [0,.000045])
plt.xlabel(r"$log(\alpha)$")
plt.ylabel("10-Fold CV MSE");

In [None]:
plt.show()

## Sparse $\beta$ statistics from Optimal Model
Note: Sklearn takes the model with $\alpha_\text{min}$

In [None]:
# beta values
clf3.coef_

summary_table = pd.DataFrame({
    "names": np.array(retmat.columns)[:-1],
    "beta values": clf3.coef_
}, columns = ["names","beta values"])


summary_table

In [None]:
# beta values
clf3.coef_

summary_table = pd.DataFrame({
    "names": np.array(retmat.columns)[:-1][clf3.coef_!=0],
    "beta values": clf3.coef_[clf3.coef_!=0]
}, columns = ["names","beta values"])


summary_table

# R 

The version that is better.

### Data

In [None]:
import rpy2
%load_ext rpy2.ipython

In [None]:
%%R -i retmat
retmat = as.matrix(retmat)

### glmnet code

In [None]:
%%R
library(glmnet)

# Get the data for the stock under consideration
# Note that the final column holds the response

glmnetout = glmnet(retmat[,-1962], retmat[,1962])


In [None]:
%%R
# Use cross-validation to choose optimal lambda. Note that this procedure is random.
set.seed(0)

cvglmout = cv.glmnet(retmat[,-1962], retmat[,1962])


### Plots

In [None]:
%%R
plot(cvglmout,lwd=2)
abline(v=log(cvglmout$lambda.min),lty=3,lwd=2)
abline(v=log(cvglmout$lambda.1se),lty=3,lwd=2)

In [None]:
%%R
beta_lasso = coef(glmnetout, s=cvglmout$lambda.1se)
beta_lasso = coef(glmnetout, s=cvglmout$lambda.min)


print(beta_lasso)


In [None]:
%%R

beta_lasso_nz = beta_lasso[beta_lasso!=0]
names(beta_lasso_nz) = names(beta_lasso)[beta_lasso!=0]
print(beta_lasso_nz)



---
