# Dataset Loading

## Loading packages

In [None]:
import polars as pl
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
import os
from datetime import datetime
import darts as dt
import seaborn as sns
import numpy as np
import pandas as pd
from matplotlib.pyplot import subplots
from statsmodels.api import OLS
import sklearn.model_selection as skm
import sklearn.linear_model as skl
from sklearn.preprocessing import StandardScaler
from ISLP import load_data
from ISLP.models import ModelSpec as MS
from functools import partial
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.cross_decomposition import PLSRegression
from ISLP.models import \
(Stepwise ,
sklearn_selected ,
sklearn_selection_path)
from l0bnb import fit_path

# LASSO Model Building

## Loading Dataset

In [None]:
inputData = pd.read_csv('asx99.csv')
np.isnan(inputData).sum()

In [None]:
inputData = inputData.dropna ();
inputData.shape
inputData = inputData.drop("Date", axis = 1)

In [None]:
def nCp(sigma2 , estimator , X, Y):
    n, p = X.shape
    Yhat = estimator.predict(X)
    RSS = np.sum((Y - Yhat)**2)
    return -(RSS + 2 * p * sigma2) / n

In [None]:
focus = "ABC"

In [None]:
design = MS(inputData.columns.drop(focus)).fit(inputData)
Y = np.array(inputData[focus])
X = design.transform(inputData)
sigma2 = OLS(Y,X).fit().scale

In [None]:
K = 5
kfold = skm.KFold(K,
random_state=0,
shuffle=True)
scaler = StandardScaler(with_mean=True , with_std=True)

In [None]:
D = design.fit_transform(inputData)

In [None]:
D = D.drop(['intercept'], axis=1)
X = np.asarray(D)

In [None]:
np.any(np.isnan(X))

In [None]:
X_scale = X.std(0)

In [None]:
Xs = X 
X_scale = X.std(0)
lambdas = 10**np.linspace(5, -5, 100) / Y.std()
soln_array = skl.ElasticNet.path(Xs,
Y,
l1_ratio=0.,
alphas=lambdas)[1]
soln_array.shape

In [None]:
lassoCV = skl.ElasticNetCV(n_alphas=100,
l1_ratio=1,
cv=kfold)   
pipeCV = Pipeline(steps=[('scaler', scaler),
('lasso', lassoCV)])
pipeCV.fit(X, Y)
tuned_lasso = pipeCV.named_steps['lasso']
tuned_lasso.alpha_

In [None]:
lambdas , soln_array = skl.Lasso.path(Xs,
Y,
l1_ratio=1,
n_alphas =100)[:2]
soln_path = pd.DataFrame(soln_array.T,
columns=D.columns ,
index=-np.log(lambdas))

In [None]:
path_fig , ax = subplots(figsize=(8,8))
soln_path.plot(ax=ax, legend=False)
# ax.legend(loc='upper left')
ax.set_xlabel('$-\log(\lambda)$', fontsize=20)
ax.set_ylabel('Standardized coefficiients', fontsize=20)

In [None]:
np.min(tuned_lasso.mse_path_.mean(1))

In [None]:
lassoCV_fig , ax = subplots(figsize =(8 ,8))
ax.errorbar(-np.log(tuned_lasso.alphas_),
tuned_lasso.mse_path_.mean (1),
yerr = tuned_lasso.mse_path_.std (1) / np.sqrt(K))
ax.axvline(-np.log(tuned_lasso.alpha_), c='k', ls='--')
ax.set_ylim ([0 ,0.001])
ax.set_xlabel('$-\log(\ lambda)$', fontsize =20)
ax.set_ylabel('Cross -validated MSE', fontsize =20)

In [None]:
tuned_lambda = -np.log(tuned_lasso.alpha_)

In [None]:
columns = pd.Series(D.columns)
finalcoef = pd.Series(tuned_lasso.coef_)

abscoef = pd.Series(abs(tuned_lasso.coef_))
stockcoef = pd.concat([columns, finalcoef], axis = 1)

abstock = pd.concat([columns, abscoef], axis = 1)

In [None]:
stockcoef.columns = ['Stockname','Coefficient']
abstock.columns = ['Stockname','Absolute Coefficient']

In [None]:
sorted_ind = np.argsort(abscoef)[::-1]
maxind = np.argmax(abscoef[sorted_ind]==0)

In [None]:
print("Tuned Lambda:", tuned_lambda, "\n", pl.DataFrame(stockcoef.iloc[sorted_ind][:maxind]))