# **Stock ElasticNet Model**

In [1]:
import warnings
warnings.filterwarnings("ignore")

import math
import numpy as np
import pandas as pd
import seaborn as sns
import yfinance as yf
import scipy.stats as ss
import statsmodels as smt
import statsmodels.api as sm
from datetime import datetime
import matplotlib.pyplot as plt
from scipy.optimize import minimize
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import GridSearchCV
from prompt_toolkit.filters.cli import emacs_insert_mode
from sklearn import linear_model, model_selection, metrics

In [2]:
stock = pd.read_csv('stockffads.csv')
stock.drop(columns='Close', inplace=True)

In [3]:
stock

Unnamed: 0,Date,Open,High,Low,Adj Close,Volume,Mkt-RF,SMB,HML,RF,...,SMA_50,EMA_12,EMA_26,MACD,RSI,ADS_Index,GM Yest_Close,TM Yest_Close,F Yest_Close,RACE Yest_Close
0,2017-01-04,14.316667,15.200000,14.287333,15.132667,168202500,0.79,0.95,-0.16,0.002,...,15.132667,15.132667,15.132667,0.000000,100.000000,0.174783,30.369259,118.550003,9.082818,55.978443
1,2017-01-05,15.094667,15.165333,14.796667,15.116667,88675500,-0.21,-0.89,-0.79,0.002,...,15.124667,15.130205,15.131481,-0.001276,0.000000,0.154241,32.045399,121.190002,9.501246,56.424824
2,2017-01-06,15.128667,15.354000,15.030000,15.267333,82918500,0.29,-0.66,-0.31,0.002,...,15.172222,15.151302,15.141545,0.009757,91.024248,0.134146,31.440609,120.440002,9.212674,56.377335
3,2017-01-09,15.264667,15.461333,15.200000,15.418667,59692500,-0.37,-0.30,-1.03,0.002,...,15.233833,15.192435,15.162072,0.030363,95.477309,0.076169,31.095015,120.129997,9.205460,55.978443
4,2017-01-10,15.466667,15.466667,15.126000,15.324667,54900000,0.16,0.89,0.43,0.002,...,15.252000,15.212778,15.174116,0.038662,71.686831,0.057208,31.112295,119.739998,9.111673,55.351604
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1648,2023-07-25,272.380005,272.899994,265.000000,265.279999,112757300,0.25,-0.04,-0.79,0.022,...,239.121800,272.029478,264.035489,7.993989,51.187379,0.310831,39.194069,164.529999,13.960000,317.160004
1649,2023-07-26,263.250000,268.040009,261.750000,264.350006,95856200,0.02,0.68,1.03,0.022,...,241.049200,270.848021,264.058787,6.789234,50.639496,0.307146,37.817787,165.559998,13.580000,316.109985
1650,2023-07-27,268.309998,269.130005,255.300003,255.710007,103697300,-0.74,-0.90,0.27,0.022,...,242.836400,268.519095,263.440359,5.078737,45.741140,0.302536,38.186794,165.699997,13.670000,317.350006
1651,2023-07-28,259.859985,267.250000,258.230011,266.440002,111446000,1.14,0.53,-0.33,0.022,...,244.834800,268.199235,263.662555,4.536680,51.956498,0.296997,38.864960,165.429993,13.730000,317.269989


In [4]:
n = stock.shape[0]
test_ratio = 0.8
stock_train = stock[:int(n*test_ratio)]
stock_test = stock[int(n*test_ratio):]

In [5]:
stock_DateIndex =  stock.copy()
stock_DateIndex.set_index('Date', inplace = True)

In [6]:
X = stock_DateIndex.drop(columns = 'Adj Close')
y = stock_DateIndex['Adj Close']

## Splitting the train and test data

In [7]:
stock_train.set_index('Date', inplace = True)
stock_test.set_index('Date', inplace = True)

X_train, X_test = stock_train.drop(columns = 'Adj Close'), stock_test.drop(columns = 'Adj Close')
y_train, y_test = stock_train['Adj Close'], stock_test['Adj Close']

In [8]:
enetcv_model = linear_model.ElasticNetCV(fit_intercept=True).fit(X_train,y_train)
enetcv_filtered = X_train.columns[np.abs(enetcv_model.coef_) != 0.0]
print('The importatnt feature as per Elastic Net with cross validation:', enetcv_filtered)
print('The optimum alpha as per elastic net with cross validataion:', enetcv_model.alpha_)

The importatnt feature as per Elastic Net with cross validation: Index(['Volume'], dtype='object')
The optimum alpha as per elastic net with cross validataion: 4136936337.6243973


In [9]:
#Grid Search
alpha_range = np.logspace(-4, 4, 100)
enet_model = linear_model.ElasticNet()

enet_grid_search = GridSearchCV(enet_model, param_grid = {'alpha': alpha_range}, scoring = 'r2', cv = 10)
enet_grid_search.fit(X_train, y_train)

enet_optimal_alpha = enet_grid_search.best_params_['alpha']
print('The optimal value of alpha as per Grid Search is:', enet_optimal_alpha)

The optimal value of alpha as per Grid Search is: 12.328467394420684


In [10]:
# Elastic net with optimal alpha value
enet_model_with_optimal_alpha = linear_model.ElasticNet(enet_optimal_alpha)
enet_model_with_optimal_alpha.fit(X_train,y_train)

enet_filtered = X_train.columns[np.abs(enet_model_with_optimal_alpha.coef_) != 0.0]
print('The selected features by elastic Net\n',list(enet_filtered))

The selected features by elastic Net
 ['Open', 'High', 'Low', 'Volume', 'RSI']


In [11]:
## Fitting the model in OLS
X_train_enet = X_train[enet_filtered]
X_test_enet = X_test[enet_filtered]

enet_OLS = sm.OLS(y_train, X_train_enet, normalize = True).fit()
print(enet_OLS.summary())

                                 OLS Regression Results                                
Dep. Variable:              Adj Close   R-squared (uncentered):                   1.000
Model:                            OLS   Adj. R-squared (uncentered):              1.000
Method:                 Least Squares   F-statistic:                          1.329e+06
Date:                Mon, 09 Oct 2023   Prob (F-statistic):                        0.00
Time:                        23:29:08   Log-Likelihood:                         -2814.5
No. Observations:                1322   AIC:                                      5639.
Df Residuals:                    1317   BIC:                                      5665.
Df Model:                           5                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------



In [12]:
enet_pval = pd.DataFrame(enet_OLS.pvalues).reset_index()

enet_pval.rename(columns = {'index': 'feature', 0: 'pValue'}, inplace = True)
enet_pval['pValue'] = enet_pval['pValue'].apply(lambda x: "{:f}".format(x))
enet_pval['pValue'] = enet_pval['pValue'].astype(float)
enet_pval[enet_pval['pValue'] <= 0.05]

Unnamed: 0,feature,pValue
0,Open,0.0
1,High,0.0
2,Low,0.0


In [13]:
y_train_pred_enet = enet_OLS.predict(X_train_enet)
train_r2_enet = round(metrics.r2_score(y_train, y_train_pred_enet), 2)
print('The R2 of training set for ElasticNet is:', round(metrics.r2_score(y_train, y_train_pred_enet), 2))
print('The MSE of training set for ElasticNet is:', (str(round(metrics.mean_squared_error(y_train, y_train_pred_enet), 2)) + '%'))

y_test_pred_enet = enet_OLS.predict(X_test_enet)
test_r2_enet = round(metrics.r2_score(y_test, y_test_pred_enet), 2)

p_enet = X_test_enet.shape[1]  # Number of columns
n_enet = len(y_test_pred_enet)  # Number of records

# Calculate the adjusted R-squared
adjusted_r2_enet = 1 - (1 - test_r2_enet) * ((n_enet - 1) / (n_enet - p_enet - 1))

print('\nThe R2 of testing set for ElasticNet is:', round(metrics.r2_score(y_test, y_test_pred_enet), 2))
print('The Adjusted R2 of testing set for ElasticNet is:', round(adjusted_r2_enet, 2))
print('The MSE of testing set for ElasticNet is:', (str(round(metrics.mean_squared_error(y_test, y_test_pred_enet), 2)) + '%'))

# Correlation between y predicted and y
corr_enet = ss.pearsonr(y_test_pred_enet, y_test)[0]
print('\nThe correlation of ElasticNet model is:', round(corr_enet, 2))

The R2 of training set for ElasticNet is: 1.0
The MSE of training set for ElasticNet is: 4.14%

The R2 of testing set for ElasticNet is: 1.0
The Adjusted R2 of testing set for ElasticNet is: 1.0
The MSE of testing set for ElasticNet is: 10.17%

The correlation of ElasticNet model is: 1.0


In [15]:
'''plt.figure(figsize = (25, 10))
plt.plot(y_test, color = 'red', label = 'actual')
plt.plot(y_test_pred_enet, color = 'blue', label = 'prediction')
plt.title("Prediction using ElasticNet", fontdict = {'fontsize': 20})
plt.show()'''

import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=y_test.index, 
    y=y_test,
    mode='lines',
    name='Actual'
))

fig.add_trace(go.Scatter(
    x=y_test.index,
    y=y_test_pred_enet,
    mode='lines',
    name='Prediction'  
))

fig.update_layout(
    title='Prediction using ElasticNet',
    xaxis_title='Index',
    yaxis_title='Value',
    legend_title='Legend',
    font=dict(size=20)
)

fig.show()