In [2]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 

In [3]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
plt.style.use("fivethirtyeight")
%matplotlib inline
import ta
# For reading stock data from yahoo
from pandas_datareader.data import DataReader

# For time stamps
from datetime import datetime

ADBE = DataReader('ADBE',  'yahoo', datetime(2020,1,2), datetime(2020,8,31))

## Technical Analysis Library in Python
It is a Technical Analysis library useful to do feature engineering from financial time series datasets (Open, Close, High, Low, Volume). It is built on Pandas and Numpy.

### The library has implemented 42 indicators:

**Volume**<br>
Money Flow Index (MFI) <br>
Accumulation/Distribution Index (ADI)<br>
On-Balance Volume (OBV)<br>
Chaikin Money Flow (CMF)<br>
Force Index (FI)<br>
Ease of Movement (EoM, EMV)<br>
Volume-price Trend (VPT)<br>
Negative Volume Index (NVI)<br>
Volume Weighted Average Price (VWAP)<br>
Volatility<br>
Average True Range (ATR)<br>
Bollinger Bands (BB)<br>
Keltner Channel (KC)<br>
Donchian Channel (DC)<br>
Ulcer Index (UI)<br>
<br>
**Trend**<br>
Simple Moving Average (SMA)<br>
Exponential Moving Average (EMA)<br>
Weighted Moving Average (WMA)<br>
Moving Average Convergence Divergence (MACD)<br>
Average Directional Movement Index (ADX)<br>
Vortex Indicator (VI)<br>
Trix (TRIX)<br>
Mass Index (MI)<br>
Commodity Channel Index (CCI)<br>
Detrended Price Oscillator (DPO)<br>
KST Oscillator (KST)<br>
Ichimoku Kinkō Hyō (Ichimoku)<br>
Parabolic Stop And Reverse (Parabolic SAR)<br>
Schaff Trend Cycle (STC)<br>
<br>
**Momentum**<br>
Relative Strength Index (RSI)<br>
Stochastic RSI (SRSI)<br>
True strength index (TSI)<br>
Ultimate Oscillator (UO)<br>
Stochastic Oscillator (SR)<br>
Williams %R (WR)<br>
Awesome Oscillator (AO)<br>
Kaufman's Adaptive Moving Average (KAMA)<br>
Rate of Change (ROC)<br>
Percentage Price Oscillator (PPO)<br>
Percentage Volume Oscillator (PVO)<br>
<br>
**Others**<br>
Daily Return (DR)<br>
Daily Log Return (DLR)<br>
Cumulative Return (CR)<br>

[Financial Indicators](https://github.com/bukosabino/ta)

In [4]:
import statsmodels.api as sm

In [5]:
ADBE

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-02,334.480011,329.170013,330.000000,334.429993,1990100,334.429993
2020-01-03,332.980011,328.690002,329.170013,331.809998,1577600,331.809998
2020-01-06,333.910004,328.190002,328.290009,333.709991,1874700,333.709991
2020-01-07,334.790009,332.309998,334.149994,333.390015,2500800,333.390015
2020-01-08,339.230011,333.399994,333.809998,337.869995,2248500,337.869995
...,...,...,...,...,...,...
2020-08-25,484.649994,474.429993,476.670013,484.429993,3967400,484.429993
2020-08-26,533.700012,492.230011,496.950012,528.489990,7780300,528.489990
2020-08-27,523.320007,504.459991,519.010010,510.320007,3631800,510.320007
2020-08-28,518.799988,510.940002,512.330017,516.440002,1926300,516.440002


In [6]:
adbe = ta.add_all_ta_features(ADBE, open="Open", high="High", low="Low", close="Close", volume="Volume")

  dip[i] = 100 * (self._dip[i] / self._trs[i])
  din[i] = 100 * (self._din[i] / self._trs[i])


In [7]:
adbe

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close,volume_adi,volume_obv,volume_cmf,volume_fi,...,momentum_wr,momentum_ao,momentum_kama,momentum_roc,momentum_ppo,momentum_ppo_signal,momentum_ppo_hist,others_dr,others_dlr,others_cr
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-02,334.480011,329.170013,330.000000,334.429993,1990100,334.429993,1.952608e+06,1990100,,,...,,,,,,,,-12.377870,,0.000000
2020-01-03,332.980011,328.690002,329.170013,331.809998,1577600,331.809998,2.669691e+06,412500,,,...,,,,,,,,-0.783421,-0.786506,-0.783421
2020-01-06,333.910004,328.190002,328.290009,333.709991,1874700,333.709991,4.413285e+06,2287200,,,...,,,,,,,,0.572615,0.570982,-0.215292
2020-01-07,334.790009,332.309998,334.149994,333.390015,2500800,333.390015,4.090626e+06,-213600,,,...,,,,,,,,-0.095885,-0.095931,-0.310970
2020-01-08,339.230011,333.399994,333.809998,337.869995,2248500,337.869995,5.290074e+06,2034900,,,...,,,,,,,,1.343766,1.334817,1.028617
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-08-25,484.649994,474.429993,476.670013,484.429993,3967400,484.429993,8.071104e+07,65873900,0.252325,9.402747e+06,...,-0.414627,24.251671,459.321939,7.754073,2.526761,-4.747149,7.273909,1.706908,1.692504,44.852436
2020-08-26,533.700012,492.230011,496.950012,528.489990,7780300,528.489990,8.653641e+07,73654200,0.352875,5.703093e+07,...,-5.102361,32.653351,480.837929,19.219919,17.461845,-0.305350,17.767195,9.095225,8.705094,58.027091
2020-08-27,523.320007,504.459991,519.010010,510.320007,3631800,510.320007,8.516149e+07,70022400,0.290687,3.945654e+07,...,-22.896877,40.016910,485.100803,17.252945,16.886664,3.133053,13.753611,-3.438094,-3.498587,52.593971
2020-08-28,518.799988,510.940002,512.330017,516.440002,1926300,516.440002,8.593103e+07,71948700,0.268005,3.550403e+07,...,-16.903346,46.486409,490.465292,15.960127,12.028531,4.912149,7.116382,1.199247,1.192113,54.423949


In [8]:
adbe.describe()

Unnamed: 0,High,Low,Open,Close,Volume,Adj Close,volume_adi,volume_obv,volume_cmf,volume_fi,...,momentum_wr,momentum_ao,momentum_kama,momentum_roc,momentum_ppo,momentum_ppo_signal,momentum_ppo_hist,others_dr,others_dlr,others_cr
count,168.0,168.0,168.0,168.0,168.0,168.0,168.0,168.0,149.0,155.0,...,155.0,135.0,159.0,156.0,143.0,135.0,135.0,168.0,167.0,168.0
mean,386.744047,374.752381,380.676132,381.672977,3191351.0,381.672977,39938380.0,26279260.0,0.155903,3269892.0,...,-29.559038,10.948251,380.109886,2.989541,-0.350072,-0.611542,0.116184,0.237232,0.256652,14.12642
std,53.388848,54.47291,53.957361,54.05774,1530022.0,54.05774,23982630.0,24877450.0,0.081662,11123370.0,...,25.586595,23.655561,42.321642,6.988756,11.328349,10.309732,4.733069,3.487733,3.347305,16.164142
min,296.869995,255.130005,288.359985,285.0,1111000.0,285.0,1952608.0,-9620300.0,-0.060763,-45904890.0,...,-99.494745,-52.041735,334.775706,-19.308102,-18.56253,-13.854006,-8.243332,-14.745157,-15.952526,-14.780371
25%,345.924995,337.959999,340.777504,343.614998,2196525.0,343.614998,17687350.0,4627250.0,0.095226,-672888.0,...,-44.310835,0.913956,345.045197,0.884888,-9.476758,-9.164493,-3.441392,-1.112252,-1.109439,2.746466
50%,373.175003,364.884995,369.940002,367.740005,2607800.0,367.740005,37335560.0,17437600.0,0.163467,3735532.0,...,-22.295032,21.186762,361.185985,4.491949,-2.164424,-3.157523,0.039143,0.547558,0.570982,9.960235
75%,438.199997,426.974998,430.450005,432.567513,3837000.0,432.567513,62095000.0,54954680.0,0.217269,7625043.0,...,-9.240489,26.587399,423.598085,6.964474,5.770214,4.712953,2.638655,1.690966,1.682053,29.344712
max,533.700012,510.940002,519.01001,528.48999,8935200.0,528.48999,86536410.0,73654200.0,0.352875,57030930.0,...,-0.414627,51.467703,493.785157,19.219919,24.995885,21.58707,17.767195,17.719298,16.313278,58.027091


In [9]:
# adbe = adbe.dropna(thresh=len(adbe) - 25, axis=1)
#threshold for NAN

In [10]:
adbe_test = adbe.dropna(axis = 1, how = 'any')

In [11]:
adbe_test

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close,volume_adi,volume_obv,volume_vpt,volume_nvi,...,volatility_kcli,trend_adx,trend_adx_pos,trend_adx_neg,trend_ichimoku_b,trend_visual_ichimoku_b,trend_psar_up_indicator,trend_psar_down_indicator,others_dr,others_cr
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-02,334.480011,329.170013,330.000000,334.429993,1990100,334.429993,1.952608e+06,1990100,-234949.320506,1000.000000,...,0.0,0.000000,0.000000,0.000000,331.825012,358.038928,0.0,0.0,-12.377870,0.000000
2020-01-03,332.980011,328.690002,329.170013,331.809998,1577600,331.809998,2.669691e+06,412500,-258691.237723,992.165789,...,0.0,0.000000,0.000000,0.000000,331.585007,358.038928,0.0,0.0,-0.783421,-0.783421
2020-01-06,333.910004,328.190002,328.290009,333.709991,1874700,333.709991,4.413285e+06,2287200,-1624.437375,992.165789,...,0.0,0.000000,0.000000,0.000000,331.335007,358.038928,0.0,1.0,0.572615,-0.215292
2020-01-07,334.790009,332.309998,334.149994,333.390015,2500800,333.390015,4.090626e+06,-213600,8336.929405,992.165789,...,0.0,0.000000,0.000000,0.000000,331.490005,358.038928,1.0,0.0,-0.095885,-0.310970
2020-01-08,339.230011,333.399994,333.809998,337.869995,2248500,337.869995,5.290074e+06,2034900,27816.686001,1005.498172,...,0.0,0.000000,0.000000,0.000000,333.710007,358.038928,0.0,0.0,1.343766,1.028617
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-08-25,484.649994,474.429993,476.670013,484.429993,3967400,484.429993,8.071104e+07,65873900,79698.914286,1086.430305,...,0.0,18.840886,31.387554,13.130186,440.029999,409.309998,0.0,0.0,1.706908,44.852436
2020-08-26,533.700012,492.230011,496.950012,528.489990,7780300,528.489990,8.653641e+07,73654200,775355.673820,1086.430305,...,0.0,22.173734,47.811762,9.966539,465.350006,409.309998,0.0,0.0,9.095225,58.027091
2020-08-27,523.320007,504.459991,519.010010,510.320007,3631800,510.320007,8.516149e+07,70022400,582771.087061,1049.077810,...,0.0,25.268521,42.440753,8.846932,469.590012,409.309998,0.0,0.0,-3.438094,52.593971
2020-08-28,518.799988,510.940002,512.330017,516.440002,1926300,516.440002,8.593103e+07,71948700,-101763.613289,1061.658839,...,0.0,28.142252,40.703056,8.484703,472.529999,409.309998,0.0,0.0,1.199247,54.423949


In [12]:
print(adbe_test.columns)
print(len(adbe_test.columns))

Index(['High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close', 'volume_adi',
       'volume_obv', 'volume_vpt', 'volume_nvi', 'volatility_atr',
       'volatility_bbhi', 'volatility_bbli', 'volatility_kch',
       'volatility_kcl', 'volatility_kcp', 'volatility_kchi',
       'volatility_kcli', 'trend_adx', 'trend_adx_pos', 'trend_adx_neg',
       'trend_ichimoku_b', 'trend_visual_ichimoku_b',
       'trend_psar_up_indicator', 'trend_psar_down_indicator', 'others_dr',
       'others_cr'],
      dtype='object')
27


In [13]:
X = adbe_test.drop(['High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close'], axis =1)

In [14]:
X.head()

Unnamed: 0_level_0,volume_adi,volume_obv,volume_vpt,volume_nvi,volatility_atr,volatility_bbhi,volatility_bbli,volatility_kch,volatility_kcl,volatility_kcp,...,volatility_kcli,trend_adx,trend_adx_pos,trend_adx_neg,trend_ichimoku_b,trend_visual_ichimoku_b,trend_psar_up_indicator,trend_psar_down_indicator,others_dr,others_cr
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-02,1952608.0,1990100,-234949.320506,1000.0,0.0,0.0,0.0,338.003337,327.383341,0.663527,...,0.0,0.0,0.0,0.0,331.825012,358.038928,0.0,0.0,-12.37787,0.0
2020-01-03,2669691.0,412500,-258691.237723,992.165789,0.0,0.0,0.0,336.726674,327.126668,0.487846,...,0.0,0.0,0.0,0.0,331.585007,358.038928,0.0,0.0,-0.783421,-0.783421
2020-01-06,4413285.0,2287200,-1624.437375,992.165789,0.0,0.0,0.0,337.036672,326.823334,0.674281,...,0.0,0.0,0.0,0.0,331.335007,358.038928,0.0,1.0,0.572615,-0.215292
2020-01-07,4090626.0,-213600,8336.929405,992.165789,0.0,0.0,0.0,336.771675,327.871666,0.620039,...,0.0,0.0,0.0,0.0,331.490005,358.038928,1.0,0.0,-0.095885,-0.31097
2020-01-08,5290074.0,2034900,27816.686001,1005.498172,0.0,0.0,0.0,337.95001,328.497996,0.991535,...,0.0,0.0,0.0,0.0,333.710007,358.038928,0.0,0.0,1.343766,1.028617


In [15]:
y = adbe_test['Adj Close']

In [39]:
Y = y.values

In [16]:
X.shape

(168, 21)

In [17]:
new_y= y.values.reshape(-1,1)

In [18]:
new_y.shape

(168, 1)

## Forward Selection

We begin with the null model—a model that contains an intercept but no predictors. We then fit p simple linear regressions and add to the null model the variable that results in the lowest RSS. We then add to that model the variable that results in the lowest RSS for the new two-variable model. This approach is continued until some stopping rule is satisfied.

In [19]:
def forward_selection(data, target, significance_level=0.05):
    initial_features = data.columns.tolist()
    best_features = []
    while (len(initial_features)>0):
        remaining_features = list(set(initial_features)-set(best_features))
        new_pval = pd.Series(index=remaining_features)
        for new_column in remaining_features:
            model = sm.OLS(target, sm.add_constant(data[best_features+[new_column]])).fit()
            new_pval[new_column] = model.pvalues[new_column]
        min_p_value = new_pval.min()
        if(min_p_value<significance_level):
            best_features.append(new_pval.idxmin())
        else:
            break
    return best_features

In [20]:
forward_selection(X,new_y )

['others_cr', 'volume_vpt', 'volume_obv']

## Backward Selection

We start with all variables in the model, and backward remove the variable with the largest p-value—that is, the variable that is the least statistically significant. The new (p − 1)-variable model is fit, and the variable with the largest p-value is removed. This procedure continues until a stopping rule is reached. For instance, we may stop when all remaining variables have a p-value below some threshold.

In [21]:
def backward_elimination(data, target,significance_level = 0.05):
    features = data.columns.tolist()
    while(len(features)>0):
        features_with_constant = sm.add_constant(data[features])
        p_values = sm.OLS(target, features_with_constant).fit().pvalues[1:]
        max_p_value = p_values.max()
        if(max_p_value >= significance_level):
            excluded_feature = p_values.idxmax()
            features.remove(excluded_feature)
        else:
            break 
    return features

In [22]:
backward_elimination(X,new_y)

['volume_obv', 'volume_vpt', 'others_cr']

## Mixed Selection

This is a combination of forward and backward selection. We start with no variables in the model, and as with forward selection selection, we add the variable that provides the best fit. We continue to add variables one-by-one. Of course, as we noted with the Advertising example, the p-values for variables can become larger as new predictors are added to the model. Hence, if at any point the p-value for one of the variables in the model rises above a certain threshold, then we remove that variable from the model. We continue to perform these forward and backward steps until all variables in the model have a sufficiently low p-value, and all variables outside the model would have a large p-value if added to the model

In [76]:
# def stepwise_selection(data, target,SL_in=0.05,SL_out = 0.05):
#     initial_features = data.columns.tolist()
#     best_features = []
#     while (len(initial_features)>0):
#         remaining_features = list(set(initial_features)-set(best_features))
#         new_pval = pd.Series(index=remaining_features)
#         for new_column in remaining_features:
#             model = sm.OLS(target, sm.add_constant(data[best_features+[new_column]])).fit()
#             new_pval[new_column] = model.pvalues[new_column]
#         min_p_value = new_pval.min()
#         if(min_p_value<SL_in):
#             best_features.append(new_pval.idxmin())
#             while(len(best_features)>0):
#                 best_features_with_constant = sm.add_constant(data[best_features])
#                 p_values = sm.OLS(target, best_features_with_constant).fit().pvalues[1:]
#                 max_p_value = p_values.max()
#                 if(max_p_value >= SL_out):
#                     excluded_feature = p_values.idxmax()
#                     best_features.remove(excluded_feature)
#                 else:
#                     break 
#         else:
#             break
#     return best_features

In [24]:
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.linear_model import LinearRegression

In [26]:
sffs = SFS(LinearRegression(),
         k_features=(4,11),
         forward=True,
         floating=True,
         cv=0)
sffs.fit(X, y)
sffs.k_feature_names_

('volume_adi', 'volume_obv', 'volume_vpt', 'others_cr')

## OLS Regression

In [53]:
X = adbe_test[['volume_obv', 'volume_vpt', 'others_cr']]

In [54]:
X

Unnamed: 0_level_0,volume_obv,volume_vpt,others_cr
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-02,1990100,-234949.320506,0.000000
2020-01-03,412500,-258691.237723,-0.783421
2020-01-06,2287200,-1624.437375,-0.215292
2020-01-07,-213600,8336.929405,-0.310970
2020-01-08,2034900,27816.686001,1.028617
...,...,...,...
2020-08-25,65873900,79698.914286,44.852436
2020-08-26,73654200,775355.673820,58.027091
2020-08-27,70022400,582771.087061,52.593971
2020-08-28,71948700,-101763.613289,54.423949


In [55]:
import scipy.stats as ss
Y = Y
T = Y.shape[0];
# SPY = DataReader('SPY',  'yahoo', datetime(2020,1,1), datetime(2020,8,31))
# F = np.diff(np.log(SPY['Adj Close'].values))
F = X.values
'Add Constant to X'
X = np.column_stack([np.ones((T,1)), F])
N = X.shape[1]

'REGRESSION STARTS:'       
'Linear Regression of Y: T x 1 on' 
'Regressors X: T x N'

invXX = np.linalg.inv(X.transpose()@X)
'OLS estimator beta: N x 1'
beta_hat = invXX@X.transpose()@Y
'Predictive value of Y_t using OLS'  
y_hat = X@beta_hat;       
'Residuals from OLS: Y - X*beta'        
residuals = Y - y_hat;            
'variance of Y_t or residuals'
sigma2 = (1/T)*(residuals.transpose()@residuals)
'standard deviation of Y_t or residuals'
sig = np.sqrt(sigma2) 
'variance-covariance matrix of beta_hat'
'N x N: on-diagnal variance(beta_j)'
'N x N: off-diagnal cov(beta_i, beta_j)'
varcov_beta_hat = (sigma2)*invXX
var_beta_hat = np.sqrt(T*np.diag(varcov_beta_hat))

'Calculate R-square'
R_square = 1 - residuals.transpose()@residuals/(T*np.var(Y))
adj_R_square = 1-(1-R_square)*(T-1)/(T-N)

'Test Each Coefficient: beta_i'
't-test stat: N x 1'
t_stat = beta_hat.transpose()/var_beta_hat
' t-test significance level: N x 1'
p_val_t = 1-ss.norm.cdf(t_stat)

'Test of Joint Significance of Model'
F_stat = beta_hat.transpose()@varcov_beta_hat@beta_hat/\
         (residuals.transpose()@residuals)
'size: (1 x N)*(N x N)*(N x 1)/((1 x T) * (T x 1)) = 1 x 1'

p_val_F = 1-ss.chi2.cdf(F_stat,T-N)

REPORT = np.column_stack([beta_hat, t_stat,p_val_t])
print('Regression Statistics')
print('------------------------\n')
print(' REGRESSION STATISTICS  \n') 
print('------------------------\n')
print('beta             t_stat            p_val\n')
print(REPORT)
print('\n Joint significance of all coefficients\n',[F_stat,p_val_F])
print('R-Square is       \n',R_square)
print('Adjusted R Square \n',adj_R_square)
print('Standard Error    \n',sig)
print('Observations      \n',T) 
print('-------------------------\n')

Regression Statistics
------------------------

 REGRESSION STATISTICS  

------------------------

beta             t_stat            p_val

[[3.34429993e+02 6.52681357e+13 0.00000000e+00]
 [1.40131013e-19 2.63433830e-01 3.96108109e-01]
 [2.16840434e-19 1.19352521e-02 4.95238636e-01]
 [3.34429993e+00 4.05390233e+12 0.00000000e+00]]

 Joint significance of all coefficients
 [9.889525547815479, 1.0]
R-Square is       
 1.0
Adjusted R Square 
 1.0
Standard Error    
 3.245501797216724e-12
Observations      
 168
-------------------------



## Rescaling Data

In [60]:
from sklearn import preprocessing

In [67]:
adbe_test_columns = adbe_test.columns

In [98]:
adbe_test_columns

Index(['High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close', 'volume_adi',
       'volume_obv', 'volume_vpt', 'volume_nvi', 'volatility_atr',
       'volatility_bbhi', 'volatility_bbli', 'volatility_kch',
       'volatility_kcl', 'volatility_kcp', 'volatility_kchi',
       'volatility_kcli', 'trend_adx', 'trend_adx_pos', 'trend_adx_neg',
       'trend_ichimoku_b', 'trend_visual_ichimoku_b',
       'trend_psar_up_indicator', 'trend_psar_down_indicator', 'others_dr',
       'others_cr'],
      dtype='object')

In [61]:
adbe_norm = preprocessing.normalize(adbe_test)

In [65]:
adbe_norm

array([[ 9.74168209e-05,  9.58702918e-05,  9.61120242e-05, ...,
         0.00000000e+00, -3.60503673e-06,  0.00000000e+00],
       [ 1.06079296e-04,  1.04712604e-04,  1.04865524e-04, ...,
         0.00000000e+00, -2.49578821e-07, -2.49578821e-07],
       [ 6.28534084e-05,  6.17767064e-05,  6.17955310e-05, ...,
         1.88234577e-07,  1.07785947e-07, -4.05254099e-08],
       ...,
       [ 4.74391946e-06,  4.57295256e-06,  4.70484913e-06, ...,
         0.00000000e+00, -3.11664775e-08,  4.76766721e-07],
       [ 4.62836629e-06,  4.55824506e-06,  4.57064579e-06, ...,
         0.00000000e+00,  1.06988290e-08,  4.85531955e-07],
       [ 4.65219127e-06,  4.57152323e-06,  4.62635231e-06, ...,
         0.00000000e+00, -5.31706695e-09,  4.81775496e-07]])

In [68]:
adbe_norm_df = pd.DataFrame(adbe_norm, columns=adbe_test_columns, index=adbe_test.index)

In [69]:
adbe_norm_df

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close,volume_adi,volume_obv,volume_vpt,volume_nvi,...,volatility_kcli,trend_adx,trend_adx_pos,trend_adx_neg,trend_ichimoku_b,trend_visual_ichimoku_b,trend_psar_up_indicator,trend_psar_down_indicator,others_dr,others_cr
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-02,0.000097,0.000096,0.000096,0.000097,0.579614,0.000097,0.568694,0.579614,-0.068429,0.000291,...,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000097,0.000104,0.000000e+00,0.000000e+00,-3.605037e-06,0.000000e+00
2020-01-03,0.000106,0.000105,0.000105,0.000106,0.502585,0.000106,0.850498,0.131412,-0.082413,0.000316,...,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000106,0.000114,0.000000e+00,0.000000e+00,-2.495788e-07,-2.495788e-07
2020-01-06,0.000063,0.000062,0.000062,0.000063,0.352883,0.000063,0.830733,0.430530,-0.000306,0.000187,...,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000062,0.000067,0.000000e+00,1.882346e-07,1.077859e-07,-4.052541e-08
2020-01-07,0.000070,0.000069,0.000070,0.000069,0.521080,0.000069,0.852345,-0.044507,0.001737,0.000207,...,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000069,0.000075,2.083654e-07,0.000000e+00,-1.997905e-08,-6.479545e-08
2020-01-08,0.000056,0.000055,0.000055,0.000055,0.368744,0.000055,0.867549,0.333715,0.004562,0.000165,...,0.0,0.000000e+00,0.000000e+00,0.000000e+00,0.000055,0.000059,0.000000e+00,0.000000e+00,2.203717e-07,1.686886e-07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-08-25,0.000005,0.000005,0.000005,0.000005,0.038054,0.000005,0.774159,0.631845,0.000764,0.000010,...,0.0,1.807169e-07,3.010613e-07,1.259413e-07,0.000004,0.000004,0.000000e+00,0.000000e+00,1.637222e-08,4.302129e-07
2020-08-26,0.000005,0.000004,0.000004,0.000005,0.068304,0.000005,0.759716,0.646621,0.006807,0.000010,...,0.0,1.946666e-07,4.197468e-07,8.749777e-08,0.000004,0.000004,0.000000e+00,0.000000e+00,7.984837e-08,5.094287e-07
2020-08-27,0.000005,0.000005,0.000005,0.000005,0.032922,0.000005,0.771993,0.634756,0.005283,0.000010,...,0.0,2.290603e-07,3.847273e-07,8.019784e-08,0.000004,0.000004,0.000000e+00,0.000000e+00,-3.116648e-08,4.767667e-07
2020-08-28,0.000005,0.000005,0.000005,0.000005,0.017185,0.000005,0.766616,0.641875,-0.000908,0.000009,...,0.0,2.510653e-07,3.631239e-07,7.569451e-08,0.000004,0.000004,0.000000e+00,0.000000e+00,1.069883e-08,4.855320e-07


In [72]:
X_norm = adbe_norm_df.drop(['High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close'], axis =1)

In [75]:
y_norm = adbe_norm_df['Adj Close']

In [76]:
new_y_norm= y_norm.values.reshape(-1,1)

## Forward Selection

In [70]:
def forward_selection(data, target, significance_level=0.05):
    initial_features = data.columns.tolist()
    best_features = []
    while (len(initial_features)>0):
        remaining_features = list(set(initial_features)-set(best_features))
        new_pval = pd.Series(index=remaining_features)
        for new_column in remaining_features:
            model = sm.OLS(target, sm.add_constant(data[best_features+[new_column]])).fit()
            new_pval[new_column] = model.pvalues[new_column]
        min_p_value = new_pval.min()
        if(min_p_value<significance_level):
            best_features.append(new_pval.idxmin())
        else:
            break
    return best_features

In [77]:
forward_selection(X_norm,new_y_norm )

['volatility_kcl',
 'volatility_kcp',
 'volume_nvi',
 'volatility_bbli',
 'volatility_kch',
 'others_cr',
 'trend_adx_neg',
 'trend_adx_pos',
 'volume_adi',
 'trend_visual_ichimoku_b',
 'trend_ichimoku_b',
 'volume_vpt',
 'volume_obv',
 'trend_adx',
 'volatility_atr']

In [80]:
len(forward_selection(X_norm,new_y_norm ))

15

## Backward Selection

In [78]:
def backward_elimination(data, target,significance_level = 0.05):
    features = data.columns.tolist()
    while(len(features)>0):
        features_with_constant = sm.add_constant(data[features])
        p_values = sm.OLS(target, features_with_constant).fit().pvalues[1:]
        max_p_value = p_values.max()
        if(max_p_value >= significance_level):
            excluded_feature = p_values.idxmax()
            features.remove(excluded_feature)
        else:
            break 
    return features

In [79]:
backward_elimination(X_norm,new_y_norm)

['volume_adi',
 'volume_obv',
 'volume_vpt',
 'volume_nvi',
 'volatility_atr',
 'volatility_bbhi',
 'volatility_bbli',
 'volatility_kch',
 'volatility_kcl',
 'volatility_kcp',
 'volatility_kcli',
 'trend_adx',
 'trend_adx_pos',
 'trend_adx_neg',
 'trend_ichimoku_b',
 'trend_visual_ichimoku_b',
 'others_cr']

In [81]:
len(backward_elimination(X_norm,new_y_norm))

17

## Mixed Selection

In [82]:
# def stepwise_selection(data, target,SL_in=0.05,SL_out = 0.05):
#     initial_features = data.columns.tolist()
#     best_features = []
#     while (len(initial_features)>0):
#         remaining_features = list(set(initial_features)-set(best_features))
#         new_pval = pd.Series(index=remaining_features)
#         for new_column in remaining_features:
#             model = sm.OLS(target, sm.add_constant(data[best_features+[new_column]])).fit()
#             new_pval[new_column] = model.pvalues[new_column]
#         min_p_value = new_pval.min()
#         if(min_p_value<SL_in):
#             best_features.append(new_pval.idxmin())
#             while(len(best_features)>0):
#                 best_features_with_constant = sm.add_constant(data[best_features])
#                 p_values = sm.OLS(target, best_features_with_constant).fit().pvalues[1:]
#                 max_p_value = p_values.max()
#                 if(max_p_value >= SL_out):
#                     excluded_feature = p_values.idxmax()
#                     best_features.remove(excluded_feature)
#                 else:
#                     break 
#         else:
#             break
#     return best_features

In [90]:
sffs = SFS(LinearRegression(),
         k_features=(4,5),
         forward=True,
         floating=True,
         cv=0)
sffs.fit(X_norm, new_y_norm)
sffs.k_feature_names_

('volume_nvi',
 'volatility_bbli',
 'volatility_kch',
 'volatility_kcl',
 'volatility_kcp')

In [84]:
# adbe_norm_features = adbe_norm_df[['volume_adi',
#  'volume_nvi',
#  'volatility_bbli',
#  'volatility_kch',
#  'volatility_kcl',
#  'volatility_kcp',
#  'trend_adx_pos',
#  'trend_adx_neg',
#  'trend_ichimoku_b',
#  'trend_visual_ichimoku_b',
#  'others_cr']]

In [92]:
adbe_norm_features = adbe_norm_df[['volume_nvi',
 'volatility_bbli',
 'volatility_kch',
 'volatility_kcl',
 'volatility_kcp']]

In [93]:
adbe_norm_features

Unnamed: 0_level_0,volume_nvi,volatility_bbli,volatility_kch,volatility_kcl,volatility_kcp
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-02,0.000291,0.0,0.000098,0.000095,1.932512e-07
2020-01-03,0.000316,0.0,0.000107,0.000104,1.554160e-07
2020-01-06,0.000187,0.0,0.000063,0.000062,1.269230e-07
2020-01-07,0.000207,0.0,0.000070,0.000068,1.291946e-07
2020-01-08,0.000165,0.0,0.000055,0.000054,1.626074e-07
...,...,...,...,...,...
2020-08-25,0.000010,0.0,0.000005,0.000004,1.489971e-08
2020-08-26,0.000010,0.0,0.000004,0.000004,2.325509e-08
2020-08-27,0.000010,0.0,0.000004,0.000004,1.526243e-08
2020-08-28,0.000009,0.0,0.000004,0.000004,1.487718e-08


In [94]:
Y = adbe_norm_df["Adj Close"]

In [95]:
Y = Y
T = Y.shape[0];
# SPY = DataReader('SPY',  'yahoo', datetime(2020,1,1), datetime(2020,8,31))
# F = np.diff(np.log(SPY['Adj Close'].values))
F = adbe_norm_features.values
'Add Constant to X'
X = np.column_stack([np.ones((T,1)), F])
N = X.shape[1]

'REGRESSION STARTS:'       
'Linear Regression of Y: T x 1 on' 
'Regressors X: T x N'

invXX = np.linalg.inv(X.transpose()@X)
'OLS estimator beta: N x 1'
beta_hat = invXX@X.transpose()@Y
'Predictive value of Y_t using OLS'  
y_hat = X@beta_hat;       
'Residuals from OLS: Y - X*beta'        
residuals = Y - y_hat;            
'variance of Y_t or residuals'
sigma2 = (1/T)*(residuals.transpose()@residuals)
'standard deviation of Y_t or residuals'
sig = np.sqrt(sigma2) 
'variance-covariance matrix of beta_hat'
'N x N: on-diagnal variance(beta_j)'
'N x N: off-diagnal cov(beta_i, beta_j)'
varcov_beta_hat = (sigma2)*invXX
var_beta_hat = np.sqrt(T*np.diag(varcov_beta_hat))

'Calculate R-square'
R_square = 1 - residuals.transpose()@residuals/(T*np.var(Y))
adj_R_square = 1-(1-R_square)*(T-1)/(T-N)

'Test Each Coefficient: beta_i'
't-test stat: N x 1'
t_stat = beta_hat.transpose()/var_beta_hat
' t-test significance level: N x 1'
p_val_t = 1-ss.norm.cdf(t_stat)

'Test of Joint Significance of Model'
F_stat = beta_hat.transpose()@varcov_beta_hat@beta_hat/\
         (residuals.transpose()@residuals)
'size: (1 x N)*(N x N)*(N x 1)/((1 x T) * (T x 1)) = 1 x 1'

p_val_F = 1-ss.chi2.cdf(F_stat,T-N)

REPORT = np.column_stack([beta_hat, t_stat,p_val_t])
print('Regression Statistics')
print('------------------------\n')
print(' REGRESSION STATISTICS  \n') 
print('------------------------\n')
print('beta             t_stat            p_val\n')
print(REPORT)
print('\n Joint significance of all coefficients\n',[F_stat,p_val_F])
print('R-Square is       \n',R_square)
print('Adjusted R Square \n',adj_R_square)
print('Standard Error    \n',sig)
print('Observations      \n',T) 
print('-------------------------\n')

Regression Statistics
------------------------

 REGRESSION STATISTICS  

------------------------

beta             t_stat            p_val

[[ 2.67713508e-07  6.77877389e-01  2.48924719e-01]
 [ 6.74762837e-02  5.51861980e-01  2.90521459e-01]
 [-1.30381809e+01 -6.84564522e-01  7.53190619e-01]
 [ 2.01172469e-01  3.89289029e-01  3.48531176e-01]
 [ 5.80304577e-01  1.50393229e+00  6.62994004e-02]
 [ 1.09057174e+01  1.49430808e+00  6.75475574e-02]]

 Joint significance of all coefficients
 [66549332377115.625, 0.0]
R-Square is       
 0.9998936877666921
Adjusted R Square 
 0.9998904065249233
Standard Error    
 1.6857481552570475e-07
Observations      
 168
-------------------------



In [96]:
import decimal
tmp = decimal.Decimal('1.09057174e+01')

In [97]:
tmp

Decimal('10.9057174')