In [2]:
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
import ta
from ta import add_all_ta_features
from ta.utils import dropna
import seaborn as sns
import numpy as np
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, acf, pacf
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from main import add_classic_indicators, get_euronext_tickers, get_stock_data, add_fin_ratios_and_commodities, generate_lagged_variables
from alpha101 import add_artificial_variables
from arch import arch_model
import itertools
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler

In [3]:
#https://github.com/Toshiyuki-Tega/Machine-Learning-for-Algorithmic-Trading-Second-Edition/blob/master/22_deep_reinforcement_learning
euro_tickers = get_euronext_tickers()
data = get_stock_data(euro_tickers[5])
data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Returns,Log Returns
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-11-29,4.710262,4.785626,4.710262,4.77809,117575,,
2017-11-30,4.763017,4.793162,4.747944,4.793162,100750,0.003154,0.00315
2017-12-01,4.793163,4.823309,4.717799,4.747945,81065,-0.009434,-0.009479
2017-12-04,4.747945,4.785627,4.740408,4.763018,36275,0.003175,0.00317
2017-12-05,4.747944,4.785626,4.710262,4.710262,69723,-0.011076,-0.011138


In [4]:
generate_lagged_variables(data).tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Returns,Log Returns,Returns n-1,Returns n-2,Returns n-3,Returns n-4,Returns n-5,Returns n-6,Returns n-7
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2022-11-23,6.52,6.54,6.5,6.51,8796,-0.004587,-0.004598,-0.007587,0.0,0.0,0.006107,-0.001524,0.009231,0.006192
2022-11-24,6.51,6.55,6.51,6.51,15929,0.0,0.0,-0.004587,-0.007587,0.0,0.0,0.006107,-0.001524,0.009231
2022-11-25,6.51,6.56,6.5,6.54,5196,0.004608,0.004598,0.0,-0.004587,-0.007587,0.0,0.0,0.006107,-0.001524
2022-11-28,6.58,6.58,6.5,6.52,12309,-0.003058,-0.003063,0.004608,0.0,-0.004587,-0.007587,0.0,0.0,0.006107
2022-11-29,6.52,6.56,6.52,6.56,9897,0.006135,0.006116,-0.003058,0.004608,0.0,-0.004587,-0.007587,0.0,0.0


In [10]:
get_euronext_tickers()

['2CRSI.PA',
 '2MX.PA',
 '2MXBS.PA',
 'ASP.PA',
 'AB.PA',
 'ABCA.PA',
 'ABEO.PA',
 'ABNX.PA',
 'ABVX.PA',
 'ACAN.PA',
 'AC.PA',
 'AAC.PA',
 'AACW.PA',
 'ACNV.PA',
 'EOS.PA',
 'ATI.PA',
 'ALDV.PA',
 'ADOC.PA',
 'ADP.PA',
 'ADVI.PA',
 'AKOM.PA',
 'AFME.PA',
 'AI.PA',
 'AIR.PA',
 'AKW.PA',
 'AAA.PA',
 'ABIO.PA',
 'ALD.PA',
 'CDANV.PA',
 'CDA.PA',
 'ALMDS.PA',
 'ALM.PA',
 'ALO.PA',
 'LTA.PA',
 'ALTA.PA',
 'AREIT.PA',
 'ATE.PA',
 'ALTUR.PA',
 'AMPLI.PA',
 'AMUN.PA',
 'ANTIN.PA',
 'APM.PA',
 'ARAMI.PA',
 'ARG.PA',
 'AKE.PA',
 'ARTE.PA',
 'PRC.PA',
 'ARTO.PA',
 'ASY.PA',
 'ATA.PA',
 'ATEME.PA',
 'ATO.PA',
 'AUB.PA',
 'AUGR.PA',
 'AURE.PA',
 'AURS.PA',
 'AVT.PA',
 'CS.PA',
 'AXW.PA',
 'BAIN.PA',
 'BALYO.PA',
 'BUI.PA',
 'BASS.PA',
 'BLC.PA',
 'FBEL.PA',
 'BLV.PA',
 'BEN.PA',
 'BB.PA',
 'BIG.PA',
 'BIM.PA',
 'BLEE.PA',
 'BNP.PA',
 'BOI.PA',
 'BOL.PA',
 'BON.PA',
 'BSD.PA',
 'EN.PA',
 'BVI.PA',
 'BUR.PA',
 'CAT31.PA',
 'CAFO.PA',
 'CBDG.PA',
 'CAPLI.PA',
 'CAP.PA',
 'CARM.PA',
 'CARP.PA',
 'CA.P

In [5]:
import urllib.request
import zipfile

#ratios mensuels
url = 'http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/Europe_5_Factors_CSV.zip'

#read data from url
factors = pd.read_csv(url, skiprows=3).astype(str)
factors.rename(columns = {factors.columns[0] : 'date'},inplace=True)

#remove spacing problem
for col in factors.columns : 
    factors[col] = factors[col].apply(lambda x : x.strip())

#drop annual info at the end of file
annual_factors_index_cut = factors[factors['date'] == 'Annual Factors: January-December'].index[0]
factors.drop(factors.index[annual_factors_index_cut:], inplace=True)
factors['date'] = pd.to_datetime(factors['date'], format = '%Y%m') + pd.offsets.MonthEnd()
factors.iloc[:,1:] = factors.iloc[:,1:].astype(float)
factors.iloc[:,1:] = factors.iloc[:,1:]/100
factors = factors[factors['date'] > "2016-01-01"]
factors.tail()

#append monthly return from stock
montly_returns = data.Close.resample('M').last().pct_change().dropna()
montly_returns.name = "Month_Rtn"
factors = factors.merge(montly_returns,on='date')
factors.tail()

Unnamed: 0,date,Mkt-RF,SMB,HML,RMW,CMA,RF,Month_Rtn
54,2022-06-30,-0.1032,-0.0103,-0.0246,0.0172,0.0021,0.0006,-0.029046
55,2022-07-31,0.0526,0.0017,-0.0601,0.0335,-0.0393,0.0008,0.011519
56,2022-08-31,-0.0668,-0.005,0.0541,-0.0321,0.0217,0.0019,0.004286
57,2022-09-30,-0.0984,-0.0351,0.0125,-0.002,0.0275,0.0019,-0.089616
58,2022-10-31,0.0666,-0.0067,0.0244,0.0117,0.0013,0.0023,0.0375


In [6]:
#rolling regression avec daily => pb manque un mois
X = factors[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']]
y = factors['Month_Rtn'] - factors['RF']
ff_model = sm.OLS(y, X).fit()
print(ff_model.summary())
b1, b2, b3, b4, b5 = ff_model.params

rf = factors['RF'].mean()
market_premium = factors['Mkt-RF'].mean()
size_premium = factors['SMB'].mean()
value_premium = factors['HML'].mean()
rmw_premium = factors['RMW'].mean()
cma_premium = factors['CMA'].mean()


expected_monthly_return = rf + b1 * market_premium + b2 * size_premium + b3 * value_premium + b4 * rmw_premium + b5 * cma_premium 
expected_monthly_return

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.176
Model:                            OLS   Adj. R-squared (uncentered):              0.100
Method:                 Least Squares   F-statistic:                              2.312
Date:                Tue, 29 Nov 2022   Prob (F-statistic):                      0.0563
Time:                        11:14:38   Log-Likelihood:                          114.26
No. Observations:                  59   AIC:                                     -218.5
Df Residuals:                      54   BIC:                                     -208.1
Df Model:                           5                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

0.0017984801406501836

In [None]:
data

In [25]:
data['Close']

2017-11-29     40.289631
2017-11-30     40.853031
2017-12-01     40.662846
2017-12-04     40.365688
2017-12-05     40.327660
                 ...    
2022-11-21    148.009995
2022-11-22    150.179993
2022-11-23    151.070007
2022-11-25    148.110001
2022-11-28    144.220001
Name: Close, Length: 1258, dtype: float64

In [85]:
add_fin_ratios_and_commodities(data)

  result = getattr(ufunc, method)(*inputs, **kwargs)


Unnamed: 0,Open,High,Low,Close,Volume,Returns,Log Returns,Gold Close,WTI Oil Close,5Y TY ^FVX,CAC 40
2017-11-28,41.435456,41.570957,40.855406,41.143055,105715200,,,1294.699951,57.990002,2.068,5390.479980
2017-11-29,41.038463,41.107402,39.738107,40.289627,166665600,-0.020743,-0.020961,1282.099976,57.299999,2.093,5398.049805
2017-11-30,40.515459,40.921970,40.042388,40.853031,166108800,0.013984,0.013887,1273.199951,57.400002,2.144,5372.790039
2017-12-01,40.401354,40.810242,40.056654,40.662853,159037200,-0.004655,-0.004666,1278.800049,58.360001,2.118,5316.890137
2017-12-04,41.002793,41.036075,40.325279,40.365692,130169600,-0.007308,-0.007335,1274.300049,57.470001,2.148,5389.290039
...,...,...,...,...,...,...,...,...,...,...,...
2022-11-21,150.160004,150.369995,147.720001,148.009995,58724100,-0.021680,-0.021919,1737.400024,79.730003,4.008,6634.450195
2022-11-22,148.130005,150.419998,146.929993,150.179993,51804100,0.014661,0.014555,1738.300049,80.949997,3.938,6657.529785
2022-11-23,149.449997,151.830002,149.339996,151.070007,58301400,0.005926,0.005909,1744.900024,77.940002,3.892,6679.089844
2022-11-25,148.309998,148.880005,147.119995,148.110001,35195900,-0.019594,-0.019788,1753.300049,76.279999,3.870,6712.479980


In [7]:
data = add_classic_indicators(data)
data.tail()

Unnamed: 0,Open,High,Low,Close,Volume,Returns,Log Returns,Returns n-1,Returns n-2,Returns n-3,Returns n-4,Returns n-5,Returns n-6,Returns n-7,adx 4,ema7,ema30,ema50,ema100,ema150,ema200,sma7,sma30,sma50,sma100,sma150,sma200,macd_24_52,macd_12_26,adx5,rsi3,rsi5,rsi7,rsi10,rsi14,rsi17,rsi20,rsi25,rsi30,stochrsi4,...,trend_trix,trend_mass_index,trend_dpo,trend_kst,trend_kst_sig,trend_kst_diff,trend_ichimoku_conv,trend_ichimoku_base,trend_ichimoku_a,trend_ichimoku_b,trend_stc,trend_adx_pos,trend_adx_neg,trend_visual_ichimoku_a,trend_visual_ichimoku_b,trend_aroon_up,trend_aroon_down,trend_aroon_ind,trend_psar_up,trend_psar_down,trend_psar_up_indicator,trend_psar_down_indicator,momentum_stoch_rsi_k,momentum_stoch_rsi_d,momentum_tsi,momentum_uo,momentum_stoch,momentum_stoch_signal,momentum_ao,momentum_roc,momentum_ppo,momentum_ppo_signal,momentum_ppo_hist,momentum_pvo,momentum_pvo_signal,momentum_pvo_hist,momentum_kama,others_dr,others_dlr,others_cr
2022-11-23,6.52,6.54,6.5,6.51,8796.0,-0.004587,-0.004598,-0.007587,0.0,0.0,0.006107,-0.001524,0.009231,0.006192,42.352308,6.538374,6.511817,6.546388,6.645881,6.705872,6.733689,6.561429,6.504,6.48173,6.691995,6.781785,6.848256,0.016079,0.013105,15.741737,21.426297,40.525344,47.25244,49.914895,50.050558,49.604163,49.096382,48.342453,47.767582,0.0,...,0.003886,23.645846,-0.1225,10.555402,-2.026704,12.582106,6.545,6.5,6.5225,6.530151,99.721737,22.945827,18.608265,6.503667,6.618795,4.0,44.0,-40.0,6.386121,,0.0,0.0,0.741863,0.881591,4.312198,41.907664,52.777844,62.963007,0.063715,1.401872,0.217929,0.016168,0.201762,-11.735842,-9.686775,-2.049067,6.529721,-0.458712,-0.459767,36.246915
2022-11-24,6.51,6.55,6.51,6.51,15929.0,0.0,0.0,-0.004587,-0.007587,0.0,0.0,0.006107,-0.001524,0.009231,35.239917,6.531281,6.5117,6.544961,6.64319,6.703277,6.731463,6.554286,6.505,6.476008,6.68815,6.77735,6.846691,0.014188,0.008803,15.490242,21.426297,40.525344,47.25244,49.914895,50.050558,49.604163,49.096382,48.342453,47.767582,0.0,...,0.009684,23.565801,-0.047,12.758645,0.869779,11.888866,6.575,6.5,6.5375,6.530151,99.860869,23.012914,18.000717,6.53814,6.618795,80.0,40.0,40.0,6.403754,,0.0,0.0,0.580813,0.741863,3.847763,35.329421,52.777844,55.5556,0.054285,2.358492,0.185648,0.050064,0.135584,-12.228166,-10.195053,-2.033113,6.528968,0.0,0.0,36.246915
2022-11-25,6.51,6.56,6.5,6.54,5196.0,0.004608,0.004598,0.0,-0.004587,-0.007587,0.0,0.0,0.006107,-0.001524,26.926235,6.533461,6.513526,6.544766,6.641147,6.701115,6.729558,6.552857,6.501667,6.470887,6.684407,6.773697,6.845276,0.013636,0.00753,15.088515,56.50457,54.959472,55.351277,54.558035,52.978516,51.913119,51.016452,49.858526,49.029813,1.0,...,0.01471,23.4049,-0.0265,14.081938,3.764151,10.317787,6.575,6.5,6.5375,6.530151,99.930434,21.85999,17.933902,6.540075,6.618795,76.0,36.0,40.0,6.420329,,0.0,0.0,0.576309,0.632995,4.186282,37.581537,61.111111,55.5556,0.043882,1.552794,0.19494,0.079039,0.115901,-17.324305,-11.620903,-5.703402,6.529657,0.460825,0.459767,36.874776
2022-11-28,6.58,6.58,6.5,6.52,12309.0,-0.003058,-0.003063,0.004608,0.0,-0.004587,-0.007587,0.0,0.0,0.006107,26.036261,6.530095,6.513943,6.543795,6.638748,6.698716,6.727473,6.542857,6.498667,6.465759,6.681154,6.769263,6.843858,0.012347,0.005014,14.993863,39.064542,45.713983,49.446362,51.052231,50.83894,50.281312,49.700861,48.862652,48.226072,0.434049,...,0.018369,23.395348,-0.0255,13.756643,6.347762,7.408882,6.59,6.5,6.545,6.505527,99.965217,22.070714,16.73037,6.527764,6.618795,72.0,32.0,40.0,6.435909,,0.0,0.0,0.509403,0.555508,3.927815,34.210008,55.555556,56.481504,0.026,0.928792,0.17553,0.098337,0.077193,-18.216518,-12.940026,-5.276492,6.529529,-0.30581,-0.306279,36.456199
2022-11-29,6.52,6.56,6.52,6.56,9897.0,0.006135,0.006116,-0.003058,0.004608,0.0,-0.004587,-0.007587,0.0,0.0,25.36878,6.537572,6.516915,6.544431,6.637189,6.696878,6.725807,6.538571,6.494667,6.46675,6.678005,6.765355,6.842352,0.01259,0.005668,14.905972,68.360737,61.785465,59.522172,57.168415,54.772995,53.394382,52.290967,50.905593,49.924452,1.0,...,0.022125,23.40245,0.009,14.217309,8.631172,5.586138,6.59,6.5,6.545,6.485828,99.982609,21.300986,16.146889,6.517914,6.618795,68.0,28.0,40.0,6.450555,,0.0,0.0,0.55214,0.545951,4.737915,35.583176,55.555595,57.40742,0.014794,1.547986,0.207225,0.120115,0.08711,-20.03275,-14.358571,-5.674179,6.529656,0.613496,0.611622,37.293352


In [9]:
data.columns.values

array(['Open', 'High', 'Low', 'Close', 'Volume', 'Returns', 'Log Returns',
       'Returns n-1', 'Returns n-2', 'Returns n-3', 'Returns n-4',
       'Returns n-5', 'Returns n-6', 'Returns n-7', 'adx 4', 'ema7',
       'ema30', 'ema50', 'ema100', 'ema150', 'ema200', 'sma7', 'sma30',
       'sma50', 'sma100', 'sma150', 'sma200', 'macd_24_52', 'macd_12_26',
       'adx5', 'rsi3', 'rsi5', 'rsi7', 'rsi10', 'rsi14', 'rsi17', 'rsi20',
       'rsi25', 'rsi30', 'stochrsi4', 'stochrsi7', 'stochrsi10',
       'stochrsi14', 'stochrsi20', 'stochrsi25', 'willr7', 'willr14',
       'willr21', 'willr60', 'CCI5', 'volume_adi', 'volume_obv',
       'volume_cmf', 'volume_fi', 'volume_em', 'volume_sma_em',
       'volume_vpt', 'volume_vwap', 'volume_mfi', 'volume_nvi',
       'volatility_bbm', 'volatility_bbh', 'volatility_bbl',
       'volatility_bbw', 'volatility_bbp', 'volatility_bbhi',
       'volatility_bbli', 'volatility_kcc', 'volatility_kch',
       'volatility_kcl', 'volatility_kcw', 'volatility_

In [41]:
data = add_artificial_variables(data)
data.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Returns,Log Returns,adx 4,ema7,ema30,ema50,ema100,ema150,ema200,sma7,sma30,sma50,sma100,sma150,sma200,macd_24_52,macd_12_26,adx5,rsi3,rsi5,rsi7,rsi10,rsi14,rsi17,rsi20,rsi25,rsi30,stochrsi4,stochrsi7,stochrsi10,stochrsi14,stochrsi20,stochrsi25,willr7,willr14,...,alpha33,alpha34,alpha35,alpha36,alpha37,alpha38,alpha40,alpha41,alpha42,alpha43,alpha44,alpha45,alpha47,alpha49,alpha50,alpha51,alpha52,alpha53,alpha54,alpha55,alpha60,alpha61,alpha62,alpha64,alpha65,alpha68,alpha71,alpha74,alpha75,alpha78,alpha81,alpha83,alpha84,alpha85,alpha86,alpha94,alpha95,alpha96,alpha99,alpha101
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
2022-11-21 00:00:00+01:00,2.66,2.66,2.56,0.026391,5488.0,-0.001919,-0.001921,48.819038,2.599915,2.534516,2.596493,2.873408,3.125877,3.322068,2.646429,2.482833,2.4888,2.8128,3.310733,3.512875,0.032487,0.01929,24.97847,43.71208,53.760992,56.93923,57.442378,55.741702,54.11185,52.553414,50.33674,48.624364,0.261964,0.095668,0.298942,0.607749,0.677744,0.715105,-56.000042,-31.818202,...,0.636559,0.786177,676.0,3.401855,0.979391,-0.243599,-0.08455,-0.010479,14.117647,120.0,-0.722237,0.2205795,0.033552,2.578609,-0.960428,2.578609,0.126286,15.854965,263549800000.0,-0.415832,-0.001034,0,-1,0,0,0,0.673426,-1,0,0.458311,-1,0.504918,0.151214,0.48396,0,-0.494585,1,-0.673426,-1,-26.0753
2022-11-22 00:00:00+01:00,2.6,2.625,2.56,2.625,2857.0,0.009615,0.009569,42.632956,2.606186,2.540354,2.597611,2.868489,3.119242,3.315132,2.637143,2.492833,2.4837,2.80745,3.296767,3.504775,0.031253,0.016542,25.339055,59.94682,60.333671,60.960819,60.036059,57.552475,55.603883,53.825628,51.356975,49.471402,1.0,0.250463,0.418349,0.681385,0.749166,0.785874,-63.88887,-26.136362,...,0.235484,0.352052,528.0,3.437827,0.765412,-0.394732,-0.094985,-0.010204,0.488864,15.0,-0.643791,0.007814986,-0.063941,-2.598609,-0.960428,-2.598609,0.031543,-14.788607,-0.9532792,0.036937,-0.00232,0,-1,-1,0,0,0.765591,0,0,0.346348,-1,0.302732,0.253863,0.422042,0,-0.571582,1,-0.765591,-1,0.378789
2022-11-23 00:00:00+01:00,2.56,2.6,2.56,0.026527,4544.0,-0.022857,-0.023122,37.993395,2.59589,2.541944,2.596332,2.862479,3.111901,3.307668,2.617857,2.502333,2.4781,2.8019,3.2828,3.4969,0.02779,0.009907,25.673884,29.409939,42.296818,48.324071,51.643048,52.04856,51.346644,50.410834,48.848256,47.525999,0.0,0.0,0.0,0.457565,0.557461,0.611857,-97.22216,-41.666648,...,0.626882,0.448164,27.0,3.718397,1.018011,-0.135595,-0.042998,0.008672,15.419355,96.0,-0.009386,0.07617995,0.069773,2.598473,-0.960428,2.598473,0.109714,10.489925,530151200000.0,0.793243,-0.001122,0,-1,-1,0,0,0.438634,0,0,0.44043,-1,0.712568,0.162252,0.871872,0,-0.517761,1,-0.438634,-1,-61.792076
2022-11-24 00:00:00+01:00,2.565,2.635,2.56,2.565,3833.0,0.0,0.0,42.498742,2.588167,2.543431,2.595104,2.856589,3.104658,3.300279,2.598571,2.512833,2.4735,2.79775,3.2681,3.488975,0.024851,0.004991,26.321463,29.409939,42.296818,48.324071,51.643048,52.04856,51.346644,50.410834,48.848256,47.525999,0.0,0.0,0.0,0.210801,0.557461,0.611857,-97.22216,-46.052605,...,0.372581,0.160907,1050.0,3.386441,0.855914,-0.227745,-0.016231,0.015979,0.901602,40.0,-0.051845,0.1240256,-0.04573,-2.538473,-0.960428,-2.538473,0.0303,-181.008591,-0.06666815,0.753115,-0.002218,0,-1,-1,0,0,0.628495,0,0,0.27529,-1,0.896175,0.824503,0.846382,0,-0.233594,1,-0.628495,-1,0.0
2022-11-25 00:00:00+01:00,2.565,2.565,2.565,2.565,0.0,0.0,0.0,45.877753,2.582376,2.544823,2.593923,2.850815,3.09751,3.292962,2.586429,2.518,2.4686,2.79085,3.253867,3.480625,0.02234,0.001398,26.922785,29.409939,42.296818,48.324071,51.643048,52.04856,51.346644,50.410834,48.848256,47.525999,0.0,0.0,0.0,0.210801,0.557461,0.611857,-94.999893,-50.0,...,0.372581,0.942225,116.0,3.62663,0.872581,-0.200375,-0.061469,0.0,0.777136,2.0,-0.768021,2.002301e-08,-0.0,-0.0,-0.935829,-0.0,-0.002143,-999984.615635,0.0,0.795204,-0.002699,0,-1,-1,0,0,0.628495,-1,0,0.102549,-1,0.999454,0.804636,0.777823,0,-0.012412,1,-0.628495,-1,0.0


In [None]:
#determiner
#rolling beta sur plusieurs unités de temps dynamic beta
#https://goldinlocks.github.io/ARCH_GARCH-Volatility-Forecasting/
https://github.com/Taaniya/Stock-Price-Returns-Prediction/blob/master/Prediction_AXISBANK.ipynb

In [None]:
#https://pub.towardsai.net/statistical-forecasting-for-time-series-data-part-5-arma-garch-model-for-time-series-forecasting-98beeedcfba8
#https://github.com/yashveersinghsohi/Statistical_Modeling_for_Time_Series_Forecasting/tree/master/Returns%20Models
#https://pub.towardsai.net/statistical-forecasting-for-time-series-data-part-6-forecasting-non-stationary-time-series-using-9acc28c39db9
#https://stackoverflow.com/questions/55882111/arima-model-for-certain-lags
#garch sur residuals de l'arima
#tester oubli de certains lags et ajout optimisation des parametres
#https://ionides.github.io/531w18/midterm_project/project38/Midterm_proj.html

In [None]:
#stationnaire => pas de saisonnalité, aléatoire mais de moyenne constante, ne dépend pas du temps
#test de stationnarité : si la série est non stationnaire => elle possede une tendance et saisonnalité, et dépend du temps (p>0.05)

AdfResult1 = adfuller(data['Close'])
print ("Original P value:",AdfResult1[1]) 

#donc la série est non stationnaire et on doit la différencier pour utiiser arima
data['log_diff_data'] = np.log(data['Close'].values)
data['log_diff_data'] = data['log_diff_data'].diff(1)
AdfResult2 = adfuller(data['log_diff_data'].dropna())
print ("Differenced p value:",AdfResult2[1]) 

plot_acf(data['log_diff_data'].dropna()**2, lags=150,zero=False)
plt.show()

plot_pacf(data['log_diff_data'].dropna()**2, lags=150,zero=False)
plt.show()

In [None]:
pacf_auto_corr, pacf_conf_int = pacf(data['log_diff_data'].dropna()**2, alpha=.05, nlags=365)
pacf_blue_area = pacf_conf_int - np.array([pacf_auto_corr,pacf_auto_corr]).T
neg_pacf_blue_area, pos_pacf_blue_area = pacf_blue_area.T
pacf_sig_lags = np.where(np.abs(pacf_auto_corr) > 2*np.abs(pos_pacf_blue_area[5]))[0]
pacf_usable_lags_binary = np.zeros(np.max(pacf_sig_lags)+1)
pacf_usable_lags_binary[significant_index] = 1
print(pacf_sig_lags)
plt.plot(neg_pacf_blue_area)
plt.plot(pacf_auto_corr)
plt.plot(pos_pacf_blue_area)
plt.show()

acf_auto_corr, acf_conf_int = acf(data['log_diff_data'].dropna()**2, alpha=.05, nlags=365, fft=True)
acf_blue_area = acf_conf_int - np.array([acf_auto_corr,acf_auto_corr]).T
neg_acf_blue_area, pos_acf_blue_area = acf_blue_area.T
acf_sig_lags = np.where(np.abs(acf_auto_corr) > 2*np.abs(pos_acf_blue_area[5]))[0]
acf_usable_lags_binary = np.zeros(np.max(acf_sig_lags)+1)
acf_usable_lags_binary[significant_index] = 1
print(acf_sig_lags)
plt.plot(neg_acf_blue_area)
plt.plot(acf_auto_corr)
plt.plot(pos_acf_blue_area)
plt.show()

In [None]:
significant_index

In [None]:
usable_lags_binary

In [None]:
stats = pd.concat([model_fit.pvalues, model_fit.tvalues, pd.Series({'bic' : model_fit.bic}), pd.Series({'aic' : model_fit.aic})])

In [None]:
model_fit.bic, model_fit.aic

In [None]:
differenciation de la série tant que pas stationnaire

In [None]:
train_df = data['log_diff_data'].loc[:"2022-05-31"]
test_df = data['log_diff_data'].loc["2022-06-01":]
test_df.shape, train_df.shape

In [None]:
data['log_diff_data'].mean()

In [None]:
feature garch n+1

In [None]:
res = sm.tsa.seasonal_decompose(data['Close'],freq=365)
fig = res.plot()
fig.set_figheight(8)
fig.set_figwidth(15)
plt.show()

In [None]:
corr_matrix = data.corr()
upperMatrix = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))
corrFeatures = [column for column in upperMatrix.columns if any(upperMatrix[column] > 0.90)]
corrFeatures

In [None]:
data.isna().sum().max()

In [None]:
series lagguées a ajouter (come prix j-1 prix j-2)
https://github.com/GoldinLocks/Cryptocurrency-Research-/blob/master/predicting-crypto-prices-with-deep-learning.ipynb
    https://www.srose.biz/wp-content/uploads/2020/08/Deep-Learning-Performance-Part-3-Batch-Normalization-Dropout-Noise.html
    garch s


In [None]:
calmar

In [None]:
estimer risk ajuster avec medaf + vol