In [1]:
import pandas as pd 
import numpy as np
import os 
from math import sqrt 
import yfinance as yf
from datetime import date, timedelta



In [2]:
from stock_prediction.data_prep_api import Data_Prep_Api
from stock_prediction.features_exo_api import exo_selection_api


In [11]:
# instantiate the Class 
prep_class = Data_Prep_Api('vinci', 252)


In [12]:
# load the first dataframe ready to use if no more modification
data = prep_class.data_prep_api()


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [13]:
data.shape

(1023, 13)

In [5]:
data.head()

Unnamed: 0,Date,Return_DG.PA,Log_Return_DG.PA,High-Low_DG.PA,High-Close_DG.PA,Low-Close_DG.PA,Volume-Change_DG.PA,Period_Volum_DG.PA,Annual_Vol_DG.PA,Period_Vol_DG.PA,Return_stoxx_50,Period_Volum_stoxx_50,DG.PA_relatif
0,2017-06-13,0.00116,0.001159,0.00581,0.002703,0.003099,-0.200583,-0.248449,0.198194,0.198194,0.004512,0.0,-0.003351
1,2017-06-14,0.01107,0.011009,0.020957,0.010948,0.0099,0.452843,0.093502,0.198444,0.198444,-0.000996,0.0,0.012065
2,2017-06-15,-0.001146,-0.001147,0.013297,0.000382,0.01291,0.061252,0.161533,0.198371,0.198371,-0.00224,0.0,0.001094
3,2017-06-16,0.014402,0.0143,0.013885,0.0,0.013885,0.596913,0.856392,0.198809,0.198809,0.000284,0.0,0.014119
4,2017-06-19,0.002638,0.002635,0.011448,0.007519,0.0039,-0.51742,-0.091332,0.184772,0.184772,0.008347,-0.098257,-0.005709


In [6]:
# function that allows us to delete some of the features we don't want to use for modelling 
# Log Return is to False by default

# *******************
# if period < 252 , don't use Annual_vol
#********************
prep_class.select_features_api(data, Return = True, Log_Return=False, High_Low=False, High_Close=True, Low_Close=True,
                        Volume_Change=True, Period_Volum=True, Annual_Vol=True,
                        Period_Vol=True, Return_Index=True, Volum_Index=True, Relative_Return=True)

Unnamed: 0,Date,Return_DG.PA,High-Close_DG.PA,Low-Close_DG.PA,Volume-Change_DG.PA,Period_Volum_DG.PA,Annual_Vol_DG.PA,Period_Vol_DG.PA,Return_stoxx_50,Period_Volum_stoxx_50,DG.PA_relatif
0,2017-06-13,0.001160,0.002703,0.003099,-0.200583,-0.248449,0.198194,0.198194,0.004512,0.000000,-0.003351
1,2017-06-14,0.011070,0.010948,0.009900,0.452843,0.093502,0.198444,0.198444,-0.000996,0.000000,0.012065
2,2017-06-15,-0.001146,0.000382,0.012910,0.061252,0.161533,0.198371,0.198371,-0.002240,0.000000,0.001094
3,2017-06-16,0.014402,0.000000,0.013885,0.596913,0.856392,0.198809,0.198809,0.000284,0.000000,0.014119
4,2017-06-19,0.002638,0.007519,0.003900,-0.517420,-0.091332,0.184772,0.184772,0.008347,-0.098257,-0.005709
...,...,...,...,...,...,...,...,...,...,...,...
1018,2021-06-07,0.004922,0.003940,0.009568,0.762784,-0.306718,0.329186,0.329186,-0.000797,-0.169992,0.005719
1019,2021-06-08,0.002769,0.002442,0.005338,-0.215297,-0.454690,0.329146,0.329146,0.000175,-0.143250,0.002594
1020,2021-06-09,0.015610,0.002928,0.019942,0.576838,-0.138483,0.328310,0.328310,-0.001824,-0.150341,0.017434
1021,2021-06-10,-0.016102,0.017747,0.000744,-0.282664,-0.380286,0.328743,0.328743,0.004652,-0.235479,-0.020754


In [7]:
# function that add the exogenous features that you need to select 
# returns the dataframe ready to modelling 
# and the list of the columns to rebase it later - no VIX because no rebase for VIX - kept unchanged 
data = exo_selection_api(data, ["sp500","gold", "eurusd", "nasdaq", "crude", "vix"])



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [8]:
data.head()

Unnamed: 0,Date,Return_DG.PA,High-Close_DG.PA,Low-Close_DG.PA,Volume-Change_DG.PA,Period_Volum_DG.PA,Annual_Vol_DG.PA,Period_Vol_DG.PA,Return_stoxx_50,Period_Volum_stoxx_50,DG.PA_relatif,Return_sp500,Return_gold,Return_eurusd,Return_nasdaq,Return_crude,Vix_No_Rebase
0,2017-06-13,0.00116,0.002703,0.003099,-0.200583,-0.248449,0.198194,0.198194,0.004512,0.0,-0.003351,0.004512,-0.000237,-0.000515,0.007272,0.008246,0.1042
1,2017-06-14,0.01107,0.010948,0.0099,0.452843,0.093502,0.198444,0.198444,-0.000996,0.0,0.012065,-0.000996,0.00553,0.001132,-0.004096,-0.037236,0.1064
2,2017-06-15,-0.001146,0.000382,0.01291,0.061252,0.161533,0.198371,0.198371,-0.00224,0.0,0.001094,-0.00224,-0.016185,0.000617,-0.004744,-0.006036,0.109
3,2017-06-16,0.014402,0.0,0.013885,0.596913,0.856392,0.198809,0.198809,0.000284,0.0,0.014119,0.000284,0.001438,-0.006142,-0.002229,0.006298,0.1038
4,2017-06-19,0.002638,0.007519,0.0039,-0.51742,-0.091332,0.184772,0.184772,0.008347,-0.098257,-0.005709,0.008347,-0.007815,0.005503,0.014183,-0.01207,0.1037


In [9]:
# once you are ready to make the modelisation you may want to rebase 100 your indexes 
# to be able to compare them 
# give him the df you want to model and returns the df with no more Returns but the 100 base index

prep_class.Price_Rebase_api(data)

Unnamed: 0,Date,High-Close_DG.PA,Low-Close_DG.PA,Volume-Change_DG.PA,Period_Volum_DG.PA,Annual_Vol_DG.PA,Period_Vol_DG.PA,Period_Volum_stoxx_50,DG.PA_relatif,Vix_No_Rebase,Return_DG.PA_R,Return_stoxx_50_R,Return_sp500_R,Return_gold_R,Return_eurusd_R,Return_nasdaq_R,Return_crude_R
0,2017-06-13,0.002703,0.003099,-0.200583,-0.248449,0.198194,0.198194,0.000000,-0.003351,0.1042,100.000000,100.000000,100.000000,100.000000,100.000000,100.000000,100.000000
1,2017-06-14,0.010948,0.009900,0.452843,0.093502,0.198444,0.198444,0.000000,0.012065,0.1064,101.106965,99.900417,99.900417,100.553010,100.113213,99.590378,96.276368
2,2017-06-15,0.000382,0.012910,0.061252,0.161533,0.198371,0.198371,0.000000,0.001094,0.1090,100.991098,99.676680,99.676680,98.925573,100.174967,99.117896,95.695222
3,2017-06-16,0.000000,0.013885,0.596913,0.856392,0.198809,0.198809,0.000000,0.014119,0.1038,102.445606,99.704952,99.704952,99.067779,99.559702,98.897005,96.297896
4,2017-06-19,0.007519,0.003900,-0.517420,-0.091332,0.184772,0.184772,-0.098257,-0.005709,0.1037,102.715902,100.537212,100.537212,98.293562,100.107602,100.299655,95.135604
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
958,2021-06-07,0.003940,0.009568,0.762784,-0.306718,0.329186,0.329186,-0.169992,0.005719,0.1642,129.794030,178.566648,178.566648,140.067058,109.932836,221.883015,163.381751
959,2021-06-08,0.002442,0.005338,-0.215297,-0.454690,0.329146,0.329146,-0.143250,0.002594,0.1707,130.153371,178.597902,178.597902,139.727369,110.160712,222.573363,165.316937
960,2021-06-09,0.002928,0.019942,0.576838,-0.138483,0.328310,0.328310,-0.150341,0.017434,0.1789,132.185076,178.272163,178.272163,139.801213,109.972975,222.363014,165.104529
961,2021-06-10,0.017747,0.000744,-0.282664,-0.380286,0.328743,0.328743,-0.235479,-0.020754,0.1610,130.056624,179.101528,179.101528,139.875057,110.055643,224.098539,165.883329


In [14]:
data2 = prep_class.data_prep_api(max=True)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [15]:
data2.shape

(5261, 13)

In [16]:
data2.head()

Unnamed: 0,Date,Return_DG.PA,Log_Return_DG.PA,High-Low_DG.PA,High-Close_DG.PA,Low-Close_DG.PA,Volume-Change_DG.PA,Period_Volum_DG.PA,Annual_Vol_DG.PA,Period_Vol_DG.PA,Return_stoxx_50,Period_Volum_stoxx_50,DG.PA_relatif
0,2000-12-20,0.00157,0.001569,0.032566,0.018809,0.013503,-0.485185,0.020124,0.388965,0.388965,-0.031296,0.389577,0.032866
1,2000-12-21,0.018809,0.018634,0.039683,0.007692,0.031746,1.61239,1.639689,0.384691,0.384691,0.008002,0.413285,0.010807
2,2000-12-22,0.012308,0.012232,0.028125,0.0,0.028125,-0.133866,1.273276,0.384268,0.384268,0.024387,0.058155,-0.012079
3,2000-12-25,0.0,0.0,0.0,0.0,0.0,-1.0,-1.0,0.382978,0.382978,0.0,0.0,0.0
4,2000-12-26,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,0.38176,0.38176,0.007075,-0.215432,-0.007075


In [18]:
data2 = exo_selection_api(data2, ["sp500","gold", "eurusd", "nasdaq", "crude", "vix"],max=True)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [19]:
data2.head()

Unnamed: 0,Date,Return_DG.PA,Log_Return_DG.PA,High-Low_DG.PA,High-Close_DG.PA,Low-Close_DG.PA,Volume-Change_DG.PA,Period_Volum_DG.PA,Annual_Vol_DG.PA,Period_Vol_DG.PA,Return_stoxx_50,Period_Volum_stoxx_50,DG.PA_relatif,Return_sp500,Return_gold,Return_eurusd,Return_nasdaq,Return_crude,Vix_No_Rebase
0,2003-12-01,0.008989,0.008948,0.020393,0.00297,0.017372,-0.707995,-0.209671,0.236081,0.236081,0.011264,-0.005485,-0.002276,0.011264,0.014869,0.0,0.01508,-0.015127,0.1677
1,2003-12-02,0.001485,0.001483,0.005212,0.000741,0.004468,0.175607,-0.068599,0.234251,0.234251,-0.003271,0.001103,0.004755,-0.003271,0.002483,0.01036,-0.0049,0.027713,0.1627
2,2003-12-03,0.004448,0.004438,0.009637,0.005166,0.004448,0.085104,0.012923,0.233776,0.233776,-0.001772,0.043583,0.00622,-0.001772,0.000495,0.002813,-0.01001,0.010396,0.1663
3,2003-12-04,0.000738,0.000738,0.016369,0.007375,0.008929,-0.352954,-0.342288,0.23363,0.23363,0.004687,0.059456,-0.003949,0.004687,-0.001486,-0.003467,0.004362,0.005145,0.163
4,2003-12-05,-0.006637,-0.006659,0.017203,0.009651,0.007479,0.046479,-0.308838,0.231953,0.231953,-0.007684,-0.083381,0.001047,-0.007684,0.007687,0.008775,-0.015736,-0.016955,0.1709


In [20]:
data2.shape

(4285, 19)