In [1]:
import yfinance as yf
import pandas as pd
import random
import numpy as np

from datetime import datetime as dt
from workflow import get_dji, Indicators

In [2]:
stocks = get_dji()

len(stocks)

30

Feature Space:
- $M_{t}$, Moving-Average Convergence Divergence (MACD) Signal
- $H_{t}$, MACD Histogram
- $R_{t}$, Relative Strength Index (RSI)
- $C_{t}$, Commodity Channel Index (CCI)
- $X_{t}$, Average Directional Index
- $V_{t}$, Turbulence Index

In [3]:
start = "2006-01-01"
end = dt.now()

stock_data = yf.download(stocks, start, end).dropna(axis=1)
stock_prices = stock_data["Adj Close"]

f"{len(stock_prices.columns)} stocks", \
    f"{round((stock_prices.index.max() - stock_prices.index.min()).days / 365, 2)}Y"

[*********************100%%**********************]  30 of 30 completed


('28 stocks', '17.85Y')

In [4]:
indicators = Indicators(stock_data)
features = indicators.get()

features.tail()

Unnamed: 0_level_0,AAPL_M,AMGN_M,AXP_M,BA_M,CAT_M,CRM_M,CSCO_M,CVX_M,DIS_M,GS_M,...,MRK_X,MSFT_X,NKE_X,PG_X,TRV_X,UNH_X,VZ_X,WBA_X,WMT_X,TURB
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-10-31,-1.96261,-0.815389,-2.670639,-4.94866,-9.330452,-2.737921,-0.616842,-4.426072,-0.598167,-6.4421,...,32.733184,35.358365,31.453403,60.863781,47.509693,14.908372,49.358314,36.610887,52.595523,52.054674
2023-11-01,-1.669119,-1.438272,-2.407413,-4.105077,-9.824445,-2.336055,-0.593429,-4.995898,-0.611446,-5.516476,...,32.887948,34.649925,33.054525,56.432277,46.749347,16.355819,43.112164,40.271502,48.340371,20.933244
2023-11-02,-1.132974,-1.451206,-1.822749,-3.18757,-9.517979,-1.659538,-0.52365,-5.004099,-0.437788,-4.201909,...,34.559868,34.041115,33.160732,51.155518,47.741517,18.481867,37.132327,42.503843,44.93287,20.280829
2023-11-03,-0.773398,-1.183948,-1.191543,-2.189891,-9.039375,-1.161645,-0.442272,-5.045235,-0.154748,-2.018453,...,34.345963,32.462046,34.309158,45.415519,47.099735,20.722401,31.165254,44.31215,43.629272,22.590744
2023-11-06,-0.277053,-0.724142,-0.665082,-1.5508,-8.758424,-0.76231,-0.351138,-5.068629,-0.01499,-0.58072,...,34.395949,30.858344,35.986404,39.758295,46.00386,22.710005,25.064783,46.19656,42.67469,9.328554


In [5]:
s = pd.concat([stock_prices, features], axis=1).dropna()

s.shape, s.head()

((4466, 169),
                 AAPL       AMGN        AXP         BA        CAT     CRM  \
 Date                                                                       
 2006-02-09  1.968905  51.850811  40.273148  51.452534  42.888489  9.8425   
 2006-02-10  2.040447  52.172092  40.242798  51.824066  43.076466  8.7750   
 2006-02-13  1.961629  51.945736  40.257980  51.523975  42.995014  8.5650   
 2006-02-14  2.050450  52.522610  40.630039  51.952675  43.997524  8.6625   
 2006-02-15  2.098346  53.281986  41.146355  51.766891  44.862179  8.5250   
 
                  CSCO        CVX        DIS          GS  ...      MRK_X  \
 Date                                                     ...              
 2006-02-09  13.470140  29.117903  21.925201  108.286316  ...  25.816695   
 2006-02-10  13.614833  28.969492  21.892380  108.255531  ...  22.367933   
 2006-02-13  13.470140  28.974619  21.670830  107.363716  ...  19.156019   
 2006-02-14  13.573496  28.742500  21.949820  109.001266  ...  17

In [6]:
s.to_csv("prices_and_features.csv")