In [1]:
import yfinance as yf

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from datetime import datetime as dt
from snp500 import SNP500

In [2]:
tickers = ["VTI", "AGG", "DBC", "^VIX"]
indices = ["STOCKS", "BONDS", "COMMODITIES", "VOLATILITY"]

start = "2006-01-01"
end = dt.now()

prices = yf.download(tickers, start, end)["Adj Close"].dropna()
prices = prices.rename(columns={t: i for t, i in zip(tickers, indices)})

prices.head()

[*********************100%%**********************]  4 of 4 completed


Unnamed: 0_level_0,BONDS,COMMODITIES,STOCKS,VOLATILITY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2006-02-06,58.557709,21.916016,45.302422,13.04
2006-02-07,58.516781,21.282082,44.861328,13.59
2006-02-08,58.487572,21.191517,45.18409,12.83
2006-02-09,58.522655,21.390755,45.098019,13.12
2006-02-10,58.399929,21.001337,45.191257,12.87


In [3]:
log_rets = np.log(prices).diff()[1:]

lookback = 50
std = log_rets.rolling(lookback).std()
momentum = prices.pct_change(lookback)

log_rets.columns = [f"{col}_R" for col in log_rets]
std.columns = [f"{col}_STD" for col in std]
momentum.columns = [f"{col}_MOM" for col in momentum]

features = pd.concat([log_rets, std, momentum], axis=1)

for col in features:
    for w in [1, 30, 60]:
        features[f"{col}_{w}D_LAG"] = features[col].shift(w)

features = features.dropna()

features.head(), features.shape

(             BONDS_R  COMMODITIES_R  STOCKS_R  VOLATILITY_R  BONDS_STD  \
 Date                                                                     
 2006-07-14  0.001330       0.004539 -0.005871      0.014509   0.002549   
 2006-07-17 -0.002149      -0.022902 -0.001719      0.032164   0.002568   
 2006-07-18 -0.000716      -0.006197  0.003353     -0.049488   0.002559   
 2006-07-19  0.003886       0.003490  0.016358     -0.131761   0.002614   
 2006-07-20  0.001021      -0.010508 -0.010253      0.041568   0.002615   
 
             COMMODITIES_STD  STOCKS_STD  VOLATILITY_STD  BONDS_MOM  \
 Date                                                                 
 2006-07-14         0.013597    0.009581        0.098703   0.008557   
 2006-07-17         0.013886    0.009543        0.098721   0.006906   
 2006-07-18         0.013831    0.009462        0.098983   0.004336   
 2006-07-19         0.013833    0.009788        0.100878   0.007527   
 2006-07-20         0.013733    0.009870       

In [13]:
features.to_csv("workflow/data/features.csv")