In [156]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn as sk

Decomposing BTC into its factors. Using factors: equities, gold, interest rates, bond yields, momentum, inflation, liquidity. Using methods: PCA, Ridge, Lasso, ElasticNet, Best Subset

In [157]:
# gold data 
iau = yf.download("IAU", start="2005-01-01", end="2023-01-01", interval="1mo")
iau["open_ret"] = iau["Open"].pct_change()
iau = iau.dropna()
iau.head(1)

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,open_ret
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2005-03-01,8.674,8.938,8.482,8.576,8.576,8064500,0.029922


In [158]:
# sp500 data
spy = yf.download('SPY', start='2005-01-01', end='2023-01-01', interval="1mo")
spy["open_ret"] = spy["Open"].pct_change()
spy = spy.dropna()
spy.head(1)

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,open_ret
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2005-02-01,118.25,121.669998,118.099998,120.629997,83.324211,1025608400,-0.027229


In [118]:
# btc data
btc = yf.download("BTC-USD", start="2005-01-01", end="2023-01-01", interval="1mo")
btc["open_ret"] = btc["Open"].pct_change()
btc = btc.dropna()
btc.head(1)

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,open_ret
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2014-11-01,338.649994,457.092987,320.626007,378.046997,378.046997,659733360,-0.1259


In [134]:
# interest rates
rf = pd.read_csv("Data/InterestRates.csv", index_col="DATE",  parse_dates=True, dtype=float).iloc[:, 0]
rf.head(1)

DATE
1950-01-01    1.5
Name: INTDSRUSM193N, dtype: float64

In [137]:
# bond yields
by = pd.read_csv("Data/BondYields.csv", index_col="DATE",  parse_dates=True, dtype=float).pct_change().dropna().iloc[:, 0]
by.head(1)

DATE
1953-05-01    0.077739
Name: IRLTLT01USM156N, dtype: float64

In [138]:
# cpi 
cpi = pd.read_csv("Data/CPI.csv", index_col="DATE",  parse_dates=True, dtype=float).iloc[:, 0]
cpi.head(1)

DATE
1955-02-01    0.0
Name: CPALTT01USM657N, dtype: float64

In [139]:
# ppi
ppi = pd.read_csv("Data/PPI.csv", index_col="DATE",  parse_dates=True, dtype=float).iloc[:, 0]
ppi.head(1)

DATE
1986-01-01    101.3
Name: PCUOMFGOMFG, dtype: float64

In [140]:
# liquidity from m1
m1 = pd.read_csv("Data/M1.csv", index_col="DATE",  parse_dates=True, dtype=float).pct_change().dropna().iloc[:, 0]
m1 = m1.resample("ME").apply(lambda x: (1+x).prod()-1)
m1.index = m1.index + pd.offsets.MonthBegin(1)
m1.head(1)

DATE
1975-02-01   -0.05445
Name: WM1NS, dtype: float64

In [147]:
# combining everything
df = pd.DataFrame({"iau": iau["open_ret"], "btc": btc["open_ret"], "rf": rf, "by": by, "cpi": cpi, "m1": m1}).dropna()
df.head()
df.plot

<pandas.plotting._core.PlotAccessor object at 0x00000231BDC813D0>

In [155]:
# corr matrix to check if high correlations between factors
df.corr()

Unnamed: 0,iau,btc,rf,by,cpi,m1
iau,1.0,0.116881,-0.28558,0.061729,0.055912,-0.013175
btc,0.116881,1.0,0.081036,0.179109,-0.017996,-0.00467
rf,-0.28558,0.081036,1.0,-0.143044,-0.048822,-0.194111
by,0.061729,0.179109,-0.143044,1.0,0.233836,0.086623
cpi,0.055912,-0.017996,-0.048822,0.233836,1.0,0.114404
m1,-0.013175,-0.00467,-0.194111,0.086623,0.114404,1.0


In [None]:
# applying ridge regression
from sk.linear_model import Ridge
