In [None]:
# Initial Imports:

import pandas as pd
from pathlib import Path
import pandas_datareader.data as reader
import datetime as dt
import statsmodels.api as sm
import getFamaFrenchFactors as gff

from sklearn.linear_model import LinearRegression
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

# For visualizations:
import seaborn as sns

# PART I: DATA CAPTURE AND ORGANIZATION

In [None]:
# Set start and end times and establish BTC as our asset of investigation

end = dt.date(2022, 3, 31)
start = dt.date(2022, 1, 1)
ticker = ["BTC-USD"]

In [None]:
# Get BTC closing prices

btc_dreturns = reader.get_data_yahoo(ticker, start, end)["Close"]
display(btc_dreturns.head())
btc_dreturns.tail()

In [None]:
# Get daily returns of BTC

btc_dreturns = btc_dreturns.pct_change().dropna() #convert to daily returns and drop NaN values
display(btc_dreturns.head())
btc_dreturns.tail()

In [None]:
# Get daily factor data  

ff3_daily = reader.DataReader("F-F_Research_Data_Factors_daily", "famafrench", start, end)[0]  
ff3_daily.head()

In [None]:
# Divide select columns by 100 (because inputted Mkt-RF, SMB, and HML columns are in percentages while RF column is not).

ff3_daily[["Mkt-RF", "SMB", "HML"]] = ff3_daily[["Mkt-RF", "SMB", "HML"]]/100 
ff3_daily.head()

In [None]:
# Read in Momentum factor data from CSV included in Resources folder of this repo.
# Note: as per this notebook, using Momentum as a factor can be turned on and off with hashes in applicable cells.
# This notebook is set up to use it or not use it, depending on user preference.
# Across a number of time periods, we have not found it to be a significant factor with BTC.

momentum_factor = pd.read_csv(
    Path("./Resources/F-F_Momentum_Factor_daily.csv"),
    header=None
)

momentum_factor

In [None]:
# Asssign column names

momentum_factor.columns = ["Date", "Mom"]
momentum_factor.columns

In [None]:
# Choose select dates 

momentum_factor = momentum_factor.loc[
    (momentum_factor["Date"] >= "20220101")
   & (momentum_factor["Date"] <= "20220331")
]

In [None]:
display(momentum_factor.head())
momentum_factor.tail()

In [None]:
# Set index

momentum_factor.set_index("Date")

In [None]:
# Drop NaNs

momentum_factor = momentum_factor.dropna()
momentum_factor.shape

In [None]:
# Divide by 100 (because inputted Mom column is in percentages).

momentum_factor["Mom"] = momentum_factor["Mom"]/100  
momentum_factor.head()

In [None]:
# Set index to the index of ff3_daily dataframe.

momentum_factor.index = ff3_daily.index

In [None]:
# Drop extra column

momentum_factor.drop(columns="Date", inplace=True)
momentum_factor

In [None]:
# Merging FF3 daily facotr data with BTC daily return data into one dataframe

regress_data = ff3_daily.merge(btc_dreturns, on="Date")  
display(regress_data.head())
regress_data.tail()

In [None]:
# Merging combined FF3 daily factor and BTC daily returns data with Momentum daily factor data

#regress_data = data.merge(momentum_factor, on="Date")  
#regress_data = regress_data[["Mkt-RF", "SMB", "HML", "Mom", "RF", "BTC-USD"]] #ordering columns
#display(regress_data.head())
#regress_data.tail()

In [None]:
# Subtracting risk-free return rate from BTC's daily returns and creating a new column to store the result

regress_data["BTC-RF"] = regress_data["BTC-USD"] - regress_data["RF"]
regress_data.head()

In [None]:
# Checking relationships between two variables graphically using seaborn regplot. Scatter plot shows excess returns on the 
# market versus excess returns on BTC.

sns.regplot(x="Mkt-RF", y="BTC-RF", data=regress_data) 

In [None]:
X = regress_data[["Mkt-RF", "SMB", "HML"]]  #the independent variables     
# X = regress_data[["Mkt-RF", "SMB", "HML", "Mom"]] #the independent variables four 4 factors
y = regress_data["BTC-RF"]  #the dependent variable

X1 = sm.add_constant(X) #defining the constant
model = sm.OLS(y, X1)

results = model.fit()
results.summary()

In [None]:
#Note: The Prob (F-statistic) depicts probability of the null hypothesis being true, and can be thought of as the p-value for the 
#regression as a whole. Our F-statistic of effectively 0 implies that overall the regressions were significant.

#Last thing are the coefficients and the p-values for the X variables. The coefficients tell you the size of the effect that 
#the variable is having on the dependent variable when all other independent variables are held constant. Here, only the Mk-RF
#coefficient has a statistically significant p-value; the others are not statistically significant.

In [None]:
# Saving the coefficients, which tell us how much exposure BTC has to the market, size, value, and momentum. 
# Note on Fama French Factors: There is some systematic risk due to size, value (book equity to market equity), and momementum of 
# stocks. Stock risks are multidimensional! 

intercept, beta_m, beta_s, beta_v = results.params  
#intercept, beta_m, beta_s, beta_v, beta_mom = results.params   #for 4 factors
print(beta_m, beta_s, beta_v)
# print(beta_m, beta_s, beta_v, beta_mom)   #for 4 factors

In [None]:
# Getting average risk free rate

risk_free = regress_data["RF"].mean()
risk_free

In [None]:
# Calculating expected excess return of the market (the market risk premium). Note, all is based on daily returns! So as calculated here, 
# the average daily return of the S&P500 (in excess of risk-free rate) during this period is .0818%.
# Calculating expected excess return due to size (the size risk premium).
# Calculating expected excess return due to value (the value risk premium).
# Calculating expected excess return due to momentum (the momentum risk premium).

market_premium = regress_data["Mkt-RF"].mean()
size_premium = regress_data["SMB"].mean()
value_premium = regress_data["HML"].mean()
#momentum_premium = regress_data["Mom"].mean()  #use for 4 factors

print(market_premium, size_premium, value_premium)  #monthly premiums, shows how market, size, value, momentum yield premiums
#print(market_premium, size_premium, value_premium, momentum_premium) #use for 4 factors

In [None]:
#Compute expected return of BTC 

btc_exp_return = intercept + risk_free + (beta_m * market_premium) + (beta_s * size_premium) + (beta_v * value_premium) 
#btc_exp_return = risk_free + (beta_m * market_premium) + (beta_s * size_premium) + (beta_v * value_premium) + (beta_mom * momentum_premium) #use for 4 factors

btc_exp_yr_return = btc_exp_return * 252

print(f"Using the Fama French Four Factor model, the expected daily return of Bitcoin is: %{btc_exp_return*100}.")
print(f"Using the Fama French Four Factor model, the expected annual return of Bitcoin is: %{btc_exp_yr_return*100}.")

# PART III: TRAINING AND TESTING THE DATA

In [None]:
# Split data into training data (80%) and test data (20%)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=0)

In [None]:
# Instantiate

model = LinearRegression()

In [None]:
# Fit

model.fit(X_train, y_train)

In [None]:
# Predict

y_pred = model.predict(X_test)

In [None]:
# Model evaluation

score = r2_score(y_test, y_pred)
percent_score = round((score * 100), 2)

print(f"Your model will correctly predict the price of Bitcoin {percent_score}% of the time.")
print(f"Or in other words, {percent_score}% of BTC's excess return on the market is explained by these factors.")