In [12]:
# This project shows how to calculate betas, use them to make a strategy, and test a strategy for statistical significance. 
import pandas as pd
btc_data = pd.read_csv("historic_data\Bittrex_BTCUSD_d.csv")
eth_data = pd.read_csv("historic_data\Bittrex_ETHUSD_d.csv")
btc_data = btc_data[0:len(eth_data)] # Only consider BTC Data starting from when we have ETH data

In [13]:
# Here's what our data looks like:
print(btc_data[:5])

   Unix Timestamp        Date  Symbol      Open      High       Low     Close  \
0      1605830400  11/20/2020  BTCUSD  17824.13  18234.99  17773.37  18113.58   
1      1605744000  11/19/2020  BTCUSD  17801.21  18177.93  17367.00  17824.13   
2      1605657600  11/18/2020  BTCUSD  17685.78  18475.39  17002.00  17801.21   
3      1605571200  11/17/2020  BTCUSD  16722.99  17871.94  16578.05  17685.78   
4      1605484800  11/16/2020  BTCUSD  15975.90  16890.00  15884.05  16722.99   

   Volume BTC   Volume USD  
0      173.44   3124710.12  
1      648.25  11571312.72  
2     1593.92  28459068.29  
3      977.37  16938093.61  
4      563.25   9313382.63  


In [14]:
# Create a dataframe with the info we want 
df = pd.DataFrame(columns = ["Date", "BTC_Open", "ETH_Open"],
                  data = {"Date" : btc_data['Date'], "BTC_Open" : btc_data['Open'], "ETH_Open" : eth_data['Open']})
# Calculate returns -- return is % price diff from yesterday to today
x = (df['BTC_Open'][0:1297].values / df['BTC_Open'][1:1298].values - 1) * 100
y = (df['ETH_Open'][0:1297].values / df['ETH_Open'][1:1298].values - 1) * 100
df = df[:1297]
df['BTC_Return'] = x
df['ETH_Return'] = y
# Let's save 300 datapoints for testing
testing = df[0:300]
df = df[300:]
# Here's what our data looks like:
print(df[:5])

          Date  BTC_Open  ETH_Open  BTC_Return  ETH_Return
300  1/25/2020   8441.08    158.95    0.523154   -2.316863
301  1/24/2020   8397.15    162.72   -3.124712   -2.469432
302  1/23/2020   8668.00    166.84   -0.639060   -1.435576
303  1/22/2020   8723.75    169.27    1.158767    2.345970
304  1/21/2020   8623.82    165.39   -0.899895   -0.898796


In [15]:
# Beta from A to B: Cov(A_returns, B_returns) / variance(B_returns)
# 1. Beta tells you how much you expect A to move, given how much B moved. 
# A_return = beta * B_return, which is the MSE linreg with intercept=0
# 2. Beta tells you much A you should buy for every B in order to maximially hedge
# We want the beta from ETH to BTC
eth_avg = df['ETH_Return'].mean()
btc_avg = df['BTC_Return'].mean()
cov = ((df['BTC_Return'] - btc_avg) * (df['ETH_Return'] - eth_avg)).sum() / (len(df) - 1)
v = ((df['BTC_Return'] - btc_avg) ** 2).sum() / (len(df) - 1)
beta = cov / v 
print("Beta from ETH to BTC: " + str(beta))

Beta from ETH to BTC: 0.8547960997797627


In [16]:
# Now we have our first model of eth return and eth price at open
# ETH_Return = beta * BTC_Return
# ETH_Open = ETH_Open(yesterday) + beta * BTC_Return * ETH_Open(yesterday)
df['Predicted'] = df['BTC_Return'] * beta 
# Let's compare this to the naive model, which models ETH_Return as the avg eth_return over the period
df['Naive'] = eth_avg
# Predicted will necessarily do better on a MSE (Mean Square Error) comparison on this data set, because beta is the coefficient for a lin reg that minimizes MSE
# Let's test on mean absolute error instead
pred_error = (abs(df['Predicted'] - df['ETH_Return'])).sum() / len(df)
naive_error = (abs(df['Naive'] - df['ETH_Return'])).sum() / len(df)
print("Mean error on training data using beta prediction: " + str(pred_error))
print("Mean error using naive prediction: " + str(naive_error))
print("(Error is the difference between [predicted % return] and [real % return])")

Mean error on training data using beta prediction: 2.80530653555051
Mean error using naive prediction: 4.047695373573443
(Error is the difference between [predicted % return] and [real % return])


In [17]:
# Now let's try this on the testing data
testing['Predicted'] = testing['BTC_Return'] * beta 
testing['Naive'] = eth_avg
pred_error = (abs(testing['Predicted'] - testing['ETH_Return'])).sum() / len(testing)
naive_error = (abs(testing['Naive'] - testing['ETH_Return'])).sum() / len(testing)
print("Mean error on testing data using beta prediction: " + str(pred_error))
print("Mean error on using naive prediction: " + str(naive_error))

Mean error on testing data using beta prediction: 2.0556073514943343
Mean error on using naive prediction: 3.449547461142158


In [18]:
# Trading strategy sim: At each open, buy Eth if its return is < our model and sell when > model
# Let's correct for avg price change during this time -- otherwise we are biased towards buying, since Eth went up during this period
# One way to do this is to mark our profit for each "buy today sell tomorrow" as tomorrows return - the avg return
normalized_next_day_returns =  df[:len(df) - 1]['ETH_Return'] - df[:len(df) - 1]['ETH_Return'].mean()
buy = (df[1:]['ETH_Return'] < (df[1:]['BTC_Return'] * beta)) * 2 - 1 # 1 = buy, -1 = sell 
# Note that buy and normalized_next_day_returns are offset by 1 day 
profit = normalized_next_day_returns.values * buy.values
print("Return using strategy: " + str(profit.sum()))

Return using strategy: 241.74135480841272


In [19]:
# It seems like we would have made 240% over ~3 years with this strategy.
# Sanity check: do we get the opposite result with the opposite strategy? Do returns sum to 0?
sell = (df[1:]['ETH_Return'] >= (df[1:]['BTC_Return'] * beta)) * 2 - 1 # 1 = buy, -1 = sell 
negProfit = normalized_next_day_returns.values * sell.values
alwaysBuyProfit = normalized_next_day_returns.values
print("Return using the opposite strategy: " + str(negProfit.sum()))
print("Sum of returns: " + str(normalized_next_day_returns.values.sum()))

Return using the opposite strategy: -241.74135480841272
Sum of returns: -1.1368683772161603e-13


In [20]:
# How does this do on the testing data?
normalized_next_day_returns =  testing[:len(testing) - 1]['ETH_Return'] - testing[:len(testing) - 1]['ETH_Return'].mean()
buy = (testing[1:]['ETH_Return'] < (testing[1:]['BTC_Return'] * beta)) * 2 - 1 # 1 = buy, -1 = sell 
profit = normalized_next_day_returns.values * buy.values
print("Profit on testing data using beta strategy: " +str(profit.sum())) 
# Let's compare this with the "naive" trading strategy -- buy if the return is less than the historical avg 
buy = (testing[1:]['ETH_Return'] < eth_avg) * 2 - 1
profit = normalized_next_day_returns.values * buy.values
print("Profit using naive strategy: " + str(profit.sum())) 

Profit on testing data using beta strategy: 10.996635468694299
Profit using naive strategy: 117.56756066134182


In [21]:
# So the naive did extremely well here -- 117% returns over 300 days. 
# Let's test this naive strategy for stastical significance.
# Null hypothesis: This strategy is no better than a random strategy -- each trade has EV 0

# Using a 1 sample t-test
t = profit.mean() / (profit.std() / (len(profit) ** (1/2)))
print("t-score (my calculation): " + str(t))
from scipy.stats import ttest_1samp
print("scipy.stats's calcuation: " + str(ttest_1samp(profit, 0))) # Our values essentially agree
print("We should consider pvalue/2; scipy uses a two-tailed test, but we want a one-tailed test")

t-score (my calculation): 1.2774008720162717
scipy.stats's calcuation: Ttest_1sampResult(statistic=1.2752629611086745, pvalue=0.20320946844784624)
We should consider pvalue/2; scipy uses a two-tailed test, but we want a one-tailed test


In [22]:
# Another way of testing is by using a monte carlo simulation.
# Create 1000 random strategies, and see what % of them perform >= the naive strategy
import random
profits_above_naive = 0
n = 1000
for i in range(0, n):
    buy = [] 
    for j in range(0, len(normalized_next_day_returns)):
        r = random.randint(0, 1)
        buy.append(r * 2 - 1)
    rand_profit = (normalized_next_day_returns.values * buy)
    if rand_profit.sum() > profit.sum():
        profits_above_naive += 1
print("% of random strategies that do >= than naive strategy: " + str(profits_above_naive / n))     

% of random strategies that do >= than naive strategy: 0.101


In [23]:
# Conclusion: We made a model predicting ETH returns using only historical long-term beta from ETH to BTC. 
# This model predicted ETH's price better than a naive model, unsuprisingly.
# The strategy of buy (or sell) ETH at the open if its return is < (or >) its predicted return is probably not profitable.
# We also looked at a strategy that traded based on ETH's return compared to its historical returns. 
# This made money in our test data, but was profitable with p-value .1. 
# 1. This is not a very impressive p-value. 2. I tested two strategies, so we should discount the p-value a bit. 3. This is a silly strategy, and my prior was heavily against it being truly profitable.
# This strategy is probably not profitable either.