## Make dataset file of BTC-USD from yfinance

In [None]:
import yfinance as yf
import numpy as np
import pandas as pd

asset = "BTC-USD"

# Period of test data (TODO: Change the period)
start_date = "2024-01-01"
end_date = "2025-01-01"

# Daily data of BTC-USD is used 

# auto_adjust=True is applied by default. 
# So,'Close' price is used instead of 'Adj Close'
df = yf.download(asset, start=start_date, end=end_date, interval='1d')['Close']
df.columns = ['Close']

# Features (TODO: Change the features)
df["Ret"] = df["Close"].pct_change()
df["Ret_5"] = df['Close'].pct_change(periods=5)    
df["Ret_10"] = df['Close'].pct_change(periods=10)    
df["Ret_15"] = df['Close'].pct_change(periods=15)
df["Ret_20"] = df['Close'].pct_change(periods=20)
df["Ret_25"] = df['Close'].pct_change(periods=25)
df["Ret_30"] = df['Close'].pct_change(periods=30)   
df["Ret_40"] = df['Close'].pct_change(periods=40)
df["Ret_50"] = df['Close'].pct_change(periods=50)    

# Target variable (TODO: Change the target variable)
df["OT"] = df["Ret_25"].shift(-25)  

del df["Close"]
df = df.dropna()

df.reset_index(inplace=True)
df['Date'] = df['Date'].dt.strftime('%Y-%m-%d %H:%M:%S')
df.rename(columns={'Date': 'date'}, inplace=True)

# Save the dataset as csv file
import os 
pathdir = "dataset/btc/"
if not os.path.exists(pathdir):
    os.makedirs(pathdir)
dataset_path = pathdir + '/dataset_BTC_r25.csv'
df.to_csv(path_or_buf=dataset_path, index=False)

df.head()


## Dataset to predict daily return!  

In [None]:
import yfinance as yf
import numpy as np
import pandas as pd

pd.options.display.precision = 3  # Set precision to 3 decimal places

asset = "BTC-USD"

# Period of test data (TODO: Change the period)
start_date = "2024-1-01"
end_date = "2025-01-01"


# auto_adjust=True is applied by default. 
# So,'Close' price is used instead of 'Adj Close'
df = yf.download(asset, start=start_date, end=end_date, interval='1d')['Close']
df.columns = ['Close']

# Target : Tomorrow's return ---------
df["OT"] = df["Close"].pct_change().shift(-1)

# Features ---------

# 0. Daily Return
df["Ret"] = df["Close"].pct_change()
for i in [3, 5, 7]:
    df["Ret"+ str(i)] = df["Ret"].shift(-i)

# 1. SMA: Simple Moving Average over a window
for i in [3, 5, 7]:
    df["SMA" + str(i)] = df["Ret"].rolling(window=i).mean()

# 2. EMA: Exponential Moving Average
for i in [3, 5, 7]:
    df["EMA" + str(i)] = df["Ret"].ewm(span=i, adjust=False).mean()

# 3. MACD: Usually MACD = EMA(12) - EMA(26), plus a 9-day signal line
short_span = 12
long_span = 26
signal_span = 9
df["EMA_short"] = df["Ret"].ewm(span=short_span, adjust=False).mean()
df["EMA_long"] = df["Ret"].ewm(span=long_span, adjust=False).mean()
df["MACD"] = df["EMA_short"] - df["EMA_long"]  # MACD line
df["MACD_Signal"] = df["MACD"].ewm(span=signal_span, adjust=False).mean()  # Signal line
del df["EMA_short"], df["EMA_long"]

# 4. RSI: Relative Strength Index (14-day typical)
rsi_period = 14
delta = df["Close"].diff()
gain = np.where(delta > 0, delta, 0.0)
loss = np.where(delta < 0, -delta, 0.0)
# Exponential moving average of gains and losses
avg_gain = pd.Series(gain).ewm(span=rsi_period, adjust=False).mean()
avg_loss = pd.Series(loss).ewm(span=rsi_period, adjust=False).mean()
# Calculate RS and then RSI
rs = avg_gain / avg_loss
df["RSI"] = 100 - (100 / (1 + rs.values))

# # 5. Momentum: close[t] - close[t-n]
# for i in [3, 5, 10]:
#     df["Momentum" + str(i)] = df["Close"].diff(periods=i)

# # 6. ROC (Rate of Change): ((close[t] - close[t-n]) / close[t-n]) * 100
# for i in [3, 5, 7]:
#     df["ROC" + str(i)] = df["Close"].diff(periods=i) / df["Close"].shift(i) * 100

# 7. ETS
from statsmodels.tsa.holtwinters import ExponentialSmoothing
import warnings
warnings.filterwarnings('ignore') 
df = df.dropna() # drop nan for calculation
endog = df["Ret"].values
lookback_win = 20
ets_pred = np.empty(len(df))
for t in range(len(df)):
    if t < lookback_win:
        ets_pred[t] = np.nan
    else:
        ets_pred[t] = ExponentialSmoothing(endog[t-lookback_win:t], trend='add', damped_trend=True).fit().forecast(steps=1)
df['ETS'] = ets_pred

del df["Close"]
df = df.dropna()

df.reset_index(inplace=True)
df['Date'] = df['Date'].dt.strftime('%Y-%m-%d %H:%M:%S')
df.rename(columns={'Date': 'date'}, inplace=True)

# Save the dataset as csv file
import os 
pathdir = "dataset/btc/"
if not os.path.exists(pathdir):
    os.makedirs(pathdir)
dataset_path = pathdir + 'BTC_ret_1d.csv'
df.to_csv(path_or_buf=dataset_path, index=False)

df.head()


[*********************100%***********************]  1 of 1 completed


Unnamed: 0,date,OT,Ret,Ret3,Ret5,Ret7,SMA3,SMA5,SMA7,EMA3,EMA5,EMA7,MACD,MACD_Signal,RSI,ETS
0,2024-01-28 00:00:00,0.03,-0.002,-0.009,0.003,-0.01,0.017,0.011,0.002,0.006,0.007,0.005,0.000576,-0.002236,54.366,-0.006
1,2024-01-29 00:00:00,-0.008,0.03,0.012,-0.004,0.002,0.012,0.016,0.013,0.018,0.014,0.011,0.0027,-0.001249,65.002,0.009
2,2024-01-30 00:00:00,-0.009,-0.008,0.003,-0.01,0.01,0.007,0.015,0.011,0.005,0.007,0.007,0.001337,-0.0007318,60.633,0.015
3,2024-01-31 00:00:00,0.012,-0.009,-0.004,0.002,0.029,0.004,0.004,0.009,-0.002,0.002,0.003,0.0001854,-0.0005484,55.857,0.011
4,2024-02-01 00:00:00,0.003,0.012,-0.01,0.01,0.022,-0.002,0.005,0.011,0.005,0.005,0.005,0.0008921,-0.0002603,60.627,0.007


In [71]:
corr = df[-100:].drop(columns=['date']).corr()
corr.style.format("{:.3f}")

Unnamed: 0,OT,Ret,Ret3,Ret5,Ret7,SMA3,SMA5,SMA7,EMA3,EMA5,EMA7,MACD,MACD_Signal,RSI,ETS
OT,1.0,0.061,0.01,0.038,-0.182,-0.043,0.01,-0.066,0.029,0.006,-0.006,0.02,-0.008,-0.075,0.02
Ret,0.061,1.0,-0.151,0.07,0.011,0.6,0.42,0.327,0.867,0.745,0.661,0.615,0.176,0.477,0.05
Ret3,0.01,-0.151,1.0,0.005,0.045,-0.023,-0.092,0.04,-0.112,-0.089,-0.074,-0.05,0.019,-0.085,0.036
Ret5,0.038,0.07,0.005,1.0,0.003,-0.053,0.096,0.038,0.006,0.011,0.016,0.027,0.04,-0.036,-0.071
Ret7,-0.182,0.011,0.045,0.003,1.0,0.181,0.098,0.078,0.091,0.109,0.108,0.114,0.064,0.07,0.039
SMA3,-0.043,0.6,-0.023,-0.053,0.181,1.0,0.762,0.634,0.891,0.921,0.896,0.847,0.471,0.77,0.544
SMA5,0.01,0.42,-0.092,0.096,0.098,0.762,1.0,0.83,0.723,0.849,0.889,0.853,0.685,0.855,0.744
SMA7,-0.066,0.327,0.04,0.038,0.078,0.634,0.83,1.0,0.613,0.766,0.842,0.8,0.8,0.833,0.828
EMA3,0.029,0.867,-0.112,0.006,0.091,0.891,0.723,0.613,1.0,0.969,0.919,0.864,0.429,0.751,0.416
EMA5,0.006,0.745,-0.089,0.011,0.109,0.921,0.849,0.766,0.969,1.0,0.987,0.929,0.59,0.861,0.608


In [69]:
corr = df[-16:].drop(columns=['date']).corr(method='spearman')
corr.style.format("{:.3f}")

Unnamed: 0,OT,Ret,Ret2,Ret3,Ret5,Ret7,Ret10,SMA3,SMA5,SMA10,EMA3,EMA5,EMA10,MACD,MACD_Signal,RSI,ROC1,ROC3,ROC5,ROC10,ETS
OT,1.0,-0.038,-0.003,-0.279,0.159,-0.562,0.268,-0.118,0.076,0.088,-0.035,0.012,-0.044,-0.05,-0.044,-0.05,-0.038,-0.118,0.085,0.091,0.026
Ret,-0.038,1.0,-0.044,-0.191,0.138,-0.115,0.029,0.579,0.574,0.771,0.897,0.829,0.709,0.735,0.135,0.641,1.0,0.579,0.576,0.744,-0.132
Ret2,-0.003,-0.044,1.0,-0.009,-0.05,-0.015,0.332,-0.012,-0.206,-0.109,-0.068,-0.038,-0.144,-0.047,-0.1,-0.235,-0.044,-0.012,-0.191,-0.168,0.035
Ret3,-0.279,-0.191,-0.009,1.0,-0.321,0.244,-0.132,0.147,-0.168,-0.226,-0.109,-0.232,-0.206,-0.174,-0.15,-0.203,-0.191,0.147,-0.174,-0.226,-0.141
Ret5,0.159,0.138,-0.05,-0.321,1.0,-0.265,-0.138,-0.318,-0.085,0.094,-0.003,-0.029,-0.082,-0.106,-0.382,-0.097,0.138,-0.318,-0.082,0.103,-0.382
Ret7,-0.562,-0.115,-0.015,0.244,-0.265,1.0,-0.018,0.344,0.129,0.047,-0.029,0.038,0.182,0.106,0.053,0.2,-0.115,0.344,0.138,0.082,0.071
Ret10,0.268,0.029,0.332,-0.132,-0.138,-0.018,1.0,-0.224,-0.076,-0.106,-0.053,-0.115,-0.162,-0.147,-0.132,-0.218,0.029,-0.224,-0.109,-0.124,-0.071
SMA3,-0.118,0.579,-0.012,0.147,-0.318,0.344,-0.224,1.0,0.785,0.818,0.747,0.794,0.847,0.829,0.582,0.835,0.579,1.0,0.794,0.821,0.45
SMA5,0.076,0.574,-0.206,-0.168,-0.085,0.129,-0.076,0.785,1.0,0.806,0.776,0.835,0.929,0.897,0.721,0.956,0.574,0.785,0.997,0.832,0.509
SMA10,0.088,0.771,-0.109,-0.226,0.094,0.047,-0.106,0.818,0.806,1.0,0.815,0.856,0.856,0.838,0.494,0.853,0.771,0.818,0.809,0.994,0.356


In [67]:
corr = df[-32:].drop(columns=['date']).corr(method='kendall')
corr.style.format("{:.3f}")

Unnamed: 0,OT,Ret,Ret2,Ret3,Ret5,Ret7,Ret10,SMA3,SMA5,SMA10,EMA3,EMA5,EMA10,MACD,MACD_Signal,RSI,ROC1,ROC3,ROC5,ROC10,ETS
OT,1.0,-0.06,-0.081,-0.036,-0.028,-0.27,0.226,-0.028,-0.081,0.028,-0.044,-0.081,-0.109,-0.069,-0.06,-0.149,-0.06,-0.024,-0.073,0.02,-0.048
Ret,-0.06,1.0,0.044,-0.073,0.048,0.25,0.044,0.395,0.278,0.387,0.694,0.601,0.5,0.484,0.113,0.444,1.0,0.399,0.27,0.387,-0.004
Ret2,-0.081,0.044,1.0,-0.117,-0.06,0.141,-0.04,-0.077,-0.202,-0.101,0.02,-0.048,-0.109,-0.101,-0.044,-0.117,0.044,-0.081,-0.194,-0.101,-0.032
Ret3,-0.036,-0.073,-0.117,1.0,-0.065,0.065,0.254,-0.089,-0.06,-0.194,-0.177,-0.198,-0.185,-0.177,-0.169,-0.153,-0.073,-0.093,-0.06,-0.185,-0.173
Ret5,-0.028,0.048,-0.06,-0.065,1.0,-0.121,0.044,-0.065,0.028,-0.008,-0.048,-0.052,-0.032,-0.089,-0.105,-0.04,0.048,-0.06,0.028,-0.008,-0.181
Ret7,-0.27,0.25,0.141,0.065,-0.121,1.0,-0.052,0.242,0.101,0.137,0.218,0.238,0.234,0.169,0.113,0.266,0.25,0.246,0.109,0.153,0.069
Ret10,0.226,0.044,-0.04,0.254,0.044,-0.052,1.0,-0.109,0.032,-0.036,-0.069,-0.105,-0.044,-0.077,-0.109,-0.044,0.044,-0.113,0.04,-0.044,-0.105
SMA3,-0.028,0.395,-0.077,-0.089,-0.065,0.242,-0.109,1.0,0.504,0.492,0.669,0.698,0.661,0.694,0.323,0.645,0.395,0.996,0.504,0.5,0.383
SMA5,-0.081,0.278,-0.202,-0.06,0.028,0.101,0.032,0.504,1.0,0.44,0.464,0.581,0.681,0.649,0.399,0.706,0.278,0.508,0.984,0.456,0.5
SMA10,0.028,0.387,-0.101,-0.194,-0.008,0.137,-0.036,0.492,0.44,1.0,0.516,0.569,0.653,0.548,0.54,0.637,0.387,0.496,0.431,0.984,0.375


In [51]:
corr = df[-50:].drop(columns=['date']).corr()
# corr.round(5) 
corr.style.format("{:.3f}")

Unnamed: 0,Ret,Ret2,Ret3,Ret5,Ret7,Ret10,SMA3,SMA5,SMA10,EMA3,EMA5,EMA10,MACD,MACD_Signal,RSI,ROC1,ROC3,ROC5,ROC10,OT
Ret,1.0,0.028,-0.155,0.236,0.043,0.096,0.589,0.427,0.386,0.866,0.75,0.594,0.638,0.233,0.523,1.0,0.588,0.429,0.38,0.025
Ret2,0.028,1.0,0.013,-0.119,0.247,0.053,-0.026,-0.024,0.003,-0.011,-0.009,-0.007,0.042,0.081,-0.039,0.028,-0.026,-0.02,0.003,0.029
Ret3,-0.155,0.013,1.0,0.014,0.111,0.153,0.029,-0.088,-0.078,-0.104,-0.085,-0.075,-0.01,0.058,-0.069,-0.155,0.028,-0.086,-0.079,0.0
Ret5,0.236,-0.119,0.014,1.0,0.002,0.009,0.061,0.137,0.112,0.146,0.123,0.098,0.135,0.093,0.048,0.236,0.059,0.134,0.11,0.101
Ret7,0.043,0.247,0.111,0.002,1.0,-0.142,0.156,0.05,0.079,0.075,0.086,0.077,0.116,0.099,0.05,0.043,0.156,0.057,0.084,-0.164
Ret10,0.096,0.053,0.153,0.009,-0.142,1.0,0.012,0.044,-0.026,0.066,0.05,0.037,0.04,0.03,0.039,0.096,0.014,0.049,-0.028,0.344
SMA3,0.589,-0.026,0.029,0.061,0.156,0.012,1.0,0.769,0.57,0.884,0.919,0.853,0.873,0.532,0.815,0.589,1.0,0.77,0.563,-0.04
SMA5,0.427,-0.024,-0.088,0.137,0.05,0.044,0.769,1.0,0.714,0.727,0.847,0.891,0.88,0.737,0.88,0.427,0.768,1.0,0.713,0.072
SMA10,0.386,0.003,-0.078,0.112,0.079,-0.026,0.57,0.714,1.0,0.589,0.712,0.856,0.776,0.914,0.774,0.386,0.569,0.714,0.999,0.061
EMA3,0.866,-0.011,-0.104,0.146,0.075,0.066,0.884,0.727,0.589,1.0,0.971,0.862,0.895,0.494,0.798,0.866,0.884,0.728,0.582,0.018


In [None]:


def simulate_trading(true_rets, pred_rets, buy_threshold=0.005, fee_rate=0.001):
    assert len(true_rets) == len(pred_rets)
    balance = 1.0
    trade_count = 0
    successful_trades = 0
    for t in range(len(pred_rets)):
        if pred_rets[t] > buy_threshold:
            trade_count += 1
            if true_rets[t] > fee_rate:
                successful_trades += 1            
            balance *= 1.0 + (true_rets[t] - fee_rate)
    return balance - 1.0, trade_count, successful_trades # accumulated_return

true_rets = df["OT"].values
pred_rets = df["EMA3"].values

ac_ret, trade_count, successful_trades = simulate_trading(true_rets, pred_rets, buy_threshold=0.02, fee_rate=0.01)
print(ac_ret, trade_count, successful_trades)

-0.21851920108516543 38 11
