In [53]:
import pickle
import pandas as pd
import os
from sklearn.ensemble import RandomForestClassifier

In [54]:
if os.path.exists("bitcoin_trades.csv"):
    bitcoinData = pd.read_csv("bitcoin_trades.csv", index_col=0)
    bitcoinData.index = pd.to_datetime(bitcoinData.index, unit='s')

# bitcoinData = bitcoinData.dropna()

In [55]:
# bitcoinData.plot.line(y="Close", use_index=True)

In [56]:
bitcoinData["next_minute"] = bitcoinData["Close"].shift(-1)
bitcoinData["target"] = (bitcoinData["next_minute"] > bitcoinData["Close"]).astype(int)

In [57]:
bitcoin2020 = bitcoinData.loc["2020-01-01":"2020-12-31"]
bitcoinData = bitcoinData.loc["2021-01-01":]
bitcoinData = bitcoinData.fillna(method="ffill")
bitcoin2020 = bitcoin2020.fillna(method="ffill")

In [58]:
horizons = [5,30,90,120]
new_predictors = []

for horizon in horizons:
    rolling_avg = bitcoinData.rolling(horizon).mean().fillna(method="ffill")
    # print(rolling_avg)
    
    ratio_column = f"Close_Ratio_{horizon}"
    bitcoinData[ratio_column] = bitcoinData["Close"] / rolling_avg["Close"]
    
    trend_column = f"Trend_{horizon}"
    bitcoinData[trend_column] = bitcoinData.shift(1).rolling(horizon).sum()["target"]
    
    new_predictors+= [ratio_column, trend_column]
    

In [59]:
bitcoinData = bitcoinData.iloc[120:]
bitcoinData

Unnamed: 0_level_0,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price,next_minute,target,Close_Ratio_5,Trend_5,Close_Ratio_30,Trend_30,Close_Ratio_90,Trend_90,Close_Ratio_120,Trend_120
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2021-01-01 02:00:00,29484.39,29535.95,29484.39,29518.58,14.232027,420039.533740,29513.682449,29527.72,1,1.001281,3.0,1.002408,17.0,1.009196,51.0,1.011994,64.0
2021-01-01 02:01:00,29526.11,29530.41,29506.27,29527.72,14.658453,432733.302960,29521.075132,29464.99,0,1.000934,4.0,1.002575,18.0,1.009280,52.0,1.012179,64.0
2021-01-01 02:02:00,29526.21,29527.03,29464.54,29464.99,8.033546,237094.194690,29513.020155,29456.74,0,0.999034,3.0,1.000412,17.0,1.006938,51.0,1.009905,64.0
2021-01-01 02:03:00,29464.99,29474.24,29433.10,29456.74,6.981526,205641.452150,29455.087712,29454.80,0,0.998891,3.0,1.000085,17.0,1.006472,50.0,1.009504,63.0
2021-01-01 02:04:00,29461.05,29465.69,29444.49,29454.80,3.924225,115610.085300,29460.618045,29439.16,0,0.998990,2.0,0.999997,16.0,1.006214,50.0,1.009320,63.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-03-30 23:56:00,58714.31,58714.31,58686.00,58686.00,1.384487,81259.372187,58692.753339,58685.81,0,0.999630,1.0,1.000154,12.0,0.999865,40.0,0.999263,54.0
2021-03-30 23:57:00,58683.97,58693.43,58683.97,58685.81,7.294848,428158.146640,58693.226508,58723.84,1,0.999675,1.0,1.000176,11.0,0.999891,40.0,0.999270,54.0
2021-03-30 23:58:00,58693.43,58723.84,58693.43,58723.84,1.705682,100117.070370,58696.198496,58760.59,1,1.000236,2.0,1.000823,12.0,1.000560,41.0,0.999923,54.0
2021-03-30 23:59:00,58742.18,58770.38,58742.18,58760.59,0.720415,42332.958633,58761.866202,58778.18,1,1.000792,2.0,1.001443,12.0,1.001198,42.0,1.000549,55.0


In [60]:
# model = RandomForestClassifier(n_estimators=100, min_samples_split=100, random_state=1)

# train = []

# train = bitcoinData.iloc[:-100]
# test = bitcoinData.iloc[-100:]
    
# test = bitcoinData.iloc[-100:]

predictors = ["Close", "Volume_(BTC)", "Open", "High", "Low"]
# model.fit(train[predictors], train["target"])

In [61]:
# import pickle


# filename = 'first_model.sav'
# pickle.dump(model, open(filename, 'wb'))

In [62]:
# from sklearn.metrics import precision_score

# preds = model.predict(test[predictors])
# preds = pd.Series(preds, index=test.index)
# precision_score(test["target"], preds)

In [63]:
# combined = pd.concat([test["target"], preds], axis=1)
# combined.plot()

In [64]:
def predict(train, test, predictors, model):
    model.fit(train[predictors], train["target"])
    preds = model.predict(test[predictors])
    preds = pd.Series(preds, index=test.index, name="Predictions")
    combined = pd.concat([test["target"], preds], axis=1)
    return combined

In [65]:
def backtest(data, model, predictors, start=2500, step=250):
    all_predictions = []
    end = 10

    for i in range(start, data.shape[0], step):
        train = data.iloc[i-2500:i].copy()
        test = data.iloc[i:(i+step)].copy()
        predictions = predict(train, test, predictors, model)
        all_predictions.append(predictions)
        end -= 1
        print(i)
        if end < 1:
            break
        
    
    return pd.concat(all_predictions)

In [66]:
bitcoin2020


Unnamed: 0_level_0,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price,next_minute,target
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-01-01 00:00:00,7160.69,7160.69,7159.64,7159.64,5.501691,39392.484606,7160.068520,7161.20,1
2020-01-01 00:01:00,7161.51,7161.51,7155.09,7161.20,3.776924,27047.304680,7161.198209,7158.82,0
2020-01-01 00:02:00,7158.82,7158.82,7158.82,7158.82,0.029278,209.595359,7158.820000,7156.90,0
2020-01-01 00:03:00,7158.82,7158.82,7156.90,7156.90,0.065819,471.156094,7158.321885,7157.20,1
2020-01-01 00:04:00,7158.50,7158.50,7154.97,7157.20,0.971387,6950.500833,7155.235666,7158.50,1
...,...,...,...,...,...,...,...,...,...
2020-12-31 23:55:00,29036.10,29052.02,29025.78,29052.02,7.631439,221592.488210,29036.791263,29039.53,0
2020-12-31 23:56:00,29052.02,29059.42,29039.28,29039.53,2.707583,78635.927205,29042.854499,29044.79,1
2020-12-31 23:57:00,29039.53,29048.63,29023.26,29044.79,14.503728,421181.023630,29039.501852,29000.12,0
2020-12-31 23:58:00,29044.79,29048.62,29000.12,29000.12,20.362289,590960.202150,29022.286679,28992.79,0


In [72]:
filename = 'first_model.sav'
model_load = pickle.load(open(filename, 'rb'))
predictions = backtest(bitcoin2020, model_load, predictors, 250000)


250000
250250
250500
250750
251000
251250
251500
251750
252000
252250


In [73]:
predictions["Predictions"].value_counts()

Predictions
0    1548
1     952
Name: count, dtype: int64

In [74]:
from sklearn.metrics import precision_score


precision_score(predictions["target"],predictions["Predictions"])

0.49159663865546216