In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score
from sklearn.metrics import accuracy_score

In [2]:
# Read in required dataframe
btc = pd.read_csv("btc_data_for_modelling.csv", index_col=0, parse_dates=True)

In [3]:
btc

Unnamed: 0,open,high,low,close,volume,edit_count,sentiment,neg_sentiment,tomorrow,target,...,fib78.6%,fib100.0%,Tenkan-sen,Kijun-sen,Senkou Span A,Senkou Span B,Middle Band,Upper Band,Lower Band,OBV
2014-12-03,381.721985,383.026001,374.346008,375.010010,13340100,13.833333,-0.442062,0.718486,369.604004,0,...,376.203527,374.346008,376.577988,399.623489,351.479004,378.735001,374.500948,397.136992,351.864904,-7.333132e+07
2014-12-04,375.717987,378.654999,367.759003,369.604004,14529600,13.566667,-0.421014,0.704597,376.854004,1,...,370.090746,367.759003,373.028000,400.602493,351.479004,373.077988,373.090248,392.954783,353.225713,-8.786092e+07
2014-12-05,369.441986,379.191986,365.756012,376.854004,15181800,13.266667,-0.409911,0.682375,374.785004,0,...,368.631310,365.756012,373.028000,400.602493,351.776253,365.426498,373.126299,393.016684,353.235913,-7.267912e+07
2014-12-06,376.756989,378.447998,370.945007,374.785004,7009320,12.633333,-0.435429,0.694496,375.095001,1,...,372.550647,370.945007,373.028000,400.602493,353.718758,365.426498,372.471449,391.141294,353.801605,-7.968844e+07
2014-12-07,374.835999,376.291992,373.274994,375.095001,6491650,12.066667,-0.444384,0.700052,361.908997,0,...,373.920632,373.274994,376.678513,400.602493,353.919250,365.426498,371.855800,389.218052,354.493548,-7.319679e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-04-18,61275.316406,64125.687500,60833.480469,63512.753906,36006307335,0.200000,-0.091111,0.116667,63843.570312,1,...,61538.012773,60833.480469,65512.410156,66241.972656,65349.796875,57814.630859,67345.824414,73400.736239,61290.912589,1.699138e+12
2024-04-19,63510.750000,65481.597656,59651.390625,63843.570312,49920425401,0.200000,-0.091111,0.116667,64994.441406,1,...,60899.054930,59651.390625,65453.812500,66183.375000,65335.638672,58007.443359,67055.737695,73202.015386,60909.460004,1.749059e+12
2024-04-20,63851.101562,65442.457031,63172.402344,64994.441406,23097485495,0.233333,-0.122229,0.150000,64926.644531,0,...,63658.194047,63172.402344,65437.066406,66183.375000,66261.089844,58007.443359,66738.777344,72603.544754,60874.009933,1.772156e+12
2024-04-21,64992.816406,65723.242188,64277.722656,64926.644531,20506644853,0.300000,-0.155494,0.183333,66837.679688,1,...,64587.063836,64277.722656,63791.410156,66183.375000,66354.376953,58007.443359,66500.002148,72244.392454,60755.611843,1.751649e+12


In [4]:
btc.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3429 entries, 2014-12-03 to 2024-04-22
Data columns (total 34 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   open            3429 non-null   float64
 1   high            3429 non-null   float64
 2   low             3429 non-null   float64
 3   close           3429 non-null   float64
 4   volume          3429 non-null   int64  
 5   edit_count      3429 non-null   float64
 6   sentiment       3429 non-null   float64
 7   neg_sentiment   3429 non-null   float64
 8   tomorrow        3429 non-null   float64
 9   target          3429 non-null   int64  
 10  RSI             3429 non-null   float64
 11  Stoch_RSI       3429 non-null   float64
 12  SMA(9)          3429 non-null   float64
 13  SMA(14)         3429 non-null   float64
 14  EMA(7)          3429 non-null   float64
 15  EMA(25)         3429 non-null   float64
 16  EMA(99)         3429 non-null   float64
 17  MACD           

### Random Forest

In [5]:
model = RandomForestClassifier(n_estimators=100, min_samples_split=50, random_state=1)

train = btc.iloc[:-200]
test = btc[-200:]

predictors = ["close", "volume", "open", "high", "low", "sentiment", "neg_sentiment", "RSI", "EMA(7)", "EMA(25)", "EMA(99)", "MACD",
              "fib23.6%", "fib38.2%", "fib50.0%", "fib61.8%", "fib78.6%", "fib100.0%"]
model.fit(train[predictors], train["target"])

In [6]:
# Precision and accuracy of the test set for the first model
preds_test = model.predict(test[predictors])
preds_test = pd.Series(preds_test, index=test.index)
precision_test = precision_score(test["target"], preds_test)
accuracy_test = accuracy_score(test["target"], preds_test)
print("Precision:", precision_test)
print("Accuracy:", accuracy_test)

Precision: 0.5714285714285714
Accuracy: 0.485


In [7]:
# Precision and accuracy of the train set for the first model
preds_train = model.predict(train[predictors])
preds_train = pd.Series(preds_train, index=train.index)
precision_train = precision_score(train["target"], preds_train)
accuracy_train = accuracy_score(train["target"], preds_train)
print("Precision:", precision_train)
print("Accuracy:", accuracy_train)

Precision: 0.804004214963119
Accuracy: 0.8278104676370394


In [12]:
model1 = RandomForestClassifier(n_estimators=100, min_samples_split=50, random_state=1)

train1 = btc.iloc[:-200]
test1 = btc[-200:]

#predictors_allFeatures = ["close", "volume", "open", "high", "low", "sentiment", "neg_sentiment", "RSI", "Stoch_RSI", "SMA(9)", "SMA(14)", "EMA(7)", "EMA(25)", "EMA(99)",
#                          "MACD", "Signal Line", "MACD Histogram", "fib23.6%", "fib38.2%", "fib50.0%", "fib61.8%", "fib78.6%", "fib100.0%", "Tenkan-sen", "Kijun-sen",
#                          "Senkou Span A", "Senkou Span B", "Middle Band", "Upper Band", "Lower Band"]
predictors_allFeatures = ["close", "volume", "open", "high", "low", "sentiment", "neg_sentiment", "RSI", "Stoch_RSI", "EMA(7)", "EMA(25)", "EMA(99)",
                          "MACD", "Signal Line", "MACD Histogram", "fib23.6%", "fib38.2%", "fib50.0%", "fib61.8%", "fib78.6%", "fib100.0%", "Tenkan-sen", "Kijun-sen",
                          "Senkou Span A", "Senkou Span B", "Middle Band", "Upper Band", "Lower Band"]
model1.fit(train1[predictors_allFeatures], train1["target"])

In [13]:
# Precision and accuracy of the test set for the second model
preds_test1 = model1.predict(test1[predictors_allFeatures])
preds_test1 = pd.Series(preds_test1, index=test1.index)
precision_test1 = precision_score(test1["target"], preds_test1)
accuracy_test1 = accuracy_score(test1["target"], preds_test1)
print("Precision:", precision_test1)
print("Accuracy:", accuracy_test1)

Precision: 0.6181818181818182
Accuracy: 0.505
