In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score
from sklearn.metrics import accuracy_score

In [2]:
# Read in required dataframes
btc = pd.read_csv("btc_data_for_modelling.csv", index_col=0, parse_dates=True)
btc_FGI = pd.read_csv("btc_data_withFGI.csv", index_col=0, parse_dates=True)

In [3]:
btc_FGI

Unnamed: 0,open,high,low,close,volume,edit_count,sentiment,neg_sentiment,tomorrow,target,...,Tenkan-sen,Kijun-sen,Senkou Span A,Senkou Span B,Middle Band,Upper Band,Lower Band,OBV,FGI_value,value_classification
2018-02-01,10237.299805,10288.799805,8812.280273,9170.540039,9959400448,6.233333,-0.382591,0.590601,8830.750000,0,...,10426.290039,13195.939941,15496.125000,13632.790039,11591.202100,14132.400116,9050.004084,1.237985e+11,30,Fear
2018-02-02,9142.280273,9142.280273,7796.490234,8830.750000,12726899712,5.566667,-0.406722,0.601712,9174.910156,1,...,9918.395020,12167.195312,15496.125000,13776.219971,11314.729590,13789.784903,8839.674277,1.110716e+11,15,Extreme Fear
2018-02-03,8852.120117,9430.750000,8251.629883,9174.910156,7263790080,5.266667,-0.397372,0.597167,8277.009766,0,...,9918.395020,11646.995117,15597.500000,13776.219971,11084.875098,13450.506510,8719.243685,1.183354e+11,40,Fear
2018-02-04,9175.700195,9334.870117,8031.220215,8277.009766,7073549824,4.733333,-0.385527,0.592116,6955.270020,0,...,9918.395020,11407.645020,15697.275146,13891.550049,10807.735596,13122.456240,8493.014951,1.112618e+11,24,Extreme Fear
2018-02-05,8270.540039,8364.839844,6756.680176,6955.270020,9285289984,4.700000,-0.385761,0.592116,7754.000000,1,...,9398.489990,10887.739990,15699.500000,13925.854980,10580.974097,13438.911654,7723.036539,1.019765e+11,11,Extreme Fear
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-04-17,63831.847656,64486.363281,59768.585938,61276.691406,41915247049,0.200000,-0.091111,0.116667,63512.753906,1,...,65755.546875,66241.972656,64809.739258,57814.630859,67664.828125,73539.014793,61790.641457,1.663132e+12,67,Greed
2024-04-18,61275.316406,64125.687500,60833.480469,63512.753906,36006307335,0.200000,-0.091111,0.116667,63843.570312,1,...,65512.410156,66241.972656,65349.796875,57814.630859,67345.824414,73400.736239,61290.912589,1.699138e+12,57,Greed
2024-04-19,63510.750000,65481.597656,59651.390625,63843.570312,49920425401,0.200000,-0.091111,0.116667,64994.441406,1,...,65453.812500,66183.375000,65335.638672,58007.443359,67055.737695,73202.015386,60909.460004,1.749059e+12,66,Greed
2024-04-20,63851.101562,65442.457031,63172.402344,64994.441406,23097485495,0.233333,-0.122229,0.150000,64926.644531,0,...,65437.066406,66183.375000,66261.089844,58007.443359,66738.777344,72603.544754,60874.009933,1.772156e+12,66,Greed


In [4]:
btc_FGI.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2269 entries, 2018-02-01 to 2024-04-21
Data columns (total 36 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   open                  2269 non-null   float64
 1   high                  2269 non-null   float64
 2   low                   2269 non-null   float64
 3   close                 2269 non-null   float64
 4   volume                2269 non-null   int64  
 5   edit_count            2269 non-null   float64
 6   sentiment             2269 non-null   float64
 7   neg_sentiment         2269 non-null   float64
 8   tomorrow              2269 non-null   float64
 9   target                2269 non-null   int64  
 10  RSI                   2269 non-null   float64
 11  Stoch_RSI             2269 non-null   float64
 12  SMA(9)                2269 non-null   float64
 13  SMA(14)               2269 non-null   float64
 14  EMA(7)                2269 non-null   float64
 15  EMA

## Random Forest

In [5]:
model = RandomForestClassifier(n_estimators=100, min_samples_split=50, random_state=1)

train = btc_FGI.iloc[:-200]
test = btc_FGI[-200:]

predictors = ["close", "volume", "open", "high", "low", "sentiment", "neg_sentiment", "RSI", "EMA(7)", "EMA(25)", "EMA(99)", "MACD",
              "fib23.6%", "fib38.2%", "fib50.0%", "fib61.8%", "fib78.6%", "fib100.0%", "FGI_value"]
model.fit(train[predictors], train["target"])

In [8]:
# Precision of the test set for the first model
preds_test = model.predict(test[predictors])
preds_test = pd.Series(preds_test, index=test.index)
precision_test = precision_score(test["target"], preds_test)
accuracy_test = accuracy_score(test["target"], preds_test)
print("Precision:", precision_test)
print("Accuracy:", accuracy_test)

Precision: 0.5434782608695652
Accuracy: 0.455


In [10]:
# Precision of the train set for the first model
preds_train = model.predict(train[predictors])
preds_train = pd.Series(preds_train, index=train.index)
precision_train = precision_score(train["target"], preds_train)
accuracy_train = accuracy_score(train["target"], preds_train)
print("Precision:", precision_train)
print("Accuracy:", accuracy_train)

Precision: 0.8105726872246696
Accuracy: 0.8303528274528758


In [11]:
model1 = RandomForestClassifier(n_estimators=100, min_samples_split=50, random_state=1)

train1 = btc_FGI.iloc[:-200]
test1 = btc_FGI[-200:]

predictors_allFeatures = ["close", "volume", "open", "high", "low", "sentiment", "neg_sentiment", "RSI", "Stoch_RSI", "SMA(9)", "SMA(14)", "EMA(7)", "EMA(25)", "EMA(99)",
                          "MACD", "Signal Line", "MACD Histogram", "fib23.6%", "fib38.2%", "fib50.0%", "fib61.8%", "fib78.6%", "fib100.0%", "Tenkan-sen", "Kijun-sen",
                          "Senkou Span A", "Senkou Span B", "Middle Band", "Upper Band", "Lower Band", "FGI_value"]
model1.fit(train1[predictors_allFeatures], train1["target"])

In [12]:
# Precision of the test set for the second model
preds_test1 = model1.predict(test1[predictors_allFeatures])
preds_test1 = pd.Series(preds_test1, index=test1.index)
precision_test1 = precision_score(test1["target"], preds_test1)
accuracy_test1 = accuracy_score(test1["target"], preds_test1)
print("Precision:", precision_test1)
print("Accuracy:", accuracy_test1)

Precision: 0.6056338028169014
Accuracy: 0.51


In [14]:
# Precision of the train set for the first model
preds_train1 = model1.predict(train1[predictors_allFeatures])
preds_train1 = pd.Series(preds_train1, index=train1.index)
precision_train1 = precision_score(train1["target"], preds_train1)
accuracy_train1 = accuracy_score(train1["target"], preds_train1)
print("Precision:", precision_train1)
print("Accuracy:", accuracy_train1)

Precision: 0.8113879003558719
Accuracy: 0.8279362010633156


In [15]:
model2 = RandomForestClassifier(n_estimators=100, min_samples_split=50, random_state=1)

train2 = btc_FGI.iloc[:-200]
test2 = btc_FGI[-200:]

predictors_notallFeatures = ["close", "volume", "open", "high", "low", "sentiment", "neg_sentiment", "RSI", "Stoch_RSI", "SMA(9)", "SMA(14)", "EMA(7)", "EMA(25)", "EMA(99)",
                          "MACD", "fib23.6%", "fib38.2%", "fib50.0%", "fib61.8%", "fib78.6%", "fib100.0%", "FGI_value"]
model2.fit(train2[predictors_notallFeatures], train2["target"])

In [18]:
# Precision of the test set for the third model
preds_test2 = model2.predict(test2[predictors_notallFeatures])
preds_test2 = pd.Series(preds_test2, index=test2.index)
precision_test2 = precision_score(test2["target"], preds_test2)
accuracy_test2 = accuracy_score(test2["target"], preds_test2)
print("Precision:", precision_test2)
print("Accuracy:", accuracy_test2)

Precision: 0.65
Accuracy: 0.495


In [19]:
# Precision of the test set for the third model
preds_train2 = model2.predict(train2[predictors_notallFeatures])
preds_train2 = pd.Series(preds_train2, index=train2.index)
precision_train2 = precision_score(train2["target"], preds_train2)
accuracy_train2 = accuracy_score(train2["target"], preds_train2)
print("Precision:", precision_train2)
print("Accuracy:", accuracy_train2)

Precision: 0.8295964125560538
Accuracy: 0.8448525857902368


In [20]:
model3 = RandomForestClassifier(n_estimators=100, min_samples_split=50, random_state=1)

train3 = btc_FGI.iloc[:-200]
test3 = btc_FGI[-200:]

predictors_withoutAddedFeatures = ["close", "volume", "open", "high", "low", "sentiment", "neg_sentiment"]
model3.fit(train3[predictors_withoutAddedFeatures], train3["target"])

In [21]:
# Precision of the test set for the fourth model
preds_test3 = model3.predict(test3[predictors_withoutAddedFeatures])
preds_test3 = pd.Series(preds_test3, index=test3.index)
precision_test3 = precision_score(test3["target"], preds_test3)
accuracy_test3 = accuracy_score(test3["target"], preds_test3)
print("Precision:", precision_test3)
print("Accuracy:", accuracy_test3)

Precision: 0.47368421052631576
Accuracy: 0.425
