In [1]:
import pandas as pd
import numpy as np 
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, mean_squared_error

In [2]:
features_df = pd.read_csv("bitcoin_train.csv")
features_df.head()

Unnamed: 0.1,Unnamed: 0,Date,Open,High,Low,Close,Volume,Daily_Change,Daily_Change_Ind,MACD,...,Open_Gold,Daily_Change_Gold,Daily_Change_Perc_Gold,Increased_Gold,Close/Last_SP500,Open_SP500,Daily_Change_SP500,Daily_Change_Perc_SP500,Increased_SP500,label
0,708,2017-08-28,4384.450195,4403.930176,4224.640137,4382.660156,1959330048,-0.219727,0.0,318.734756,...,1297.6,17.7,0.013641,1,2444.24,2447.35,-3.11,-0.001271,0,1.0
1,563,2017-01-31,920.958984,972.018982,920.958984,970.403015,164582000,50.020996,1.0,12.846431,...,1197.7,13.7,0.011439,1,2278.87,2274.02,4.85,0.002133,1,1.0
2,1390,2020-05-14,9271.329102,9793.268555,9255.035156,9733.72168,56426907637,463.734375,1.0,491.284984,...,1723.1,17.8,0.01033,1,2852.5,2794.54,57.96,0.02074,1,0.0
3,19,2014-11-10,362.265015,374.81601,357.561005,366.924011,30450100,3.660003,1.0,-5.684871,...,1176.8,-17.0,-0.014446,0,2038.26,2032.01,6.25,0.003076,1,1.0
4,944,2018-08-06,7062.939941,7166.549805,6890.540039,6951.799805,3925900000,-116.680175,0.0,55.940525,...,1214.4,-5.8,-0.004776,0,2850.4,2840.29,10.11,0.003559,1,0.0


In [3]:
features_df.isnull().sum()

Unnamed: 0                    0
Date                          0
Open                          0
High                          0
Low                           0
Close                         0
Volume                        0
Daily_Change                  0
Daily_Change_Ind              0
MACD                          0
PROC_3                        0
PROC_5                        0
PROC_10                       0
wpr                           0
sto_os                        0
goog_trend_score              0
count                         0
compound                      0
retweets_count                0
likes_count                   0
replies_count                 0
compound_weighted_replies     0
compound_weighted_likes       0
compound_weighted_retweets    0
Daily_Change_Perc             0
Weekly_Change                 0
Weekly_Change_Perc            0
Close/Last_Gold               0
Open_Gold                     0
Daily_Change_Gold             0
Daily_Change_Perc_Gold        0
Increase

In [4]:
features_df.columns

Index(['Unnamed: 0', 'Date', 'Open', 'High', 'Low', 'Close', 'Volume',
       'Daily_Change', 'Daily_Change_Ind', 'MACD', 'PROC_3', 'PROC_5',
       'PROC_10', 'wpr', 'sto_os', 'goog_trend_score', 'count', 'compound',
       'retweets_count', 'likes_count', 'replies_count',
       'compound_weighted_replies', 'compound_weighted_likes',
       'compound_weighted_retweets', 'Daily_Change_Perc', 'Weekly_Change',
       'Weekly_Change_Perc', 'Close/Last_Gold', 'Open_Gold',
       'Daily_Change_Gold', 'Daily_Change_Perc_Gold', 'Increased_Gold',
       'Close/Last_SP500', 'Open_SP500', 'Daily_Change_SP500',
       'Daily_Change_Perc_SP500', 'Increased_SP500', 'label'],
      dtype='object')

In [7]:
# Creating X and y 

features_lst = ['Daily_Change', 'Daily_Change_Ind', 'MACD', 'PROC_3', 'PROC_5','PROC_10', 'wpr',\
                'sto_os', 'goog_trend_score', 'count', 'compound', 'retweets_count', 'likes_count', 'replies_count',\
                'compound_weighted_replies', 'compound_weighted_likes','compound_weighted_retweets',\
                'Daily_Change_Perc', 'Daily_Change_Gold', 'Daily_Change_Perc_Gold', 'Increased_Gold', \
                'Daily_Change_SP500', 'Daily_Change_Perc_SP500', 'Increased_SP500', 'Weekly_Change', 'Weekly_Change_Perc' ]

y = features_df['label']


In [8]:
def predict_btc(rf_model, features):
    '''
    Fits a Random Forest model to predict whether the price of bitcoin will go up or down
    Inputs:
        rf_model: the random forest model
        features: list of features to use
    Returns: 
         yhat: predictions  
    '''
    
    X = features_df[features_lst]
    X_train, X_valid, y_train, y_valid = train_test_split(X, y, random_state = 42)
    
    rf.fit(X_train, y_train)

    yhat = rf.predict(X_valid)
    acc_score  = accuracy_score(y_valid, yhat)
    mse = mean_squared_error(y_valid, yhat)
    
    print('Fraction of correctly classified samples: ', acc_score)
    
    return (yhat, rf.feature_importances_)


In [9]:
rf = RandomForestClassifier()
yhat1, feature_import = predict_btc(rf, features_lst)

Fraction of correctly classified samples:  0.4728434504792332


In [10]:
for i in range(len(features_lst)):
    print(f"{features_lst[i]}: {feature_import[i]}")

Daily_Change: 0.041641370663031356
Daily_Change_Ind: 0.003937620602276846
MACD: 0.05010979988525663
PROC_3: 0.04411324186879252
PROC_5: 0.04423464044544665
PROC_10: 0.04400933232117042
wpr: 0.03632939327766383
sto_os: 0.03801772270315417
goog_trend_score: 0.04573770021318294
count: 0.03657653365635353
compound: 0.045909180642653945
retweets_count: 0.040554225757980546
likes_count: 0.039600293643680666
replies_count: 0.036221179557046075
compound_weighted_replies: 0.04114134163463002
compound_weighted_likes: 0.04488825252761517
compound_weighted_retweets: 0.04951573436011645
Daily_Change_Perc: 0.04761246764234312
Daily_Change_Gold: 0.04634566650051084
Daily_Change_Perc_Gold: 0.04552286285886493
Increased_Gold: 0.0036127411676297326
Daily_Change_SP500: 0.046002293553132526
Daily_Change_Perc_SP500: 0.04630741331629196
Increased_SP500: 0.004871959850077091
Weekly_Change: 0.04021804890855459
Weekly_Change_Perc: 0.03696898244254349


In [11]:
param_grid = [{'n_estimators': [5, 10, 15, 20, 25, 30]}, {'max_features': ['sqrt', 'log2', None]},
              {'oob_score': [True, False]}]


X = features_df[features_lst]
grid_search_rf = GridSearchCV(rf, param_grid, cv = 5, scoring = 'accuracy')
grid_search_rf.fit(X, y)


GridSearchCV(cv=5, estimator=RandomForestClassifier(),
             param_grid=[{'n_estimators': [5, 10, 15, 20, 25, 30]},
                         {'max_features': ['sqrt', 'log2', None]},
                         {'oob_score': [True, False]}],
             scoring='accuracy')

In [12]:
best_hp = grid_search_rf.best_params_
best_hp

{'oob_score': False}

In [14]:
rf_log2 = RandomForestClassifier(oob_score=False)
yhat_log2 = predict_btc(rf_log2, features_lst)

Fraction of correctly classified samples:  0.5079872204472844


In [15]:
dogecoin = pd.read_csv("dogecoin_train.csv")
dogecoin = dogecoin.rename(columns={'goog_trend': 'goog_trend_score'})

In [16]:
y2 = dogecoin['label']

In [17]:
def predict_doge(rf_model, features):
    '''
    Fits a Random Forest model to predict whether the price of bitcoin will go up or down
    Inputs:
        rf_model: the random forest model
        features: list of features to use
    Returns: 
         yhat: predictions  
    '''
    
    X = dogecoin[features_lst]
    X_train, X_valid, y_train, y_valid = train_test_split(X, y2, random_state = 42)
    
    rf.fit(X_train, y_train)

    yhat = rf.predict(X_valid)
    acc_score  = accuracy_score(y_valid, yhat)
    mse = mean_squared_error(y_valid, yhat)
    
    print('Fraction of correctly classified samples: ', acc_score)
    
    return (yhat, rf.feature_importances_)

In [18]:
rf2 = RandomForestClassifier()
yhat1, feature_import = predict_doge(rf2, features_lst)

Fraction of correctly classified samples:  0.8079268292682927


In [19]:
for i in range(len(features_lst)):
    print(f"{features_lst[i]}: {feature_import[i]}")

Daily_Change: 0.03894316667250456
Daily_Change_Ind: 0.00725748378833058
MACD: 0.09832821182218617
PROC_3: 0.053616107079894344
PROC_5: 0.05594103921630059
PROC_10: 0.05045930959143881
wpr: 0.04997609868563159
sto_os: 0.055277626538312356
goog_trend_score: 0.060513122111475616
count: 0.012755522586048897
compound: 0.018661528115088033
retweets_count: 0.019593000429074175
likes_count: 0.02124871241515087
replies_count: 0.020903241754300028
compound_weighted_replies: 0.01575789499682435
compound_weighted_likes: 0.018420166897283423
compound_weighted_retweets: 0.018111997426373316
Daily_Change_Perc: 0.04909888138471762
Daily_Change_Gold: 0.050507396476791
Daily_Change_Perc_Gold: 0.055429656935759866
Increased_Gold: 0.005253633998469346
Daily_Change_SP500: 0.058284363191551176
Daily_Change_Perc_SP500: 0.053376248073163955
Increased_SP500: 0.006247399048119839
Weekly_Change: 0.051315844519146246
Weekly_Change_Perc: 0.0547223462460631
