In [1]:
import numpy as np
import pandas as pd
import matplotlib as plt

%matplotlib inline

import statsmodels.api as sm
import statsmodels.formula.api as smf

from statsmodels.stats.outliers_influence import variance_inflation_factor

In [2]:
def OSR2(model, df_train, df_test, dependent_var):   
    y_test = df_test[dependent_var]
    y_pred = model.predict(df_test)
    SSE = np.sum((y_test - y_pred)**2)
    SST = np.sum((y_test - np.mean(df_train[dependent_var]))**2)    
    return 1 - SSE/SST

In [3]:
def VIF(df, columns):
    values = sm.add_constant(df[columns]).values
    num_columns = len(columns)+1
    vif = [variance_inflation_factor(values, i) for i in range(num_columns)]
    return pd.Series(vif[1:], index=columns)

In [4]:
DoD = pd.read_csv('Data_Master_2.csv')

In [5]:
DoD.info()
DoD.head(2)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 507 entries, 0 to 506
Data columns (total 31 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   Dates                              507 non-null    object 
 1   Date                               507 non-null    int64  
 2   Bitcoin_Volume                     507 non-null    float64
 3   Bitcoin_Price                      507 non-null    float64
 4   Bitcoin_Price_Close                507 non-null    float64
 5   Bitcoin_Price_Previous_Day_Open    507 non-null    float64
 6   Bitcoin_Price_One_Week_Prior_Open  507 non-null    float64
 7   Ethereum_Price                     507 non-null    float64
 8   Doge_Price                         507 non-null    float64
 9   Litecoin_Price                     507 non-null    float64
 10  SP500                              507 non-null    float64
 11  UST_1Y_Maturity                    507 non-null    float64

Unnamed: 0,Dates,Date,Bitcoin_Volume,Bitcoin_Price,Bitcoin_Price_Close,Bitcoin_Price_Previous_Day_Open,Bitcoin_Price_One_Week_Prior_Open,Ethereum_Price,Doge_Price,Litecoin_Price,...,Negative_Tweet,Neutral_Tweets,Tweet_Average_Sentiment,News_Article_Count,News_Sentiment,Crypto_Global_Ranking,BTC_Price_Movement_DOD_UD,BTC_Price_Movement_DOD_10,BTC_Price_Movement_ID_UD,BTC_Price_Movement_ID_10
0,11-10-2021,1,48730830000.0,54734.125,57484.78906,54952.82031,48208.90625,4636.174316,0.255709,261.26297,...,220,933,0.075143,12,0.058515,56.0,Up,1,Up,1
1,12-10-2021,2,30966010000.0,57526.83203,56041.05859,54734.125,49174.96094,3908.496094,0.164422,148.598297,...,162,657,0.077401,9,0.116162,56.0,Up,1,Down,0


In [6]:
DoD.corr()

Unnamed: 0,Date,Bitcoin_Volume,Bitcoin_Price,Bitcoin_Price_Close,Bitcoin_Price_Previous_Day_Open,Bitcoin_Price_One_Week_Prior_Open,Ethereum_Price,Doge_Price,Litecoin_Price,SP500,...,All_Twitter_Posts,Positive_Tweets,Negative_Tweet,Neutral_Tweets,Tweet_Average_Sentiment,News_Article_Count,News_Sentiment,Crypto_Global_Ranking,BTC_Price_Movement_DOD_10,BTC_Price_Movement_ID_10
Date,1.0,-0.116204,-0.899014,-0.897526,-0.900416,-0.907818,-0.747528,-0.724678,-0.642903,-0.792102,...,-0.738314,-0.75604,-0.650482,-0.722833,-0.067121,0.422904,-0.091727,-0.791673,0.055865,-0.007339
Bitcoin_Volume,-0.116204,1.0,0.089039,0.08415,0.089478,0.095841,0.096813,0.123685,0.06488,0.00712,...,0.177285,0.180533,0.203216,0.164599,-0.052245,0.169132,-0.025556,0.119622,0.044363,-0.006284
Bitcoin_Price,-0.899014,0.089039,1.0,0.996753,0.996748,0.978568,0.831093,0.84014,0.806627,0.882743,...,0.690087,0.730281,0.558523,0.671817,0.17171,-0.493507,0.148491,0.805052,0.00031,-0.02413
Bitcoin_Price_Close,-0.897526,0.08415,0.996753,1.0,0.993515,0.975817,0.834162,0.843944,0.810633,0.884519,...,0.688048,0.730726,0.549789,0.669755,0.181311,-0.494795,0.150321,0.804744,0.002828,0.027576
Bitcoin_Price_Previous_Day_Open,-0.900416,0.089478,0.996748,0.993515,1.0,0.981838,0.827261,0.838087,0.804157,0.880326,...,0.690869,0.729239,0.565154,0.672465,0.164868,-0.494108,0.149525,0.805362,-0.037546,-0.019915
Bitcoin_Price_One_Week_Prior_Open,-0.907818,0.095841,0.978568,0.975817,0.981838,1.0,0.80681,0.819745,0.7827,0.870621,...,0.706688,0.740848,0.59558,0.687309,0.147999,-0.486302,0.162304,0.81174,-0.063527,-0.002707
Ethereum_Price,-0.747528,0.096813,0.831093,0.834162,0.827261,0.80681,1.0,0.930252,0.930939,0.751162,...,0.472966,0.508788,0.357691,0.460762,0.153115,-0.513105,0.141201,0.797146,-0.01858,0.025547
Doge_Price,-0.724678,0.123685,0.84014,0.843944,0.838087,0.819745,0.930252,1.0,0.952772,0.72643,...,0.477942,0.534061,0.371096,0.452046,0.196249,-0.554124,0.175562,0.83078,-0.021312,0.02027
Litecoin_Price,-0.642903,0.06488,0.806627,0.810633,0.804157,0.7827,0.930939,0.952772,1.0,0.711483,...,0.425854,0.479145,0.319251,0.403177,0.197209,-0.547948,0.156214,0.782314,-0.004101,0.037448
SP500,-0.792102,0.00712,0.882743,0.884519,0.880326,0.870621,0.751162,0.72643,0.711483,1.0,...,0.579748,0.61064,0.444752,0.571089,0.178137,-0.426185,0.106677,0.730843,0.011631,0.030452


In [7]:
DoD_train = DoD[DoD['Date'] <= 405]
DoD_test = DoD[DoD['Date'] > 405]

len(DoD_train), len(DoD_test)

(405, 102)

### Model1 -  All variables from the Dataframe

In [8]:
model1 = smf.ols(formula='Bitcoin_Price ~ Bitcoin_Price_Previous_Day_Open + Bitcoin_Price_One_Week_Prior_Open + Ethereum_Price + Doge_Price + Litecoin_Price + SP500 + UST_1Y_Maturity + UST_10Y_Maturity + Crude_Oil + Gold + Copper + Corn + Singapore_Dollar + Euro + All_Twitter_Posts + Positive_Tweets + Negative_Tweet + Neutral_Tweets + Tweet_Average_Sentiment + News_Article_Count + News_Sentiment + Crypto_Global_Ranking + + BTC_Price_Movement_ID_10',
data=DoD_train).fit()
print(model1.summary())

                            OLS Regression Results                            
Dep. Variable:          Bitcoin_Price   R-squared:                       0.993
Model:                            OLS   Adj. R-squared:                  0.993
Method:                 Least Squares   F-statistic:                     2530.
Date:                Fri, 21 Apr 2023   Prob (F-statistic):               0.00
Time:                        15:28:30   Log-Likelihood:                -3427.7
No. Observations:                 405   AIC:                             6903.
Df Residuals:                     381   BIC:                             6999.
Df Model:                          23                                         
Covariance Type:            nonrobust                                         
                                        coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------
Interc

In [9]:
OSR2(model1, DoD_train, DoD_test, 'Bitcoin_Price')

0.9940605811282505

In [10]:
cols1 = ['Bitcoin_Price_Previous_Day_Open', 'Bitcoin_Price_One_Week_Prior_Open', 'Ethereum_Price', 'Doge_Price', 'Litecoin_Price', 'SP500', 'UST_1Y_Maturity', 'UST_10Y_Maturity', 'Crude_Oil', 'Gold', 'Copper', 'Corn', 'Singapore_Dollar', 'Euro', 'All_Twitter_Posts', 'Positive_Tweets', 'Negative_Tweet', 'Neutral_Tweets', 'Tweet_Average_Sentiment', 'News_Article_Count', 'News_Sentiment', 'Crypto_Global_Ranking', 'BTC_Price_Movement_ID_10']
VIF(DoD_train, cols1)

Bitcoin_Price_Previous_Day_Open          36.215249
Bitcoin_Price_One_Week_Prior_Open        38.471346
Ethereum_Price                           22.418116
Doge_Price                               19.655883
Litecoin_Price                           28.652092
SP500                                    11.270754
UST_1Y_Maturity                          62.728909
UST_10Y_Maturity                         49.896973
Crude_Oil                                 5.625838
Gold                                      9.149555
Copper                                    7.001230
Corn                                      6.848355
Singapore_Dollar                         10.893340
Euro                                     25.619791
All_Twitter_Posts                    543438.253540
Positive_Tweets                       61270.908387
Negative_Tweet                         7326.738367
Neutral_Tweets                       176505.632923
Tweet_Average_Sentiment                   2.393596
News_Article_Count             

# Model2 - Remove All_Tweets, Positive_Tweets, Negative_Tweets, Neutral_Tweet variable

In [11]:
model2 = smf.ols(formula='Bitcoin_Price ~ Bitcoin_Price_Previous_Day_Open + Bitcoin_Price_One_Week_Prior_Open + Ethereum_Price + Doge_Price + Litecoin_Price + SP500 + UST_1Y_Maturity + UST_10Y_Maturity + Crude_Oil + Gold + Copper + Corn + Singapore_Dollar + Euro + Tweet_Average_Sentiment + News_Article_Count + News_Sentiment + Crypto_Global_Ranking + + BTC_Price_Movement_ID_10',
data=DoD_train).fit()
print(model2.summary())

                            OLS Regression Results                            
Dep. Variable:          Bitcoin_Price   R-squared:                       0.993
Model:                            OLS   Adj. R-squared:                  0.992
Method:                 Least Squares   F-statistic:                     2778.
Date:                Fri, 21 Apr 2023   Prob (F-statistic):               0.00
Time:                        15:28:30   Log-Likelihood:                -3449.4
No. Observations:                 405   AIC:                             6939.
Df Residuals:                     385   BIC:                             7019.
Df Model:                          19                                         
Covariance Type:            nonrobust                                         
                                        coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------
Interc

In [12]:
OSR2(model2, DoD_train, DoD_test, 'Bitcoin_Price')

0.9941540314229251

In [13]:
cols2 = ['Bitcoin_Price_Previous_Day_Open', 'Bitcoin_Price_One_Week_Prior_Open', 'Ethereum_Price', 'Doge_Price', 'Litecoin_Price', 'SP500', 'UST_1Y_Maturity', 'UST_10Y_Maturity', 'Crude_Oil', 'Gold', 'Copper', 'Corn', 'Singapore_Dollar', 'Euro', 'Tweet_Average_Sentiment', 'News_Article_Count', 'News_Sentiment', 'Crypto_Global_Ranking', 'BTC_Price_Movement_ID_10']
VIF(DoD_train, cols2)

Bitcoin_Price_Previous_Day_Open      33.274298
Bitcoin_Price_One_Week_Prior_Open    37.241637
Ethereum_Price                       21.167725
Doge_Price                           19.508081
Litecoin_Price                       28.504078
SP500                                10.963073
UST_1Y_Maturity                      59.647002
UST_10Y_Maturity                     48.760012
Crude_Oil                             5.584900
Gold                                  9.039114
Copper                                6.837374
Corn                                  6.440558
Singapore_Dollar                     10.776485
Euro                                 25.288473
Tweet_Average_Sentiment               1.226606
News_Article_Count                    1.749926
News_Sentiment                        1.089209
Crypto_Global_Ranking                 4.537515
BTC_Price_Movement_ID_10              1.069765
dtype: float64

# Model3 - Remove UST_1Y_Maturity

In [14]:
model3 = smf.ols(formula='Bitcoin_Price ~ Bitcoin_Price_Previous_Day_Open + Bitcoin_Price_One_Week_Prior_Open + Ethereum_Price + Doge_Price + Litecoin_Price + SP500 + UST_10Y_Maturity + Crude_Oil + Gold + Copper + Corn + Singapore_Dollar + Euro + Tweet_Average_Sentiment + News_Article_Count + News_Sentiment + Crypto_Global_Ranking + + BTC_Price_Movement_ID_10',
data=DoD_train).fit()
print(model3.summary())

                            OLS Regression Results                            
Dep. Variable:          Bitcoin_Price   R-squared:                       0.993
Model:                            OLS   Adj. R-squared:                  0.992
Method:                 Least Squares   F-statistic:                     2924.
Date:                Fri, 21 Apr 2023   Prob (F-statistic):               0.00
Time:                        15:28:30   Log-Likelihood:                -3450.5
No. Observations:                 405   AIC:                             6939.
Df Residuals:                     386   BIC:                             7015.
Df Model:                          18                                         
Covariance Type:            nonrobust                                         
                                        coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------
Interc

In [15]:
OSR2(model3, DoD_train, DoD_test, 'Bitcoin_Price')

0.9945964408414276

In [16]:
cols3 = ['Bitcoin_Price_Previous_Day_Open', 'Bitcoin_Price_One_Week_Prior_Open', 'Ethereum_Price', 'Doge_Price', 'Litecoin_Price', 'SP500', 'UST_10Y_Maturity', 'Crude_Oil', 'Gold', 'Copper', 'Corn', 'Singapore_Dollar', 'Euro', 'Tweet_Average_Sentiment', 'News_Article_Count', 'News_Sentiment', 'Crypto_Global_Ranking', 'BTC_Price_Movement_ID_10']
VIF(DoD_train, cols3)

Bitcoin_Price_Previous_Day_Open      32.928171
Bitcoin_Price_One_Week_Prior_Open    30.713864
Ethereum_Price                       20.988672
Doge_Price                           19.455791
Litecoin_Price                       28.388028
SP500                                10.961722
UST_10Y_Maturity                     11.597570
Crude_Oil                             4.633561
Gold                                  8.143054
Copper                                6.793672
Corn                                  5.967620
Singapore_Dollar                     10.282301
Euro                                 23.553003
Tweet_Average_Sentiment               1.215497
News_Article_Count                    1.749773
News_Sentiment                        1.088868
Crypto_Global_Ranking                 4.503241
BTC_Price_Movement_ID_10              1.061543
dtype: float64

# Model4 - Remove Bitcoin_Price_Previous_Day_Open

In [17]:
model4 = smf.ols(formula='Bitcoin_Price ~ Bitcoin_Price_One_Week_Prior_Open + Ethereum_Price + Doge_Price + Litecoin_Price + SP500 + UST_10Y_Maturity + Crude_Oil + Gold + Copper + Corn + Singapore_Dollar + Euro + Tweet_Average_Sentiment + News_Article_Count + News_Sentiment + Crypto_Global_Ranking + + BTC_Price_Movement_ID_10',
data=DoD_train).fit()
print(model4.summary())

                            OLS Regression Results                            
Dep. Variable:          Bitcoin_Price   R-squared:                       0.967
Model:                            OLS   Adj. R-squared:                  0.965
Method:                 Least Squares   F-statistic:                     658.7
Date:                Fri, 21 Apr 2023   Prob (F-statistic):          2.08e-273
Time:                        15:28:30   Log-Likelihood:                -3759.0
No. Observations:                 405   AIC:                             7554.
Df Residuals:                     387   BIC:                             7626.
Df Model:                          17                                         
Covariance Type:            nonrobust                                         
                                        coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------
Interc

In [18]:
OSR2(model4, DoD_train, DoD_test, 'Bitcoin_Price')

0.9152728246047328

In [19]:
cols4 = ['Bitcoin_Price_One_Week_Prior_Open', 'Ethereum_Price', 'Doge_Price', 'Litecoin_Price', 'SP500', 'UST_10Y_Maturity', 'Crude_Oil', 'Gold', 'Copper', 'Corn', 'Singapore_Dollar', 'Euro', 'Tweet_Average_Sentiment', 'News_Article_Count', 'News_Sentiment', 'Crypto_Global_Ranking', 'BTC_Price_Movement_ID_10']
VIF(DoD_train, cols4)

Bitcoin_Price_One_Week_Prior_Open    12.836652
Ethereum_Price                       20.976053
Doge_Price                           19.153658
Litecoin_Price                       28.087016
SP500                                 9.931438
UST_10Y_Maturity                     11.577800
Crude_Oil                             4.378915
Gold                                  8.124081
Copper                                6.793363
Corn                                  5.952207
Singapore_Dollar                      9.819410
Euro                                 23.230921
Tweet_Average_Sentiment               1.211384
News_Article_Count                    1.749692
News_Sentiment                        1.087354
Crypto_Global_Ranking                 4.411150
BTC_Price_Movement_ID_10              1.034916
dtype: float64

# Model5 - Remove Ethereum_Price, Litecoin_Price, Dogecoin_Price

In [20]:
model5 = smf.ols(formula='Bitcoin_Price ~ Bitcoin_Price_One_Week_Prior_Open + SP500 + UST_10Y_Maturity + Crude_Oil + Gold + Copper + Corn + Singapore_Dollar + Euro + Tweet_Average_Sentiment + News_Article_Count + News_Sentiment + Crypto_Global_Ranking + BTC_Price_Movement_ID_10',
data=DoD_train).fit()
print(model5.summary())

                            OLS Regression Results                            
Dep. Variable:          Bitcoin_Price   R-squared:                       0.963
Model:                            OLS   Adj. R-squared:                  0.962
Method:                 Least Squares   F-statistic:                     731.0
Date:                Fri, 21 Apr 2023   Prob (F-statistic):          9.17e-270
Time:                        15:28:30   Log-Likelihood:                -3778.1
No. Observations:                 405   AIC:                             7586.
Df Residuals:                     390   BIC:                             7646.
Df Model:                          14                                         
Covariance Type:            nonrobust                                         
                                        coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------
Interc

In [21]:
OSR2(model5, DoD_train, DoD_test, 'Bitcoin_Price')

0.9063376405621735

In [22]:
cols5 = ['Bitcoin_Price_One_Week_Prior_Open', 'SP500', 'UST_10Y_Maturity', 'Crude_Oil', 'Gold', 'Copper', 'Corn', 'Singapore_Dollar', 'Euro', 'Tweet_Average_Sentiment', 'News_Article_Count', 'News_Sentiment', 'Crypto_Global_Ranking', 'BTC_Price_Movement_ID_10']
VIF(DoD_train, cols5)

Bitcoin_Price_One_Week_Prior_Open     9.232110
SP500                                 8.858079
UST_10Y_Maturity                     10.204937
Crude_Oil                             4.245119
Gold                                  7.611823
Copper                                6.041655
Corn                                  5.150158
Singapore_Dollar                      9.429922
Euro                                 19.859645
Tweet_Average_Sentiment               1.172229
News_Article_Count                    1.652004
News_Sentiment                        1.075182
Crypto_Global_Ranking                 4.128009
BTC_Price_Movement_ID_10              1.021267
dtype: float64

# Model6 - Remove UST_10Y_Maturity

In [23]:
model6 = smf.ols(formula='Bitcoin_Price ~ Bitcoin_Price_One_Week_Prior_Open + SP500 + Crude_Oil + Gold + Copper + Corn + Singapore_Dollar + Euro + Tweet_Average_Sentiment + News_Article_Count + News_Sentiment + Crypto_Global_Ranking + BTC_Price_Movement_ID_10',
data=DoD_train).fit()
print(model6.summary())

                            OLS Regression Results                            
Dep. Variable:          Bitcoin_Price   R-squared:                       0.963
Model:                            OLS   Adj. R-squared:                  0.962
Method:                 Least Squares   F-statistic:                     788.1
Date:                Fri, 21 Apr 2023   Prob (F-statistic):          4.26e-271
Time:                        15:28:30   Log-Likelihood:                -3778.4
No. Observations:                 405   AIC:                             7585.
Df Residuals:                     391   BIC:                             7641.
Df Model:                          13                                         
Covariance Type:            nonrobust                                         
                                        coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------
Interc

In [24]:
OSR2(model6, DoD_train, DoD_test, 'Bitcoin_Price')

0.8993773956301776

In [25]:
cols6 = ['Bitcoin_Price_One_Week_Prior_Open', 'SP500', 'Crude_Oil', 'Gold', 'Copper', 'Corn', 'Singapore_Dollar', 'Euro', 'Tweet_Average_Sentiment', 'News_Article_Count', 'News_Sentiment', 'Crypto_Global_Ranking', 'BTC_Price_Movement_ID_10']
VIF(DoD_train, cols6)

Bitcoin_Price_One_Week_Prior_Open     8.974296
SP500                                 7.166474
Crude_Oil                             4.203872
Gold                                  6.566017
Copper                                5.940655
Corn                                  3.718565
Singapore_Dollar                      9.219864
Euro                                 18.941777
Tweet_Average_Sentiment               1.162073
News_Article_Count                    1.647164
News_Sentiment                        1.062205
Crypto_Global_Ranking                 4.102897
BTC_Price_Movement_ID_10              1.020873
dtype: float64

# Model7 - Remove Euro variable

In [26]:
model7 = smf.ols(formula='Bitcoin_Price ~ Bitcoin_Price_One_Week_Prior_Open + SP500 + Crude_Oil + Gold + Copper + Corn + Singapore_Dollar + Tweet_Average_Sentiment + News_Article_Count + News_Sentiment + Crypto_Global_Ranking + BTC_Price_Movement_ID_10',
data=DoD_train).fit()
print(model7.summary())

                            OLS Regression Results                            
Dep. Variable:          Bitcoin_Price   R-squared:                       0.961
Model:                            OLS   Adj. R-squared:                  0.960
Method:                 Least Squares   F-statistic:                     804.0
Date:                Fri, 21 Apr 2023   Prob (F-statistic):          1.86e-267
Time:                        15:28:30   Log-Likelihood:                -3790.6
No. Observations:                 405   AIC:                             7607.
Df Residuals:                     392   BIC:                             7659.
Df Model:                          12                                         
Covariance Type:            nonrobust                                         
                                        coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------
Interc

In [27]:
OSR2(model7, DoD_train, DoD_test, 'Bitcoin_Price')

0.9401686179189963

In [28]:
cols7 = ['Bitcoin_Price_One_Week_Prior_Open', 'SP500', 'Crude_Oil', 'Gold', 'Copper', 'Corn', 'Singapore_Dollar', 'Tweet_Average_Sentiment', 'News_Article_Count', 'News_Sentiment', 'Crypto_Global_Ranking', 'BTC_Price_Movement_ID_10']
VIF(DoD_train, cols7)

Bitcoin_Price_One_Week_Prior_Open    7.266960
SP500                                6.648406
Crude_Oil                            4.197679
Gold                                 6.478796
Copper                               3.945597
Corn                                 3.581496
Singapore_Dollar                     4.383781
Tweet_Average_Sentiment              1.150574
News_Article_Count                   1.595632
News_Sentiment                       1.059624
Crypto_Global_Ranking                3.920740
BTC_Price_Movement_ID_10             1.017757
dtype: float64

# Model8 - Remove News_Sentiment variable - low significance

In [29]:
model8 = smf.ols(formula='Bitcoin_Price ~ Bitcoin_Price_One_Week_Prior_Open + SP500 + Crude_Oil + Gold + Copper + Corn + Singapore_Dollar + Tweet_Average_Sentiment + News_Article_Count + Crypto_Global_Ranking + BTC_Price_Movement_ID_10',
data=DoD_train).fit()
print(model8.summary())

                            OLS Regression Results                            
Dep. Variable:          Bitcoin_Price   R-squared:                       0.961
Model:                            OLS   Adj. R-squared:                  0.960
Method:                 Least Squares   F-statistic:                     876.7
Date:                Fri, 21 Apr 2023   Prob (F-statistic):          1.10e-268
Time:                        15:28:30   Log-Likelihood:                -3791.2
No. Observations:                 405   AIC:                             7606.
Df Residuals:                     393   BIC:                             7654.
Df Model:                          11                                         
Covariance Type:            nonrobust                                         
                                        coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------
Interc

In [30]:
OSR2(model8, DoD_train, DoD_test, 'Bitcoin_Price')

0.9419982297115143

In [31]:
cols8 = ['Bitcoin_Price_One_Week_Prior_Open', 'SP500', 'Crude_Oil', 'Gold', 'Copper', 'Corn', 'Singapore_Dollar', 'Tweet_Average_Sentiment', 'News_Article_Count', 'Crypto_Global_Ranking', 'BTC_Price_Movement_ID_10']
VIF(DoD_train, cols8)

Bitcoin_Price_One_Week_Prior_Open    7.212291
SP500                                6.634364
Crude_Oil                            4.197040
Gold                                 6.455311
Copper                               3.945530
Corn                                 3.549932
Singapore_Dollar                     4.383734
Tweet_Average_Sentiment              1.147410
News_Article_Count                   1.591368
Crypto_Global_Ranking                3.893890
BTC_Price_Movement_ID_10             1.017727
dtype: float64

# Model9 - Remove Crypto_Global_Ranking

In [32]:
model9 = smf.ols(formula='Bitcoin_Price ~ Bitcoin_Price_One_Week_Prior_Open + SP500 + Crude_Oil + Gold + Copper + Corn + Singapore_Dollar + Tweet_Average_Sentiment + News_Article_Count + BTC_Price_Movement_ID_10',
data=DoD_train).fit()
print(model9.summary())

                            OLS Regression Results                            
Dep. Variable:          Bitcoin_Price   R-squared:                       0.961
Model:                            OLS   Adj. R-squared:                  0.960
Method:                 Least Squares   F-statistic:                     966.2
Date:                Fri, 21 Apr 2023   Prob (F-statistic):          3.86e-270
Time:                        15:28:31   Log-Likelihood:                -3791.3
No. Observations:                 405   AIC:                             7605.
Df Residuals:                     394   BIC:                             7649.
Df Model:                          10                                         
Covariance Type:            nonrobust                                         
                                        coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------
Interc

In [33]:
OSR2(model9, DoD_train, DoD_test, 'Bitcoin_Price')

0.94081364542045

In [34]:
cols9 = ['Bitcoin_Price_One_Week_Prior_Open', 'SP500', 'Crude_Oil', 'Gold', 'Copper', 'Corn', 'Singapore_Dollar', 'Tweet_Average_Sentiment', 'News_Article_Count', 'BTC_Price_Movement_ID_10']
VIF(DoD_train, cols9)

Bitcoin_Price_One_Week_Prior_Open    5.372873
SP500                                6.633207
Crude_Oil                            4.178839
Gold                                 6.335405
Copper                               3.883477
Corn                                 3.300995
Singapore_Dollar                     4.284425
Tweet_Average_Sentiment              1.140640
News_Article_Count                   1.590407
BTC_Price_Movement_ID_10             1.014468
dtype: float64

# Model10 - Remove News_Article_Count

In [35]:
model10 = smf.ols(formula='Bitcoin_Price ~ Bitcoin_Price_One_Week_Prior_Open + SP500 + Crude_Oil + Gold + Copper + Corn + Singapore_Dollar + Tweet_Average_Sentiment + BTC_Price_Movement_ID_10',
data=DoD_train).fit()
print(model10.summary())

                            OLS Regression Results                            
Dep. Variable:          Bitcoin_Price   R-squared:                       0.960
Model:                            OLS   Adj. R-squared:                  0.960
Method:                 Least Squares   F-statistic:                     1067.
Date:                Fri, 21 Apr 2023   Prob (F-statistic):          6.03e-271
Time:                        15:28:31   Log-Likelihood:                -3793.0
No. Observations:                 405   AIC:                             7606.
Df Residuals:                     395   BIC:                             7646.
Df Model:                           9                                         
Covariance Type:            nonrobust                                         
                                        coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------
Interc

In [36]:
OSR2(model10, DoD_train, DoD_test, 'Bitcoin_Price')

0.9382203644879736

In [37]:
cols10 = ['Bitcoin_Price_One_Week_Prior_Open', 'SP500', 'Crude_Oil', 'Gold', 'Copper', 'Corn', 'Singapore_Dollar', 'Tweet_Average_Sentiment', 'BTC_Price_Movement_ID_10']
VIF(DoD_train, cols10)

Bitcoin_Price_One_Week_Prior_Open    4.833401
SP500                                6.627623
Crude_Oil                            4.178294
Gold                                 6.293295
Copper                               3.882604
Corn                                 3.247833
Singapore_Dollar                     4.282050
Tweet_Average_Sentiment              1.140482
BTC_Price_Movement_ID_10             1.013934
dtype: float64

- Based on the model (Model10) the variables that are most significant to determine the price of Bitcoin are:
    - Price of bitcoin from one week prior
    - Price of the SP500 (USD)
    - Price of crude oil (USD)
    - Price of gold (USD)
    - Price of copper (USD)
    - Price of corn (USD)
    - USD to SGD exchange rate
    - Average twitter sentiment for the day
    - Intraday price movement
    
- Each of these variables have high significance (P > |t| value of less than 0.05)
- Each of these variables have low multicollinearity amongst them (VIF values < 10)
- Each of these variables are also avaiable online and updated daily making it easy to collect data