In [1143]:
import pandas as pd
import math
from math import sqrt
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

# read in data predictions for BTC

In [1144]:
predictions_df = pd.read_csv("training_configs/btc_all_predictions.csv", parse_dates=True)
true_price_df = pd.read_csv("../tmp/historic_crypto_prices - bitcoin_jan_2017_sep_4_2021 copy.csv")

In [1145]:
predictions_df.head()

Unnamed: 0,date,nbeats_btc_lookback_15_window_5_std_1.25_num_add_dfs_2,tcn_btc_lookback_15_window_5_std_1.25_num_add_dfs_2,nbeats_btc_lookback_30_window_5_std_1.25_num_add_dfs_2,tcn_btc_lookback_30_window_5_std_1.25_num_add_dfs_2,nbeats_btc_lookback_45_window_5_std_1.25_num_add_dfs_2,tcn_btc_lookback_45_window_5_std_1.25_num_add_dfs_2,date_prediction_for,test_model_lookback_1
0,2010-01-01,0.0,0.0,0.0,0.0,0.0,0.0,2010-01-09,0.0
1,2019-04-02,4752.149527,253.537135,4730.976374,654.310204,3469.734359,795.20143,2019-04-09,0.0
2,2019-04-03,4546.248123,407.206977,4373.560445,255.627627,4437.900633,1413.509939,2019-04-10,0.0
3,2019-04-04,4550.99859,726.168584,4045.591506,1861.981408,4012.01123,523.09301,2019-04-11,0.0
4,2019-04-05,5003.264197,353.606543,4819.668698,817.215937,3977.238773,553.057301,2019-04-12,0.0


In [1146]:
predictions_df.date.min()

'2010-01-01'

In [1147]:
predictions_df.date.max()

'2021-06-16'

In [1148]:
predictions_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 808 entries, 0 to 807
Data columns (total 9 columns):
 #   Column                                                  Non-Null Count  Dtype  
---  ------                                                  --------------  -----  
 0   date                                                    808 non-null    object 
 1   nbeats_btc_lookback_15_window_5_std_1.25_num_add_dfs_2  808 non-null    float64
 2   tcn_btc_lookback_15_window_5_std_1.25_num_add_dfs_2     808 non-null    float64
 3   nbeats_btc_lookback_30_window_5_std_1.25_num_add_dfs_2  808 non-null    float64
 4   tcn_btc_lookback_30_window_5_std_1.25_num_add_dfs_2     808 non-null    float64
 5   nbeats_btc_lookback_45_window_5_std_1.25_num_add_dfs_2  808 non-null    float64
 6   tcn_btc_lookback_45_window_5_std_1.25_num_add_dfs_2     808 non-null    float64
 7   date_prediction_for                                     808 non-null    object 
 8   test_model_lookback_1                    

In [1149]:
true_close_df = true_price_df[['date','close']]

### Map the predictions for date to the true price date

In [1150]:
predictions_df.date_prediction_for

0       2010-01-09
1       2019-04-09
2       2019-04-10
3       2019-04-11
4       2019-04-12
          ...     
803     2021-06-19
804     2021-06-20
805     2021-06-21
806     2021-06-22
807     2021-06-23
Name: date_prediction_for, Length: 808, dtype: object

In [1151]:
merged_df = pd.merge(predictions_df, true_close_df, left_on='date_prediction_for',
                     right_on ='date', suffixes=['_pred','_true'])

In [1152]:
merged_df['day'] = [t.day for t in pd.to_datetime(merged_df.date_prediction_for)]
merged_df['month'] =  [t.month for t in pd.to_datetime(merged_df.date_prediction_for)]
merged_df['quarter'] = [t.quarter for t in pd.to_datetime(merged_df.date_prediction_for)]
merged_df['day_of_year'] = [t.strftime('%j') for t in pd.to_datetime(merged_df.date_prediction_for)]

In [1153]:
merged_df

Unnamed: 0,date_pred,nbeats_btc_lookback_15_window_5_std_1.25_num_add_dfs_2,tcn_btc_lookback_15_window_5_std_1.25_num_add_dfs_2,nbeats_btc_lookback_30_window_5_std_1.25_num_add_dfs_2,tcn_btc_lookback_30_window_5_std_1.25_num_add_dfs_2,nbeats_btc_lookback_45_window_5_std_1.25_num_add_dfs_2,tcn_btc_lookback_45_window_5_std_1.25_num_add_dfs_2,date_prediction_for,test_model_lookback_1,date_true,close,day,month,quarter,day_of_year
0,2019-04-02,4752.149527,253.537135,4730.976374,654.310204,3469.734359,795.201430,2019-04-09,0.0,2019-04-09,5204.96,9,4,2,099
1,2019-04-03,4546.248123,407.206977,4373.560445,255.627627,4437.900633,1413.509939,2019-04-10,0.0,2019-04-10,5324.55,10,4,2,100
2,2019-04-04,4550.998590,726.168584,4045.591506,1861.981408,4012.011230,523.093010,2019-04-11,0.0,2019-04-11,5064.49,11,4,2,101
3,2019-04-05,5003.264197,353.606543,4819.668698,817.215937,3977.238773,553.057301,2019-04-12,0.0,2019-04-12,5089.54,12,4,2,102
4,2019-04-06,5184.268504,197.768508,5123.185573,1596.333494,4093.602236,920.106602,2019-04-13,0.0,2019-04-13,5096.59,13,4,2,103
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
802,2021-06-12,38538.259919,31534.584129,31467.961211,35962.499904,29552.951504,35495.923901,2021-06-19,0.0,2021-06-19,35615.87,19,6,2,170
803,2021-06-13,34217.414448,36146.378804,35592.368263,36750.568589,31735.996384,36302.542550,2021-06-20,0.0,2021-06-20,35698.30,20,6,2,171
804,2021-06-14,37894.852144,36550.939321,33650.191827,37648.190534,30225.075563,36625.597963,2021-06-21,0.0,2021-06-21,31676.69,21,6,2,172
805,2021-06-15,38574.222909,40984.156096,45822.994318,38266.367164,30888.406876,37240.240984,2021-06-22,0.0,2021-06-22,32505.66,22,6,2,173


In [1154]:
merged_df.drop(['date_pred', 'date_true', 'date_prediction_for'], inplace=True, axis=1)

In [1155]:
merged_df.tail()

Unnamed: 0,nbeats_btc_lookback_15_window_5_std_1.25_num_add_dfs_2,tcn_btc_lookback_15_window_5_std_1.25_num_add_dfs_2,nbeats_btc_lookback_30_window_5_std_1.25_num_add_dfs_2,tcn_btc_lookback_30_window_5_std_1.25_num_add_dfs_2,nbeats_btc_lookback_45_window_5_std_1.25_num_add_dfs_2,tcn_btc_lookback_45_window_5_std_1.25_num_add_dfs_2,test_model_lookback_1,close,day,month,quarter,day_of_year
802,38538.259919,31534.584129,31467.961211,35962.499904,29552.951504,35495.923901,0.0,35615.87,19,6,2,170
803,34217.414448,36146.378804,35592.368263,36750.568589,31735.996384,36302.54255,0.0,35698.3,20,6,2,171
804,37894.852144,36550.939321,33650.191827,37648.190534,30225.075563,36625.597963,0.0,31676.69,21,6,2,172
805,38574.222909,40984.156096,45822.994318,38266.367164,30888.406876,37240.240984,0.0,32505.66,22,6,2,173
806,35937.900945,36651.530289,35609.455753,37073.789737,30458.488853,36378.993818,0.0,33723.03,23,6,2,174


In [1156]:
merged_df.describe()

Unnamed: 0,nbeats_btc_lookback_15_window_5_std_1.25_num_add_dfs_2,tcn_btc_lookback_15_window_5_std_1.25_num_add_dfs_2,nbeats_btc_lookback_30_window_5_std_1.25_num_add_dfs_2,tcn_btc_lookback_30_window_5_std_1.25_num_add_dfs_2,nbeats_btc_lookback_45_window_5_std_1.25_num_add_dfs_2,tcn_btc_lookback_45_window_5_std_1.25_num_add_dfs_2,test_model_lookback_1,close,day,month,quarter
count,807.0,807.0,807.0,807.0,807.0,807.0,807.0,807.0,807.0,807.0,807.0
mean,18284.57461,17532.185192,18378.716919,17578.528693,18353.732547,17465.723931,0.0,17916.182466,15.744734,6.377943,2.459727
std,16916.20454,16208.061385,17164.932888,16031.407627,17164.032977,16355.812803,0.0,15964.300123,8.747308,3.322927,1.074427
min,4546.248123,139.87784,3666.543647,-289.448271,3261.251466,-280.599843,0.0,4970.79,1.0,1.0,1.0
25%,8680.318501,8408.93661,8665.497577,8345.788075,8563.555148,8259.347023,0.0,8563.235,8.0,4.0,2.0
50%,10316.931078,10272.321734,10346.269373,10363.155508,10493.418536,10082.577239,0.0,10169.57,16.0,6.0,2.0
75%,18781.04101,18457.630081,18201.4225,18502.4044,18403.370431,18587.661388,0.0,18767.56,23.0,9.0,3.0
max,70219.723358,65873.591667,74388.810607,64053.516022,80122.167399,69524.744598,0.0,63503.46,31.0,12.0,4.0


### train test split

In [1403]:
split_pct = .80
l_merged = len(merged_df)
merged_df_train, merged_df_test = merged_df.iloc[:int(split_pct*l_merged),:],  merged_df.iloc[int(l_merged*split_pct):,:]
                                                                                                  

In [1404]:
print(f"train rows = {len(merged_df_train)}")

train rows = 645


In [1405]:
print(f"test rows = {len(merged_df_test)}")

test rows = 162


In [1406]:
merged_df_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 645 entries, 0 to 644
Data columns (total 12 columns):
 #   Column                                                  Non-Null Count  Dtype  
---  ------                                                  --------------  -----  
 0   nbeats_btc_lookback_15_window_5_std_1.25_num_add_dfs_2  645 non-null    float64
 1   tcn_btc_lookback_15_window_5_std_1.25_num_add_dfs_2     645 non-null    float64
 2   nbeats_btc_lookback_30_window_5_std_1.25_num_add_dfs_2  645 non-null    float64
 3   tcn_btc_lookback_30_window_5_std_1.25_num_add_dfs_2     645 non-null    float64
 4   nbeats_btc_lookback_45_window_5_std_1.25_num_add_dfs_2  645 non-null    float64
 5   tcn_btc_lookback_45_window_5_std_1.25_num_add_dfs_2     645 non-null    float64
 6   test_model_lookback_1                                   645 non-null    float64
 7   close                                                   645 non-null    float64
 8   day                                     

In [1407]:
merged_df_test.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 162 entries, 645 to 806
Data columns (total 12 columns):
 #   Column                                                  Non-Null Count  Dtype  
---  ------                                                  --------------  -----  
 0   nbeats_btc_lookback_15_window_5_std_1.25_num_add_dfs_2  162 non-null    float64
 1   tcn_btc_lookback_15_window_5_std_1.25_num_add_dfs_2     162 non-null    float64
 2   nbeats_btc_lookback_30_window_5_std_1.25_num_add_dfs_2  162 non-null    float64
 3   tcn_btc_lookback_30_window_5_std_1.25_num_add_dfs_2     162 non-null    float64
 4   nbeats_btc_lookback_45_window_5_std_1.25_num_add_dfs_2  162 non-null    float64
 5   tcn_btc_lookback_45_window_5_std_1.25_num_add_dfs_2     162 non-null    float64
 6   test_model_lookback_1                                   162 non-null    float64
 7   close                                                   162 non-null    float64
 8   day                                   

In [1408]:
merged_df_train.iloc[:, ~merged_df.columns.isin(['close'])]

Unnamed: 0,nbeats_btc_lookback_15_window_5_std_1.25_num_add_dfs_2,tcn_btc_lookback_15_window_5_std_1.25_num_add_dfs_2,nbeats_btc_lookback_30_window_5_std_1.25_num_add_dfs_2,tcn_btc_lookback_30_window_5_std_1.25_num_add_dfs_2,nbeats_btc_lookback_45_window_5_std_1.25_num_add_dfs_2,tcn_btc_lookback_45_window_5_std_1.25_num_add_dfs_2,test_model_lookback_1,day,month,quarter,day_of_year
0,4752.149527,253.537135,4730.976374,654.310204,3469.734359,795.201430,0.0,9,4,2,099
1,4546.248123,407.206977,4373.560445,255.627627,4437.900633,1413.509939,0.0,10,4,2,100
2,4550.998590,726.168584,4045.591506,1861.981408,4012.011230,523.093010,0.0,11,4,2,101
3,5003.264197,353.606543,4819.668698,817.215937,3977.238773,553.057301,0.0,12,4,2,102
4,5184.268504,197.768508,5123.185573,1596.333494,4093.602236,920.106602,0.0,13,4,2,103
...,...,...,...,...,...,...,...,...,...,...,...
640,26913.349815,27713.340675,30167.456203,28349.326076,28917.651589,25862.850593,0.0,8,1,1,008
641,30278.225347,29304.713713,28881.018791,26750.182515,29745.745751,27160.888420,0.0,9,1,1,009
642,34649.988585,30729.619377,32179.851957,28317.446060,29403.035073,29349.469968,0.0,10,1,1,010
643,33688.542469,29673.923317,33352.718104,28307.698304,28193.253800,28340.491740,0.0,11,1,1,011


In [1409]:


merged_df_x_train, merged_df_y_train = merged_df_train.iloc[:, ~merged_df_train.columns.isin(['close'])], merged_df_train.iloc[:, merged_df_train.columns.isin(['close'])]
merged_df_x_test, merged_df_y_test =  merged_df_test.iloc[:, ~merged_df_test.columns.isin(['close'])], merged_df_test.iloc[:, merged_df_test.columns.isin(['close'])]
                                                                                                                                                 

In [1410]:
merged_df_x_train

Unnamed: 0,nbeats_btc_lookback_15_window_5_std_1.25_num_add_dfs_2,tcn_btc_lookback_15_window_5_std_1.25_num_add_dfs_2,nbeats_btc_lookback_30_window_5_std_1.25_num_add_dfs_2,tcn_btc_lookback_30_window_5_std_1.25_num_add_dfs_2,nbeats_btc_lookback_45_window_5_std_1.25_num_add_dfs_2,tcn_btc_lookback_45_window_5_std_1.25_num_add_dfs_2,test_model_lookback_1,day,month,quarter,day_of_year
0,4752.149527,253.537135,4730.976374,654.310204,3469.734359,795.201430,0.0,9,4,2,099
1,4546.248123,407.206977,4373.560445,255.627627,4437.900633,1413.509939,0.0,10,4,2,100
2,4550.998590,726.168584,4045.591506,1861.981408,4012.011230,523.093010,0.0,11,4,2,101
3,5003.264197,353.606543,4819.668698,817.215937,3977.238773,553.057301,0.0,12,4,2,102
4,5184.268504,197.768508,5123.185573,1596.333494,4093.602236,920.106602,0.0,13,4,2,103
...,...,...,...,...,...,...,...,...,...,...,...
640,26913.349815,27713.340675,30167.456203,28349.326076,28917.651589,25862.850593,0.0,8,1,1,008
641,30278.225347,29304.713713,28881.018791,26750.182515,29745.745751,27160.888420,0.0,9,1,1,009
642,34649.988585,30729.619377,32179.851957,28317.446060,29403.035073,29349.469968,0.0,10,1,1,010
643,33688.542469,29673.923317,33352.718104,28307.698304,28193.253800,28340.491740,0.0,11,1,1,011


In [1411]:
merged_df_y_train

Unnamed: 0,close
0,5204.96
1,5324.55
2,5064.49
3,5089.54
4,5096.59
...,...
640,40797.61
641,40254.55
642,38356.44
643,35566.66


# performance, rmse , on average prediction

In [1412]:
average_predictions_train = np.mean(merged_df_x_train.iloc[:,:6], axis=1)
average_predictions_test = np.mean(merged_df_x_test.iloc[:,:6], axis=1)

In [1413]:
average_predictions_train[:10]

0    2442.651505
1    2572.342291
2    2619.974055
3    2587.341908
4    2852.544153
5    2721.518844
6    3153.856190
7    2727.367343
8    3387.347636
9    2872.964842
dtype: float64

In [1414]:
merged_df_y_train.head(10)

Unnamed: 0,close
0,5204.96
1,5324.55
2,5064.49
3,5089.54
4,5096.59
5,5167.72
6,5067.11
7,5235.56
8,5251.94
9,5298.39


In [1415]:
average_rmse_train = sqrt(mean_squared_error(average_predictions_train, merged_df_y_train))
average_rmse_test = sqrt(mean_squared_error(average_predictions_test, merged_df_y_test))

In [1416]:
print(f" The average training rmse is {average_rmse_train}")
print(f" The average testing rmse is {average_rmse_test}")

 The average training rmse is 2190.499380083371
 The average testing rmse is 6691.979049915355


# train linear regression on ALL predictions

In [1417]:
lregr = LinearRegression()
lregr_last_180 = LinearRegression()

In [1418]:
merged_df_x_train.iloc[-180:,:]

Unnamed: 0,nbeats_btc_lookback_15_window_5_std_1.25_num_add_dfs_2,tcn_btc_lookback_15_window_5_std_1.25_num_add_dfs_2,nbeats_btc_lookback_30_window_5_std_1.25_num_add_dfs_2,tcn_btc_lookback_30_window_5_std_1.25_num_add_dfs_2,nbeats_btc_lookback_45_window_5_std_1.25_num_add_dfs_2,tcn_btc_lookback_45_window_5_std_1.25_num_add_dfs_2,test_model_lookback_1,day,month,quarter,day_of_year
465,9567.780113,8620.578375,10040.550223,9192.953401,9712.171149,8918.906192,0.0,17,7,3,199
466,9760.981086,8953.552878,10029.810314,8974.965689,9380.270446,8653.565705,0.0,18,7,3,200
467,9026.348515,9054.237648,9275.661669,8509.094151,9287.236739,9345.738876,0.0,19,7,3,201
468,9554.235201,9158.205618,9114.356556,8742.131679,9065.785563,9000.832401,0.0,20,7,3,202
469,9091.804465,8884.531332,8547.082461,8862.531424,9332.539079,8619.927162,0.0,21,7,3,203
...,...,...,...,...,...,...,...,...,...,...,...
640,26913.349815,27713.340675,30167.456203,28349.326076,28917.651589,25862.850593,0.0,8,1,1,008
641,30278.225347,29304.713713,28881.018791,26750.182515,29745.745751,27160.888420,0.0,9,1,1,009
642,34649.988585,30729.619377,32179.851957,28317.446060,29403.035073,29349.469968,0.0,10,1,1,010
643,33688.542469,29673.923317,33352.718104,28307.698304,28193.253800,28340.491740,0.0,11,1,1,011


In [1419]:
lregr_last_180.fit(merged_df_x_train.iloc[-180:,:], merged_df_y_train.iloc[-180:,:])

LinearRegression()

In [1420]:
lregr.fit(merged_df_x_train, merged_df_y_train)

LinearRegression()

In [1421]:
train_predictions_lrgr = lregr.predict(merged_df_x_train)

In [1422]:
test_predictions_lrgr = lregr.predict(merged_df_x_test)

In [1423]:
test_predictions_lrgr_last_180 = lregr_last_180.predict(merged_df_x_test)

In [1424]:
test_predictions_lrgr_last_180[:15]

array([[42212.77272376],
       [44927.21290255],
       [50047.40702974],
       [50877.29124594],
       [50787.70849924],
       [53637.99185942],
       [47132.18865553],
       [51312.05374633],
       [55304.20315119],
       [52699.37230986],
       [51533.20352843],
       [51763.77018075],
       [53384.01645268],
       [51972.99894488],
       [51288.27229693]])

In [1425]:
test_predictions_lrgr[:15]


array([[40890.02755817],
       [44420.7816461 ],
       [45607.06913876],
       [46886.50844792],
       [51898.11440304],
       [49911.4775658 ],
       [44302.36506318],
       [50059.03210815],
       [53788.33432213],
       [52718.81345796],
       [50523.88316508],
       [49017.88679294],
       [45039.09393503],
       [48127.61018697],
       [46747.17360202]])

In [1426]:
merged_df_y_test.head(15)

Unnamed: 0,close
645,37316.36
646,39187.33
647,36825.37
648,36178.14
649,35791.28
650,36630.08
651,36069.81
652,35547.75
653,30825.7
654,33005.76


In [1427]:
average_rmse_test_lrgr_last_180 = sqrt(mean_squared_error(test_predictions_lrgr_last_180, merged_df_y_test))

In [1428]:
average_rmse_train_lrgr = sqrt(mean_squared_error(train_predictions_lrgr, merged_df_y_train))
average_rmse_test_lrgr = sqrt(mean_squared_error(test_predictions_lrgr, merged_df_y_test))

In [1429]:
print(f" The average training rmse with linear stacking is {average_rmse_train_lrgr}")
print(f" The average testing rmse with linear stacking is {average_rmse_test_lrgr}")
print(f" The average testing rmse with linear stacking last 180 is {average_rmse_test_lrgr_last_180}")

 The average training rmse with linear stacking is 1359.6128080778496
 The average testing rmse with linear stacking is 13894.758032025939
 The average testing rmse with linear stacking last 180 is 17877.47577021059




#### feature importance

In [1430]:
lregr.coef_.ravel()[np.argsort(lregr.coef_.ravel())[::-1]]

array([ 1.18785087e+03,  2.60114636e+02,  7.61648559e-01,  3.48315064e-01,
        3.03278548e-01, -3.47393225e-11, -6.32660174e-02, -7.48133697e-02,
       -9.01773157e-02, -2.66077947e+02, -8.39773872e+03])

In [1431]:
merged_df_x_train.columns[np.argsort(lregr.coef_)[::-1]]

array([['month', 'day',
        'tcn_btc_lookback_30_window_5_std_1.25_num_add_dfs_2',
        'tcn_btc_lookback_45_window_5_std_1.25_num_add_dfs_2',
        'tcn_btc_lookback_15_window_5_std_1.25_num_add_dfs_2',
        'test_model_lookback_1',
        'nbeats_btc_lookback_30_window_5_std_1.25_num_add_dfs_2',
        'nbeats_btc_lookback_45_window_5_std_1.25_num_add_dfs_2',
        'nbeats_btc_lookback_15_window_5_std_1.25_num_add_dfs_2',
        'day_of_year', 'quarter']], dtype=object)

In [1432]:
lregr_last_180.coef_.ravel()[np.argsort(lregr_last_180.coef_.ravel())[::-1]]

array([ 2.37216633e+03,  5.42528071e+02,  6.67700741e-01,  5.30639058e-01,
        1.34123090e-01,  5.15409839e-02, -8.95106211e-11, -1.83389664e-02,
       -2.94354652e-02, -5.30419132e+02, -1.74738278e+04])

In [1433]:
merged_df_x_train.columns[np.argsort(lregr_last_180.coef_)[::-1]]

array([['month', 'day',
        'nbeats_btc_lookback_15_window_5_std_1.25_num_add_dfs_2',
        'tcn_btc_lookback_30_window_5_std_1.25_num_add_dfs_2',
        'test_model_lookback_1',
        'nbeats_btc_lookback_30_window_5_std_1.25_num_add_dfs_2',
        'tcn_btc_lookback_45_window_5_std_1.25_num_add_dfs_2',
        'tcn_btc_lookback_15_window_5_std_1.25_num_add_dfs_2',
        'nbeats_btc_lookback_45_window_5_std_1.25_num_add_dfs_2',
        'day_of_year', 'quarter']], dtype=object)

## Test Random Forest

In [1434]:
rf_last_180 = RandomForestRegressor(n_estimators=500)
rf = RandomForestRegressor(n_estimators=500)

In [1435]:
rf.fit(merged_df_x_train, merged_df_y_train)
rf_last_180.fit(merged_df_x_train[-180:], merged_df_y_train[-180:])

  rf.fit(merged_df_x_train, merged_df_y_train)
  rf_last_180.fit(merged_df_x_train[-180:], merged_df_y_train[-180:])


RandomForestRegressor(n_estimators=500)

In [1436]:
train_predictions_rf = rf.predict(merged_df_x_train)
test_predictions_rf = rf.predict(merged_df_x_test)

test_predictions_rf_last_180 = rf_last_180.predict(merged_df_x_test)

In [1437]:
test_predictions_rf_last_180[:15].reshape(-1,1)


array([[35926.24138],
       [35602.99304],
       [35589.31674],
       [35565.14844],
       [35537.18392],
       [35454.41172],
       [35419.06586],
       [35419.06586],
       [35419.06586],
       [35419.06586],
       [35419.06586],
       [35419.06586],
       [35485.19298],
       [35419.06586],
       [35426.22468]])

In [1438]:
test_predictions_rf[:15].reshape(-1,1)


array([[36126.20738],
       [35892.99734],
       [35892.99734],
       [35892.99734],
       [35892.99734],
       [35892.99734],
       [35892.99734],
       [35850.6729 ],
       [35836.9966 ],
       [35836.9966 ],
       [35836.9966 ],
       [35836.9966 ],
       [35895.0807 ],
       [35836.9966 ],
       [35836.9966 ]])

In [1439]:
merged_df_y_test.head(15)

Unnamed: 0,close
645,37316.36
646,39187.33
647,36825.37
648,36178.14
649,35791.28
650,36630.08
651,36069.81
652,35547.75
653,30825.7
654,33005.76


In [1440]:
average_rmse_train_rf = sqrt(mean_squared_error(train_predictions_rf, merged_df_y_train))
average_rmse_test_rf = sqrt(mean_squared_error(test_predictions_rf, merged_df_y_test))

In [1441]:
average_rmse_test_rf_last_180 = sqrt(mean_squared_error(test_predictions_rf_last_180, merged_df_y_test))

In [1442]:
print(f" The average training rmse with RF stacking is {average_rmse_train_rf}")
print(f" The average testing rmse with RF stacking is {average_rmse_test_rf}")

 The average training rmse with RF stacking is 275.8194708022487
 The average testing rmse with RF stacking is 14822.534454061855


In [1443]:
print(f" The average testing rmse with RFlast 180 days  stacking is {average_rmse_test_rf_last_180}")


 The average testing rmse with RFlast 180 days  stacking is 15064.703782683142


In [1444]:
rf.feature_importances_.ravel()[np.argsort(rf.feature_importances_.ravel())[::-1]]

array([0.39833026, 0.3427265 , 0.11652126, 0.04347751, 0.0384032 ,
       0.02902339, 0.02196588, 0.00403823, 0.00290866, 0.00260511,
       0.        ])

In [1445]:
merged_df_x_train.columns[np.argsort(rf.feature_importances_)[::-1]]

Index(['nbeats_btc_lookback_15_window_5_std_1.25_num_add_dfs_2',
       'nbeats_btc_lookback_45_window_5_std_1.25_num_add_dfs_2',
       'nbeats_btc_lookback_30_window_5_std_1.25_num_add_dfs_2', 'day_of_year',
       'tcn_btc_lookback_15_window_5_std_1.25_num_add_dfs_2',
       'tcn_btc_lookback_45_window_5_std_1.25_num_add_dfs_2',
       'tcn_btc_lookback_30_window_5_std_1.25_num_add_dfs_2', 'day', 'month',
       'quarter', 'test_model_lookback_1'],
      dtype='object')

## Test gradient boosting

In [1446]:
gb = GradientBoostingRegressor(n_estimators=200)
gb_last_180 = GradientBoostingRegressor(n_estimators=200)

In [1447]:
gb.fit(merged_df_x_train, merged_df_y_train)
gb_last_180.fit(merged_df_x_train[-180:], merged_df_y_train[-180:])

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


GradientBoostingRegressor(n_estimators=200)

In [1448]:
train_predictions_gb = gb.predict(merged_df_x_train)
test_predictions_gb = gb.predict(merged_df_x_test)
test_predictions_gb_last_180 = gb.predict(merged_df_x_test)

In [1449]:
test_predictions_gb_last_180

array([34124.28120335, 33957.51877973, 33957.51877973, 34022.16992994,
       34022.16992994, 34032.77400121, 34032.77400121, 33979.71559034,
       33979.71559034, 34000.24057805, 34000.24057805, 34111.61698439,
       34968.73223541, 34302.3217971 , 34371.95275248, 34451.78107785,
       34511.47855969, 35397.3712347 , 35624.45723602, 35366.4690131 ,
       35409.3412318 , 36119.54018293, 35409.3412318 , 35399.97647336,
       36110.17542448, 36151.91384505, 36297.95096688, 36170.27603414,
       34798.23725063, 34715.36579894, 34360.18828569, 34360.18828569,
       34093.99038846, 34093.99038846, 28390.86761925, 34042.33659882,
       34052.94067009, 33744.32853923, 33744.32853923, 33744.32853923,
       33834.1495989 , 33796.34213978, 33907.71854613, 34182.30977876,
       34098.42335883, 33917.71617382, 33903.31759963, 35122.2187796 ,
       35122.2187796 , 35122.2187796 , 35122.2187796 , 35155.72623986,
       35200.48312869, 35200.48312869, 35200.48312869, 34957.07058634,
      

In [1450]:
test_predictions_gb[:15]


array([34124.28120335, 33957.51877973, 33957.51877973, 34022.16992994,
       34022.16992994, 34032.77400121, 34032.77400121, 33979.71559034,
       33979.71559034, 34000.24057805, 34000.24057805, 34111.61698439,
       34968.73223541, 34302.3217971 , 34371.95275248])

In [1451]:
merged_df_y_test.head(15)

Unnamed: 0,close
645,37316.36
646,39187.33
647,36825.37
648,36178.14
649,35791.28
650,36630.08
651,36069.81
652,35547.75
653,30825.7
654,33005.76


In [1457]:
average_rmse_train_gb = sqrt(mean_squared_error(train_predictions_gb, merged_df_y_train))
average_rmse_test_gb = sqrt(mean_squared_error(test_predictions_gb, merged_df_y_test))
average_rmse_test_gb_last_180 = sqrt(mean_squared_error(test_predictions_gb_last_180, merged_df_y_test))

In [1458]:
print(f" The average training rmse with GB stacking is {average_rmse_train_gb}")
print(f" The average testing rmse with GB stacking is {average_rmse_test_gb}")
print(f" The average testing rmse with GB stacking last 180 is {average_rmse_test_gb_last_180}")

 The average training rmse with GB stacking is 281.1537776646442
 The average testing rmse with GB stacking is 16440.099258378792
 The average testing rmse with GB stacking last 180 is 16440.099258378792


In [1459]:
gb.feature_importances_.ravel()[np.argsort(gb.feature_importances_.ravel())[::-1]]

array([3.95549845e-01, 3.90881747e-01, 6.68986197e-02, 6.64146617e-02,
       5.19576669e-02, 1.70059798e-02, 6.97068543e-03, 2.80483011e-03,
       1.39657826e-03, 1.19386136e-04, 0.00000000e+00])

In [1460]:
merged_df_x_train.columns[np.argsort(gb.feature_importances_)[::-1]]

Index(['nbeats_btc_lookback_15_window_5_std_1.25_num_add_dfs_2',
       'nbeats_btc_lookback_45_window_5_std_1.25_num_add_dfs_2',
       'tcn_btc_lookback_15_window_5_std_1.25_num_add_dfs_2',
       'nbeats_btc_lookback_30_window_5_std_1.25_num_add_dfs_2', 'day_of_year',
       'tcn_btc_lookback_45_window_5_std_1.25_num_add_dfs_2', 'quarter',
       'tcn_btc_lookback_30_window_5_std_1.25_num_add_dfs_2', 'day', 'month',
       'test_model_lookback_1'],
      dtype='object')

# total comparison

In [1456]:
print(f"split_pct = {split_pct}")
print(f" The average training rmse with GB stacking is {average_rmse_train_gb}")
print(f" The average testing rmse with GB stacking is {average_rmse_test_gb}")
print("---")
print(f" The average training rmse with RF stacking is {average_rmse_train_rf}")
print(f" The average testing rmse with RF stacking is {average_rmse_test_rf}")

print(f" The average testing rmse with RFlast 180 days  stacking is {average_rmse_test_rf_last_180}")
print("---")
print(f" The average training rmse with linear stacking is {average_rmse_train_lrgr}")
print(f" The average testing rmse with linear stacking is {average_rmse_test_lrgr}")
print(f" The average testing rmse with linear stacking last 180 is {average_rmse_test_lrgr_last_180}")

split_pct = 0.8
 The average training rmse with GB stacking is 281.1537776646442
 The average testing rmse with GB stacking is 16440.099258378792
---
 The average training rmse with RF stacking is 275.8194708022487
 The average testing rmse with RF stacking is 14822.534454061855
 The average testing rmse with RFlast 180 days  stacking is 15064.703782683142
---
 The average training rmse with linear stacking is 1359.6128080778496
 The average testing rmse with linear stacking is 13894.758032025939
 The average testing rmse with linear stacking last 180 is 17877.47577021059


In [1388]:
print(f"split_pct = {split_pct}")
print(f" The average training rmse with GB stacking is {average_rmse_train_gb}")
print(f" The average testing rmse with GB stacking is {average_rmse_test_gb}")
print("---")
print(f" The average training rmse with RF stacking is {average_rmse_train_rf}")
print(f" The average testing rmse with RF stacking is {average_rmse_test_rf}")

print(f" The average testing rmse with RFlast 180 days  stacking is {average_rmse_test_rf_last_180}")
print("---")
print(f" The average training rmse with linear stacking is {average_rmse_train_lrgr}")
print(f" The average testing rmse with linear stacking is {average_rmse_test_lrgr}")
print(f" The average testing rmse with linear stacking last 180 is {average_rmse_test_lrgr_last_180}")

split_pct = 0.999
 The average training rmse with GB stacking is 543.2154785815712
 The average testing rmse with GB stacking is 625.7115020418714
---
 The average training rmse with RF stacking is 497.5581799718235
 The average testing rmse with RF stacking is 1006.3554999999396
 The average testing rmse with RFlast 180 days  stacking is 876.4936399999497
---
 The average training rmse with linear stacking is 3156.0739048320597
 The average testing rmse with linear stacking is 296.45511818701925
 The average testing rmse with linear stacking last 180 is 1633.7360591751494


In [1335]:
print(f"split_pct = {split_pct}")
print(f" The average training rmse with GB stacking is {average_rmse_train_gb}")
print(f" The average testing rmse with GB stacking is {average_rmse_test_gb}")
print("---")
print(f" The average training rmse with RF stacking is {average_rmse_train_rf}")
print(f" The average testing rmse with RF stacking is {average_rmse_test_rf}")

print(f" The average testing rmse with RFlast 180 days  stacking is {average_rmse_test_rf_last_180}")
print("---")
print(f" The average training rmse with linear stacking is {average_rmse_train_lrgr}")
print(f" The average testing rmse with linear stacking is {average_rmse_test_lrgr}")
print(f" The average testing rmse with linear stacking last 180 is {average_rmse_test_lrgr_last_180}")

split_pct = 0.9
 The average training rmse with GB stacking is 398.3968961593961
 The average testing rmse with GB stacking is 11809.633043687045
---
 The average training rmse with RF stacking is 426.32643578365344
 The average testing rmse with RF stacking is 12065.246682644456
 The average testing rmse with RFlast 180 days  stacking is 14480.443329239819
---
 The average training rmse with linear stacking is 2563.452663216802
 The average testing rmse with linear stacking is 7263.468865717031
 The average testing rmse with linear stacking last 180 is 7333.384577561145


In [1282]:
print(f"split_pct = {split_pct}")
print(f" The average training rmse with GB stacking is {average_rmse_train_gb}")
print(f" The average testing rmse with GB stacking is {average_rmse_test_gb}")
print("---")
print(f" The average training rmse with RF stacking is {average_rmse_train_rf}")
print(f" The average testing rmse with RF stacking is {average_rmse_test_rf}")

print(f" The average testing rmse with RFlast 180 days  stacking is {average_rmse_test_rf_last_180}")
print("---")
print(f" The average training rmse with linear stacking is {average_rmse_train_lrgr}")
print(f" The average testing rmse with linear stacking is {average_rmse_test_lrgr}")
print(f" The average testing rmse with linear stacking last 180 is {average_rmse_test_lrgr_last_180}")

split_pct = 0.98
 The average training rmse with GB stacking is 513.2638571259248
 The average testing rmse with GB stacking is 2801.4446271218635
---
 The average training rmse with RF stacking is 487.19981157068366
 The average testing rmse with RF stacking is 2382.961306560452
 The average testing rmse with RFlast 180 days  stacking is 2459.42696409997
---
 The average training rmse with linear stacking is 3138.0824024002163
 The average testing rmse with linear stacking is 3914.3291560455996
 The average testing rmse with linear stacking last 180 is 3959.8209059686365


In [1229]:
print(f"split_pct = {split_pct}")
print(f" The average training rmse with GB stacking is {average_rmse_train_gb}")
print(f" The average testing rmse with GB stacking is {average_rmse_test_gb}")
print("---")
print(f" The average training rmse with RF stacking is {average_rmse_train_rf}")
print(f" The average testing rmse with RF stacking is {average_rmse_test_rf}")

print(f" The average testing rmse with RFlast 180 days  stacking is {average_rmse_test_rf_last_180}")
print("---")
print(f" The average training rmse with linear stacking is {average_rmse_train_lrgr}")
print(f" The average testing rmse with linear stacking is {average_rmse_test_lrgr}")
print(f" The average testing rmse with linear stacking last 180 is {average_rmse_test_lrgr_last_180}")

split_pct = 0.99
 The average training rmse with GB stacking is 553.2994917776629
 The average testing rmse with GB stacking is 2093.9728076575398
---
 The average training rmse with RF stacking is 493.5938294046072
 The average testing rmse with RF stacking is 2665.69200196458
 The average testing rmse with RFlast 180 days  stacking is 2755.7830292177173
---
 The average training rmse with linear stacking is 3140.8259123810662
 The average testing rmse with linear stacking is 4229.820848517844
 The average testing rmse with linear stacking last 180 is 3116.627389423628
