# Random Forest Training

---

### Import Libraries and Dependencies

In [1]:
# Import libraries and dependencies
import pandas as pd
import numpy as np
from pathlib import Path
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

### Read in CSV as Pandas DataFrame

In [2]:
# Set path to CSV and read in CSV
csv_path = Path("../Resources/trading_signals.csv")
trading_signals_df=pd.read_csv(csv_path)
trading_signals_df.head()

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume,Symbol,hourly_return,fast_close,slow_close,...,vol_trend_long,vol_trend_short,vol_trend_signal,bollinger_mid_band,bollinger_std,bollinger_upper_band,bollinger_lower_band,bollinger_long,bollinger_short,bollinger_signal
0,2019-08-26 17:00:00+00:00,10324.6,10333.9,10292.7,10300.0,134.471765,BTC/USD,,10300.0,10300.0,...,0.0,0.0,0.0,,,,,0.0,0.0,0.0
1,2019-08-26 18:00:00+00:00,10300.0,10305.0,10261.0,10290.9,131.790626,BTC/USD,-0.000883,10293.933333,10295.292372,...,0.0,0.0,0.0,,,,,0.0,0.0,0.0
2,2019-08-26 19:00:00+00:00,10290.9,10325.1,10290.0,10310.0,78.29489,BTC/USD,0.001856,10303.114286,10300.538382,...,0.0,0.0,0.0,,,,,0.0,0.0,0.0
3,2019-08-26 20:00:00+00:00,10311.5,10312.8,10147.5,10262.6,354.93019,BTC/USD,-0.004597,10281.506667,10290.046097,...,0.0,-1.0,-1.0,,,,,0.0,0.0,0.0
4,2019-08-26 21:00:00+00:00,10265.4,10308.1,10189.7,10274.6,570.039335,BTC/USD,0.001169,10277.941935,10286.514507,...,0.0,-1.0,-1.0,,,,,0.0,0.0,0.0


### Set Index, Infer DateTimeFormat, and Drop Extraneous Columns

In [3]:
# Set index as datetime object and drop extraneous columns
trading_signals_df = trading_signals_df.set_index(pd.to_datetime(trading_signals_df["Timestamp"], infer_datetime_format=True))
trading_signals_df = trading_signals_df.drop(columns=["Timestamp"])
trading_signals_df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Symbol,hourly_return,fast_close,slow_close,crossover_long,...,vol_trend_long,vol_trend_short,vol_trend_signal,bollinger_mid_band,bollinger_std,bollinger_upper_band,bollinger_lower_band,bollinger_long,bollinger_short,bollinger_signal
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-08-26 17:00:00+00:00,10324.6,10333.9,10292.7,10300.0,134.471765,BTC/USD,,10300.000000,10300.000000,0.0,...,0.0,0.0,0.0,,,,,0.0,0.0,0.0
2019-08-26 18:00:00+00:00,10300.0,10305.0,10261.0,10290.9,131.790626,BTC/USD,-0.000883,10293.933333,10295.292372,0.0,...,0.0,0.0,0.0,,,,,0.0,0.0,0.0
2019-08-26 19:00:00+00:00,10290.9,10325.1,10290.0,10310.0,78.294890,BTC/USD,0.001856,10303.114286,10300.538382,1.0,...,0.0,0.0,0.0,,,,,0.0,0.0,0.0
2019-08-26 20:00:00+00:00,10311.5,10312.8,10147.5,10262.6,354.930190,BTC/USD,-0.004597,10281.506667,10290.046097,0.0,...,0.0,-1.0,-1.0,,,,,0.0,0.0,0.0
2019-08-26 21:00:00+00:00,10265.4,10308.1,10189.7,10274.6,570.039335,BTC/USD,0.001169,10277.941935,10286.514507,0.0,...,0.0,-1.0,-1.0,,,,,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-09-25 12:00:00+00:00,8336.7,8363.9,8000.0,8305.7,1295.424881,BTC/USD,-0.003957,8336.263582,8799.354978,0.0,...,1.0,0.0,1.0,8556.905,254.811023,8811.716023,8302.093977,0.0,0.0,0.0
2019-09-25 13:00:00+00:00,8305.7,8442.5,8278.0,8388.2,459.187772,BTC/USD,0.009933,8362.231791,8771.821159,0.0,...,1.0,0.0,1.0,8502.845,139.738302,8642.583302,8363.106698,0.0,0.0,0.0
2019-09-25 14:00:00+00:00,8392.0,8478.4,8375.0,8379.0,466.040826,BTC/USD,-0.001097,8370.615896,8745.515101,0.0,...,1.0,0.0,1.0,8490.345,139.039292,8629.384292,8351.305708,0.0,0.0,0.0
2019-09-25 15:00:00+00:00,8385.4,8430.0,8294.5,8294.5,259.406669,BTC/USD,-0.010085,8332.557948,8715.311969,0.0,...,1.0,0.0,1.0,8484.235,145.003014,8629.238014,8339.231986,1.0,0.0,1.0


### Set X-Variable List and Filter to Obtain Associated Values

In [4]:
# Set x variable list of features


# Filter by x-variable list


Unnamed: 0_level_0,crossover_signal,vol_trend_signal,bollinger_signal
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-09-25 12:00:00+00:00,-1.0,1.0,0.0
2019-09-25 13:00:00+00:00,-1.0,1.0,0.0
2019-09-25 14:00:00+00:00,-1.0,1.0,0.0
2019-09-25 15:00:00+00:00,-1.0,1.0,1.0
2019-09-25 16:00:00+00:00,-1.0,1.0,1.0


### Shift the DataFrame Index by 1

In [5]:
# Shift DataFrame values by 1


Unnamed: 0_level_0,crossover_signal,vol_trend_signal,bollinger_signal
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-09-25 12:00:00+00:00,-1.0,1.0,0.0
2019-09-25 13:00:00+00:00,-1.0,1.0,0.0
2019-09-25 14:00:00+00:00,-1.0,1.0,0.0
2019-09-25 15:00:00+00:00,-1.0,1.0,0.0
2019-09-25 16:00:00+00:00,-1.0,1.0,1.0


### Drop NAs and Replace Infs (Positive/Negative Infinity) 

In [6]:
# Drop NAs


# Replace positive/negative infinity values


# Display sample data


Unnamed: 0_level_0,Open,High,Low,Close,Volume,Symbol,hourly_return,fast_close,slow_close,crossover_long,...,vol_trend_long,vol_trend_short,vol_trend_signal,bollinger_mid_band,bollinger_std,bollinger_upper_band,bollinger_lower_band,bollinger_long,bollinger_short,bollinger_signal
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-08-26 18:00:00+00:00,10300.0,10305.0,10261.0,10290.9,131.790626,BTC/USD,-0.000883,10293.933333,10295.292372,0.0,...,0.0,0.0,0.0,,,,,0.0,0.0,0.0
2019-08-26 19:00:00+00:00,10290.9,10325.1,10290.0,10310.0,78.29489,BTC/USD,0.001856,10303.114286,10300.538382,1.0,...,0.0,0.0,0.0,,,,,0.0,0.0,0.0
2019-08-26 20:00:00+00:00,10311.5,10312.8,10147.5,10262.6,354.93019,BTC/USD,-0.004597,10281.506667,10290.046097,0.0,...,0.0,-1.0,0.0,,,,,0.0,0.0,0.0
2019-08-26 21:00:00+00:00,10265.4,10308.1,10189.7,10274.6,570.039335,BTC/USD,0.001169,10277.941935,10286.514507,0.0,...,0.0,-1.0,-1.0,,,,,0.0,0.0,0.0
2019-08-26 22:00:00+00:00,10274.6,10345.1,10274.6,10303.0,176.130751,BTC/USD,0.002764,10290.669841,10289.759171,1.0,...,0.0,-1.0,-1.0,,,,,0.0,0.0,0.0


### Construct the Dependent Variable

In [7]:
# Construct the dependent variable where if hourly return is greater than 0, then 1, else, 0.


# Display sample data


Unnamed: 0_level_0,Open,High,Low,Close,Volume,Symbol,hourly_return,fast_close,slow_close,crossover_long,...,vol_trend_short,vol_trend_signal,bollinger_mid_band,bollinger_std,bollinger_upper_band,bollinger_lower_band,bollinger_long,bollinger_short,bollinger_signal,Positive Return
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-08-26 18:00:00+00:00,10300.0,10305.0,10261.0,10290.9,131.790626,BTC/USD,-0.000883,10293.933333,10295.292372,0.0,...,0.0,0.0,,,,,0.0,0.0,0.0,0.0
2019-08-26 19:00:00+00:00,10290.9,10325.1,10290.0,10310.0,78.294890,BTC/USD,0.001856,10303.114286,10300.538382,1.0,...,0.0,0.0,,,,,0.0,0.0,0.0,1.0
2019-08-26 20:00:00+00:00,10311.5,10312.8,10147.5,10262.6,354.930190,BTC/USD,-0.004597,10281.506667,10290.046097,0.0,...,-1.0,0.0,,,,,0.0,0.0,0.0,0.0
2019-08-26 21:00:00+00:00,10265.4,10308.1,10189.7,10274.6,570.039335,BTC/USD,0.001169,10277.941935,10286.514507,0.0,...,-1.0,-1.0,,,,,0.0,0.0,0.0,1.0
2019-08-26 22:00:00+00:00,10274.6,10345.1,10274.6,10303.0,176.130751,BTC/USD,0.002764,10290.669841,10289.759171,1.0,...,-1.0,-1.0,,,,,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-09-25 12:00:00+00:00,8336.7,8363.9,8000.0,8305.7,1295.424881,BTC/USD,-0.003957,8336.263582,8799.354978,0.0,...,0.0,1.0,8556.905,254.811023,8811.716023,8302.093977,0.0,0.0,0.0,0.0
2019-09-25 13:00:00+00:00,8305.7,8442.5,8278.0,8388.2,459.187772,BTC/USD,0.009933,8362.231791,8771.821159,0.0,...,0.0,1.0,8502.845,139.738302,8642.583302,8363.106698,0.0,0.0,0.0,1.0
2019-09-25 14:00:00+00:00,8392.0,8478.4,8375.0,8379.0,466.040826,BTC/USD,-0.001097,8370.615896,8745.515101,0.0,...,0.0,1.0,8490.345,139.039292,8629.384292,8351.305708,0.0,0.0,0.0,0.0
2019-09-25 15:00:00+00:00,8385.4,8430.0,8294.5,8294.5,259.406669,BTC/USD,-0.010085,8332.557948,8715.311969,0.0,...,0.0,1.0,8484.235,145.003014,8629.238014,8339.231986,1.0,0.0,0.0,0.0


### Assign Training and Testing Windows

In [8]:
# Construct training start and end dates


# Construct testing start and end dates


# Print training and testing start/end dates
print(f"Training Start: {training_start}")
print(f"Training End: {training_end}")
print(f"Testing Start: {testing_start}")
print(f"Testing End: {testing_end}")

Training Start: 2019-08-26
Training End: 2019-09-14
Testing Start: 2019-09-15
Testing End: 2019-09-25


### Separate X and y Training Datasets

In [9]:
# Construct the X_train and y_train datasets


# Display sample data


Unnamed: 0_level_0,crossover_signal,vol_trend_signal,bollinger_signal
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-09-14 19:00:00+00:00,1.0,1.0,-1.0
2019-09-14 20:00:00+00:00,1.0,1.0,-1.0
2019-09-14 21:00:00+00:00,1.0,1.0,-1.0
2019-09-14 22:00:00+00:00,1.0,1.0,-1.0
2019-09-14 23:00:00+00:00,1.0,1.0,0.0


Timestamp
2019-09-14 19:00:00+00:00    0.0
2019-09-14 20:00:00+00:00    1.0
2019-09-14 21:00:00+00:00    0.0
2019-09-14 22:00:00+00:00    0.0
2019-09-14 23:00:00+00:00    1.0
Name: Positive Return, dtype: float64

### Separate X and y Testing Datasets

In [11]:
# Construct the X test and y test datasets


# Display sample data


Unnamed: 0_level_0,crossover_signal,vol_trend_signal,bollinger_signal
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-09-25 12:00:00+00:00,-1.0,1.0,0.0
2019-09-25 13:00:00+00:00,-1.0,1.0,0.0
2019-09-25 14:00:00+00:00,-1.0,1.0,0.0
2019-09-25 15:00:00+00:00,-1.0,1.0,0.0
2019-09-25 16:00:00+00:00,-1.0,1.0,1.0


Timestamp
2019-09-25 12:00:00+00:00    0.0
2019-09-25 13:00:00+00:00    1.0
2019-09-25 14:00:00+00:00    0.0
2019-09-25 15:00:00+00:00    0.0
2019-09-25 16:00:00+00:00    1.0
Name: Positive Return, dtype: float64

### Import SKLearn Library and Classes

In [12]:
# Import sklearn required libraries
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

### Train Random Forest Model 

In [14]:
# Fit a SKLearn random forest using just the training set (X_train, Y_train):


# Make a prediction of "y" values from the X_test dataset


# Assemble actual y data (Y_test) with predicted y data (from just above) into two columns in a DataFrame


# Display sample data


Unnamed: 0_level_0,Positive Return,Predicted Value
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-09-15 00:00:00+00:00,0.0,0.0
2019-09-15 01:00:00+00:00,1.0,0.0
2019-09-15 02:00:00+00:00,0.0,0.0
2019-09-15 03:00:00+00:00,0.0,0.0
2019-09-15 04:00:00+00:00,1.0,1.0
...,...,...
2019-09-25 12:00:00+00:00,0.0,1.0
2019-09-25 13:00:00+00:00,1.0,1.0
2019-09-25 14:00:00+00:00,0.0,1.0
2019-09-25 15:00:00+00:00,0.0,1.0


### Save Pre-Trained Model Using Joblib

In [15]:
# Save the pre-trained model
from joblib import dump, load


['random_forest_model.joblib']