In [1]:
# Imports
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from pandas.tseries.offsets import DateOffset
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LogisticRegression
from imblearn.metrics import classification_report_imbalanced
import keras.metrics

In [2]:
# Import the BITCOIN csv into the notebook
btc_df = pd.read_csv(
    Path("./Resources/bitcoin.csv"),
    index_col = 'Date',
    infer_datetime_format=True, 
    parse_dates=True
)

# Review the DataFrame
btc_df.head()

Unnamed: 0_level_0,Close,Open,High,Volume,Low,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,RSI_14,STOCHk_14_3_3,STOCHd_14_3_3,MACD_Signal,RSI_Signal,Stoch_Signal,custom_signal,MACD_Entry/Exit,stoch_diff,Stoch_Entry/Exit
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2021-04-14 15:00:00+00:00,63200.328125,63143.21875,63459.949219,560734208,62603.304688,352.438978,-227.296078,579.735056,47.549614,30.00574,45.569075,-1.0,0.0,-1.0,0,,-15.563335,
2021-04-14 16:00:00+00:00,63636.90625,63171.070312,63826.519531,0,62987.664062,316.196716,-210.830672,527.027388,52.571302,29.383467,33.556675,-1.0,0.0,-1.0,0,0.0,-4.173208,0.0
2021-04-14 17:00:00+00:00,62808.371094,63655.71875,63854.359375,0,62728.757812,218.104428,-247.138368,465.242796,43.967897,27.077575,28.822261,-1.0,0.0,-1.0,0,0.0,-1.744686,0.0
2021-04-14 18:00:00+00:00,62198.378906,62545.9375,62730.660156,623951872,61868.546875,90.105629,-300.109733,390.215362,38.918127,21.942582,26.134541,-1.0,0.0,-1.0,0,0.0,-4.191959,0.0
2021-04-14 19:00:00+00:00,62244.957031,62217.371094,62557.453125,1172676608,61554.796875,-7.489541,-318.163923,310.674382,39.489616,13.650148,20.890102,-1.0,0.0,-1.0,0,0.0,-7.239954,0.0


In [3]:
btc_df.dtypes

Close               float64
Open                float64
High                float64
Volume                int64
Low                 float64
MACD_12_26_9        float64
MACDh_12_26_9       float64
MACDs_12_26_9       float64
RSI_14              float64
STOCHk_14_3_3       float64
STOCHd_14_3_3       float64
MACD_Signal         float64
RSI_Signal          float64
Stoch_Signal        float64
custom_signal         int64
MACD_Entry/Exit     float64
stoch_diff          float64
Stoch_Entry/Exit    float64
dtype: object

In [4]:
# Create a list of categorical variables 
#categorical_variables = list(stoch_df.dtypes[stoch_df.dtypes == "object"].index)
categorical_variables = list(btc_df[['custom_signal']])
# Display the categorical variables list
display(categorical_variables[0:6])

['custom_signal']

In [5]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)
    # sparse = False, results in an array
    # sparse = True (default), results in a sparse matrix

In [6]:
# Encode the categorcal variables using OneHotEncoder
encoded_data = enc.fit_transform(btc_df[categorical_variables])
encoded_data[0:1]

array([[0., 1., 0.]])

In [7]:
# Create a DataFrame with the encoded variables
encoded_df = pd.DataFrame(
    encoded_data,
    columns = enc.get_feature_names(categorical_variables)
        # function gathers column names and assigns them to the new DataFrame
)

# set index of encoded_df
encoded_df.set_index(btc_df.index, inplace=True)

# Review the DataFrame
encoded_df.head()



Unnamed: 0_level_0,custom_signal_-2,custom_signal_0,custom_signal_2
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-04-14 15:00:00+00:00,0.0,1.0,0.0
2021-04-14 16:00:00+00:00,0.0,1.0,0.0
2021-04-14 17:00:00+00:00,0.0,1.0,0.0
2021-04-14 18:00:00+00:00,0.0,1.0,0.0
2021-04-14 19:00:00+00:00,0.0,1.0,0.0


In [8]:
#encoded_df.drop(columns=['Stoch_Entry/Exit_nan','MACD_Entry/Exit_nan'],inplace=True)

# Review the DataFrame
#encoded_df.head()

In [9]:
# Add the numerical variables from the original DataFrame to the one-hot encoding DataFrame
side_numeric = btc_df.drop(columns=['custom_signal'])

# Review the DataFrame
side_numeric.head()

Unnamed: 0_level_0,Close,Open,High,Volume,Low,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,RSI_14,STOCHk_14_3_3,STOCHd_14_3_3,MACD_Signal,RSI_Signal,Stoch_Signal,MACD_Entry/Exit,stoch_diff,Stoch_Entry/Exit
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2021-04-14 15:00:00+00:00,63200.328125,63143.21875,63459.949219,560734208,62603.304688,352.438978,-227.296078,579.735056,47.549614,30.00574,45.569075,-1.0,0.0,-1.0,,-15.563335,
2021-04-14 16:00:00+00:00,63636.90625,63171.070312,63826.519531,0,62987.664062,316.196716,-210.830672,527.027388,52.571302,29.383467,33.556675,-1.0,0.0,-1.0,0.0,-4.173208,0.0
2021-04-14 17:00:00+00:00,62808.371094,63655.71875,63854.359375,0,62728.757812,218.104428,-247.138368,465.242796,43.967897,27.077575,28.822261,-1.0,0.0,-1.0,0.0,-1.744686,0.0
2021-04-14 18:00:00+00:00,62198.378906,62545.9375,62730.660156,623951872,61868.546875,90.105629,-300.109733,390.215362,38.918127,21.942582,26.134541,-1.0,0.0,-1.0,0.0,-4.191959,0.0
2021-04-14 19:00:00+00:00,62244.957031,62217.371094,62557.453125,1172676608,61554.796875,-7.489541,-318.163923,310.674382,39.489616,13.650148,20.890102,-1.0,0.0,-1.0,0.0,-7.239954,0.0


In [10]:
btc_ohe_df = pd.concat([encoded_df,side_numeric],axis=1)

# Review the number of columns
len(btc_ohe_df.columns)

20

In [11]:
X = btc_ohe_df.drop(columns=['custom_signal_-2','custom_signal_0','custom_signal_2'])

# Review the number of columns
len(X.columns)

17

In [12]:
# Create the target set selecting the Signal column and assiging it to y
y = btc_ohe_df[['custom_signal_-2','custom_signal_0','custom_signal_2']]

# Review the number of columns
len(y.columns)

3

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 1)
    # test_size=x
# Review the DataFrame
X_test.head()

Unnamed: 0_level_0,Close,Open,High,Volume,Low,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,RSI_14,STOCHk_14_3_3,STOCHd_14_3_3,MACD_Signal,RSI_Signal,Stoch_Signal,MACD_Entry/Exit,stoch_diff,Stoch_Entry/Exit
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2021-07-26 06:00:00+00:00,38631.367188,38577.894531,39163.28125,1277196288,38507.214844,1164.25447,397.270048,766.984421,88.159784,90.37865,88.62873,1.0,-1.0,1.0,0.0,1.74992,0.0
2021-07-02 03:00:00+00:00,32898.644531,33273.632812,33273.632812,0,32811.226562,-275.076243,18.464916,-293.541159,36.796906,26.477782,47.689659,1.0,0.0,-1.0,0.0,-21.211876,0.0
2022-04-07 05:00:00+00:00,43406.117188,43428.773438,43438.523438,191303680,43280.898438,-526.234586,21.757158,-547.991744,31.657352,34.886151,29.748615,1.0,0.0,1.0,0.0,5.137536,0.0
2022-03-12 20:00:00+00:00,39155.347656,39047.027344,39183.449219,0,39018.796875,-5.338445,6.519041,-11.857486,51.885047,50.797374,50.911851,1.0,0.0,-1.0,0.0,-0.114476,0.0
2021-12-01 10:00:00+00:00,57071.1875,57164.050781,57245.332031,0,56958.078125,-78.125308,-48.989652,-29.135656,47.385186,24.480059,25.08122,-1.0,0.0,-1.0,0.0,-0.601161,-2.0


In [14]:
# Select the start of the training period
training_begin = X.index.min() + DateOffset(hours=1)

# Display the training begin date
print(training_begin)

2021-04-14 16:00:00+00:00


In [15]:
# Select the ending period for the training data with an offset of 3 months
training_end = X.index.min() + DateOffset(months=3)
    # Keep training less than 50% of total DataFrame

# Display the training end date
print(training_end)

2021-07-14 15:00:00+00:00


In [16]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Review the X_train DataFrame
X_train.head()

Unnamed: 0_level_0,Close,Open,High,Volume,Low,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,RSI_14,STOCHk_14_3_3,STOCHd_14_3_3,MACD_Signal,RSI_Signal,Stoch_Signal,MACD_Entry/Exit,stoch_diff,Stoch_Entry/Exit
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2021-04-14 16:00:00+00:00,63636.90625,63171.070312,63826.519531,0,62987.664062,316.196716,-210.830672,527.027388,52.571302,29.383467,33.556675,-1.0,0.0,-1.0,0.0,-4.173208,0.0
2021-04-14 17:00:00+00:00,62808.371094,63655.71875,63854.359375,0,62728.757812,218.104428,-247.138368,465.242796,43.967897,27.077575,28.822261,-1.0,0.0,-1.0,0.0,-1.744686,0.0
2021-04-14 18:00:00+00:00,62198.378906,62545.9375,62730.660156,623951872,61868.546875,90.105629,-300.109733,390.215362,38.918127,21.942582,26.134541,-1.0,0.0,-1.0,0.0,-4.191959,0.0
2021-04-14 19:00:00+00:00,62244.957031,62217.371094,62557.453125,1172676608,61554.796875,-7.489541,-318.163923,310.674382,39.489616,13.650148,20.890102,-1.0,0.0,-1.0,0.0,-7.239954,0.0
2021-04-14 20:00:00+00:00,62485.046875,62266.152344,62549.261719,488300544,62022.703125,-64.715176,-300.311646,235.59647,42.47715,19.998181,18.530303,-1.0,0.0,1.0,0.0,1.467877,2.0


In [17]:
# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end+DateOffset(hours=1):]
y_test = y.loc[training_end+DateOffset(hours=1):]

# Review the X_test DataFrame
X_test.head()

Unnamed: 0_level_0,Close,Open,High,Volume,Low,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,RSI_14,STOCHk_14_3_3,STOCHd_14_3_3,MACD_Signal,RSI_Signal,Stoch_Signal,MACD_Entry/Exit,stoch_diff,Stoch_Entry/Exit
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2021-07-14 16:00:00+00:00,32843.324219,32792.773438,32902.371094,0,32737.091797,-3.165757,124.400933,-127.56669,57.212453,91.708334,89.760162,1.0,0.0,1.0,0.0,1.948172,0.0
2021-07-14 17:00:00+00:00,32858.398438,32847.039062,32910.703125,0,32799.476562,23.02393,120.472496,-97.448566,57.520856,91.777837,92.402362,1.0,0.0,-1.0,0.0,-0.624525,-2.0
2021-07-14 18:00:00+00:00,32755.757812,32845.46875,33061.398438,105054208,32737.658203,35.092678,106.032995,-70.940317,54.63332,88.12486,90.53701,1.0,0.0,-1.0,0.0,-2.41215,0.0
2021-07-14 19:00:00+00:00,32734.552734,32753.484375,32789.386719,139253760,32621.011719,42.45676,90.717662,-48.260902,54.02987,83.013345,87.638681,1.0,0.0,-1.0,0.0,-4.625336,0.0
2021-07-14 20:00:00+00:00,32819.949219,32734.529297,32847.5,0,32724.105469,54.554757,82.252527,-27.69777,56.131338,79.517873,83.552026,1.0,0.0,-1.0,0.0,-4.034153,0.0


In [18]:
# Scale the features DataFrames
# Create a StandardScaler instance
scaler = StandardScaler()

# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)

## Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [19]:
# MACD Neural Network
# Define the the number of inputs (features) to the model
number_input_features = len(X_train.iloc[0])

# Review the number of features
number_input_features

17

In [20]:
# Define the number of neurons in the output layer
number_output_neurons = 3
    # equal to one because we only have one target, y.
    # should be equal to the number of target columns we are trying to predict.

In [21]:
n = '\n'

# Define the number of hidden nodes for the first hidden layer and second layer
hidden_nodes_layer1 =  (number_input_features + number_output_neurons) //2
hidden_nodes_layer2 = (hidden_nodes_layer1 + number_output_neurons) //2
hidden_nodes_layer3 = (hidden_nodes_layer2 + number_output_neurons) //2 
hidden_nodes_layer4 = (hidden_nodes_layer3 + number_output_neurons) //2 
hidden_nodes_layer5 = (hidden_nodes_layer4 + number_output_neurons) //2 

# Review the number hidden nodes in the first and second layer
print(f'# of neurons in the first hidden layer: {hidden_nodes_layer1}{n}# of neurons in the second hidden layer: {hidden_nodes_layer2}{n}# of neurons in the third hidden layer: {hidden_nodes_layer3}'
    f'{n}# of neurons in the fourth hidden layer: {hidden_nodes_layer4}{n}# of neurons in the fifth hidden layer: {hidden_nodes_layer5}{n}')

# of neurons in the first hidden layer: 10
# of neurons in the second hidden layer: 6
# of neurons in the third hidden layer: 4
# of neurons in the fourth hidden layer: 3
# of neurons in the fifth hidden layer: 3



In [22]:
# Create the Sequential model instance
nn = Sequential()

In [23]:
# Add the first hidden layer
nn.add(Dense(units=hidden_nodes_layer1,input_dim=number_input_features,activation='relu'))

In [24]:
# Add the second hidden layer
nn.add(Dense(units=hidden_nodes_layer2,activation='relu'))

In [25]:
# Add the third hidden layer
nn.add(Dense(units=hidden_nodes_layer3,activation='relu'))

In [26]:
# Add the fourth hidden layer
#nn.add(Dense(units=hidden_nodes_layer4,activation='relu'))

In [27]:
# Add the fifth hidden layer
#nn.add(Dense(units=hidden_nodes_layer5,activation='relu'))

In [28]:
# Add the output layer to the model specifying the number of output neurons and activation function
nn.add(Dense(units=number_output_neurons,activation='softmax'))

In [29]:
# Display the Sequential model summary
nn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 10)                180       
                                                                 
 dense_1 (Dense)             (None, 6)                 66        
                                                                 
 dense_2 (Dense)             (None, 4)                 28        
                                                                 
 dense_3 (Dense)             (None, 3)                 15        
                                                                 
Total params: 289
Trainable params: 289
Non-trainable params: 0
_________________________________________________________________


In [30]:
# Compile the Sequential model
nn.compile(loss=keras.losses.CategoricalCrossentropy(), optimizer='adam', metrics=[keras.metrics.CategoricalAccuracy()])

    # metric exploration

In [31]:
# Fit the model using 50 epochs and the training data
nn.fit(X_train_scaled,y_train,epochs=100, verbose=3)
    # make sure to use X_train_scaled rather than X_train
    # verbose=3, reduces the graphics displayed per epoch. in turn this increases the overall speed of the epochs.

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x1f7b6f092c8>

In [32]:
# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
nn_btc_keras= nn.evaluate(X_test_scaled,y_test, verbose=3)

# Display the model loss and accuracy results
abc = print(f"KERAS: {nn_btc_keras}")

KERAS: [0.004160259384661913, 0.9989104866981506]


In [33]:
# Loss: 0.2134855091571808, Accuracy: 0.9853846430778503
# # of neurons in the first hidden layer: 14
# of neurons in the second hidden layer: 8
# of neurons in the third hidden layer: 5
# of neurons in the fourth hidden layer: 4
# of neurons in the fifth hidden layer: 3
# relu activations functions with softmax for the output layer
# loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy, mse']
# 3 output neurons

In [34]:
btc_df.head()

Unnamed: 0_level_0,Close,Open,High,Volume,Low,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,RSI_14,STOCHk_14_3_3,STOCHd_14_3_3,MACD_Signal,RSI_Signal,Stoch_Signal,custom_signal,MACD_Entry/Exit,stoch_diff,Stoch_Entry/Exit
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2021-04-14 15:00:00+00:00,63200.328125,63143.21875,63459.949219,560734208,62603.304688,352.438978,-227.296078,579.735056,47.549614,30.00574,45.569075,-1.0,0.0,-1.0,0,,-15.563335,
2021-04-14 16:00:00+00:00,63636.90625,63171.070312,63826.519531,0,62987.664062,316.196716,-210.830672,527.027388,52.571302,29.383467,33.556675,-1.0,0.0,-1.0,0,0.0,-4.173208,0.0
2021-04-14 17:00:00+00:00,62808.371094,63655.71875,63854.359375,0,62728.757812,218.104428,-247.138368,465.242796,43.967897,27.077575,28.822261,-1.0,0.0,-1.0,0,0.0,-1.744686,0.0
2021-04-14 18:00:00+00:00,62198.378906,62545.9375,62730.660156,623951872,61868.546875,90.105629,-300.109733,390.215362,38.918127,21.942582,26.134541,-1.0,0.0,-1.0,0,0.0,-4.191959,0.0
2021-04-14 19:00:00+00:00,62244.957031,62217.371094,62557.453125,1172676608,61554.796875,-7.489541,-318.163923,310.674382,39.489616,13.650148,20.890102,-1.0,0.0,-1.0,0,0.0,-7.239954,0.0


In [35]:
btc_df.dtypes

Close               float64
Open                float64
High                float64
Volume                int64
Low                 float64
MACD_12_26_9        float64
MACDh_12_26_9       float64
MACDs_12_26_9       float64
RSI_14              float64
STOCHk_14_3_3       float64
STOCHd_14_3_3       float64
MACD_Signal         float64
RSI_Signal          float64
Stoch_Signal        float64
custom_signal         int64
MACD_Entry/Exit     float64
stoch_diff          float64
Stoch_Entry/Exit    float64
dtype: object

In [36]:
categorical_variables = list(btc_df[['MACD_Signal','RSI_Signal','Stoch_Signal','MACD_Entry/Exit','Stoch_Entry/Exit']])
categorical_variables

['MACD_Signal',
 'RSI_Signal',
 'Stoch_Signal',
 'MACD_Entry/Exit',
 'Stoch_Entry/Exit']

In [37]:
# Encode the categorcal variables using OneHotEncoder
encoded_data = enc.fit_transform(btc_df[categorical_variables])
encoded_data[0:5]

array([[1., 0., 0., 1., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 1.],
       [1., 0., 0., 1., 0., 1., 0., 0., 1., 0., 0., 0., 1., 0., 0.],
       [1., 0., 0., 1., 0., 1., 0., 0., 1., 0., 0., 0., 1., 0., 0.],
       [1., 0., 0., 1., 0., 1., 0., 0., 1., 0., 0., 0., 1., 0., 0.],
       [1., 0., 0., 1., 0., 1., 0., 0., 1., 0., 0., 0., 1., 0., 0.]])

In [38]:
# Create a DataFrame with the encoded variables
encoded_df = pd.DataFrame(
    encoded_data,
    columns = enc.get_feature_names(categorical_variables)
        # function gathers column names and assigns them to the new DataFrame
)

# set index of encoded_df
encoded_df.set_index(btc_df.index, inplace=True)

# Review the DataFrame
encoded_df.head()



Unnamed: 0_level_0,MACD_Signal_-1.0,MACD_Signal_1.0,RSI_Signal_-1.0,RSI_Signal_0.0,RSI_Signal_1.0,Stoch_Signal_-1.0,Stoch_Signal_1.0,MACD_Entry/Exit_-2.0,MACD_Entry/Exit_0.0,MACD_Entry/Exit_2.0,MACD_Entry/Exit_nan,Stoch_Entry/Exit_-2.0,Stoch_Entry/Exit_0.0,Stoch_Entry/Exit_2.0,Stoch_Entry/Exit_nan
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2021-04-14 15:00:00+00:00,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
2021-04-14 16:00:00+00:00,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
2021-04-14 17:00:00+00:00,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
2021-04-14 18:00:00+00:00,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
2021-04-14 19:00:00+00:00,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0


In [39]:
# Add the numerical variables from the original DataFrame to the one-hot encoding DataFrame
#side_numeric = btc_df.drop(columns=['MACD_Signal_-1.0','MACD_Signal_1.0','RSI_Signal_-1.0','RSI_Signal_0.0','RSI_Signal_1.0','Stoch_Signal_-1.0','Stoch_Signal_1.0','','','','',''])
#side_numeric.head()

In [40]:
#btc_df = pd.concat([encoded_df,side_numeric],axis=1)
#btc_df.head()

In [41]:
btc_df

Unnamed: 0_level_0,Close,Open,High,Volume,Low,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,RSI_14,STOCHk_14_3_3,STOCHd_14_3_3,MACD_Signal,RSI_Signal,Stoch_Signal,custom_signal,MACD_Entry/Exit,stoch_diff,Stoch_Entry/Exit
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2021-04-14 15:00:00+00:00,63200.328125,63143.218750,63459.949219,560734208,62603.304688,352.438978,-227.296078,579.735056,47.549614,30.005740,45.569075,-1.0,0.0,-1.0,0,,-15.563335,
2021-04-14 16:00:00+00:00,63636.906250,63171.070312,63826.519531,0,62987.664062,316.196716,-210.830672,527.027388,52.571302,29.383467,33.556675,-1.0,0.0,-1.0,0,0.0,-4.173208,0.0
2021-04-14 17:00:00+00:00,62808.371094,63655.718750,63854.359375,0,62728.757812,218.104428,-247.138368,465.242796,43.967897,27.077575,28.822261,-1.0,0.0,-1.0,0,0.0,-1.744686,0.0
2021-04-14 18:00:00+00:00,62198.378906,62545.937500,62730.660156,623951872,61868.546875,90.105629,-300.109733,390.215362,38.918127,21.942582,26.134541,-1.0,0.0,-1.0,0,0.0,-4.191959,0.0
2021-04-14 19:00:00+00:00,62244.957031,62217.371094,62557.453125,1172676608,61554.796875,-7.489541,-318.163923,310.674382,39.489616,13.650148,20.890102,-1.0,0.0,-1.0,0,0.0,-7.239954,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-04-13 02:00:00+00:00,40076.019531,39900.718750,40116.257812,94822400,39790.132812,-152.940047,51.444522,-204.384569,48.955389,48.301882,48.923804,1.0,0.0,-1.0,0,0.0,-0.621922,-2.0
2022-04-13 03:00:00+00:00,40110.710938,40065.468750,40233.812500,19312640,40057.527344,-129.417894,59.973340,-189.391234,49.669797,55.028137,51.088304,1.0,0.0,1.0,0,0.0,3.939834,2.0
2022-04-13 04:00:00+00:00,40107.257812,40110.140625,40125.730469,0,39983.550781,-109.789471,63.681410,-173.470881,49.595390,66.498858,56.609626,1.0,0.0,1.0,0,0.0,9.889232,0.0
2022-04-13 05:00:00+00:00,40135.281250,40136.691406,40153.757812,17272832,40110.160156,-90.924428,66.037163,-156.961591,50.246772,74.238791,65.255262,1.0,0.0,1.0,0,0.0,8.983529,0.0


In [42]:
X = btc_df.drop(columns=['custom_signal'])

X.head()

Unnamed: 0_level_0,Close,Open,High,Volume,Low,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,RSI_14,STOCHk_14_3_3,STOCHd_14_3_3,MACD_Signal,RSI_Signal,Stoch_Signal,MACD_Entry/Exit,stoch_diff,Stoch_Entry/Exit
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2021-04-14 15:00:00+00:00,63200.328125,63143.21875,63459.949219,560734208,62603.304688,352.438978,-227.296078,579.735056,47.549614,30.00574,45.569075,-1.0,0.0,-1.0,,-15.563335,
2021-04-14 16:00:00+00:00,63636.90625,63171.070312,63826.519531,0,62987.664062,316.196716,-210.830672,527.027388,52.571302,29.383467,33.556675,-1.0,0.0,-1.0,0.0,-4.173208,0.0
2021-04-14 17:00:00+00:00,62808.371094,63655.71875,63854.359375,0,62728.757812,218.104428,-247.138368,465.242796,43.967897,27.077575,28.822261,-1.0,0.0,-1.0,0.0,-1.744686,0.0
2021-04-14 18:00:00+00:00,62198.378906,62545.9375,62730.660156,623951872,61868.546875,90.105629,-300.109733,390.215362,38.918127,21.942582,26.134541,-1.0,0.0,-1.0,0.0,-4.191959,0.0
2021-04-14 19:00:00+00:00,62244.957031,62217.371094,62557.453125,1172676608,61554.796875,-7.489541,-318.163923,310.674382,39.489616,13.650148,20.890102,-1.0,0.0,-1.0,0.0,-7.239954,0.0


In [43]:
# Create the target set selecting the Signal column and assiging it to y
y = btc_df['custom_signal']

y

Date
2021-04-14 15:00:00+00:00    0
2021-04-14 16:00:00+00:00    0
2021-04-14 17:00:00+00:00    0
2021-04-14 18:00:00+00:00    0
2021-04-14 19:00:00+00:00    0
                            ..
2022-04-13 02:00:00+00:00    0
2022-04-13 03:00:00+00:00    0
2022-04-13 04:00:00+00:00    0
2022-04-13 05:00:00+00:00    0
2022-04-13 05:37:00+00:00    0
Name: custom_signal, Length: 8520, dtype: int64

In [44]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 1)
    # test_size=x
len(X_test)

2130

In [45]:
y_train.value_counts()

 0    6284
-2      57
 2      49
Name: custom_signal, dtype: int64

In [46]:
# Select the start of the training period
training_begin = X.index.min() + DateOffset(hours=1)

# Display the training begin date
print(training_begin)

2021-04-14 16:00:00+00:00


In [47]:
# Select the ending period for the training data with an offset of 3 months
training_end = X.index.min() + DateOffset(months=3)
    # Keep training less than 50% of total DataFrame

# Display the training end date
print(training_end)

2021-07-14 15:00:00+00:00


In [48]:
# # Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Review the X_train DataFrame
X_train.head()

Unnamed: 0_level_0,Close,Open,High,Volume,Low,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,RSI_14,STOCHk_14_3_3,STOCHd_14_3_3,MACD_Signal,RSI_Signal,Stoch_Signal,MACD_Entry/Exit,stoch_diff,Stoch_Entry/Exit
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2021-04-14 16:00:00+00:00,63636.90625,63171.070312,63826.519531,0,62987.664062,316.196716,-210.830672,527.027388,52.571302,29.383467,33.556675,-1.0,0.0,-1.0,0.0,-4.173208,0.0
2021-04-14 17:00:00+00:00,62808.371094,63655.71875,63854.359375,0,62728.757812,218.104428,-247.138368,465.242796,43.967897,27.077575,28.822261,-1.0,0.0,-1.0,0.0,-1.744686,0.0
2021-04-14 18:00:00+00:00,62198.378906,62545.9375,62730.660156,623951872,61868.546875,90.105629,-300.109733,390.215362,38.918127,21.942582,26.134541,-1.0,0.0,-1.0,0.0,-4.191959,0.0
2021-04-14 19:00:00+00:00,62244.957031,62217.371094,62557.453125,1172676608,61554.796875,-7.489541,-318.163923,310.674382,39.489616,13.650148,20.890102,-1.0,0.0,-1.0,0.0,-7.239954,0.0
2021-04-14 20:00:00+00:00,62485.046875,62266.152344,62549.261719,488300544,62022.703125,-64.715176,-300.311646,235.59647,42.47715,19.998181,18.530303,-1.0,0.0,1.0,0.0,1.467877,2.0


In [49]:
# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end+DateOffset(hours=1):]
y_test = y.loc[training_end+DateOffset(hours=1):]

# Review the X_test DataFrame
display(X_test.head())
display(X_test.tail())
#     # NOT SURE IF DATEOFFSET IS NECESSARY FOR X/Y TEST

Unnamed: 0_level_0,Close,Open,High,Volume,Low,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,RSI_14,STOCHk_14_3_3,STOCHd_14_3_3,MACD_Signal,RSI_Signal,Stoch_Signal,MACD_Entry/Exit,stoch_diff,Stoch_Entry/Exit
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2021-07-14 16:00:00+00:00,32843.324219,32792.773438,32902.371094,0,32737.091797,-3.165757,124.400933,-127.56669,57.212453,91.708334,89.760162,1.0,0.0,1.0,0.0,1.948172,0.0
2021-07-14 17:00:00+00:00,32858.398438,32847.039062,32910.703125,0,32799.476562,23.02393,120.472496,-97.448566,57.520856,91.777837,92.402362,1.0,0.0,-1.0,0.0,-0.624525,-2.0
2021-07-14 18:00:00+00:00,32755.757812,32845.46875,33061.398438,105054208,32737.658203,35.092678,106.032995,-70.940317,54.63332,88.12486,90.53701,1.0,0.0,-1.0,0.0,-2.41215,0.0
2021-07-14 19:00:00+00:00,32734.552734,32753.484375,32789.386719,139253760,32621.011719,42.45676,90.717662,-48.260902,54.02987,83.013345,87.638681,1.0,0.0,-1.0,0.0,-4.625336,0.0
2021-07-14 20:00:00+00:00,32819.949219,32734.529297,32847.5,0,32724.105469,54.554757,82.252527,-27.69777,56.131338,79.517873,83.552026,1.0,0.0,-1.0,0.0,-4.034153,0.0


Unnamed: 0_level_0,Close,Open,High,Volume,Low,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,RSI_14,STOCHk_14_3_3,STOCHd_14_3_3,MACD_Signal,RSI_Signal,Stoch_Signal,MACD_Entry/Exit,stoch_diff,Stoch_Entry/Exit
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2022-04-13 02:00:00+00:00,40076.019531,39900.71875,40116.257812,94822400,39790.132812,-152.940047,51.444522,-204.384569,48.955389,48.301882,48.923804,1.0,0.0,-1.0,0.0,-0.621922,-2.0
2022-04-13 03:00:00+00:00,40110.710938,40065.46875,40233.8125,19312640,40057.527344,-129.417894,59.97334,-189.391234,49.669797,55.028137,51.088304,1.0,0.0,1.0,0.0,3.939834,2.0
2022-04-13 04:00:00+00:00,40107.257812,40110.140625,40125.730469,0,39983.550781,-109.789471,63.68141,-173.470881,49.59539,66.498858,56.609626,1.0,0.0,1.0,0.0,9.889232,0.0
2022-04-13 05:00:00+00:00,40135.28125,40136.691406,40153.757812,17272832,40110.160156,-90.924428,66.037163,-156.961591,50.246772,74.238791,65.255262,1.0,0.0,1.0,0.0,8.983529,0.0
2022-04-13 05:37:00+00:00,40169.285156,40169.285156,40169.285156,0,40169.285156,-72.395385,67.652965,-140.048349,51.073012,81.9476,74.228416,1.0,0.0,1.0,0.0,7.719183,0.0


In [50]:
# Scale the features DataFrames
# Create a StandardScaler instance

# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train)

# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [51]:
# From SVM, instantiate SVC classifier model instance
btc_tree = RandomForestClassifier(random_state=1)
 
# Fit the model to the data using the training data
btc_tree.fit(X_train_scaled,y_train)
 
# Use the testing data to make the model predictions
y_btc_tree_pred = btc_tree.predict(X_test_scaled)

btc_tree_class = classification_report(y_test,y_btc_tree_pred)
btc_tree_matrix = confusion_matrix(y_test,y_btc_tree_pred)

In [52]:
# Instantiate the random oversampler model

random_sampler = RandomOverSampler(random_state=1)

# Fit the original training data to the random_oversampler model
X_resampled, y_resampled = random_sampler.fit_resample(X_train,y_train)


y_resampled.value_counts()

# Do we have to create this before 

 0    2064
-2    2064
 2    2064
Name: custom_signal, dtype: int64

In [53]:
# Stoch RandomForestClassifier Oversampled
btc_tree_os = RandomForestClassifier(random_state=1)
 
# Fit the model to the data using the training data
btc_tree_os.fit(X_resampled,y_resampled)
 
# Use the testing data to make the model predictions
y_btc_tree_pred_os = btc_tree_os.predict(X_test)

btc_tree_class_os = classification_report(y_test,y_btc_tree_pred_os)
btc_tree_matrix_os = confusion_matrix(y_test,y_btc_tree_pred_os)

In [54]:
# # From LogisticRegression, instantiate LogisticRegression classifier model instance
btc_log = LogisticRegression(random_state=1)
 
# Fit the model to the data using the training data
btc_log.fit(X_train_scaled,y_train)
 
# # Use the testing data to make the model predictions
y_btc_log_pred = btc_log.predict(X_test_scaled)

# # Create and save confusion matrix and classification report to a variable name
btc_log_matrix = confusion_matrix(y_test,y_btc_log_pred)
btc_log_class = classification_report(y_test,y_btc_log_pred)

In [55]:
print(btc_log_matrix)
print(btc_log_class)

[[  58    0    0]
 [   1 6314    0]
 [   0    0   52]]
              precision    recall  f1-score   support

          -2       0.98      1.00      0.99        58
           0       1.00      1.00      1.00      6315
           2       1.00      1.00      1.00        52

    accuracy                           1.00      6425
   macro avg       0.99      1.00      1.00      6425
weighted avg       1.00      1.00      1.00      6425



In [56]:
# From LogisticRegression, instantiate LogisticRegression classifier model instance
btc_log_os = LogisticRegression(random_state=1)
 
# Fit the model to the data using the training data
btc_log_os.fit(X_resampled,y_resampled)
 
# # Use the testing data to make the model predictions
y_btc_log_pred_os = btc_log_os.predict(X_test)

# Create and save confusion matrix and classification report to a variable name
btc_log_matrix_os = confusion_matrix(y_test,y_btc_log_pred_os)
btc_log_class_os = classification_report(y_test,y_btc_log_pred_os)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [57]:
# try to create a model which uses a lot more OHE catagorical variables

In [58]:
print('NN')
print(f'KERAS:{nn_btc_keras}')
print('Random Forest')
print(btc_tree_class)
print(btc_tree_matrix)
print('OS -- Random Forest')
print(btc_tree_class_os)
print(btc_tree_matrix_os)
print('log Reg')
print(btc_log_class_os)
print(btc_log_matrix_os)
print('OS -- Log Reg')
print(btc_log_class_os)
print(btc_log_matrix_os)

NN
KERAS:[0.004160259384661913, 0.9989104866981506]
Random Forest
              precision    recall  f1-score   support

          -2       1.00      0.84      0.92        58
           0       0.99      1.00      1.00      6315
           2       1.00      0.17      0.30        52

    accuracy                           0.99      6425
   macro avg       1.00      0.67      0.74      6425
weighted avg       0.99      0.99      0.99      6425

[[  49    9    0]
 [   0 6315    0]
 [   0   43    9]]
OS -- Random Forest
              precision    recall  f1-score   support

          -2       1.00      0.71      0.83        58
           0       1.00      1.00      1.00      6315
           2       1.00      0.83      0.91        52

    accuracy                           1.00      6425
   macro avg       1.00      0.84      0.91      6425
weighted avg       1.00      1.00      1.00      6425

[[  41   17    0]
 [   0 6315    0]
 [   0    9   43]]
log Reg
              precision    recall 