In [3]:
import tensorflow as tf
import keras_tuner as kt
import matplotlib.pyplot as plt
import datetime
import numpy as np
import matplotlib.dates as mdates
from datetime import datetime, timedelta
import seaborn as sns
from pandas.plotting import register_matplotlib_converters
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout,BatchNormalization
import warnings
import pandas as pd
pd.set_option("display.max_columns", None)
from tensorflow.keras.callbacks import ReduceLROnPlateau,EarlyStopping
from keras.regularizers import l2
import time
from keras.optimizers import Adam
warnings.filterwarnings("ignore")



In [4]:
df = pd.read_pickle("df.pkl")
df.set_index("Date", inplace=True)
holder = df.pop("signal")

In [5]:
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df)
scaled_data = pd.DataFrame(scaled_data, columns=df.columns)
scaled_data["signal"] = holder.values
scaled_data = np.array(scaled_data)

In [6]:
def reshape(df,candles):
    num_cols = df.shape[1]-1
    arr = []
    for j in range(num_cols):
        arr.append([])
        for i in range(candles,df.shape[0]):
            arr[j].append(df[i-candles:i,j])
    arr = np.moveaxis(arr,[0],[2])
    arr,yi = np.array(arr),np.array(df[candles:,-1])
    y=np.reshape(yi,(len(yi),1))
    return arr,y

In [7]:
candles = 30
X_std,Y_std = reshape(scaled_data,candles)

In [8]:
splitlitmit = int(len(scaled_data)*0.8)
x_train_std,x_test_std = X_std[:splitlitmit],X_std[splitlitmit:]
y_train_std,y_test_std = Y_std[:splitlitmit],Y_std[splitlitmit:]

In [9]:
print(x_train_std.shape,y_train_std.shape,x_test_std.shape,y_test_std.shape)

(2646281, 30, 22) (2646281, 1) (661541, 30, 22) (661541, 1)


In [10]:
# Define constants
LAYERS = [8, 8, 1]  # Reduced to 2 LSTM layers and 1 output layer
EPOCHS = 1
LR = 5e-2
N = x_train_std.shape[2]
# Build the model
model = Sequential([
    LSTM(units=LAYERS[0], activation="tanh", recurrent_activation="hard_sigmoid",
         kernel_regularizer=l2(LR), recurrent_regularizer=l2(LR), dropout=0.2,
         recurrent_dropout=0.2, return_sequences=True, input_shape=(candles, N)),
    LSTM(units=LAYERS[1], activation="tanh", recurrent_activation="hard_sigmoid",
         kernel_regularizer=l2(LR), recurrent_regularizer=l2(LR), dropout=0.2,
         recurrent_dropout=0.2, return_sequences=False),
    Dense(units=LAYERS[2], activation="sigmoid")
])

# Compile the model
model.compile(loss="binary_crossentropy", optimizer=Adam(learning_rate=LR), metrics=["accuracy"])

# Define callbacks
lr_decay = ReduceLROnPlateau(monitor="loss", patience=1, verbose=1, factor=0.5, min_lr=1e-8)
early_stop = EarlyStopping(monitor="val_accuracy", patience=10, verbose=1, mode="auto",
                           baseline=0, restore_best_weights=True)

# Train the model
start_time = time.time()
history = model.fit(x_train_std, y_train_std, epochs=EPOCHS, batch_size=64, 
                    validation_data=(x_test_std, y_test_std), callbacks=[lr_decay, early_stop], verbose=1)
training_time = time.time() - start_time

# Evaluate the model
train_loss, train_acc = model.evaluate(x_train_std, y_train_std, verbose=0)
test_loss, test_acc = model.evaluate(x_test_std, y_test_std, verbose=0)

# Print results
print("-" * 65)
print(f"Training was completed in {training_time:.2f} secs")
print("-" * 65)
print(f"Train accuracy: {train_acc * 100:.4f}%")
print(f"Test accuracy: {test_acc * 100:.4f}%")


[1m41349/41349[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m303s[0m 7ms/step - accuracy: 0.8852 - loss: 0.3566 - val_accuracy: 0.8919 - val_loss: 0.3002 - learning_rate: 0.0500
Restoring model weights from the end of the best epoch: 1.
-----------------------------------------------------------------
Training was completed in 316.92 secs
-----------------------------------------------------------------
Train accuracy: 89.0563%
Test accuracy: 89.1895%


In [11]:
y_predict = model.predict(x_test_std)

[1m20674/20674[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 2ms/step


In [13]:
threshold = 0.5
binary_predictions = [1 if p >= threshold else 0 for p in y_predict]

In [16]:
binary_predictions = pd.DataFrame(binary_predictions)

In [18]:
binary_predictions.value_counts()

0
0    362731
1    298810
Name: count, dtype: int64

In [21]:
model.save("btc_ml_model.keras")