In [1]:
import os
import pandas as pd 
from collections import deque
import random 
import time
from sklearn import preprocessing
import numpy as np 

import tensorflow as tf 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint

2024-12-16 13:54:41.306058: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1734337481.350263   53342 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1734337481.363085   53342 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-16 13:54:41.459229: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
CRYPTO_TO_PREDICT = "LTC-USD"
SEQ_LEN = 60  
FUTURE_PERIOD_PREDICT = 3  


In [3]:
def classify(current, future):
    if float(future) > float(current):
        return 1
    else:
        return 0

## DATASET PREPROCESSING

In [4]:
def preprocess_df(df):
    df = df.drop("future", axis=1)  
    
    for col in df.columns:  
        if col != "target":  
            df[col] = df[col].pct_change()  
            df.dropna(inplace=True)  
            df[col] = preprocessing.scale(df[col].values)  
    
    df.dropna(inplace=True)
    
    sequential_data = []
    prev_days = deque(maxlen=SEQ_LEN)

    for i in df.values:  
        prev_days.append([n for n in i[:-1]]) 
        if len(prev_days) == SEQ_LEN:  
            sequential_data.append([np.array(prev_days), i[-1]])
    
    random.shuffle(sequential_data)
    
    buys = []
    sells = []
    
    for seq, target in sequential_data:  
        if target == 0:  
            sells.append([seq, target]) 
        elif target == 1:  
            buys.append([seq, target])  
            
    random.shuffle(buys)
    random.shuffle(sells)
    
    lower = min(len(buys), len(sells))    
    
    buys=buys[:lower]
    sells = sells[:lower]
            
    sequential_data = buys+sells 
    random.shuffle(sequential_data)
    
    X = []
    y = []
    for seq, target in sequential_data:
        X.append(seq)
        y.append(target)
        
    return np.array(X), y

In [5]:
merged_df = pd.DataFrame()

cryptos = ["BTC-USD", "LTC-USD", "BCH-USD", "ETH-USD"] 
for crypto in cryptos:  

    # crypto = cryptos.split('.csv')[0]  
    dataset = f'data/{crypto}.csv'  
    df = pd.read_csv(dataset, names=['time', 'low', 'high', 'open', 'close', 'volume'])  

    # rename volume and close 
    df.rename(columns={"close": f"{crypto}_close", "volume": f"{crypto}_volume"}, inplace=True)

    df.set_index("time", inplace=True)  # set time as index 
    df = df[[f"{crypto}_close", f"{crypto}_volume"]]  

    if len(merged_df)==0: 
        merged_df = df  
    else:  
        merged_df = merged_df.join(df)

merged_df.fillna(method="ffill", inplace=True)  

merged_df['future'] = merged_df[f'{CRYPTO_TO_PREDICT}_close'].shift(-FUTURE_PERIOD_PREDICT)
merged_df['target'] = list(map(classify, merged_df[f'{CRYPTO_TO_PREDICT}_close'], merged_df['future']))

merged_df.dropna(inplace=True)

times = sorted(merged_df.index.values)
last_5pct = sorted(merged_df.index.values)[-int(0.05*len(times))]

validation_merged_df = merged_df[(merged_df.index >= last_5pct)]
merged_df = merged_df[(merged_df.index < last_5pct)]

X_train, y_train = preprocess_df(merged_df)
X_val, y_val = preprocess_df(validation_merged_df)

print(f"train data: {len(X_train)}  || validation: {len(X_val)}")
print(f"Dont buys: {y_train.count(0)}, buys: {y_train.count(1)}")
print(f"VALIDATION Dont buys: {y_val.count(0)}, buys: {y_val.count(1)}")

  merged_df.fillna(method="ffill", inplace=True)


train data: 77922  || validation: 3860
Dont buys: 38961, buys: 38961
VALIDATION Dont buys: 1930, buys: 1930


## RNN-MODEL

In [6]:
EPOCHS = 10  
BATCH_SIZE = 64  
NAME = f"{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}" 

model = Sequential()
model.add(LSTM(128, input_shape=(X_train.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())  #normalizes activation outputs, same reason you want to normalize your input data.

model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(LSTM(128))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(2, activation='softmax'))

optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, decay=1e-6)

model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=optimizer,
    metrics=['accuracy']
)

tensorBoard = TensorBoard(log_dir="logs/{}".format(NAME))

filepath = "models/RNN_Final-{epoch:02d}-{val_accuracy:.3f}.keras"
checkpoint = ModelCheckpoint(
    filepath=filepath,
    monitor="val_accuracy",
    verbose=1,
    save_best_only=True,
    mode="max"
)



I0000 00:00:1734337488.755277   53342 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3620 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3050 6GB Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6
  super().__init__(**kwargs)


In [7]:
print(f"X_train shape: {X_train.shape}, y_train shape: {len(y_train)}")
print(f"X_val shape: {X_val.shape}, y_val shape: {len(y_val)}")


X_train shape: (77922, 60, 8), y_train shape: 77922
X_val shape: (3860, 60, 8), y_val shape: 3860


In [8]:
y_train = np.array(y_train)
y_val = np.array(y_val)


In [9]:
history = model.fit(
    X_train, y_train,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(X_val, y_val),
    callbacks=[tensorBoard, checkpoint],
)

Epoch 1/10


I0000 00:00:1734337493.426130   53457 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m1217/1218[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 33ms/step - accuracy: 0.4952 - loss: 0.7586
Epoch 1: val_accuracy improved from -inf to 0.52073, saving model to models/RNN_Final-01-0.521.keras
[1m1218/1218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 35ms/step - accuracy: 0.4952 - loss: 0.7586 - val_accuracy: 0.5207 - val_loss: 0.6921
Epoch 2/10
[1m1217/1218[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 32ms/step - accuracy: 0.5135 - loss: 0.6936
Epoch 2: val_accuracy improved from 0.52073 to 0.54326, saving model to models/RNN_Final-02-0.543.keras
[1m1218/1218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 32ms/step - accuracy: 0.5135 - loss: 0.6936 - val_accuracy: 0.5433 - val_loss: 0.6887
Epoch 3/10
[1m1217/1218[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 32ms/step - accuracy: 0.5301 - loss: 0.6899
Epoch 3: val_accuracy improved from 0.54326 to 0.55052, saving model to models/RNN_Final-03-0.551.keras
[1m1218/1218[0m [32m