In [33]:
import pandas as pd
from sklearn import preprocessing
from collections import deque
import numpy as np
import random
import time
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,LSTM,BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint

In [41]:

SEQ_LEN = 60
FUTURE_PERIOD_PREDICT = 3
RATIO_TO_PREDECT = "LTC-USD"

EPOCHS = 5
BATCH_SIZE = 64
NAME =f"{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"

def classify(current, future):
    if float(future) > float(current):
        return 1
    else:
        return 0

main_df = pd.DataFrame()

ratios = ["BTC-USD", "LTC-USD", "ETH-USD", "BCH-USD"]

for ratio in ratios:
    dataset = f"C:/Users/HP/OneDrive/Desktop/python files/crypto_data/{ratio}.csv"
    df = pd.read_csv(dataset, names=["time", "low", "high", "open", "close", "volume"])
    df.rename(columns={"close": f"{ratio}_close", "volume": f"{ratio}_volume"}, inplace=True)
    df.set_index("time", inplace=True)
    df = df[[f"{ratio}_close", f"{ratio}_volume"]]

    if main_df.empty:
        main_df = df
    else:
        main_df = main_df.join(df)

# Calculate 'future' column after all data is processed
if f"{RATIO_TO_PREDECT}_close" in main_df.columns:
    main_df['future'] = main_df[f"{RATIO_TO_PREDECT}_close"].shift(-FUTURE_PERIOD_PREDICT)
else:
    raise KeyError(f"Column {RATIO_TO_PREDECT}_close is missing in the DataFrame.")



main_df['target'] = list(map(classify, main_df[f"{RATIO_TO_PREDECT}_close"],main_df["future"]))

print(main_df[[f"{RATIO_TO_PREDECT}_close", "future","target"]].head())


            LTC-USD_close     future  target
time                                        
1528968660      96.580002  96.500000       0
1528968720      96.660004  96.389999       0
1528968780      96.570000  96.519997       0
1528968840      96.500000  96.440002       0
1528968900      96.389999  96.470001       1


In [49]:
def preproccess_df(df):
    df = df.drop('future',axis=1)

    #preprocessing data
    for col in df.columns:
        if col !="target": #not preprocessing target column
            df[col] = df[col].pct_change() #normalizing data 
            df.dropna(inplace=True)
            df[col] = preprocessing.scale(df[col].values) # scaling data 
    df.dropna(inplace=True) 

    sequential_data = []
    prev_days = deque(maxlen = SEQ_LEN )
    
    for i in df.values:
        prev_days.append([n for n in i[:-1]])
        if len(prev_days) == SEQ_LEN:
            sequential_data.append([np.array(prev_days),i[-1]])
    random.shuffle(sequential_data)# shuffling data

    print("Sequential data length:", len(sequential_data))

    if len(sequential_data) == 0:
        raise ValueError("No data available after sequential processing!")
    
     # balancing the data 

    buys = []
    sells = []
    for seq,target in sequential_data:
        if target ==0:
            sells.append([seq,target])
        elif target ==1:
            buys.append([seq,target])
    random.shuffle(buys)
    random.shuffle(sells)

    lower = min(len(buys),len(sells))
    buys = buys[:lower]
    sells = sells[:lower]

    sequential_data = buys + sells
    random.shuffle(sequential_data)

    X = []
    Y = []

    for seq, target in sequential_data:
        X.append(seq)
        Y.append(target)

    return np.array(X), np.array(Y)



    

In [50]:
times = sorted(main_df.index.values)

last_5pct=times[-int(0.05*len(times))]



In [51]:
validation_main_df = main_df[(main_df.index >= last_5pct)]
main_df =main_df[(main_df.index < last_5pct)]


train_x,train_y = preproccess_df(main_df)
validation_x,validation_y = preproccess_df(validation_main_df)

# After preprocessing
print("train_x shape:", train_x.shape)
print("train_y shape:", train_y.shape)
print("validation_x shape:", validation_x.shape)
print("validation_y shape:", validation_y.shape)




Sequential data length: 78313
Sequential data length: 3798
train_x shape: (65962, 60, 8)
train_y shape: (65962,)
validation_x shape: (3174, 60, 8)
validation_y shape: (3174,)


In [38]:
print(f"train data: {len(train_x)} -- validation :{len(validation_x)}")

print(f"DONT buys :{train_y.count(0)}, buys{train_y.count(1)}")
print(f"VALIDATION DONT buys :{validation_y.count(0)}, buys: {validation_y.count(1)}")

train data: 62210 -- validation :3692
DONT buys :31105, buys31105
VALIDATION DONT buys :1846, buys: 1846


In [None]:
model = Sequential()
model.add(LSTM(128,input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128,input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(LSTM(128,input_shape=(train_x.shape[1:])))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32,activation="relu"))
model.add(Dropout(0.2))
model.add(Dense(2,activation="softmax"))

opt = tf.keras.optimizers.Adam(learning_rate=1e-3, decay = 1e-5)

model.compile(loss = 'sparse_categorical_crossentropy',
              optimizer = opt,
              metrics=['accuracy'])

tensorboard = TensorBoard(log_dir=f"logs/{NAME}")
filepath = "RNN_Final-{epoch:0d}"
checkpoint = ModelCheckpoint("models/{}.keras".format(filepath,monitor = 'val_acc',verbose =1,save_best_only=True,model='max'))

history = model.fit(train_x,train_y,batch_size = BATCH_SIZE, epochs=EPOCHS, validation_data=(validation_x,validation_y),callbacks = [tensorboard,checkpoint])



  super().__init__(**kwargs)


Epoch 1/5
[1m 605/1031[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m1:49[0m 257ms/step - accuracy: 0.5074 - loss: 0.7695