In [29]:
import pandas as pd
from sklearn import preprocessing
import numpy as np
from collections import deque
import random
import pickle

In [30]:
future_window = 3
security_to_predict = 'ETH-USD'

In [31]:
def future_predict(current, future):
    if future > current:
        return 1
    else:
        return 0

In [37]:
def preprocessing_df(df:pd.DataFrame):
    df.drop(['future_price'], axis = 1, inplace = True)
    for c in df.columns:
        if c != 'target':
            df[c] = df[c].pct_change()
            df.dropna(inplace=True)
            df[c] = preprocessing.scale(df[c].values)
        df.dropna(inplace=True)
    df.dropna(inplace=True)
    return df

def create_sequences(lookback_period:int, df:pd.DataFrame):
    sequences = []
    lookback = deque(maxlen = lookback_period)
    for i in df.values:
        lookback.append([n for n in i[:-1]])
        if len(lookback) == lookback_period:
            sequences.append([np.array(lookback), i[-1]])
    random.shuffle(sequences)


    buys = []
    sells = []


    for i in range(len(sequences)):
        if sequences[i][-1] == 1:
            buys.append(sequences[i])
        else:
            sells.append(sequences[i])

            



    min_num = min(len(buys), len(sells))
    buys = buys[:min_num]
    sells = sells[:min_num]
    print(f'Buys: {len(buys)}, Sells: {len(sells)}')
    sequences = buys+sells
    random.shuffle(sequences)
    X = [sequences[x][0] for x in range(len(sequences))]
    y = [sequences[x][1] for x in range(len(sequences))]

    
        
    return np.array(X), y

In [39]:
sec_names = ['BTC-USD', 'BCH-USD', 'LTC-USD', 'ETH-USD']
main_df = pd.DataFrame()

for sec in sec_names:
    temp = pd.read_csv(f'./crypto_data/{sec}.csv', 
                       names = ['time', 'low', 'high','open','close','volume'])
    temp = pd.DataFrame({'time': temp['time'], f'{sec}_close': temp['close'], f'{sec}_volume': temp['volume']})
    temp.set_index('time', inplace=True)
    if len(main_df) == 0:
        main_df = temp
    else:
        main_df = pd.merge(main_df, temp, on=['time'])
main_df['future_price'] = main_df[f'{security_to_predict}_close'].shift(-3)
main_df['target'] = list(map(future_predict,main_df[f'{security_to_predict}_close'], main_df['future_price']))
main_df.dropna(inplace=True)


main_df = preprocessing_df(main_df)
validation_df = main_df.iloc[int(len(main_df)*0.9):]
main_df = main_df.iloc[:int(len(main_df) * 0.9)]

X_train, y_train = create_sequences(60, main_df)
X_val, y_val = create_sequences(60, validation_df)

print(f'X_train: {len(X_train)}, y_train: {len(y_train)}')
print(f'X_val: {len(X_val)}, y_val: {len(y_val)}')



Buys: 35357, Sells: 35357
Buys: 3909, Sells: 3909
X_train: 70714, y_train: 70714
X_val: 7818, y_val: 7818


In [52]:
data = {'X_train':X_train, 'y_train':y_train, 'X_val':X_val, 'y_val':y_val}
for name, value in data.items():
    with open(f'{name}.pickle', 'wb') as j:
        pickle.dump(value, j)