In [1]:
import pandas as pd
from sklearn import preprocessing
import numpy as np
from collections import deque
import random
import pickle

In [2]:
future_window = 3
security_to_predict = 'ETH-USD'

In [3]:
def future_predict(current, future):
    if future > current:
        return 1
    else:
        return 0

In [4]:
def preprocessing_df(df:pd.DataFrame):
    df.drop(['future_price'], axis = 1, inplace = True)
    for c in df.columns:
        if c != 'target':
            df[c] = df[c].pct_change()
            df.dropna(inplace=True)
            df[c] = preprocessing.scale(df[c].values)
        df.dropna(inplace=True)
    df.dropna(inplace=True)
    return df

def create_sequences(lookback_period:int, df:pd.DataFrame):
    sequences = []
    lookback = deque(maxlen = lookback_period)
    for i in df.values:
        lookback.append([n for n in i[:-1]])
        if len(lookback) == lookback_period:
            sequences.append([np.array(lookback), i[-1]])
    random.shuffle(sequences)


    buys = []
    sells = []


    for i in range(len(sequences)):
        if sequences[i][-1] == 1:
            buys.append(sequences[i])
        else:
            sells.append(sequences[i])

            



    min_num = min(len(buys), len(sells))
    buys = buys[:min_num]
    sells = sells[:min_num]
    print(f'Buys: {len(buys)}, Sells: {len(sells)}')
    sequences = buys+sells
    random.shuffle(sequences)
    X = [sequences[x][0] for x in range(len(sequences))]
    y = [sequences[x][1] for x in range(len(sequences))]

    
        
    return np.array(X), y

In [5]:
sec_names = ['BTC-USD', 'BCH-USD', 'LTC-USD', 'ETH-USD']
main_df = pd.DataFrame()

for sec in sec_names:
    temp = pd.read_csv(f'./crypto_data/{sec}.csv', 
                       names = ['time', 'low', 'high','open','close','volume'])
    temp = pd.DataFrame({'time': temp['time'], f'{sec}_close': temp['close'], f'{sec}_volume': temp['volume']})
    temp.set_index('time', inplace=True)
    if len(main_df) == 0:
        main_df = temp
    else:
        main_df = pd.merge(main_df, temp, on=['time'])
main_df['future_price'] = main_df[f'{security_to_predict}_close'].shift(-3)
main_df['target'] = list(map(future_predict,main_df[f'{security_to_predict}_close'], main_df['future_price']))
main_df.dropna(inplace=True)


main_df = preprocessing_df(main_df)
validation_df = main_df.iloc[int(len(main_df)*0.9):]
main_df = main_df.iloc[:int(len(main_df) * 0.9)]

X_train, y_train = create_sequences(60, main_df)
X_val, y_val = create_sequences(60, validation_df)

print(f'X_train: {len(X_train)}, y_train: {len(y_train)}')
print(f'X_val: {len(X_val)}, y_val: {len(y_val)}')



Buys: 35357, Sells: 35357
Buys: 3909, Sells: 3909
X_train: 70714, y_train: 70714
X_val: 7818, y_val: 7818


In [52]:
data = {'X_train':X_train, 'y_train':y_train, 'X_val':X_val, 'y_val':y_val}
for name, value in data.items():
    with open(f'{name}.pickle', 'wb') as j:
        pickle.dump(value, j)

In [6]:
X_train = pickle.load(open('X_train.pickle', 'rb'))
y_train = pickle.load(open('y_train.pickle', 'rb'))

In [13]:
main_df[main_df['BTC-USD_close'].values > 1]

Unnamed: 0_level_0,BTC-USD_close,BTC-USD_volume,BCH-USD_close,BCH-USD_volume,LTC-USD_close,LTC-USD_volume,ETH-USD_close,ETH-USD_volume,target
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1528972380,1.437367,1.146036,-0.927850,-0.005754,0.238800,-0.072141,-0.529577,-0.046571,0
1528972920,1.676028,-0.003892,0.705120,-0.006146,1.023147,0.018937,0.992259,0.011761,0
1528974900,1.378163,-0.001189,0.588875,-0.006161,-0.465111,-0.103417,0.004585,-0.023371,1
1528975020,1.171690,0.090186,1.583138,-0.006009,1.332813,-0.075844,1.670955,0.112606,1
1528975140,1.074962,0.025006,1.869705,-0.003357,2.188042,-0.095620,0.021232,-0.031794,1
...,...,...,...,...,...,...,...,...,...
1534576020,1.730480,0.625268,3.567936,-0.006147,2.871633,-0.097839,0.480036,-0.048810,0
1534576740,1.749708,0.238871,-0.008896,-0.006177,1.556001,-0.096597,0.321696,0.070656,1
1534578120,2.268779,-0.078789,1.382287,-0.006128,2.072376,-0.090798,0.878607,0.071119,1
1534578720,1.970116,4.971648,1.772778,0.005081,0.522499,-0.081327,1.728736,0.604529,1
