# --> Importations

In [None]:
import tensorflow as tf
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM
import numpy as np
from sklearn import preprocessing
from collections import deque
import random

# --> Global variables

In [None]:
SEQ_LEN = 60                 #window size : 60 minutes 
FUTURE_PERIOD_PREDICT = 3    #predict : 3 minutes
RATIO_TO_PREDICT = "BTC-USD" #predict : BTC-USD price

# --> Définition des classes

In [None]:
def classify(current, future):
    if float(future) > float(current):
        return 1 #1 veut dire qu'on doit acheter
    else:
        return 0 #0 veut dire qu'on doit vendre

# --> Data processing

In [None]:
main_df = pd.DataFrame()
ratios = ["BTC-USD", "LTC-USD", "ETH-USD", "BCH-USD"]
for ratio in ratios:
    dataset = f"crypto_data/{ratio}.csv"
    df = pd.read_csv(dataset, names=["time", "low", "high", "open", "close", "volume"])
    #print(ratio)
    #print(df.head(3))
    df.rename(columns={"close": f"{ratio}_close",
                       "volume": f"{ratio}_volume"},
                       inplace=True
                        )
    df.set_index("time", inplace=True)
    df = df[[f"{ratio}_close", f"{ratio}_volume"]]
    #print('=====================================================================================')
    #print(df.head(5))
    if len(main_df) == 0:
        main_df = df
    else:
        main_df = main_df.join(df)
        
print(main_df.head(3))

In [None]:
main_df["future"] = main_df[f"{RATIO_TO_PREDICT}_close"].shift(-FUTURE_PERIOD_PREDICT)
print(main_df[[f"{RATIO_TO_PREDICT}_close", "future"]].head(3))

In [None]:
main_df["target"] = list(map(classify, 
                             main_df[f"{RATIO_TO_PREDICT}_close"], 
                             main_df["future"]))
print(main_df[[f"{RATIO_TO_PREDICT}_close", "future", "target"]].head(10))
#Ici un "1" en target signifie que 3 lignes plus tard le prix a augmente

# --> Normalize data

In [None]:
times = sorted(main_df.index.values)
last_5_percent = times[-int(0.05*len(times))]
print(last_5_percent)

In [None]:
validation_main_df = main_df[(main_df.index >= last_5_percent)]
main_df = main_df[(main_df.index < last_5_percent)]
print(validation_main_df)

In [None]:
def preprocess_df(df):
    df = df.drop("future", 1)                             #Il faut enlever le future sinon le NN va s'en servir
    for col in df.columns:                                
        if col != "target":                               #On normalize et scale tous sauf les targets qui restent 0 ou 1
            df[col] = df[col].pct_change()                #Normalize la data
            df.dropna(inplace=True)                       #Supprime les bugs
            df[col] = preprocessing.scale(df[col].values) #Preprocessing scale les valeurs
    df.dropna(inplace=True)                               
    
    sequential_data = []
    prev_days = deque(maxlen=SEQ_LEN)                     #Continuer d'ajouter de la data jusqu'au "maxlen" puis pop out les vieux items
#     print(df.head(10))
#     print("===================================================================")
#     for c in df.columns:
#         print(c)
    for i in df.values:                                   #On prends les values donc il n'y a plus le temps mais il y a toujours les targets
        prev_days.append([n for n in i[:-1]])             #"n for n" sont les colonnes, sans le dernier i qui correspond aux targets
        if len(prev_days) == SEQ_LEN:
            sequential_data.append([np.array(prev_days), i[-1]])
    random.shuffle(sequential_data)                       #Melange les donnees 

In [None]:
train_x, train_y = preprocess_df(main_df)
valid_x, valid_y = preprocess_df(validation_main_df)
print(train_x[0].shape, train_x[1].shape)
print(train_y[0].shape, train_y[1].shape)

# --> Balance the data

# --> Building model

>Ce model est un RNN, LSTM qui fait de la prediction de prix de plusieurs cryptomonnaies.

In [None]:
model = Sequential()

#On ne specifie pas la fonction d'activation. Cela appelle le LSTM de CuDNN, qui utilise sa propre fonction d'activation.
model.add(LSTM(128, input_shape=(x_train.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(128))
model.add(Dropout(0.2))

model.add(Dense(32, activation="relu"))
model.add(Dropout(0.2))

model.add(Dense(10, activation="softmax"))

optimizer = tf.keras.optimizers.Adam(lr=1e-3, decay=1e-5)

model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer = optimizer,
    metrics = ["accuracy"]
    )

model.summary()

history = model.fit(x_train,  y_train, epochs=3, validation_data=(x_test, y_test))

