In [1]:
import tensorflow as tf
from tensorflow.python.client import device_lib
import numpy as np
import pandas as pd
import sklearn
import sklearn.model_selection
import os

from sklearn import preprocessing as pp

from yahoo_fin import stock_info as si
import talib as tlb

# Price prediction

![logo](jupyter-logo.png)

# Utilisation du GPU

In [None]:
print(device_lib.list_local_devices())

In [4]:
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [3]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [None]:
sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(log_device_placement=True))

# Importation des données

In [3]:
possible_features = ['open', 'high', 'low', 'close', 'volume', 'close 7 days mean', 'close 14 days mean', 'close 20 days mean', 'close 50 days mean', 'ema', 'upperband', 'middleband', 'lowerband', 'rsi', 'macd', 'bop', 'obv', 'natr', 'sine', 'leadsine', 'CDLKICKINGBYLENGTH', 'CDLTAKURI', 'CDLHARAMI', 'CDLDOJI', 'CDLDRAGONFLYDOJI', 'CDLLONGLEGGEDDOJI']

In [4]:
"""Paramètres"""
ticker = 'KO' #choix de l'action à étudier

n = 60 #nombre de jours considérés par le modèle précédant la date voulue
days_predicted = 2 #on prévoit le prix de l'action à la date t + days_predicted
test_size = 0.2 #part des données réservée au test du modèle
shffl = False
features = possible_features #liste d'indicateurs financiers utiles à la prédiction




In [5]:
"""On charge la dataset"""

df = si.get_data(ticker) 
df.head()

Unnamed: 0,open,high,low,close,adjclose,volume,ticker
1970-03-25,0.828125,0.842448,0.828125,0.828125,0.195108,1862400,KO
1970-03-26,0.828125,0.83724,0.828125,0.834635,0.196642,854400,KO
1970-03-30,0.835938,0.846354,0.835938,0.838542,0.197562,1008000,KO
1970-03-31,0.83724,0.83724,0.829427,0.835938,0.196949,710400,KO
1970-04-01,0.835938,0.838542,0.833333,0.83724,0.197256,940800,KO


In [6]:
df.drop(columns=['ticker', 'adjclose'], inplace = True)
df = df.astype(np.float64)
df.head()

Unnamed: 0,open,high,low,close,volume
1970-03-25,0.828125,0.842448,0.828125,0.828125,1862400.0
1970-03-26,0.828125,0.83724,0.828125,0.834635,854400.0
1970-03-30,0.835938,0.846354,0.835938,0.838542,1008000.0
1970-03-31,0.83724,0.83724,0.829427,0.835938,710400.0
1970-04-01,0.835938,0.838542,0.833333,0.83724,940800.0


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 12939 entries, 1970-03-25 to 2021-07-09
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   open    12939 non-null  float64
 1   high    12939 non-null  float64
 2   low     12939 non-null  float64
 3   close   12939 non-null  float64
 4   volume  12939 non-null  float64
dtypes: float64(5)
memory usage: 606.5 KB


In [8]:
"""print les colonnes"""

df['open'].plot(figsize = (10, 10))

ImportError: matplotlib is required for plotting when the default backend "matplotlib" is selected.

In [None]:
df.dropna(inplace = True)

In [9]:
df.shape

(12939, 5)

# Calibrage de la fenêtre 

Pour certaines action, le cours ne se comporte pas du tout pareil selon la période, il faut ajuster la fenêtre, en selectionnant la durée souhaitée avec la commande
df = df.iloc[n_supp:,:] où n_supp est le nombre de jours supprimés. Cet ajustement est à faire manuellement pour chaque action, dans le cas du btc, on garde tout car la période 
est assez récente


In [10]:
df = df.iloc[8000:,:]

# Indicateurs d'analyse technique

In [11]:
"""Fonctions des indicateurs 
(yahoo_fin permet uniquement d'obtenir les 5 colonnes 'adjclose', 'volume', 'open', 'high', 'low')
"""
########Overlap Studies Functions


#Moyennes mobiles :
#7j
df['close 7 days mean'] = df['close'].rolling(window = 7).mean()
#14j
df['close 14 days mean'] = df['close'].rolling(window = 14).mean()
#20j
df['close 20 days mean'] = df['close'].rolling(window = 20).mean()
#50j
df['close 50 days mean'] = df['close'].rolling(window = 50).mean()


#On utilise le package talib pour avoir les indicateurs suivants

df['ema'] = tlb.EMA(df['close'], timeperiod=30)

df['upperband'], df['middleband'], df['lowerband'] = tlb.BBANDS(df['close'], timeperiod=5, nbdevup=2, nbdevdn=2, matype=0)



In [12]:
#######Momentum Indicator Functions : 

#RSI
df['rsi'] = tlb.RSI(df['close'], timeperiod=14)

#MACD
df['macd'], df['macdsignal'], df['macdhist'] = tlb.MACD(df['close'], fastperiod=12, slowperiod=26, signalperiod=9)

#balance of power
df['bop'] = tlb.BOP(df['open'], df['high'], df['low'], df['close']) #balance of power

In [13]:
##########Volume Indicator Functions
df['obv'] = tlb.OBV(df['close'], df['volume']) #on balance volume

In [14]:
##########Volatility Indicator Functions
df['natr'] = tlb.NATR(df['high'], df['low'], df['close'], timeperiod=14) #Normalized Average True Range

In [15]:
###########Price Transform Functions
#Ne sert a rien, on pert de l'information

In [16]:
###########Cycle Indicator Functions très important apparament
df['sine'], df['leadsine'] = tlb.HT_SINE(df['close'])

In [17]:
############## patern recognition

df['CDLKICKINGBYLENGTH'] = tlb.CDLKICKINGBYLENGTH(df['open'], df['high'], df['low'], df['close']) #bull / bear
df['CDLTAKURI'] = tlb.CDLTAKURI(df['open'], df['high'], df['low'], df['close'])
df['CDLHARAMI'] = tlb.CDLHARAMI(df['open'], df['high'], df['low'], df['close'])
df['CDLDOJI'] = tlb.CDLDOJI(df['open'], df['high'], df['low'], df['close'])
df['CDLDRAGONFLYDOJI'] = tlb.CDLDRAGONFLYDOJI(df['open'], df['high'], df['low'], df['close'])
df['CDLLONGLEGGEDDOJI'] = tlb.CDLLONGLEGGEDDOJI(df['open'], df['high'], df['low'], df['close'])

In [18]:
df.head(60)

Unnamed: 0,open,high,low,close,volume,close 7 days mean,close 14 days mean,close 20 days mean,close 50 days mean,ema,...,obv,natr,sine,leadsine,CDLKICKINGBYLENGTH,CDLTAKURI,CDLHARAMI,CDLDOJI,CDLDRAGONFLYDOJI,CDLLONGLEGGEDDOJI
2001-11-26,24.475,24.545,24.049999,24.155001,8057400.0,,,,,,...,8057400.0,,,,0,0,0,0,0,0
2001-11-27,24.155001,24.190001,23.754999,23.995001,7262400.0,,,,,,...,795000.0,,,,0,0,0,0,0,0
2001-11-28,23.605,23.690001,23.285,23.375,9075800.0,,,,,,...,-8280800.0,,,,0,0,0,0,0,0
2001-11-29,23.475,23.639999,23.23,23.540001,7146400.0,,,,,,...,-1134400.0,,,,0,0,0,0,0,0
2001-11-30,23.5,23.625,23.379999,23.48,6975200.0,,,,,,...,-8109600.0,,,,0,0,0,0,0,0
2001-12-03,23.174999,23.375,23.055,23.264999,8425800.0,,,,,,...,-16535400.0,,,,0,0,0,0,0,0
2001-12-04,23.469999,23.525,23.145,23.275,7857600.0,23.583572,,,,,...,-8677800.0,,,,0,0,0,0,0,0
2001-12-05,23.639999,23.700001,23.48,23.559999,12115800.0,23.498571,,,,,...,3438000.0,,,,0,0,0,0,0,0
2001-12-06,23.35,23.445,23.08,23.215,7025800.0,23.387143,,,,,...,-3587800.0,,,,0,0,0,0,0,0
2001-12-07,23.09,23.450001,23.049999,23.299999,6653800.0,23.376428,,,,,...,3066000.0,,,,0,0,0,0,0,0


## Affiche de certains indicateurs
On peut plot les indicateurs

In [19]:
df[['close', 'close 50 days mean']].plot(figsize = (10, 10))

ImportError: matplotlib is required for plotting when the default backend "matplotlib" is selected.

In [20]:
df[['close', 'upperband', 'middleband', 'lowerband']].plot(figsize = (10, 10))

ImportError: matplotlib is required for plotting when the default backend "matplotlib" is selected.

# Pre-processing de la data

In [21]:
# Pre-processing de la data



scaler_dict = {}

"""Pre-processing de la data

/!\ il faut bien sauvegarder le scaler, car il dépend du min et du max de la colonne !, on ne pourra pas de-scale après sinon

"""



for column in features:
    scaler = pp.MinMaxScaler()
    df[column] = scaler.fit_transform(np.expand_dims(df[column].values, axis=1))
    #on conserve le scaler de chaque colonne car il dépend de la colonne
    scaler_dict[column] = scaler
    
    
    
    

In [22]:
scaler_dict

{'open': MinMaxScaler(),
 'high': MinMaxScaler(),
 'low': MinMaxScaler(),
 'close': MinMaxScaler(),
 'volume': MinMaxScaler(),
 'close 7 days mean': MinMaxScaler(),
 'close 14 days mean': MinMaxScaler(),
 'close 20 days mean': MinMaxScaler(),
 'close 50 days mean': MinMaxScaler(),
 'ema': MinMaxScaler(),
 'upperband': MinMaxScaler(),
 'middleband': MinMaxScaler(),
 'lowerband': MinMaxScaler(),
 'rsi': MinMaxScaler(),
 'macd': MinMaxScaler(),
 'bop': MinMaxScaler(),
 'obv': MinMaxScaler(),
 'natr': MinMaxScaler(),
 'sine': MinMaxScaler(),
 'leadsine': MinMaxScaler(),
 'CDLKICKINGBYLENGTH': MinMaxScaler(),
 'CDLTAKURI': MinMaxScaler(),
 'CDLHARAMI': MinMaxScaler(),
 'CDLDOJI': MinMaxScaler(),
 'CDLDRAGONFLYDOJI': MinMaxScaler(),
 'CDLLONGLEGGEDDOJI': MinMaxScaler()}

In [23]:
df.head()

Unnamed: 0,open,high,low,close,volume,close 7 days mean,close 14 days mean,close 20 days mean,close 50 days mean,ema,...,obv,natr,sine,leadsine,CDLKICKINGBYLENGTH,CDLTAKURI,CDLHARAMI,CDLDOJI,CDLDRAGONFLYDOJI,CDLLONGLEGGEDDOJI
2001-11-26,0.143602,0.139003,0.134866,0.135112,0.048434,,,,,,...,0.135274,,,,0.0,0.0,0.5,0.0,0.0,0.0
2001-11-27,0.135846,0.130414,0.127691,0.131266,0.041919,,,,,,...,0.132691,,,,0.0,0.0,0.5,0.0,0.0,0.0
2001-11-28,0.122516,0.118316,0.116259,0.11636,0.05678,,,,,,...,0.129463,,,,0.0,0.0,0.5,0.0,0.0,0.0
2001-11-29,0.119365,0.117106,0.114922,0.120327,0.040968,,,,,,...,0.132005,,,,0.0,0.0,0.5,0.0,0.0,0.0
2001-11-30,0.119971,0.116743,0.11857,0.118884,0.039565,,,,,,...,0.129524,,,,0.0,0.0,0.5,0.0,0.0,0.0


In [24]:
"""On crée futur"""
df['futur'] = df['close'].shift(-days_predicted)

In [25]:
df.shape

(4939, 29)

Ici, on supprime de nouveau les NaN, créés lors du calcul des indicateurs

In [26]:
df.dropna(inplace = True)

In [27]:
df.shape

(4874, 29)

## On shuffle y et c de la même manière

In [28]:
"""Préparation des séquences de prix des 50 derniers jours que l'on va considérer"""

sequence_tot = []
sequence = []
i = 0
for input_, tgt, current in zip(df[features].values, df['futur'].values, df['close'].values):
    sequence.append(input_)
    if len(sequence) == n: #dès que len(sequence) = n, on stock la sequence dans sequence_tot
                            #dès lors, on supprime le premier élément de sequence, et on ajoute l'entrée à la fin, 
                            #ça garde une séquence de 50
        sequence_tot.append([np.array(sequence), tgt, current])
        sequence.pop(0)


#tous les n, on ajoute tgt = target, avec df['futur'] qui est décalé de days_predicted 
#donc avec les n valeurs des nb_features features, on va essayer de trouver la target 


    
x_data, y_data = [], [] #c correspond à current, c'est le prix à la date considérée, on en aura besoin pour déterminer le bénéfice
#y est la donnée à déterminer, à savoir le prix à t + 10
for s, tgt, current in sequence_tot:
    x_data.append(s)
    y_data.append((tgt, current))
x_data, y_data = np.array(x_data), np.array(y_data)


In [29]:
y_data.shape

(4815, 2)

## Création de x, y, c sans sklearn

In [None]:
"""/!\ ne pas executer le bloc qui suit"""

In [None]:
#Nous avons essayé de coder par nous-mêmes la séparation des données... ça n'a pas bien fonctionné

In [None]:
"""/!\ Ne pas lancer cette cellule"""
"""Préparation des séquences de prix des 50 derniers jours que l'on va considérer"""

sequence_tot = []
sequence = []
i = 0
for inpt, tgt, current in zip(df[features].values, df['futur'].values, df['close'].values):
    sequence.append(inpt)
    if len(sequence) == n: #dès que len(sequence) = n, on stock la sequence dans sequence_tot
                            #dès lors, on supprime le premier élément de sequence, et on ajoute l'entrée à la fin, 
                            #ça garde une séquence de 50
        sequence_tot.append([np.array(sequence), tgt, current])
        sequence.pop(0)


#tous les n, on ajoute tgt = target, avec df['futur'] qui est décalé de days_predicted 
#donc avec les n valeurs des nb_features features, on va essayer de trouver la target 
#donc, si n=50 et days_predicted=10, last_sequence devrait être de longueur 60 (50+10)


    
x_data, y_data, c_data = [], [], [] #c correspond à current, c'est le prix à la date considérée, on en aura besoin pour déterminer le bénéfice
#y est la donnée à déterminer, à savoir le prix à t + 10
for s, tgt, current in sequence_tot:
    x_data.append(s)
    y_data.append(tgt)
    c_data.append(current)
x_data, y_data, c_data = np.array(x_data), np.array(y_data), np.array(c_data)


In [None]:
"""/!\ Ne pas lancer cette cellule"""
idx = int((1 - test_size) * len(x_data))
          
x_train = x_data[:idx] #"""on prend les premiers pour le train"""
y_train = y_data[:idx] #"""on prend les premiers pour le train"""
x_test  = x_data[idx:] #"""on prend les derniers pour le test"""
y_test = y_data[idx:] #"""on prend les derniers pour le test"""
c_test = c_data[idx:]

# Création de x, y, c avec sklearn

In [30]:
x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(x_data, y_data, test_size=test_size, shuffle=shffl)

In [31]:
y_test

array([[0.67760549, 0.66750808],
       [0.68121164, 0.67279721],
       [0.66294026, 0.67760549],
       ...,
       [0.86031972, 0.85695395],
       [0.8557519 , 0.84974156],
       [0.8636855 , 0.86031972]])

In [32]:
y_test, c_test = y_test[:,0], y_test[:,1]

In [33]:
y_test

array([0.67760549, 0.68121164, 0.66294026, 0.66462314, 0.66294026,
       0.65981485, 0.65500657, 0.64587092, 0.64803464, 0.65284284,
       0.6499579 , 0.62759944, 0.63409062, 0.63649472, 0.63144605,
       0.64082217, 0.64827502, 0.64875586, 0.64803464, 0.64611129,
       0.65717029, 0.66269979, 0.66294026, 0.66462314, 0.6752013 ,
       0.67279721, 0.66991227, 0.67448009, 0.66943143, 0.66798892,
       0.66462314, 0.66149774, 0.66582519, 0.66197858, 0.65692992,
       0.65981485, 0.6554874 , 0.65741076, 0.65957448, 0.64755381,
       0.65885318, 0.66462314, 0.66582519, 0.67327804, 0.67760549,
       0.69467483, 0.67976922, 0.67351842, 0.65332368, 0.64731334,
       0.65500657, 0.65644908, 0.65741076, 0.65789159, 0.65620871,
       0.64827502, 0.65476619, 0.65957448, 0.66582519, 0.6665464 ,
       0.67111432, 0.65500657, 0.64370719, 0.64418803, 0.64322636,
       0.65789159, 0.6610169 , 0.66486352, 0.65861281, 0.66342109,
       0.66221904, 0.65067912, 0.65043874, 0.65572787, 0.65861

On a pas besion de c_train, on a juste besoin de c_test pour calculer les benefices, on garde juste y_train

In [34]:
y_train

array([[0.22995553, 0.22995553],
       [0.21120326, 0.23584563],
       [0.20531315, 0.22995553],
       ...,
       [0.66702724, 0.65837234],
       [0.66750808, 0.65933402],
       [0.67279721, 0.66702724]])

In [35]:
y_train = y_train[:,0]

## Conversion en float64 pour éviter toute erreur

In [36]:
x_train = np.asarray(x_train).astype('float64')
y_train = np.asarray(y_train).astype('float64')
x_test = np.asarray(x_test).astype('float64')
y_test = np.asarray(y_test).astype('float64')

In [37]:
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((3852, 60, 26), (3852,), (963, 60, 26), (963,))

In [38]:
df.tail(days_predicted)

Unnamed: 0,open,high,low,close,volume,close 7 days mean,close 14 days mean,close 20 days mean,close 50 days mean,ema,...,natr,sine,leadsine,CDLKICKINGBYLENGTH,CDLTAKURI,CDLHARAMI,CDLDOJI,CDLDRAGONFLYDOJI,CDLLONGLEGGEDDOJI,futur
2021-07-06,0.858943,0.853859,0.852365,0.849742,0.10761,0.861783,0.870485,0.889858,0.932561,0.902333,...,0.058852,0.4974,0.85171,0.0,0.0,0.5,0.0,0.0,0.0,0.855752
2021-07-07,0.852399,0.860876,0.8565,0.86032,0.100231,0.861783,0.86986,0.888168,0.932917,0.901991,...,0.058747,0.576383,0.903415,0.0,0.0,0.5,0.0,0.0,0.0,0.863685


# Modèle

In [39]:
"""Paramètres"""
#paramètres sur les données
n = 60
days_predicted = 2
test_size = 0.2
features = possible_features

#paramètres du modèle
nb_neurons = 256
nb_layers = 6
dropout = 0.4 #"désactive" aléatoirement certains neurones a chaque forward pour réduire l'overfitting
nb_features = len(features) #len de la dataframe
loss_fn = 'huber_loss'
e = 350 #nombre d'epoch
b_s = 32 #c'est la taille du batch
bidi = False
mtrcs = ['mean_absolute_error']
optm = 'adam'
activation_fn = 'tanh'
reg = None


In [40]:
def mod_name(ticker, n, days_predicted, test_size, nb_neurons, nb_layers, dropout, nb_features, loss_fn, e, b_s, bidi, mtrcs, optm, activation_fn, reg):
    return 'n' + str(n) + '__' + 'days_predicted' + str(days_predicted) + '__' + 'test_size' + str(test_size) + '__' + 'nb_neurons' + str(nb_neurons) + '__' + 'nb_layers' + str(nb_layers) + '__' + 'dropout' + str(dropout) + '__' + 'nb_features' + str(nb_features) + '__' + 'loss_fn' + str(loss_fn) + '__' + 'e' + str(e) + '__' + 'b_s' + str(b_s) + '__' + 'bidi' + str(bidi) + '__' + 'mtrcs' + str(mtrcs) + '__' + 'optm' + str(optm) + '__' + 'activation_fn' + str(activation_fn) + '__' + 'reg' + str(reg) 


In [41]:
name = mod_name(ticker, n, days_predicted, test_size, nb_neurons, nb_layers, dropout, nb_features, loss_fn, e, b_s, bidi, mtrcs, optm, activation_fn, reg)

In [42]:
name

"n60__days_predicted2__test_size0.2__nb_neurons256__nb_layers6__dropout0.4__nb_features26__loss_fnhuber_loss__e350__b_s32__bidiFalse__mtrcs['mean_absolute_error']__optmadam__activation_fntanh__regNone"

In [43]:
def mod(nb_neurons, nb_layers, dropout, n, nb_features, loss_fn, bidi, mtrcs, optm, activation_fn, reg):

    if bidi :
        """Bidirectionnel, n_layers = 2"""

        mod = tf.keras.Sequential()

        for l in range(nb_layers - 1):
            mod.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(nb_neurons, 
                                                                       return_sequences=True, 
                                                                       batch_input_shape=(None, n, nb_features)
                                                                      )))
            mod.add(tf.keras.layers.Dropout(dropout))
        
        mod.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(nb_neurons, return_sequences=False)))
        mod.add(tf.keras.layers.Dropout(dropout))

        mod.add(tf.keras.layers.Dense(1, activation = activation_fn)) 
        mod.compile(loss = loss_fn, metrics = mtrcs, optimizer = optm)
        
        
    else :
        mod = tf.keras.Sequential()
        
        for l in range(nb_layers - 1):
            mod.add(tf.keras.layers.LSTM(nb_neurons, return_sequences=True, batch_input_shape=(None, n, nb_features)))
            mod.add(tf.keras.layers.Dropout(dropout))
        
        mod.add(tf.keras.layers.LSTM(nb_neurons, return_sequences=False))
        mod.add(tf.keras.layers.Dropout(dropout))

        mod.add(tf.keras.layers.Dense(1, activation = activation_fn)) 
        mod.compile(loss = loss_fn, metrics = mtrcs, optimizer = optm) #adam est un algorithme d'optimisation (ADAptative Moment)

    return mod
        






In [None]:
mod = mod(nb_neurons, nb_layers, dropout, n, nb_features, loss_fn, bidi, mtrcs, optm, activation_fn, reg)

In [None]:
mod.summary()

In [None]:
"""création des dossiers pour les logs"""

if not os.path.isdir("logs"):
    os.mkdir("logs")


In [93]:
#on utilise tensorboard pour visualiser la loss etc au fil des epochs

tensorboard = tf.keras.callbacks.TensorBoard(log_dir=os.path.join("logs", name))

In [None]:
mod.fit(x_train, y_train,
                    batch_size = b_s,
                    epochs = e,
                    validation_data = (x_test, y_test),
                    callbacks = [tensorboard],
                    verbose = 1)

Train on 3836 samples, validate on 959 samples
Epoch 1/350
Epoch 2/350

# Sauvegarde du modèle

In [62]:
mod.save(name + '.h5')

# Chargement d'un modèle déjà existant

In [1]:
loaded = tf.keras.models.load_model('BTC-USD__BiDiFalse__n60__days_predicted10__test_size0.2__nb_neurons256__dropout0.4__nb_features26__e650__b_s32.h5')

NameError: name 'tf' is not defined

# Calcul des prédictions

In [82]:
y_pred = mod.predict(x_test)

In [83]:
y_pred.shape

(463, 1)

In [84]:
y_test.shape

(463, 1)

In [85]:
"""on met y_test sous la meme shape que y_pred pour la suite"""

'on met y_test sous la meme shape que y_pred pour la suite'

In [86]:
y_test

array([[3.80780645e-02],
       [7.34666406e-04],
       [8.29260249e-03],
       [3.78972998e-03],
       [1.39231762e-03],
       [1.66413132e-02],
       [1.65910256e-01],
       [1.14587392e-01],
       [4.02663373e-03],
       [1.82424556e-03],
       [1.12812402e-01],
       [2.52740808e-03],
       [1.20882657e-01],
       [8.50275695e-04],
       [5.85327476e-02],
       [4.12065472e-01],
       [8.98786927e-04],
       [1.36191216e-01],
       [1.05974944e-03],
       [3.86680634e-01],
       [8.14805272e-02],
       [2.92995095e-01],
       [7.69821226e-01],
       [8.48984923e-02],
       [6.41206605e-01],
       [1.44310711e-01],
       [1.12072848e-01],
       [1.46245073e-01],
       [9.65304857e-02],
       [1.42072749e-01],
       [5.24995566e-02],
       [3.97799575e-03],
       [5.61798702e-03],
       [1.28833905e-01],
       [1.48486643e-01],
       [1.49418817e-03],
       [2.75834277e-01],
       [9.04014042e-04],
       [1.37409688e-01],
       [3.85911786e-03],


In [87]:
y_test = np.reshape(y_test, (y_pred.shape))

In [88]:
y_test.shape

(463, 1)

In [89]:
c_test = np.reshape(c_test, (y_pred.shape))

In [90]:
c_test.shape

(463, 1)

## On descale les sortie pour comparer les prix en USD

In [92]:
#Une fois les données obtenues, on "descale" les prix pour trouver les vraies valeurs
y_test_descaled = np.squeeze(scaler_dict['close'].inverse_transform(y_test))
y_pred_descaled = np.squeeze(scaler_dict['close'].inverse_transform(y_pred))
c_test_descaled = np.squeeze(scaler_dict['close'].inverse_transform(c_test))

In [74]:
buy_profit  = lambda current, true_futur, pred_futur: true_futur - current if pred_futur > current else 0
sell_profit = lambda current, true_futur, pred_futur: current - true_futur if pred_futur < current else 0
#la gestion de portefeuille n'est pas l'objet de ce projet : on achete une unité d'action (ou de bitcoin ici), et on revend également une unité, independament de ce que cela représente comme part de notre capital.
#la stratégie d'investissement est basique : si on s'attend à ce que les prix augmentent, on achète (dans l'optique de revendre après)
#si on s'attend à ce que les prix diminuent, on vend (et on achète après)

# Calcul des profits

In [93]:
buy_prft = list(map(buy_profit, 
                                c_test_descaled, 
                                y_test_descaled, 
                                y_pred_descaled)
                                
                                )

sell_prft = list(map(sell_profit, 
                                c_test_descaled, 
                                y_test_descaled, 
                                y_pred_descaled)
                                )

In [94]:
buy_prft

[124.829833984375,
 -11.445999145507812,
 45.646972656249886,
 6.464996337890625,
 19.222000122070312,
 0,
 0,
 0,
 -22.56201171875,
 14.152008056640625,
 25.41064453125,
 -23.0369873046875,
 0,
 -9.884994506835938,
 1174.0400390625,
 3467.1328125,
 9.188003540039062,
 913.2099609375,
 16.409011840820312,
 5247.71484375,
 13.33447265625,
 2776.533203125,
 102.87890625,
 0,
 0,
 610.384765625,
 122.85400390625,
 0,
 0,
 0,
 0,
 -24.9739990234375,
 91.18801879882812,
 -56.486328125,
 337.4580078125,
 33.98799133300781,
 2811.65234375,
 -29.1199951171875,
 0,
 4.52301025390625,
 -255.97509765625,
 0,
 11.871002197265625,
 852.60791015625,
 0,
 0,
 320.5166015625,
 10552.068359375,
 0,
 0,
 21.10198974609375,
 3039.326171874998,
 31.891006469726562,
 0,
 22.836975097656136,
 -73.7998046875,
 0,
 0,
 1169.19921875,
 0,
 1585.4443359375,
 0,
 0,
 206.419921875,
 -2.22601318359375,
 110.85003662109375,
 0,
 244.92333984375,
 411.280029296875,
 -184.119140625,
 0,
 0.1820068359375,
 4829.625,


In [95]:
sell_prft

[0,
 0,
 0,
 0,
 0,
 43.0699462890625,
 61.1201171875,
 989.87939453125,
 0,
 0,
 0,
 0,
 83.83984375,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 975.81005859375,
 15077.05859375,
 0,
 0,
 289.9140625,
 285.759765625,
 2184.490234375,
 514.6125488281255,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 315.2294921875,
 0,
 0,
 664.6123046875,
 0,
 0,
 245.5166015625,
 983.248046875,
 0,
 0,
 115.6552734375,
 342.14013671875,
 0,
 0,
 0,
 463.1098632812509,
 0,
 0,
 61.2041015625,
 -28.1640625,
 0,
 109.38037109375,
 0,
 547.7197265625,
 7.81201171875,
 0,
 0,
 0,
 1464.9501953125,
 0,
 0,
 0,
 -2.1594238281245453,
 0,
 0,
 0,
 0,
 -25.469970703125,
 5358.73046875,
 0,
 0,
 -97.5546875,
 0,
 1033.8779296875,
 9.47021484375,
 397.435546875,
 420.130859375,
 0,
 0,
 12.1710205078125,
 0,
 0,
 1011.5869140625,
 0,
 0,
 0,
 0,
 7161.859375,
 0,
 0,
 1599.19921875,
 -631.785400390625,
 0,
 0,
 -156.310546875,
 6.31500244140625,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 -107.87646484375045,
 0,
 0,
 0,
 0,
 -47.4697265

In [96]:
# on calcule l'accuracy en regardant la proportion de profit positif
accuracy = (len([e for e in sell_prft if e > 0]) + len([e for e in buy_prft if e > 0])) / len(y_pred)

In [97]:
tot_buy_profit  = sum(buy_prft)
tot_sell_profit = sum(sell_prft)
tot_profit = tot_buy_profit + tot_sell_profit
profit_per_trade = tot_profit / len(y_pred)

In [98]:
tot_profit, profit_per_trade

(408729.9564361572, 882.7860830154584)

In [81]:
accuracy

0.8099352051835853