In [2]:
import os
import warnings
import json
from datetime import datetime 
import pandas as pd
import numpy as np
from math import sqrt
from numpy.random import seed
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import seaborn as sns
import matplotlib.pyplot as plt
from keras.models import load_model
from sklearn.metrics import mean_squared_error
import mlflow
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope
plt.rcParams['figure.facecolor'] = 'white'
warnings.simplefilter('ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

2022-11-13 13:04:11.789614: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-13 13:04:11.882359: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-11-13 13:04:11.882374: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2022-11-13 13:04:11.900809: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2022-11-13 13:04:12.397839: W tensorflow/stream_executor/platform/de

In [13]:
class DataFormatting():
      
    def __init__(self):
        self.df_data = None
        self.df_datetime = None
       
    def dataset(df):

        # converting time colum from object type to datetime format
        df['date'] = pd.to_datetime(df['date'],dayfirst = True, format = '%d/%m/%Y')
        df = df.dropna()
        # splitting the dataframe in to X and y 
        df_data = df[['open','high','low','close']] #'high','low',,'CRUDE_OIL_CLOSE','US500_CLOSE','open','EXCHANGE_RATE',
        df_datetime =df[['date']]

        return df_data, df_datetime


# Data transformation (changing data shape to model requirement)

def data_transformation(data, lags = 5, n_fut = 1):
    
    """ this function transforms dataframe to required input shape for the model.
    It required 2 input arguments:
    1. data: this will be the pandas dataframe
    2. lags: how many previous price points to be used to predict the next future value, in
    this case the default is set to 5 for 'EURUSD' commodity"""

    # initialize lists to store the dataset
    X_data = []
    y_data = []
    
    for i in range(lags, len(data)- n_fut +1):
        X_data.append(data[i-lags: i, 0: data.shape[1]])
        y_data.append(data[i+ n_fut-1:i+n_fut,3]) # extracts close price with specific lag as price to be predicted.

    # convert the list to numpy array

    X_data = np.array(X_data)
    y_data = np.array(y_data)

    return X_data, y_data

In [14]:
lag = 5
n_fut = 1

In [19]:
import joblib
std_scaler = joblib.load('EU_scaler_std.bin')
data = pd.read_csv('../data/forecast_EURUSD.csv',index_col=[0]) 
# initializing DataFormatting class
data_init = DataFormatting()
df_data, df_datetime = DataFormatting.dataset(data)
print('\n')
print('Displaying top 5 rows of the dataset:')
print('\n')
print(df_data.head())
print(df_data.shape)
print(df_data.columns)
df_colnames = list(df_data.columns)
# normalize train, val and test dataset

# initialize StandartScaler()
#scaler = StandardScaler()
#std_scaler = std_scaler.fit(df_data)
data_fit_transformed = std_scaler.transform(df_data)


print('\n')
print('Displaying top 5 rows of all the scaled dataset:')
print('\n')
#print('The train dateset:','\n''\n',data_fit_transformed[0:5],'\n''\n', 'The validation dataset:','\n''\n',val_transformed[0:5],'\n''\n','The test dataset:','\n''\n',test_transformed[0:5])
print('The train dateset:','\n''\n',data_fit_transformed)

# changing shape of the data to match the model requirement!

X_data, y_data = data_transformation(data_fit_transformed, lags = lag, n_fut = n_fut)
print('\n')
print('Displaying the shape of the dataset required by the model:')
print('\n')
print(f' Input shape X:',X_data.shape, f'Input shape y:',y_data.shape)
print('\n')
print(X_data)
#print(y_data[0:5])
print(y_data)






Displaying top 5 rows of the dataset:


         open     high      low    close
9994  0.98734  0.99768  0.98486  0.99678
9995  0.99610  1.00886  0.99437  1.00833
9996  1.00783  1.00938  0.99576  0.99649
9997  0.99642  0.99982  0.99268  0.99658
9998  0.99465  0.99658  0.98729  0.98825
(6, 4)
Index(['open', 'high', 'low', 'close'], dtype='object')


Displaying top 5 rows of all the scaled dataset:


The train dateset: 

 [[-0.96239694 -0.92667316 -0.95543531 -0.91028885]
 [-0.91363094 -0.86484    -0.90213312 -0.84598282]
 [-0.84833127 -0.86196404 -0.89434236 -0.91190347]
 [-0.91184954 -0.91483747 -0.91160532 -0.91140238]
 [-0.92170294 -0.93275692 -0.9418155  -0.95778067]
 [-0.96295363 -0.94791102 -0.94231994 -0.93300475]]


Displaying the shape of the dataset required by the model:


 Input shape X: (1, 5, 4) Input shape y: (1, 1)


[[[-0.96239694 -0.92667316 -0.95543531 -0.91028885]
  [-0.91363094 -0.86484    -0.90213312 -0.84598282]
  [-0.84833127 -0.86196404 -0.89434236 -0.91190347

In [16]:
path_model = "../Model_Outputs/2022_11_12/EURUSD/model_Bilstm/model/lstm_192.h5"
model_eval = load_model(path_model, compile=False)
forecast = model_eval.predict(X_data)
forecast_copies = np.repeat(forecast, df_data.shape[1], axis = -1 )
y_pred_fut = std_scaler.inverse_transform(forecast_copies)[:,0]



In [17]:
print(y_pred_fut)

[1.0015833]
