In [14]:
"""
This is a trained Recurrent Neural Network (LSTM) to predict Brent price

Guide to Keras:
    https://keras.io/getting-started/sequential-model-guide/#training

@author: valentin
"""

## Import the functions and classes we'll need
# import winsound
import os

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras import backend as K

from sklearn import datasets
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error



########### Assign values to parameters

# Number of epochs to run
_epochs = 10

# Read in the data
cwd = os.getcwd()
dataLocation = "/home/valentint/EAA_Analytics/Personal/VT/"

brentPriceDf = pd.read_csv(dataLocation + "brent_forecast_data_mihaela_for_lstm.csv")
brentPriceDf.head(10)

# Remove the forecasts from the data
# inputData.BrentPrice[[inputData.Date > 2017-01-01 00:00:00]] = 0

# Format "Date" field as date
brentPriceDf[['date']] = pd.to_datetime(brentPriceDf.date)

# Convert the data frame to a Numpy array
brentPriceArr = brentPriceDf.iloc[:, 0:].values


#####################################################
#####################################################
# Select random seed
randSeed = 7896

# Provide names of input features
inputDataFrame = brentPriceDf
inputData = brentPriceArr

# Create a Numpy array from the input data
dataframe = inputData[:, 1:, ]
# xVarColumns = [1, 2, 3]                                   # Select features: BY DEFAULT, it uses all features in columns 1:END for predictors
yVarColumns = [0]                                           # select target: The target should always be in the first column
number_of_features = len(list(brentPriceDf)) - 2            # Calculate the number of features to be used in the network



#####################################################
#####################################################
# fix random seed for reproducibility
np.random.seed(randSeed)


## NO NEED FOR ARRAYS - Extract the NumPy array from the dataframe and convert the integer values to
# floating point values, which are more suitable for modeling with a neural network
# dataset = dataframe.values
# dataset = dataset.astype('float32')

# Normalize the data between 0 - 1
scaler = MinMaxScaler(feature_range = (0, 1), copy = True)
dataset = scaler.fit_transform(dataframe)

# Split the data in train and validaiton
trainSize = int(len(dataset) * 2 / 3)
train = dataset[0:trainSize, :]
validate = dataset[trainSize:len(dataset), :]

print(len(train), len(validate), len(dataset))

## Modify the data for the LSTM network - The LSTM network expects the input data (X)
# to be provided with a specific array structure in the form of: [samples, time steps, features].
trainX = train[:, 1:]
validateX = validate[:, 1:]

trainY = train[:, yVarColumns]
validateY = validate[:, yVarColumns]

dataframe_length = len(trainY)
# dataframe_dim = Need to figure out how to count the columns of the array

# reshape input to be [samples, time steps, features]
trainX = trainX.reshape(trainX.shape[0], 1, trainX.shape[1])
validateX = validateX.reshape(validateX.shape[0], 1, validateX.shape[1])


## The LSTM network expects the input data (X) to be provided with a specific
# array structure in the form of: [samples, time steps, features]
# Define the network
modelFit = Sequential()
modelFit.add(LSTM(20,
                  activation = 'sigmoid',
                  input_shape = (1, number_of_features)))
modelFit.add(Dropout(.1))
modelFit.add(Dense(1, activation = 'linear'))

# Before training the model, configure the learning process via the compile method
modelFit.compile(optimizer = 'adagrad',                              # adam, adagrad
                 loss = 'mean_squared_error',                        # poisson, mean_squared_error, binary_crossentropy
                 metrics = ['accuracy'])

print(modelFit.summary())

# Train the model
modelEstimate = modelFit.fit(trainX, trainY,
                             epochs = _epochs,
                             batch_size = 1,
                             verbose = 1,
                             validation_data = (validateX, validateY))

# make predictions
trainPredict = modelFit.predict(trainX)
validatePredict = modelFit.predict(validateX)

# print the training accuracy and validation loss at each epoch
# print the number of models of the network
print(modelEstimate.history)
print(len(modelFit.layers))


# Invert predictions
df_train = np.column_stack((trainPredict, train[:, 1:]))
trainPredict2 = scaler.inverse_transform(df_train)

df_validate = np.column_stack((validatePredict, validate[:, 1:]))
validatePredict2 = scaler.inverse_transform(df_validate)


# Plot the errors of the epochs and MSE
plt.plot(modelEstimate.history['loss'])
plt.plot(modelEstimate.history['val_loss'])
#  plt.plot(modelEstimate.history['val_acc'])
plt.title('Model Error History')
plt.ylabel('Mean Squared Error')
plt.xlabel('Epochs')
plt.legend(['Training Error', 'Validation Error'])
plt.show()
    

###################################################
# Combine the final datasest - merge the training and validation datasets and rename columns
combined_dataframe = pd.concat([pd.DataFrame(trainPredict2), pd.DataFrame(validatePredict2)])
combined_dataframe.index = range(len(combined_dataframe))

# Add columns names to the data frame with the forecasts
names_list = list(inputDataFrame)[1:]
names_list[0] = 'forecast_brent_price'

combined_dataframe.columns = names_list

actual_value_target = pd.DataFrame(dataframe[:, 0])
actual_value_target.columns = ['actual_brent_price']



# Create the dataframe and write it to a CSV file
final_forecast_file = pd.concat([actual_value_target, combined_dataframe], axis = 1)
final_forecast_file.to_csv(dataLocation + "brent_forecast_lstm.csv", sep = ',')




TypeError: float() argument must be a string or a number, not 'Timestamp'

In [11]:
brentPriceDf.head(10)

Unnamed: 0,date,recessionUS,oilPrice,ipGlobal,ipXOECD,ipOECD,rmspreadGlobal,rmspreadXOECD,rmspreadOECD,usDollar,...,oecdLiquidsDemand,xoecdLiquidsDemand,vehicleSales,oecdStocks,globalSupply,globalUnconv,opecSupply,xopecSupply,refineryOutput,globalInventory
0,2000-01-01,0,25,85,52,98,2,2,2,115,...,47971,28189,4156574,3791208,75148,1156,29066,46081,1341800,-15
1,2000-02-01,0,28,86,52,99,1,2,1,117,...,49954,28554,4458208,3767290,75932,1165,29844,46088,1269944,-2639
2,2000-03-01,0,27,86,53,99,1,2,1,117,...,48873,27977,5775554,3751942,76023,1035,29868,46155,1356073,-1102
3,2000-04-01,0,23,87,53,100,1,2,1,117,...,46842,27624,4568956,3790346,76688,1171,30736,45952,1330019,2984
4,2000-05-01,0,27,88,54,100,1,2,1,120,...,48059,27874,4988072,3814601,77134,1348,31258,45876,1343813,2137
5,2000-06-01,0,30,88,54,100,1,2,1,119,...,48300,28183,4992669,3845371,76903,1475,30827,46076,1322686,998
6,2000-07-01,0,29,88,54,101,1,2,1,119,...,47717,28357,4704606,3906943,77585,1491,31192,46393,1395705,2167
7,2000-08-01,0,30,88,55,101,0,1,0,120,...,50015,28425,4242338,3871379,78115,1473,31844,46271,1417507,72
8,2000-09-01,0,33,88,55,101,0,1,0,121,...,49245,28476,4760440,3882086,78078,1424,31853,46225,1342904,952
9,2000-10-01,0,31,88,55,101,0,1,0,123,...,48831,28179,4384189,3872714,78863,1389,32466,46397,1390521,2520
