## Multivariate LSTM

# Import packages

In [217]:
import requests
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
import pandas as pd

from pandas_datareader import data as wb
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.utils import resample
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import LSTM
from sklearn.metrics import mean_squared_error
from imblearn.under_sampling import RandomUnderSampler

%matplotlib inline

# Set figure parameters
plt.rcParams['figure.figsize'] = [10, 7.5]


# Input data

## FX

In [237]:
yf.pdr_override()

data = wb.get_data_yahoo('VND=x', start = '2003-01-01', end = '2024-12-31', interval = '1mo')

# Obtain latest vnd to usd rate
df_conv = wb.get_data_yahoo('VND=x', start = '2003-01-01', end = '2024-12-31')
data1 = 1/df_conv
vnd_to_usd = data1.iloc[-1,0]

# Clean up statistics 
df = pd.DataFrame(data['Adj Close'])
df.rename(columns = {'Adj Close':'USDVND'}, inplace = True)

# change the datetime format
usdvnd_up = pd.DataFrame(df['USDVND'].resample('YS').mean())
usdvnd_up.index = pd.to_datetime(usdvnd_up.index, format = '%Y')

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


## IMF data

In [238]:
def imf_data(endpoint):
    url = f"https://www.imf.org/external/datamapper/api/v1/{endpoint}/VNM"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        values = data['values'][endpoint]['VNM']
        df = pd.DataFrame(list(values.items()), columns=['Year', endpoint.replace('/', ' ')])
        df['Year'] = pd.to_numeric(df['Year'])
        df = df[(df['Year'] >= 2003) & (df['Year'] <= 2024)]
        df.set_index('Year', inplace=True)
        return df

# Define endpoints for each indicator
endpoints = {'NGDPDPC': 'GDP per Capita (Current Prices)',
             'BCA': 'Current Account Balance',
             'PCPIPCH': 'Average CPI'}

# Fetch data for each endpoint and concatenate DataFrames
dfs = {title: imf_data(endpoint) for endpoint, title in endpoints.items()}

In [239]:
# Rename columns
imfdata = pd.concat(dfs.values(), axis=1)
imfdata.columns = endpoints.values()
imfdata.index = pd.to_datetime(imfdata.index, format = '%Y')

# Prepare data

In [240]:
all_data = pd.merge(usdvnd_up,imfdata, left_index=True, right_index=True)
all_data

Unnamed: 0_level_0,USDVND,GDP per Capita (Current Prices),Current Account Balance,Average CPI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2003-01-01,15002.0,610.357,-1.931,3.3
2004-01-01,15266.333333,756.981,-1.591,7.9
2005-01-01,15795.583333,873.136,-0.56,8.4
2006-01-01,15537.0,996.255,-0.164,7.5
2007-01-01,15693.333333,1152.267,-6.992,8.3
2008-01-01,16286.583333,1446.562,-10.787,23.1
2009-01-01,17517.833333,1481.442,-6.608,6.7
2010-01-01,19017.583333,1628.013,-4.276,9.2
2011-01-01,20408.0,1949.826,0.233,18.7
2012-01-01,20665.166667,2197.619,9.267,9.1


In [241]:
dataset = all_data.values
dataset = dataset.astype('float32')

In [247]:
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)

# separate into train and test data
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size, :], dataset[train_size:len(dataset), :]

In [None]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back= 3):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), :]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return np.array(dataX), np.array(dataY)

look_back = 3
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)

In [249]:
# reshape input to be [samples, time steps, features]
n = 4 # number of columns
trainX = np.reshape(trainX, (trainX.shape[0], look_back, n))
testX = np.reshape(testX, (testX.shape[0], look_back, n))

# Build model

In [None]:
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(50, input_shape=(look_back, n)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=700, batch_size=50, verbose=1)

model.save('fundamental_model_eurusd_300epochs.h5')

trainPredict = model.predict(trainX)
testPredict = model.predict(testX)

trainPredict = np.squeeze(trainPredict)
testPredict = np.squeeze(testPredict)


def inverse_transform(arr):
    extended = np.zeros((len(arr), n))
    extended[:, 0] = arr
    return scaler.inverse_transform(extended)[:, 0]


trainPredict = inverse_transform(trainPredict)
testPredict = inverse_transform(testPredict)
trainY = inverse_transform(trainY)
testY = inverse_transform(testY)

Epoch 1/700
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - loss: 0.3365
Epoch 2/700
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step - loss: 0.3061
Epoch 3/700
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - loss: 0.2776
Epoch 4/700
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - loss: 0.2509
Epoch 5/700
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - loss: 0.2260
Epoch 6/700
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - loss: 0.2028
Epoch 7/700
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - loss: 0.1812
Epoch 8/700
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - loss: 0.1612
Epoch 9/700
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - loss: 0.1428
Epoch 10/700
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - loss: 0.1259
Epoch 11/70

In [253]:
# shift predictions up by one
testPredict = np.delete(testPredict, -1)
testY = np.delete(testY, 0)

plt.plot(testPredict, color="blue")
plt.plot(testY, color="red")
plt.show()
testScore = np.sqrt(mean_squared_error(testY, testPredict))

testScore = testScore*vnd_to_usd

print('Test Score: %.6f RMSE' % (testScore))

IndexError: index 0 is out of bounds for axis 0 with size 0