# __Libraries__

In [251]:
import pandas as pd
import numpy as np
import math

import matplotlib.pyplot as plt
import plotly.express as px

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import LSTM

from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score, r2_score 
from sklearn.metrics import mean_poisson_deviance, mean_gamma_deviance, accuracy_score
from sklearn.preprocessing import MinMaxScaler

# __Loading data__

In [252]:
# https://www.investing.com/currencies/usd-cop-historical-data
df = pd.read_csv("/content/USD_COP1.csv")

# __Clearing data__

In [253]:
df = df.sort_values(by='Date')
df = df.set_index('Date')
df.head()

Unnamed: 0_level_0,Price,Open,High,Low,Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2004-01-02,2776.65,2780.55,2781.05,2776.55,-0.0012
2004-01-05,2770.25,2775.65,2783.45,2768.25,-0.0023
2004-01-06,2758.0,2771.2,2772.6,2757.7,-0.0044
2004-01-07,2763.0,2751.0,2764.6,2750.2,0.0018
2004-01-08,2751.5,2763.2,2764.2,2740.9,-0.0042


# __Data exploration__

In [254]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4905 entries, 2004-01-02 to 2022-10-21
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Price   4905 non-null   float64
 1   Open    4905 non-null   float64
 2   High    4905 non-null   float64
 3   Low     4905 non-null   float64
 4   Change  4905 non-null   float64
dtypes: float64(5)
memory usage: 229.9+ KB


In [255]:
df.describe()

Unnamed: 0,Price,Open,High,Low,Change
count,4905.0,4905.0,4905.0,4905.0,4905.0
mean,2588.442118,2588.119307,2601.888255,2574.575609,0.000147
std,705.26086,704.663794,710.653533,699.443379,0.007855
min,1656.5,1656.75,1670.7,1635.3,-0.0615
25%,1935.2,1935.49,1943.5,1927.65,-0.0036
50%,2378.2,2378.3,2390.5,2366.75,0.0
75%,3056.57,3055.81,3074.75,3036.37,0.0037
max,4906.35,4904.0,4960.13,4877.55,0.0623


In [256]:
fig = px.line(df[['Price', 'Open', 'High', 'Low']], 
              title = "U.S. DOLLAR / COLOMBIAN PESO",
              labels={
                     "Date": "Time",
                     "value": "Value",
                     "variable": ""
                 },
              template="plotly_white",
              color_discrete_sequence=px.colors.qualitative.G10,
              width=900, height=500,
              )
fig.update_layout(margin=dict(l=100, b=80, r=120, t=100))
fig.show()

In [257]:
fig = px.line(df[['Change']], 
              title = "U.S. DOLLAR / COLOMBIAN PESO",
              labels={
                     "Date": "Time",
                     "value": "Value",
                     "variable": ""
                 },
              template="plotly_white",
              color_discrete_sequence=px.colors.qualitative.G10,
              width=900, height=500,
              )
fig.update_layout(margin=dict(l=100, b=80, r=120, t=100))
fig.show()

In [338]:
df_price = df['Price']

In [339]:
fig = px.line(df_price, 
              title = "U.S. DOLLAR / COLOMBIAN PESO",
              labels={
                     "Date": "Time",
                     "value": "Value",
                     "variable": ""
                 },
              template="plotly_white",
              color_discrete_sequence=px.colors.qualitative.G10,
              width=900, height=500,
              )
fig.update_layout(margin=dict(l=100, b=80, r=120, t=100))
fig.show()

# __Dataset__

## Split DataFrame


In [340]:
training_size = int(len(df_price)*0.80)
test_size = len(df_price)-training_size
df_train_price = df_price.reset_index().iloc[:training_size,:]
df_test_price = df_price.reset_index().iloc[training_size:len(df_price),]

## Split np.array

In [341]:
scaler=MinMaxScaler(feature_range=(0,1))
df_price=scaler.fit_transform(np.array(df_price).reshape(-1,1))
print(df_price.shape)

(4905, 1)


In [342]:
training_size=int(len(df_price)*0.80)
test_size=len(df_price)-training_size
train_data,test_data=df_price[0:training_size,:],df_price[training_size:len(df_price),:1]
print("train_data: ", train_data.shape)
print("test_data: ", test_data.shape)

train_data:  (3924, 1)
test_data:  (981, 1)


## Create DataSet

In [343]:
def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), 0]
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
    return np.array(dataX), np.array(dataY)

In [344]:
time_step = 6
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)

print("X_train: ", X_train.shape)
print("y_train: ", y_train.shape)
print("X_test: ", X_test.shape)
print("y_test", y_test.shape)

X_train:  (3917, 6)
y_train:  (3917,)
X_test:  (974, 6)
y_test (974,)


In [345]:
X_train =X_train.reshape(X_train.shape[0],X_train.shape[1] , 1)
X_test = X_test.reshape(X_test.shape[0],X_test.shape[1] , 1)

print("X_train: ", X_train.shape)
print("X_test: ", X_test.shape)

X_train:  (3917, 6, 1)
X_test:  (974, 6, 1)


# __Model and Training__

In [346]:
model=Sequential()
model.add(LSTM(1000,input_shape=(None,1),activation="relu"))
model.add(Dense(500))
model.add(Dense(1))
model.compile(loss="mean_squared_error",optimizer="adam")

In [347]:
history = model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=200,batch_size=32,verbose=1)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

# __Evaluation__

In [348]:
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(loss))

In [349]:
fig = px.line(pd.DataFrame({'Loss':loss, 'Val Loss':val_loss}, index=epochs),
              title = "Training and validation loss",
              labels={
                     "value": "",
                     "index": "Epoch",
                     "variable": ""
                 },
              template="plotly_white",
              color_discrete_sequence=px.colors.qualitative.G10,
              width=900, height=500,
              )
fig.update_layout(margin=dict(l=100, b=80, r=120, t=100))
fig.show()

In [350]:
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)
train_predict.shape, test_predict.shape



((3917, 1), (974, 1))

In [351]:
train_predict = scaler.inverse_transform(train_predict)
test_predict = scaler.inverse_transform(test_predict)
original_ytrain = scaler.inverse_transform(y_train.reshape(-1,1)) 
original_ytest = scaler.inverse_transform(y_test.reshape(-1,1)) 

In [352]:
print("Train data RMSE: ", math.sqrt(mean_squared_error(original_ytrain,train_predict)))
print("Train data MSE: ", mean_squared_error(original_ytrain,train_predict))
print("Train data MAE: ", mean_absolute_error(original_ytrain,train_predict))
print("-------------------------------------------------------------------------------------")
print("Test data RMSE: ", math.sqrt(mean_squared_error(original_ytest,test_predict)))
print("Test data MSE: ", mean_squared_error(original_ytest,test_predict))
print("Test data MAE: ", mean_absolute_error(original_ytest,test_predict))

Train data RMSE:  20.1350745908849
Train data MSE:  405.4212287804988
Train data MAE:  13.829699660284895
-------------------------------------------------------------------------------------
Test data RMSE:  38.25325507163687
Test data MSE:  1463.311523575712
Test data MAE:  26.654917102476908


In [353]:
print("Train data explained variance regression score:", 
      explained_variance_score(original_ytrain, train_predict))
print("Test data explained variance regression score:", 
      explained_variance_score(original_ytest, test_predict))

Train data explained variance regression score: 0.9981041787639715
Test data explained variance regression score: 0.9881300221327449


In [354]:
print("Train data R2 score:", r2_score(original_ytrain, train_predict))
print("Test data R2 score:", r2_score(original_ytest, test_predict))

Train data R2 score: 0.9980480989947772
Test data R2 score: 0.9876046320660044


In [355]:
print("Train data MGD: ", mean_gamma_deviance(original_ytrain, train_predict))
print("Test data MGD: ", mean_gamma_deviance(original_ytest, test_predict))
print("----------------------------------------------------------------------")
print("Train data MPD: ", mean_poisson_deviance(original_ytrain, train_predict))
print("Test data MPD: ", mean_poisson_deviance(original_ytest, test_predict))

Train data MGD:  6.527778887309377e-05
Test data MGD:  9.505519616732326e-05
----------------------------------------------------------------------
Train data MPD:  0.15968993370474785
Test data MPD:  0.37108627079577367


In [356]:
test_predict.shape

(974, 1)

In [315]:
df_test_price_new = df_test_price.iloc[5:1961,].set_index('Date').copy()
df_test_price_new['Forecast'] = test_predict

In [357]:
fig = px.line(df_test_price_new, 
              title = "Forecast: U.S. DOLLAR / COLOMBIAN PESO",
              labels={
                     "Date": "Time",
                     "value": "Value",
                     "variable": ""
                 },
              template="plotly_white",
              color_discrete_sequence=px.colors.qualitative.G10,
              width=900, height=500,
              )
fig.update_layout(margin=dict(l=100, b=80, r=120, t=100))
fig.show()