# Gold Price Prediction
Predicts Gold Prices from [Yahoo Finance](https://query1.finance.yahoo.com/v7/finance/download/GOOG?period1=1092873600&period2=1589414400&interval=1d&events=history)

## Imports

In [1]:
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [2]:
plt.rcParams['figure.figsize'] = (14, 10)

In [3]:
print(tf.__version__)

2.2.0


## Constants

In [4]:
DATA_URL = "https://query1.finance.yahoo.com/v7/finance/download/GOLD?period1=476323200&period2=1590019200&interval=1d&events=history"
TRAIN_DATE_BOUNDARY = '2019-01-01'
EPOCHS = 30
BATCH_SIZE = 31
STEPS = 30

## Download data
Download data from Yahoo finance and partition to test and training

In [5]:
# data = pd.read_csv(DATA_URL, index_col="Date", parse_dates=["Date"])
data = pd.read_csv(DATA_URL)

In [6]:
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,1985-02-04,0.5523,0.58137,0.5523,0.5523,0.189186,50400
1,1985-02-05,0.5523,0.61044,0.5523,0.5523,0.189186,18400
2,1985-02-06,0.58137,0.65406,0.58137,0.58137,0.199143,65600
3,1985-02-07,0.58137,0.61044,0.58137,0.58137,0.199143,40000
4,1985-02-08,0.58137,0.61044,0.58137,0.58137,0.199143,25600


In [7]:
data_training = data[data['Date'] < TRAIN_DATE_BOUNDARY]
training_data = data_training.drop(['Date', 'Adj Close', 'Volume'], axis=1)
data_test = data[data['Date'] >= TRAIN_DATE_BOUNDARY]

Scale data to improve training efficiency

In [8]:
scaler = MinMaxScaler()
training_data = scaler.fit_transform(training_data)

A function for converting data into time steps dataset

In [9]:
def data_timesteps(dataset, steps = 1):
    x_data = []
    y_data = []
    for i in range(steps, dataset.shape[0]):
        x_data.append(training_data[(i - steps): i])
        y_data.append(training_data[i, 0])
        
    return np.array(x_data), np.array(y_data)
    

In [10]:
X_train, y_train = data_timesteps(training_data, STEPS)
X_train.shape, y_train.shape

((8517, 30, 4), (8517,))

## Model Configuration

In [11]:
model = keras.models.Sequential()
model.add(keras.layers.LSTM(units=50, activation='relu', return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.LSTM(units=60, activation='relu', return_sequences=True))
model.add(keras.layers.Dropout(0.3))
model.add(keras.layers.LSTM(units=80, activation='relu', return_sequences=True))
model.add(keras.layers.Dropout(0.4))
model.add(keras.layers.LSTM(units=120, activation='relu', return_sequences=True))
model.add(keras.layers.Dropout(0.5))
model.add(keras.layers.Dense(1))

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 30, 50)            11000     
_________________________________________________________________
dropout (Dropout)            (None, 30, 50)            0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 30, 60)            26640     
_________________________________________________________________
dropout_1 (Dropout)          (None, 30, 60)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 30, 80)            45120     
_________________________________________________________________
dropout_2 (Dropout)          (None, 30, 80)            0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 30, 120)           9

Compile and Train model

In [12]:
model.compile(optimizer='adam', loss='mean_squared_error')

In [13]:
y_train.shape

(8517,)

In [None]:
model_results = model.fit(X_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=0.1, shuffle=False)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30

Plot for training and validation

In [None]:
plt.plot(model_results.history['loss'], label='Train', color='green')
plt.plot(model_results.history['val_loss'], label='Validation', color='red')
plt.show()

## Test Model

In [None]:
past_60_days = data_training.tail(60)
df = past_60_days.append(data_test, ignore_index=True)
df = df.drop(['Date', 'Adj Close', 'Volume'], axis=1)
inputs = scaler.transform(df)
    
X_test, y_test = data_timesteps(inputs, 60)
X_test.shape, y_test.shape

Predict test data

In [None]:
y_predict = model.predict(X_test)
y_predict = np.argmax(y_predict, axis=1)
y_predict.shape

Scale data back to original form

In [None]:
scale = 1/scaler.scale_[0]
y_predict = y_predict * scale
y_test = y_test * scale
# y_predict = scaler.inverse_transform(y_predict)
# y_test = scaler.inverse_transform(y_test)

## Visualize Test Prediction

In [None]:
plt.plot(y_test, color="green", label="Real Price")
plt.plot(y_predict, color="blue", label="Predicted Price")
plt.title("Gold Price Prediction")
plt.xlabel("Time")
plt.ylabel("Price")
plt.legend()
plt.show()

## Credits
1. [KGP Talkie](https://youtu.be/arydWPLDnEc) LSTM Tutorial