<a href="https://colab.research.google.com/github/achmadbauravindah/Prediction-LSTM-GoldPrice/blob/master/Prediction_LSTM_GoldPrice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Nama: Achmad Bauravindah

Email: baurav99@gmail.com

Asal: Pasuruan, Jawa Timur

### Connect Colab to Github

In [None]:
! git clone  https://achmadbauravindah:[token]@github.com/achmadbauravindah/Prediction-LSTM-GoldPrice.git
! git config --global user.email "baurav99@gmail.com"
! git config --global user.name "achmadbauravindah"
%cd '/content/Prediction-LSTM-GoldPrice/app/files'

Cloning into 'Prediction-LSTM-GoldPrice'...
remote: Enumerating objects: 119, done.[K
remote: Counting objects: 100% (119/119), done.[K
remote: Compressing objects: 100% (89/89), done.[K
remote: Total 119 (delta 51), reused 97 (delta 29), pack-reused 0[K
Receiving objects: 100% (119/119), 6.32 MiB | 20.73 MiB/s, done.
Resolving deltas: 100% (51/51), done.
/content/Prediction-LSTM-GoldPrice/app/files


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, LSTM
from keras.callbacks import Callback, ModelCheckpoint, CSVLogger # Callback
from sklearn.preprocessing import MinMaxScaler
from keras.models import load_model

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
import plotly.express as px
import pickle

### Get Data from directory

In [None]:
# Get data from direktori
data_path = '/content/Prediction-LSTM-GoldPrice/historical-gold-data.xlsx'
data = pd.read_excel(data_path)

### Show Data
- Price column will be taken to predict "Future Gold Price"
- Price column contain price in Dollar ($)
- Price column is in 100gr of gold
- So this column will change to Rupiah and divided by 100gr to get price in 1gr gold

In [None]:
data.head()

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,12/31/1984,311.7,311.7,311.7,311.7,,-0.10%
1,12/28/1984,312.0,312.0,312.0,312.0,,0.91%
2,12/27/1984,309.2,310.5,312.0,307.3,20.43K,-1.59%
3,12/26/1984,314.2,313.8,315.6,313.5,10.94K,0.32%
4,12/21/1984,313.2,313.0,315.0,310.6,19.46K,1.16%


### Show count data

In [None]:
# Jumlah data
data.count()

Date        12272
Price       12272
Open        12272
High        12272
Low         12272
Vol.         9591
Change %    12272
dtype: int64

### Change Data Column to Date Index in Pandas

In [None]:
# Set index dates to data
data["Date"] = pd.to_datetime(data.Date, dayfirst=True)
data.set_index("Date", inplace=True)

  data["Date"] = pd.to_datetime(data.Date, dayfirst=True)


### Add Day Date with NaN Date Values
(Because the data is incomplete in every date columns (there is missing value))

In [None]:
data_new = data.reindex(pd.date_range('1985-01-01', '2023-05-18'))

### Create price_data Dataframe from Price Column

In [None]:
price_data = data_new['Price']
# Change All Data to Numeric
price_data = pd.to_numeric(price_data, errors='coerce')

In [None]:
price_data.head()

1985-01-01      NaN
1985-01-02    306.5
1985-01-03    293.7
1985-01-04    322.7
1985-01-05    317.3
Freq: D, Name: Price, dtype: float64

### Calculate NaN Price Values

In [None]:
print("NaN: ", price_data.isna().sum())

NaN:  4284


### Impute Missing Values (Price) with 'Rows After' or 'Before Values' Data

In [None]:
price_data = price_data.fillna(method='ffill') # Change NaN with Forward Fill
price_data[0] = price_data[1] # First data can't to interpolate, so this is to rise it
print("NaN: ", price_data.isna().sum())

NaN:  0


### Plot Dataset

In [None]:
def showPlotExpress(DataFrame, x_axes, y_axes):
  fig = px.line(DataFrame, x=x_axes, y=y_axes, title='N Last Days Data')
  fig.update_xaxes(
      rangeslider_visible=True,
      rangeselector=dict(
          buttons=list([
              dict(count=1, label="1m", step="month", stepmode="backward"),
              dict(count=6, label="6m", step="month", stepmode="backward"),
              dict(count=1, label="YTD", step="year", stepmode="todate"),
              dict(count=1, label="1y", step="year", stepmode="backward"),
              dict(step="all")
          ])
      )
  )

  fig.show()

In [None]:
# Membuat plot interaktif dengan Plotly Express

price_data_df = pd.DataFrame(price_data)
x_axes = price_data_df.index
y_axes = price_data_df['Price']
showPlotExpress(price_data_df, x_axes, y_axes)

### Change Data to Rupiah and 1gr Gold Price

In [None]:
price_data.tail()

2023-05-14    2024.80
2023-05-15    2022.70
2023-05-16    1993.00
2023-05-17    1984.90
2023-05-18    1959.75
Freq: D, Name: Price, dtype: float64

In [None]:
price_data_rupiah = (pd.to_numeric(price_data)/28.3495)*14687
pd.DataFrame(price_data_rupiah).tail()

Unnamed: 0,Price
2023-05-14,1048986.0
2023-05-15,1047898.0
2023-05-16,1032512.0
2023-05-17,1028315.0
2023-05-18,1015286.0


### Skala data

In [None]:
# Skala Data
skala_data = price_data_rupiah.max()-price_data_rupiah.min()
print('Skala Data: ', skala_data)

Skala Data:  940658.0680435281


### Normalization

In [None]:
scaler = MinMaxScaler(feature_range=(0,1))
price_data_rupiah_arr = np.array(price_data_rupiah)
price_data_rupiah_norm = scaler.fit_transform(price_data_rupiah_arr.reshape(-1,1))

### Split data (train: 90%, val: 10%)

In [None]:
train_size = int(len(price_data_rupiah_norm) * 0.90)
val_size = len(price_data_rupiah_norm) - train_size

In [None]:
train_size, val_size

(12615, 1402)

In [None]:
train_data = price_data_rupiah_norm[0:train_size,:] 
val_data  = price_data_rupiah_norm[train_size:len(price_data_rupiah_norm),:1]

### Create label from window/timestep (500 timestep)

In [None]:
def createWindowDataset(dataset, time_step = 1):
  X = []
  y = []
  for i in range(len(dataset) - time_step - 1):
    fitur = dataset[i:(i+time_step), 0] # sederet fitur sebelum output timeseries
    X.append(fitur)
    y.append(dataset[i + time_step, 0]) # output data time series
  return np.array(X), np.array(y)

In [None]:
X_train, y_train = createWindowDataset(train_data, 500)
X_val, y_val = createWindowDataset(val_data, 500)

# Check Shape of X_train
print("Shape X_train: ", X_train.shape)
print("Shape X_val: ", X_train.shape)
print("Shape y_val: ", y_train.shape)
print("Shape y_val: ", y_train.shape)


Shape X_train:  (12114, 500)
Shape X_val:  (12114, 500)
Shape y_val:  (12114,)
Shape y_val:  (12114,)


### Reshape data for Training Model LSTM 

In [None]:
#reshape input
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)

# Check Shape of X_train
print("Shape X_train: ", X_train.shape)
print("Shape X_val: ", X_train.shape)
print("Shape y_val: ", y_train.shape)
print("Shape y_val: ", y_train.shape)

Shape X_train:  (12114, 500, 1)
Shape X_val:  (12114, 500, 1)
Shape y_val:  (12114,)
Shape y_val:  (12114,)


### Model Building

In [None]:
model = tf.keras.models.Sequential([
            LSTM(64, return_sequences=True),
            LSTM(64), 
            Dense(1),
])

model.compile(loss=tf.keras.losses.Huber(),
              optimizer='adam',
              metrics=["mae"])

### Callback Setting

In [None]:
# Create Callback

# 1 Callback untuk mendapatkan checkpoint setiap akurasi yang membaik dari tiap epoch
checkpoint_filepath = '/content/weights-improvement-{epoch:02d}-{val_mae:.2f}.hdf5'
checkpointCallback = ModelCheckpoint(filepath=checkpoint_filepath,
                                            save_weights_only=True,
                                            monitor='val_mae',
                                            mode='min',
                                            save_best_only=True)

# 2 Callback untuk menghentikan proses training pada rentang akurasi tertentu (di-set 100%)
class stopTrainingCallback(Callback):
  def on_epoch_end(self, epoch,logs={}):
    # Tidak memakai skala data (sebesar 45) karena nanti epoch yang di training hanya sedikit
    if ((logs.get('mae') <= 0.002) and (logs.get('val_mae') <= 0.002)) :
      print('\n Yeyy mae mencapai nilai kurang dari 0.002 🤩')
      self.model.stop_training = True

# 3 Callback mendapatkan accuracy, loss, val_loss, val_accuracy dan disimpan pada csv
log_csv = CSVLogger('prediction_goldprice_result.csv', separator=';', append=False)

In [None]:
callback_list = [stopTrainingCallback(), log_csv]

### Model Training

In [None]:
history = model.fit(X_train, y_train, validation_data = (X_val, y_val), epochs=100, batch_size=256, callbacks=callback_list)

Epoch 1/100

KeyboardInterrupt: ignored

In [None]:
# Show history of mae training model
plt.plot(history.history['mae'])
plt.plot(history.history['val_mae'])
plt.title('model mae')
plt.ylabel('mae')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()
# Show history of loss training model
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

### Get Last Days Price Data (1 Row, 500 Column (Windowset))

In [None]:
last_data_input = X_train[500]
# last_data_input = X_train[0]
last_data_input = last_data_input.reshape(1,-1) # Reshape for model predict
print("Shape of last_data_input: ", last_data_input.shape)

### Predict after n-days 

In [None]:
def predictAfterNDays(n_days, X_before_days):
  predicted_values = []
  for n in range(n_days):
    # Predict Values
    predicted_value = model.predict(X_before_days, verbose=0)
    # Add Predicted Values to List
    predicted_values.append(predicted_value)
    # Slice X_before_days to new data with predicted values
    X_before_days = np.append(X_before_days, predicted_value)
    X_before_days = X_before_days[1:].reshape(1, -1)
  return np.array(predicted_values)

results = predictAfterNDays(7, last_data_input)

In [None]:
# Show Plot
results_denorm = scaler.inverse_transform(results.reshape(-1, 1))
results_denorm_df = pd.DataFrame(results_denorm)
x_axes = results_denorm_df.index
y_axes = results_denorm_df[0]
showPlotExpress(results_denorm_df, x_axes, y_axes)

In [None]:
# Plot N Last Days Data
y_train_denorm = scaler.inverse_transform(y_train.reshape(-1, 1))
y_train_denorm_df = pd.DataFrame(y_train_denorm[501:508])
x_axes = y_train_denorm_df.index
y_axes = y_train_denorm_df[0]
showPlotExpress(y_train_denorm_df, x_axes, y_axes)

### Save Trained Model to Github

In [None]:
model.save('lstm_trained_model.h5')

### Save Scaler Model (MinMaxScaler) as Pickle File to Github

In [None]:
# Menyimpan model scaler ke dalam file (untuk renormalisasi dan denormalisasi)
with open('scaler_model.pkl', 'wb') as file:
    pickle.dump(scaler, file)

### Save Preprocessed Dataset to Github

In [None]:
price_data_rupiah_df = pd.DataFrame(price_data_rupiah)
price_data_rupiah_df.reset_index(inplace=True)
price_data_rupiah_df = price_data_rupiah_df.rename(columns = {'index':'Tanggal', 'Price':'Harga'})

In [None]:
price_data_rupiah_df.to_excel('preprocessed_dataset.xlsx')

### Save All New Files
- model_trained_lstm.h5
- scaler_model
- preprocessed_dataset.xlsx

In [None]:
! git add .
! git commit -m 'from Colab'
! git push origin master