In [None]:
import numpy as np
import pandas as pd
import yfinance as yf

from datetime import datetime
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler

Get the Historical Data of the Stock


In [None]:
#Test the function
start_date = "2010-01-01"
end_date =  "2024-04-01"

symbol = 'BBNI.JK'

historical_data = yf.download(symbol, start=start_date, end=end_date)

df = pd.DataFrame(historical_data).reset_index()

[*********************100%%**********************]  1 of 1 completed


In [None]:
print(df.head())

        Date        Open        High         Low       Close   Adj Close  \
0 2010-01-04  943.262085  959.852112  938.522034  943.262085  647.517517   
1 2010-01-05  943.262085  959.852112  938.522034  943.262085  647.517517   
2 2010-01-06  938.522034  948.002075  933.782043  938.522034  644.263550   
3 2010-01-07  933.782043  948.002075  924.302063  933.782043  641.009766   
4 2010-01-08  933.782043  943.262085  929.042053  933.782043  641.009766   

     Volume  
0  17285826  
1  31016809  
2  32037904  
3  21151852  
4  12106513  


In [None]:
#Save the data to a csv file
df.to_csv('BBNI.JK2010to2024.csv')

Load the dataset from the csv file

In [None]:
df = pd.read_csv('/content/BBNI.JK2010to2024.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,0,2010-01-04,943.262085,959.852112,938.522034,943.262085,647.517517,17285826
1,1,2010-01-05,943.262085,959.852112,938.522034,943.262085,647.517517,31016809
2,2,2010-01-06,938.522034,948.002075,933.782043,938.522034,644.26355,32037904
3,3,2010-01-07,933.782043,948.002075,924.302063,933.782043,641.009766,21151852
4,4,2010-01-08,933.782043,943.262085,929.042053,933.782043,641.009766,12106513


Rename Columns

In [None]:
df.rename(columns={'Date': 'date', 'Open': 'open', 'High':'high', 'Low':'low',
                   'Close': 'close', 'Adj Close' : 'adj_close', 'Volume': 'volume'}, inplace=True)

df.drop(columns=['Unnamed: 0'], inplace=True)
df.head()

Unnamed: 0,date,open,high,low,close,adj_close,volume
0,2010-01-04,943.262085,959.852112,938.522034,943.262085,647.517517,17285826
1,2010-01-05,943.262085,959.852112,938.522034,943.262085,647.517517,31016809
2,2010-01-06,938.522034,948.002075,933.782043,938.522034,644.26355,32037904
3,2010-01-07,933.782043,948.002075,924.302063,933.782043,641.009766,21151852
4,2010-01-08,933.782043,943.262085,929.042053,933.782043,641.009766,12106513


Convert date from string to date format

In [None]:
df['date'] = pd.to_datetime(df.date)
df.head()

Unnamed: 0,date,open,high,low,close,adj_close,volume
0,2010-01-04,943.262085,959.852112,938.522034,943.262085,647.517517,17285826
1,2010-01-05,943.262085,959.852112,938.522034,943.262085,647.517517,31016809
2,2010-01-06,938.522034,948.002075,933.782043,938.522034,644.26355,32037904
3,2010-01-07,933.782043,948.002075,924.302063,933.782043,641.009766,21151852
4,2010-01-08,933.782043,943.262085,929.042053,933.782043,641.009766,12106513


Sort the dataframe by date format

In [None]:
df.sort_values(by='date', inplace=True)
df.head()

Unnamed: 0,date,open,high,low,close,adj_close,volume
0,2010-01-04,943.262085,959.852112,938.522034,943.262085,647.517517,17285826
1,2010-01-05,943.262085,959.852112,938.522034,943.262085,647.517517,31016809
2,2010-01-06,938.522034,948.002075,933.782043,938.522034,644.26355,32037904
3,2010-01-07,933.782043,948.002075,924.302063,933.782043,641.009766,21151852
4,2010-01-08,933.782043,943.262085,929.042053,933.782043,641.009766,12106513


In [None]:
df.shape

(3522, 7)

Get the duration of the dataframe

In [None]:
print('starting date: ', df.iloc[0][0])
print('ending date: ', df.iloc[-1][0])
print('duration: ', df.iloc[-1][0]-df.iloc[0][0])


starting date:  2010-01-04 00:00:00
ending date:  2024-03-28 00:00:00
duration:  5197 days 00:00:00


Normalizing/scaling close value between 0 and 1

In [None]:
close_df = df[['date', 'close']]
close_df.shape

(3522, 2)

In [None]:
close_stock = close_df.copy()
del close_df['date']

scaler = MinMaxScaler(feature_range=(0,1))
close_df = scaler.fit_transform(np.array(close_df).reshape(-1,1))
close_df.shape

(3522, 1)

Split the train and test dataset. Ratio for training and testing dataset is 70:30

In [None]:
TRAINING_PORTION = .70
TESTING_PORTION = 1 - TRAINING_PORTION


train_size = int(len(close_df) *TRAINING_PORTION)
test_size = int(len(close_df) - train_size)

train, test = close_df[0:train_size, :], close_df[train_size:len(close_df),:1]

print(close_df.shape)
print(train.shape)
print(test.shape)

(3522, 1)
(2465, 1)
(1057, 1)


Prepare the dataset for the requirement of timeseries prediction

Suppose you have a dataset with 100 data points [1, 2, 3, ..., 100], and time_step is set to 3. The function will create input-output pairs as follows:

Input sequence [1, 2, 3], Output value 4
Input sequence [2, 3, 4], Output value 5
Input sequence [3, 4, 5], Output value 6
and so on...

In [None]:
def prepare_dataset(dataset, time_step):
  dataX , dataY = [], []
  for i in range(len(dataset)-time_step-1):
    a = dataset[i:(i+time_step), 0]
    dataX.append(a)
    dataY.append(dataset[i+time_step, 0])

  return np.array(dataX), np.array(dataY)

In [None]:
TIME_STEP = 15

X_train, y_train = prepare_dataset(train, TIME_STEP)
X_test, y_test = prepare_dataset(test, TIME_STEP)

print("X_train: ", X_train.shape)
print("y_train: ", y_train.shape)
print("X_test: ", X_test.shape)
print("y_test: ", y_test.shape)


X_train:  (2449, 15)
y_train:  (2449,)
X_test:  (1041, 15)
y_test:  (1041,)


In [None]:
#Reshape input to be [samples, timesteps, features] which is required for LSTM

X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
print('X_train: ', X_train.shape)
print('X_test: ', X_test.shape)

X_train:  (2449, 15, 1)
X_test:  (1041, 15, 1)


# Models

1.   Model 1 - LSTM: 3 LSTM layers and 1 Dense layer
2.   Model 2 - LSTM + GRU: 2 LSTM layers + 2 GRU layers and 1 Dense Layers




LSTM Model Structure

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, GRU
from tensorflow.keras.utils import plot_model

In [None]:
tf.keras.backend.clear_session()

model1 = Sequential()
model1.add(LSTM(32, return_sequences=True, input_shape=(TIME_STEP, 1)))
model1.add(LSTM(32, return_sequences=True))
model1.add(LSTM(32))
model1.add(Dense(1))

model1.compile(loss='mean_squared_error', optimizer='adam')

In [None]:
# Plot the model
model1.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 15, 32)            4352      
                                                                 
 lstm_1 (LSTM)               (None, 15, 32)            8320      
                                                                 
 lstm_2 (LSTM)               (None, 32)                8320      
                                                                 
 dense (Dense)               (None, 1)                 33        
                                                                 
Total params: 21025 (82.13 KB)
Trainable params: 21025 (82.13 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
model1.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=200, batch_size=5, verbose=1)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<keras.src.callbacks.History at 0x7b7d910dceb0>

In [None]:
train_predict=model1.predict(X_train)
test_predict = model1.predict(X_test)

train_predict.shape, test_predict.shape



((2449, 1), (1041, 1))

In [None]:
model1.save("model1Stockpred")

In [None]:
#Transform back to the original (unscaled) form
train_predict = scaler.inverse_transform(train_predict)
test_predict = scaler.inverse_transform(test_predict)
original_ytrain = scaler.inverse_transform(y_train.reshape(-1,1))
original_ytest = scaler.inverse_transform(y_test.reshape(-1,1))

Evaluations for model 1

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score, r2_score
from sklearn.metrics import mean_poisson_deviance, mean_gamma_deviance, accuracy_score
import math

**Evaluation Metrices RMSE, MSE, and MAE**


In [None]:
# Evaluation metrices RMSE and MAE
print("Train data RMSE: ", math.sqrt(mean_squared_error(original_ytrain,train_predict)))
print("Train data MSE: ", mean_squared_error(original_ytrain,train_predict))
print("Test data MAE: ", mean_absolute_error(original_ytrain,train_predict))
print("-------------------------------------------------------------------------------------")
print("Test data RMSE: ", math.sqrt(mean_squared_error(original_ytest,test_predict)))
print("Test data MSE: ", mean_squared_error(original_ytest,test_predict))
print("Test data MAE: ", mean_absolute_error(original_ytest,test_predict))

Train data RMSE:  56.222548470131215
Train data MSE:  3160.9749564762537
Test data MAE:  39.5396076543519
-------------------------------------------------------------------------------------
Test data RMSE:  76.89963896998243
Test data MSE:  5913.5544737136415
Test data MAE:  55.45479845382294


**Explained variance regression score**

In [None]:
print("Train data explained variance regression score:", explained_variance_score(original_ytrain, train_predict))
print("Test data explained variance regression score:", explained_variance_score(original_ytest, test_predict))

Train data explained variance regression score: 0.9966445497087817
Test data explained variance regression score: 0.9947702500642257


**R2 Score**

In [None]:
print("Train data R2 score:", r2_score(original_ytrain, train_predict))
print("Test data R2 score:", r2_score(original_ytest, test_predict))

Train data R2 score: 0.9966329424566946
Test data R2 score: 0.9946503597624402


**Regression Loss Mean Gamma Deviance Regression Loss and Mean Poisson Deviance Regression Loss**

In [None]:
print("Train data MGD: ", mean_gamma_deviance(original_ytrain, train_predict))
print("Test data MGD: ", mean_gamma_deviance(original_ytest, test_predict))
print("----------------------------------------------------------------------")
print("Train data MPD: ", mean_poisson_deviance(original_ytrain, train_predict))
print("Test data MPD: ", mean_poisson_deviance(original_ytest, test_predict))

Train data MGD:  0.00045128022101614725
Test data MGD:  0.0005274472716320341
----------------------------------------------------------------------
Train data MPD:  1.1145271714169134
Test data MPD:  1.6549775958924


Comparison between original stock close price and predicted close price

In [None]:
from itertools import cycle
import plotly.express as px

In [None]:
# shift train predictions for plotting

look_back=TIME_STEP
trainPredictPlot = np.empty_like(close_df)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(train_predict)+look_back, :] = train_predict
print("Train predicted data: ", trainPredictPlot.shape)

# shift test predictions for plotting
testPredictPlot = np.empty_like(close_df)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(train_predict)+(look_back*2)+1:len(close_df)-1, :] = test_predict
print("Test predicted data: ", testPredictPlot.shape)

names = cycle(['Original close price','Train predicted close price','Test predicted close price'])


plotdf = pd.DataFrame({'date': close_stock['date'],
                       'original_close': close_stock['close'],
                      'train_predicted_close': trainPredictPlot.reshape(1,-1)[0].tolist(),
                      'test_predicted_close': testPredictPlot.reshape(1,-1)[0].tolist()})

fig = px.line(plotdf,x=plotdf['date'], y=[plotdf['original_close'],plotdf['train_predicted_close'],
                                          plotdf['test_predicted_close']],
              labels={'value':'Stock price','date': 'Date'})
fig.update_layout(title_text='Comparision between original close price vs predicted close price',
                  plot_bgcolor='white', font_size=15, font_color='black', legend_title_text='Close Price')
fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=True)
fig.update_yaxes(showgrid=True)
fig.show()

Train predicted data:  (3522, 1)
Test predicted data:  (3522, 1)


Predicting the next 100 days

In [None]:
from numpy import array

In [None]:
x_input=test[len(test)-TIME_STEP:].reshape(1,-1)
temp_input=list(x_input)
temp_input=temp_input[0].tolist()

lst_output=[]
n_steps=TIME_STEP
i=0
pred_days = 100
while(i<pred_days):

    if(len(temp_input)>TIME_STEP):

        x_input=np.array(temp_input[1:])
        #print("{} day input {}".format(i,x_input))
        x_input = x_input.reshape(1,-1)
        x_input = x_input.reshape((1, n_steps, 1))

        yhat = model1.predict(x_input, verbose=0)
        #print("{} day output {}".format(i,yhat))
        temp_input.extend(yhat[0].tolist())
        temp_input=temp_input[1:]
        #print(temp_input)

        lst_output.extend(yhat.tolist())
        i=i+1

    else:

        x_input = x_input.reshape((1, n_steps,1))
        yhat = model1.predict(x_input, verbose=0)
        temp_input.extend(yhat[0].tolist())

        lst_output.extend(yhat.tolist())
        i=i+1

print("Output of predicted next days: ", len(lst_output))

Output of predicted next days:  100


In [None]:
last_days=np.arange(1,TIME_STEP+1)
day_pred=np.arange(TIME_STEP+1,TIME_STEP+pred_days+1)
print(last_days)
print(day_pred)

[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]
[ 16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33
  34  35  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51
  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69
  70  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87
  88  89  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105
 106 107 108 109 110 111 112 113 114 115]


In [None]:
temp_mat = np.empty((len(last_days)+pred_days+1,1))
temp_mat[:] = np.nan
temp_mat = temp_mat.reshape(1,-1).tolist()[0]

last_original_days_value = temp_mat
next_predicted_days_value = temp_mat

last_original_days_value[0:TIME_STEP+1] = scaler.inverse_transform(close_df[len(close_df)-TIME_STEP:]).reshape(1,-1).tolist()[0]
next_predicted_days_value[TIME_STEP+1:] = scaler.inverse_transform(np.array(lst_output).reshape(-1,1)).reshape(1,-1).tolist()[0]

new_pred_plot = pd.DataFrame({
    'last_original_days_value':last_original_days_value,
    'next_predicted_days_value':next_predicted_days_value
})

names = cycle(['Last 15 days close price','Predicted next 10 days close price'])

fig = px.line(new_pred_plot,x=new_pred_plot.index, y=[new_pred_plot['last_original_days_value'],
                                                      new_pred_plot['next_predicted_days_value']],
              labels={'value': 'Stock price','index': 'Timestamp'})
fig.update_layout(title_text='Compare last 15 days vs next 10 days',
                  plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Close Price')

fig.for_each_trace(lambda t:  t.update(name = next(names)))
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

In [None]:
lstmdf=close_df.tolist()
lstmdf.extend((np.array(lst_output).reshape(-1,1)).tolist())
lstmdf=scaler.inverse_transform(lstmdf).reshape(1,-1).tolist()[0]

names = cycle(['Close price'])

fig = px.line(lstmdf,labels={'value': 'Stock price','index': 'Timestamp'})
fig.update_layout(title_text='Plotting whole closing stock price with prediction',
                  plot_bgcolor='white', font_size=15, font_color='black',legend_title_text='Stock')

fig.for_each_trace(lambda t:  t.update(name = next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

NOTE: DEBUG THIS PLOT LATER

LSTM + GRU

In [None]:
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

print("X_train: ", X_train.shape)
print("X_test: ", X_test.shape)

X_train:  (2449, 15, 1)
X_test:  (1041, 15, 1)


In [None]:
tf.keras.backend.clear_session()
model2 = Sequential()
model2.add(LSTM(32, return_sequences=True, input_shape=(TIME_STEP, 1)))
model2.add(LSTM(32, return_sequences=True))
model2.add(GRU(32, return_sequences=True))
model2.add(GRU(32))
model2.add(Dense(1))
model2.compile(loss='mean_squared_error', optimizer='adam')

In [None]:
model2.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 15, 32)            4352      
                                                                 
 lstm_1 (LSTM)               (None, 15, 32)            8320      
                                                                 
 gru (GRU)                   (None, 15, 32)            6336      
                                                                 
 gru_1 (GRU)                 (None, 32)                6336      
                                                                 
 dense (Dense)               (None, 1)                 33        
                                                                 
Total params: 25377 (99.13 KB)
Trainable params: 25377 (99.13 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
model2.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=200,
           batch_size=5, verbose=1)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<keras.src.callbacks.History at 0x7b7d86ba9060>

In [None]:
model2.save("model2StockPred")