## Load library

In [1]:
!pip install pandas_datareader
!pip install plotly

import plotly.graph_objects as go
import plotly.subplots as sp
import pandas as pd
from pandas_datareader.data import DataReader
from pandas_datareader import data as pdr
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

print('Library loaded')



2024-08-17 11:33:20.669975: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-17 11:33:20.670092: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-17 11:33:20.808873: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Library loaded


## Load data

In [2]:
base_data = pd.read_csv("/kaggle/input/nvidia-dataset-july-2024/NVDA.csv")
base_data['Stock Name'] = 'NVDA'
base_data["Date"] = pd.to_datetime(base_data["Date"])
base_data.set_index("Date", inplace=True)
base_data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Stock Name
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2014-07-28,0.446750,0.446750,0.435500,0.443000,0.421708,330352000,NVDA
2014-07-29,0.443250,0.450500,0.443000,0.444500,0.423136,217972000,NVDA
2014-07-30,0.447250,0.453250,0.446000,0.452000,0.430276,246184000,NVDA
2014-07-31,0.446500,0.450000,0.436750,0.437500,0.416473,401880000,NVDA
2014-08-01,0.437500,0.444750,0.436000,0.442250,0.420994,219884000,NVDA
...,...,...,...,...,...,...,...
2024-07-19,120.349998,121.599998,117.370003,117.930000,117.930000,217223800,NVDA
2024-07-22,120.349998,124.070000,119.860001,123.540001,123.540001,258068900,NVDA
2024-07-23,122.779999,124.690002,122.099998,122.589996,122.589996,173911000,NVDA
2024-07-24,119.169998,119.949997,113.440002,114.250000,114.250000,327776900,NVDA


## EDA / Pengenalan data

In [3]:
stock_names = base_data["Stock Name"].unique()

num_stock_names = len(stock_names)
print(f"Jumlah jenis stock name: {num_stock_names}")

print("Daftar stock name:")
print(stock_names)

Jumlah jenis stock name: 1
Daftar stock name:
['NVDA']


In [4]:
stock_counts = base_data["Stock Name"].value_counts()
stock_counts

Stock Name
NVDA    2516
Name: count, dtype: int64

In [5]:
stock_info = base_data.info()

print("Informasi tentang saham Nvidia:")
stock_info

print("Statistik tentang saham Nvidia:")
stock_describe = base_data.describe()
stock_describe

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2516 entries, 2014-07-28 to 2024-07-25
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Open        2516 non-null   float64
 1   High        2516 non-null   float64
 2   Low         2516 non-null   float64
 3   Close       2516 non-null   float64
 4   Adj Close   2516 non-null   float64
 5   Volume      2516 non-null   int64  
 6   Stock Name  2516 non-null   object 
dtypes: float64(5), int64(1), object(1)
memory usage: 157.2+ KB
Informasi tentang saham Nvidia:
Statistik tentang saham Nvidia:


Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,2516.0,2516.0,2516.0,2516.0,2516.0,2516.0
mean,15.214306,15.489683,14.918306,15.219211,15.191314,466934100.0
std,22.715711,23.123062,22.236979,22.701487,22.707632,254558500.0
min,0.42325,0.4325,0.41925,0.41975,0.401343,45644000.0
25%,2.556625,2.578688,2.50425,2.541688,2.505952,302250000.0
50%,6.069875,6.159625,5.957125,6.055,5.999637,414271000.0
75%,17.915064,18.245999,17.641188,17.992687,17.969387,564732200.0
max,139.800003,140.759995,132.419998,135.580002,135.580002,3692928000.0


In [32]:
fig = sp.make_subplots(rows=1, cols=1, subplot_titles=stock_names, shared_xaxes=False)

for i, stock_name in enumerate(stock_names, 1):
    stock_data = base_data[base_data["Stock Name"] == stock_name]
    row = (i - 1) // 5 + 1
    col = (i - 1) % 5 + 1
    fig.add_trace(
        go.Scatter(x=stock_data.index, y=stock_data["Adj Close"], mode='lines', name=stock_name),
        row=row, col=col
    )
    fig.update_xaxes(title_text="Date", row=row, col=col)
    fig.update_yaxes(title_text="Adj Close", row=row, col=col)

fig.update_layout(
    height=500, width=1000,
    title_text="Harga Closing Saham",
    showlegend=False,
)
fig.show()

In [7]:
fig = sp.make_subplots(rows=1, cols=1, subplot_titles=stock_names, shared_xaxes=False)

for i, stock_name in enumerate(stock_names, 1):
    stock_data = base_data[base_data["Stock Name"] == stock_name]
    row = (i - 1) // 5 + 1
    col = (i - 1) % 5 + 1
    fig.add_trace(
        go.Scatter(x=stock_data.index, y=stock_data["Volume"], mode='lines', name=stock_name),
        row=row, col=col
    )
    fig.update_xaxes(title_text="Date", row=row, col=col)
    fig.update_yaxes(title_text="Volume", row=row, col=col)

fig.update_layout(
    height=500, width=1000,
    title_text="Total Penjualan Saham",
    showlegend=False,
)
fig.show()

In [None]:
fig = go.Figure()
stock_data = base_data[base_data["Stock Name"] == "NVDA"]

fig.add_trace(go.Scatter(x=stock_data.index, y=stock_data["Adj Close"], mode='lines', name='Adj Close'))

ma_day = [5, 10, 20, 30, 50]
for ma in ma_day:
    column_name = f"MA for {ma} days"
    stock_data[column_name] = stock_data["Adj Close"].rolling(ma).mean()
    fig.add_trace(go.Scatter(x=stock_data.index, y=stock_data[column_name], mode='lines', name=column_name))

fig.update_layout(
    title="Moving Avarage dari saham NVDA",
    xaxis_title="Date",
    yaxis_title="Price",
    height=600,
    width=1200
)

fig.update_layout(
    xaxis_rangeslider_visible=True,
    xaxis_type="date"
)
fig.show()

In [9]:
fig = sp.make_subplots(rows=1, cols=1, subplot_titles=stock_names, shared_xaxes=False)

for i, stock_name in enumerate(stock_names, 1):
    stock_data = base_data[base_data["Stock Name"] == stock_name]
    row = (i - 1) // 5 + 1
    col = (i - 1) % 5 + 1
    fig.add_trace(
        go.Scatter(x=stock_data.index, y=stock_data["Close"], mode='lines', name=stock_name),
        row=row, col=col
    )
    fig.update_xaxes(title_text="Date", row=row, col=col)
    fig.update_yaxes(title_text="Close", row=row, col=col)

fig.update_layout(
    height=500, width=1000,
    title_text="Harga Close Saham",
    showlegend=False,
)
fig.show()

## Data modeling

In [10]:
def get_stock_data(stock_name, data):
    stock_data = data[data["Stock Name"] == stock_name].copy()
    return stock_data

stock_name_to_get = 'NVDA'
selected_stock_data = get_stock_data(stock_name_to_get, base_data)
selected_stock_data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Stock Name
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2014-07-28,0.446750,0.446750,0.435500,0.443000,0.421708,330352000,NVDA
2014-07-29,0.443250,0.450500,0.443000,0.444500,0.423136,217972000,NVDA
2014-07-30,0.447250,0.453250,0.446000,0.452000,0.430276,246184000,NVDA
2014-07-31,0.446500,0.450000,0.436750,0.437500,0.416473,401880000,NVDA
2014-08-01,0.437500,0.444750,0.436000,0.442250,0.420994,219884000,NVDA
...,...,...,...,...,...,...,...
2024-07-19,120.349998,121.599998,117.370003,117.930000,117.930000,217223800,NVDA
2024-07-22,120.349998,124.070000,119.860001,123.540001,123.540001,258068900,NVDA
2024-07-23,122.779999,124.690002,122.099998,122.589996,122.589996,173911000,NVDA
2024-07-24,119.169998,119.949997,113.440002,114.250000,114.250000,327776900,NVDA


In [11]:
data = selected_stock_data.filter(['Close'])
data_stock = data.values

In [12]:
len(data_stock)

2516

In [13]:
train_data_len = int(np.ceil(len(data_stock)*0.90))
train_data_len

2265

In [14]:
train_data = data_stock[0:train_data_len,]
valid_data = data_stock[train_data_len:,]

In [15]:
base_data = pd.read_csv(
    '/kaggle/input/nvidia-dataset-july-2024/NVDA.csv', parse_dates=["Date"], index_col="Date"
)


train_df = pd.DataFrame()
valid_df = pd.DataFrame()

train_df["Close"] = train_data.flatten()
train_df.index = data.index[:train_data_len]

valid_df["Close"] = valid_data.flatten()
valid_df.index = data.index[train_data_len:]

In [16]:
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data_stock.reshape(-1, 1))

x_train_data, y_train_data = [], []

#time window terbaik yang digunakan adalah 30 hari 
days = 30 
for i in range(days, len(train_data)):
    x_train_data.append(scaled_data[i - days : i, 0])
    y_train_data.append(scaled_data[i, 0])

x_train_data, y_train_data = np.array(x_train_data), np.array(y_train_data)

x_train_data = np.reshape(x_train_data, (x_train_data.shape[0], x_train_data.shape[1], 1))

In [17]:
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape=(x_train_data.shape[1], 1)))
model.add(Dropout(0.3))
model.add(LSTM(64, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(32, return_sequences=False))
model.add(Dense(16))
model.add(Dense(1))

model.summary()


Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



In [18]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4), loss="mean_squared_error")

history = model.fit(x_train_data, y_train_data, epochs=200, batch_size=32, verbose=1, shuffle=False)

Epoch 1/200
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 8ms/step - loss: 2.5680e-04
Epoch 2/200
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 0.0040
Epoch 3/200
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 9.8358e-04
Epoch 4/200
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 6.8242e-05
Epoch 5/200
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 1.5058e-04
Epoch 6/200
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 1.6946e-04
Epoch 7/200
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 1.3556e-04
Epoch 8/200
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 1.0304e-04
Epoch 9/200
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 8.7020e-05
Epoch 10/200
[1m70/70[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

## Evaluasi

In [19]:
inputs_data = data_stock[len(data_stock) - len(valid_data) - days :]
inputs_data = inputs_data.reshape(-1, 1)
inputs_data = scaler.transform(inputs_data)

In [20]:
X_test = []
for i in range(days, inputs_data.shape[0]):
    X_test.append(inputs_data[i - days : i, 0])
X_test = np.array(X_test)

X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
predicted_closing_price = model.predict(X_test)
predicted_closing_price = scaler.inverse_transform(predicted_closing_price)

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step


In [21]:
valid_df["Predictions"] = predicted_closing_price

In [22]:
from sklearn.metrics import mean_squared_error

print(
    "MSE : ",
    mean_squared_error(valid_df["Close"].values, valid_df["Predictions"].values),
)

MSE :  12.1674261164283


## Data prediction

In [23]:
fig = go.Figure()
fig.add_trace(
    go.Scatter(x=train_df.index, y=train_df["Close"], mode="lines", name="Train Data")
)
fig.add_trace(
    go.Scatter(x=valid_df.index, y=valid_df["Close"], mode="lines", name="Valid Data")
)
fig.add_trace(
    go.Scatter(
        x=valid_df.index, y=valid_df["Predictions"], mode="lines", name="Prediction"
    )
)

fig.update_layout(
    xaxis=dict(title="Date"),
    yaxis=dict(title="Closing Price"),
    title="Hasil training model",
)

fig.show()

In [24]:
predictions_df = pd.DataFrame(index=valid_df.index)
predictions_df["Predictions"] = valid_df["Predictions"]

print("Valid Data VS Predictions:")
combined_df = pd.concat([valid_df["Close"], predictions_df], axis=1)
print(combined_df)

Valid Data VS Predictions:
                 Close  Predictions
Date                               
2023-07-27   45.900002    46.166943
2023-07-28   46.750000    45.845127
2023-07-31   46.729000    45.769707
2023-08-01   46.507000    45.910503
2023-08-02   44.269001    46.160069
...                ...          ...
2024-07-19  117.930000   126.742737
2024-07-22  123.540001   123.528610
2024-07-23  122.589996   121.137024
2024-07-24  114.250000   119.732544
2024-07-25  112.279999   118.211777

[251 rows x 2 columns]


In [25]:
model.save('nvda_stock_prediction_using_lstm.h5')

In [26]:
loaded_model = tf.keras.models.load_model('nvda_stock_prediction_using_lstm.h5')


## Implementasi dan prediksi

In [27]:
base_data = pd.read_csv("/kaggle/input/nvidia-dataset-july-2024/NVDA.csv")
base_data['Stock Name'] = 'NVDA'
base_data["Date"] = pd.to_datetime(base_data["Date"])
base_data.set_index("Date", inplace=True)

def get_stock_data(stock_name, data):
    stock_data = data[data["Stock Name"] == stock_name].copy()
    return stock_data

stock_name_to_get = 'NVDA'
selected_stock_data = get_stock_data(stock_name_to_get, base_data)
data = selected_stock_data.filter(['Close'])
data_stock = data.values

scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data_stock.reshape(-1, 1))

# prediksi
days = 30
full_X = []
for i in range(days, len(scaled_data)):
    full_X.append(scaled_data[i - days : i, 0])
full_X = np.array(full_X)
full_X = np.reshape(full_X, (full_X.shape[0], full_X.shape[1], 1))

loaded_model = tf.keras.models.load_model('nvda_stock_prediction_using_lstm.h5')
predicted_full_price = loaded_model.predict(full_X)
predicted_full_price = scaler.inverse_transform(predicted_full_price)

full_df = pd.DataFrame(index=data.index[days:], data={"Close": data_stock.flatten()[days:], "Predictions": predicted_full_price.flatten()})
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=full_df.index,
    y=full_df["Close"],
    mode='lines',
    name='Actual'
))
fig.add_trace(go.Scatter(
    x=full_df.index,
    y=full_df["Predictions"],
    mode='lines',
    name='Predicted'
))
fig.update_layout(title='Harga saham NVDA Asli vs Prediksi model',
                  xaxis_title='Date',
                  yaxis_title='Close Price',
                  legend_title='Legend')
fig.show()

[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step


In [28]:
from datetime import timedelta

days = 30
full_X = []
for i in range(days, len(scaled_data)):
    full_X.append(scaled_data[i - days : i, 0])
full_X = np.array(full_X)
full_X = np.reshape(full_X, (full_X.shape[0], full_X.shape[1], 1))

loaded_model = tf.keras.models.load_model('nvda_stock_prediction_using_lstm.h5')
predicted_full_price = loaded_model.predict(full_X)
predicted_full_price = scaler.inverse_transform(predicted_full_price)

# memasukkan prediksi ke DataFrame
full_df = pd.DataFrame(index=data.index[days:], data={"Close": data_stock.flatten()[days:], "Predictions": predicted_full_price.flatten()})

# forecast 30 hari ke depan
last_known_data = scaled_data[-days:].reshape(1, days, 1)
forecast = []
for _ in range(30):
    next_pred = loaded_model.predict(last_known_data)
    forecast.append(next_pred[0, 0])
    last_known_data = np.append(last_known_data[:, 1:, :], next_pred.reshape(1, 1, 1), axis=1)

# inverse transform kembali hasil forecast ke skala asli
forecast = scaler.inverse_transform(np.array(forecast).reshape(-1, 1))

# tanggal forecast
last_date = data.index[-1]
forecast_dates = [last_date + timedelta(days=i) for i in range(1, 31)]

# menambahkan hasil forecast ke DataFrame
forecast_df = pd.DataFrame(index=forecast_dates, data={"Forecast": forecast.flatten()})

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=full_df.index,
    y=full_df["Close"],
    mode='lines',
    name='Actual'
))

fig.add_trace(go.Scatter(
    x=full_df.index,
    y=full_df["Predictions"],
    mode='lines',
    name='Predicted'
))

fig.add_trace(go.Scatter(
    x=forecast_df.index,
    y=forecast_df["Forecast"],
    mode='lines',
    name='Forecast'
))

fig.update_layout(title='Harga Saham NVDA - Actual, Predicted, and Forecast 30 hari selanjutnya',
                  xaxis_title='Date',
                  yaxis_title='Close Price',
                  legend_title='Legend')
fig.show()

[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1

In [29]:
# tanggal forecast
last_date = data.index[-1]
forecast_dates = [last_date + timedelta(days=i) for i in range(1, 31)]

forecast_df = pd.DataFrame(index=forecast_dates, data={"Forecast": forecast.flatten()})

# cetak DataFrame hasil prediksi 30 hari ke depan
print("Hasil Prediksi 30 Hari ke Depan:")
print(forecast_df)

Hasil Prediksi 30 Hari ke Depan:
              Forecast
2024-07-26  116.303383
2024-07-27  114.738663
2024-07-28  113.568726
2024-07-29  112.721649
2024-07-30  112.110016
2024-07-31  111.647636
2024-08-01  111.319794
2024-08-02  111.106827
2024-08-03  111.016548
2024-08-04  111.001434
2024-08-05  111.085175
2024-08-06  111.274651
2024-08-07  111.561790
2024-08-08  111.940056
2024-08-09  112.412331
2024-08-10  112.952728
2024-08-11  113.577377
2024-08-12  114.264893
2024-08-13  115.004616
2024-08-14  115.785950
2024-08-15  116.629959
2024-08-16  117.502449
2024-08-17  118.403572
2024-08-18  119.329895
2024-08-19  120.292664
2024-08-20  121.254044
2024-08-21  122.226418
2024-08-22  123.179184
2024-08-23  124.124260
2024-08-24  125.076012
