## Importing necessary libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.model_selection import train_test_split

<br><br>

## Load the stock dataset from CSV file

In [None]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/dataset/DATA.csv')
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,NASDAQ,NYSE,S&P 500,FTSE100,...,RUSSELL2000,HENG SENG,SSE,Crude Oil,Gold,VIX,USD index,Amazon,Google,Microsoft
0,7/2/2010,8.946072,8.961785,8.685715,8.819285,693842800,2091.790039,6434.810059,1022.580017,4838.09,...,598.969971,19905.32031,2382.900879,72.139999,1207.400024,30.120001,84.43,109.139999,109.139999,23.27
1,7/1/2010,9.082143,9.1,8.686429,8.874286,1022896000,2101.360107,6462.029785,1027.369995,4805.75,...,604.76001,,2373.791992,72.949997,1206.300049,32.860001,84.72,110.959999,110.959999,23.16
2,7/6/2010,8.964286,9.028571,8.791429,8.879642,615235600,2093.879883,6486.089844,1028.060059,4965.0,...,590.030029,20084.11914,2409.424072,71.980003,1194.800049,29.65,84.08,110.059998,110.059998,23.82
3,7/7/2010,8.946072,9.241786,8.919642,9.238214,654556000,2159.469971,6685.779785,1060.27002,5014.82,...,611.659973,19857.07031,2421.116943,74.07,1198.599976,26.84,83.82,113.43,113.43,24.299999
4,7/8/2010,9.374286,9.389286,9.103214,9.2175,738144400,2175.399902,6755.810059,1070.25,5105.45,...,620.27002,20050.56055,2415.149902,75.440002,1195.800049,25.709999,83.82,116.220001,116.220001,24.41


<br><br>

## Data Preprocessing

In [None]:
# Checking for missing values in each column
missing_data = df.isnull().sum()
missing_data

Date             0
Open             0
High             0
Low              0
Close            0
Volume           0
NASDAQ           0
NYSE             0
S&P 500          0
FTSE100          3
NIKKI225       134
BSE SENSEX     127
RUSSELL2000      0
HENG SENG      112
SSE            170
Crude Oil        2
Gold             2
VIX              0
USD index        0
Amazon           0
Google           0
Microsoft        0
dtype: int64

In [None]:
# drop rows with missing values
df.dropna(inplace=True)

In [None]:
# Rename the column names
df.columns = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'NASDAQ', 'NYSE', 'S&P 500', 'FTSE100', 'NIKKI225', 'BSE SENSEX', 'RUSSELL2000', 'HENG SENG', 'SSE', 'Crude Oil', 'Gold', 'VIX', 'USD index', 'Amazon', 'Google', 'Microsoft']

In [None]:
# Convert the 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'])

# Sort the DataFrame by the 'Date' column in ascending order
df.sort_values(by='Date', ascending=True, inplace=True)

In [None]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,NASDAQ,NYSE,S&P 500,FTSE100,...,RUSSELL2000,HENG SENG,SSE,Crude Oil,Gold,VIX,USD index,Amazon,Google,Microsoft
0,2010-07-02,8.946072,8.961785,8.685715,8.819285,693842800,2091.790039,6434.810059,1022.580017,4838.09,...,598.969971,19905.32031,2382.900879,72.139999,1207.400024,30.120001,84.43,109.139999,109.139999,23.27
2,2010-07-06,8.964286,9.028571,8.791429,8.879642,615235600,2093.879883,6486.089844,1028.060059,4965.0,...,590.030029,20084.11914,2409.424072,71.980003,1194.800049,29.65,84.08,110.059998,110.059998,23.82
3,2010-07-07,8.946072,9.241786,8.919642,9.238214,654556000,2159.469971,6685.779785,1060.27002,5014.82,...,611.659973,19857.07031,2421.116943,74.07,1198.599976,26.84,83.82,113.43,113.43,24.299999
4,2010-07-08,9.374286,9.389286,9.103214,9.2175,738144400,2175.399902,6755.810059,1070.25,5105.45,...,620.27002,20050.56055,2415.149902,75.440002,1195.800049,25.709999,83.82,116.220001,116.220001,24.41
5,2010-07-09,9.174643,9.282143,9.112857,9.272142,433322400,2196.449951,6808.709961,1077.959961,5132.94,...,629.429993,20378.66016,2470.923096,76.089996,1209.599976,24.98,83.95,117.260002,117.260002,24.27


In [None]:
data = df

In [None]:
# Extract 'Gold' prices
prices = data['Gold'].values.astype(float)

In [None]:
# Normalize data to scale values between 0 and 1
scaler = MinMaxScaler()
prices_scaled = scaler.fit_transform(prices.reshape(-1, 1))

<br><br>

## Model Training

In [None]:
# Define a function to create sequences for LSTM
def create_sequences(data, sequence_length):
    sequences = []
    for i in range(len(data) - sequence_length):
        sequence = data[i:i+sequence_length]
        sequences.append(sequence)
    return np.array(sequences)

# Choose the sequence length (e.g., 10 days)
sequence_length = 10

# Create sequences for LSTM
sequences = create_sequences(prices_scaled, sequence_length)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(sequences[:, :-1], sequences[:, -1], test_size=0.2, random_state=42)

In [None]:
model = Sequential()

# Add an LSTM layer
model.add(LSTM(units=50, return_sequences=True, input_shape=(sequence_length - 1, 1)))
model.add(LSTM(units=50))
model.add(Dense(units=1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x7f517a3a4430>