In [14]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt

# Load the dataset
df = pd.read_csv("cleaned_stock_data_no_nan.csv")

# Select the columns 'Open', 'High', 'Low', 'Close', 'Adj Close'
price = df[['Open', 'High', 'Low', 'Close', 'Adj Close']]

In [15]:
# Load the dataset
df = pd.read_csv("cleaned_stock_data_no_nan.csv")

# Select the columns 'Open', 'High', 'Low', 'Close', 'Adj Close'
price = df[['Open', 'High', 'Low', 'Close', 'Adj Close']]

# Normalize the data
scaler = MinMaxScaler()
normalized_price = scaler.fit_transform(price)

# Split the data into train and test sets
train_size = int(len(normalized_price) * 0.8)
train_data, test_data = normalized_price[:train_size], normalized_price[train_size:]

# Define the number of time steps
time_steps = 30

# Create the input sequences and target values for training set
X_train = []
y_train = []
for i in range(time_steps, len(train_data)):
    X_train.append(train_data[i - time_steps:i])
    y_train.append(train_data[i])

# Convert the lists to numpy arrays
X_train = np.array(X_train)
y_train = np.array(y_train)

# Create the input sequences and target values for test set
X_test = []
y_test = []
for i in range(time_steps, len(test_data)):
    X_test.append(test_data[i - time_steps:i])
    y_test.append(test_data[i])

# Convert the lists to numpy arrays
X_test = np.array(X_test)
y_test = np.array(y_test)

# Build the LSTM model
model = Sequential()
model.add(LSTM(64, input_shape=(time_steps, price.shape[1]), return_sequences=True))
model.add(LSTM(64))
model.add(Dense(price.shape[1]))
model.compile(optimizer='adam', loss='mse')

# Train the model
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test), verbose=1)

# Make predictions
train_predictions = model.predict(X_train)
test_predictions = model.predict(X_test)

# Inverse transform the predictions and actual values
train_predictions = scaler.inverse_transform(train_predictions)
test_predictions = scaler.inverse_transform(test_predictions)
y_train = scaler.inverse_transform(y_train)
y_test = scaler.inverse_transform(y_test)

# Evaluate the model
train_rmse = np.sqrt(mean_squared_error(y_train, train_predictions))
test_rmse = np.sqrt(mean_squared_error(y_test, test_predictions))
train_mae = mean_absolute_error(y_train, train_predictions)
test_mae = mean_absolute_error(y_test, test_predictions)
train_r2 = r2_score(y_train, train_predictions)
test_r2 = r2_score(y_test, test_predictions)

# Print the evaluation metrics
print("Training RMSE:", train_rmse)
print("Testing RMSE:", test_rmse)
print("Training MAE:", train_mae)
print("Testing MAE:", test_mae)
print("Training R^2:", train_r2)
print("Testing R^2:", test_r2)

# Plot the training loss and validation loss over epochs
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100

From the provided output, we can observe the training and validation loss for each epoch during the training of the LSTM model. It seems that the model achieves a relatively low loss value, indicating good performance in terms of minimizing the difference between predicted and actual values.

Here are a few observations from the training process:

- The training and validation loss values decrease gradually over the epochs, which suggests that the model is learning and improving its performance.
- The training loss decreases faster than the validation loss, which could indicate some overfitting as the model may be fitting the training data too closely.
- The validation loss reaches a minimum value around epoch 29, after which it starts to fluctuate slightly.
- The training loss continues to decrease until the final epoch, indicating that the model could potentially benefit from additional training.

In [17]:
# Make predictions
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

# Inverse transform the predictions and actual values to their original scale
train_predict = scaler.inverse_transform(train_predict)
Y_train = scaler.inverse_transform([Y_train])
test_predict = scaler.inverse_transform(test_predict)
Y_test = scaler.inverse_transform([Y_test])

# Calculate RMSE
train_rmse = np.sqrt(mean_squared_error(Y_train[0], train_predict[:, 0]))
test_rmse = np.sqrt(mean_squared_error(Y_test[0], test_predict[:, 0]))

# Calculate R^2 score
train_r2 = r2_score(Y_train[0], train_predict[:, 0])
test_r2 = r2_score(Y_test[0], test_predict[:, 0])

print("Train RMSE:", train_rmse)
print("Test RMSE:", test_rmse)
print("Train R^2 score:", train_r2)
print("Test R^2 score:", test_r2)