### Stock market prediction: 5 YEAR S&P - Using Long Short Term Memory

In [None]:
# Import the necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as pltS
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from datetime import timedelta

In [None]:
# Load the dataset
snp = pd.read_csv('ES_5Years_8_11_2024.csv')

In [None]:
# Extract the 'Close' prices and convert the 'Date' column to datetime
close_prices = snp['Close'].values.reshape(-1, 1)
dates = pd.to_datetime(snp['Time'])

In [None]:
# Split the data into training and testing sets (e.g., 80% train, 20% test)
train_size = int(len(close_prices) * 0.7)
train_data, test_data = close_prices[:train_size], close_prices[train_size:]

In [None]:
# Initialize MinMaxScaler to scale the data between 0 and 1
scaler = MinMaxScaler(feature_range=(0, 1))

# Fit the scaler on the training data and scale the training data
train_data_scaled = scaler.fit_transform(train_data)

# Scale the test data using the same scaler (do not refit the scaler)
test_data_scaled = scaler.transform(test_data)


In [None]:
# Create sequences for training data (time_step = 60)
time_step = 60
X_train, Y_train = [], []
for i in range(time_step, len(train_data_scaled)):
    X_train.append(train_data_scaled[i - time_step:i, 0])  # last 60 days
    Y_train.append(train_data_scaled[i, 0])  # next day

# Convert to numpy arrays
X_train, Y_train = np.array(X_train), np.array(Y_train)

# Reshape X_train to fit LSTM input format: [samples, timesteps, features]
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)

In [None]:
# Create the LSTM model
model = Sequential()

# First LSTM layer with dropout
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))  # 20% dropout

# Second LSTM layer with dropout
model.add(LSTM(units=50, return_sequences=False))
model.add(Dropout(0.2))  # 20% dropout

# Dense layer to output the prediction
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')


In [None]:
# Train the model using the training data
model.fit(X_train, Y_train, epochs=10, batch_size=64)

In [None]:
# Prepare test data for prediction
X_test, Y_test = [], []
for i in range(time_step, len(test_data_scaled)):
    X_test.append(test_data_scaled[i - time_step:i, 0])  # last 60 days

# Convert to numpy arrays and reshape for LSTM
X_test = np.array(X_test)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Predict on the test data
test_predictions = model.predict(X_test)

# Rescale the predictions back to original scale
test_predictions = scaler.inverse_transform(test_predictions)

In [None]:
# Assuming you want to predict the next 10 days
future_days = 10
future_predictions = []
last_60_days = test_data_scaled[-60:]  # Last 60 days from the test set

for _ in range(future_days):
    # Reshape the last 60 days to feed into the model
    last_60_days = last_60_days.reshape(1, last_60_days.shape[0], 1)
    predicted_price_scaled = model.predict(last_60_days)
    future_predictions.append(predicted_price_scaled[0, 0])
    
    # Update the last_60_days with the predicted price for the next prediction
    last_60_days = np.append(last_60_days[0], predicted_price_scaled)
    last_60_days = last_60_days[-60:]  # Keep the window of 60 days

# Rescale the future predictions
future_predictions = scaler.inverse_transform(np.array(future_predictions).reshape(-1, 1))

# Create a range of future dates for plotting
future_dates = [dates.iloc[-1] + pd.Timedelta(days=i) for i in range(1, future_days + 1)]

# Plot future predictions
plt.figure(figsize=(10, 6))
plt.plot(dates, close_prices, label='Actual Stock Price')
plt.plot(future_dates, future_predictions, label='Future Predictions', color='green')
plt.title('S&P Stock Price Prediction with Future Prices')
plt.xlabel('Date')
plt.ylabel('Stock Price')
plt.legend()
plt.show()


In [None]:
# Calculate Error Metrics

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

# Assuming test_predictions_flat is your predicted prices, and close_prices[train_size:] is the actual prices

# Calculate the metrics
mse = mean_squared_error(close_prices[train_size:train_size + len(test_predictions_flat)], test_predictions_flat)
rmse = np.sqrt(mse)
mae = mean_absolute_error(close_prices[train_size:train_size + len(test_predictions_flat)], test_predictions_flat)
r2 = r2_score(close_prices[train_size:train_size + len(test_predictions_flat)], test_predictions_flat)

print(f"Mean Squared Error: {mse}")
print(f"Root Mean Squared Error: {rmse}")
print(f"Mean Absolute Error: {mae}")
print(f"R-squared: {r2}")

In [None]:
# Trim the actual training data (close_prices[:train_size]) to match the length of train_predictions

# Get the difference in lengths
train_size_adjusted = len(train_predictions)  # The length of train_predictions

# Calculate training metrics with adjusted size
mse_train = mean_squared_error(close_prices[:train_size_adjusted], train_predictions)
rmse_train = np.sqrt(mse_train)
r2_train = r2_score(close_prices[:train_size_adjusted], train_predictions)

print(f"Training RMSE: {rmse_train}")
print(f"Training R-squared: {r2_train}")

In [None]:
# Since its overfitting, makes sense to increase the dropout by 30%
model.add(Dropout(0.3))  # Increase dropout

In [None]:
future_days = 100 # Predict next 100 days
future_predictions = []
last_100_days = test_data_scaled[-100:]

for _ in range(future_days):
    last_100_days = last_100_days.reshape(1, last_100_days.shape[0], 1)
    predicted_price_scaled = model.predict(last_100_days)
    future_predictions.append(predicted_price_scaled[0, 0])
    
    # Update the input with the predicted price for the next prediction
    last_100_days = np.append(last_100_days[0], predicted_price_scaled)
    last_100_days = last_100_days[-100:]

# Rescale the future predictions back to the original stock price scale
future_predictions = scaler.inverse_transform(np.array(future_predictions).reshape(-1, 1))

# Generate dates for the future predictions
future_dates = [dates.iloc[-1] + pd.Timedelta(days=i) for i in range(1, future_days + 1)]

# Plot future predictions
plt.figure(figsize=(10, 6))
plt.plot(dates, close_prices, label='Actual Stock Price')
plt.plot(future_dates, future_predictions, label='Future Predictions', color='green')
plt.title('S&P Stock Price Prediction with Future Prices')
plt.xlabel('Date')
plt.ylabel('Stock Price')
plt.legend()
plt.show()
