## Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import yfinance as yf

from pmdarima import auto_arima
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam

from pyswarm import pso

## EDA

In [None]:
# data = yf.download("MSFT AMZN AAPL NFLX GOOG")
data = yf.download("AMZN")

In [None]:
data

In [None]:
data.info()

In [None]:
close_prices = data['Close']

In [None]:
# Plot the 'Close' prices
plt.figure(figsize=(7, 3))
plt.plot(close_prices, label='Close Price')
plt.title('Stock Prices Over Time')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()

## ARIMA

### Determine p, d, q with PSO

In [None]:
## ARIMA Model Parameters Optimization with PSO
def arima_fitness_function(params):
    p, d, q = int(params[0]), int(params[1]), int(params[2])
    model = auto_arima(close_prices, seasonal=False, trace=False, error_action='ignore', suppress_warnings=True, stepwise=True, 
                       order=(p, d, q))
    return model.aic()

# Define PSO optimizer for ARIMA parameters
def optimize_arima_params():
    lb = [0, 0, 0]  # Lower bounds for p, d, q
    ub = [5, 2, 5]  # Upper bounds for p, d, q
    best_params, _ = pso(arima_fitness_function, lb, ub, swarmsize=10, maxiter=5)
    return int(best_params[0]), int(best_params[1]), int(best_params[2])

# Get the best ARIMA parameters
p, d, q = optimize_arima_params()
print(f'Optimized ARIMA Parameters: p={p}, d={d}, q={q}')

### Fit ARIMA model and make predictions

In [None]:
# Fit ARIMA model and make predictions
from statsmodels.tsa.arima.model import ARIMA
arima_model = ARIMA(close_prices, order=(p, d, q))
arima_results = arima_model.fit()

In [None]:
arima_predictions = arima_results.predict(start=0, end=len(close_prices)-1, dynamic=False)

In [None]:
# Plot the original data and the ARIMA predictions
plt.figure(figsize=(7, 3))
plt.plot(close_prices, label='Actual Close Price', color='blue')
plt.plot(arima_predictions, label='ARIMA Predicted Price', color='red')
plt.title('Actual vs ARIMA Predicted Stock Prices')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()

In [None]:
# Calculate the accuracy metrics for ARIMA
mae_arima = mean_absolute_error(close_prices, arima_predictions)
mse_arima = mean_squared_error(close_prices, arima_predictions)
rmse_arima = np.sqrt(mse_arima)
r2_arima = r2_score(close_prices, arima_predictions)
print(f'ARIMA Mean Absolute Error (MAE): {mae_arima:.4f}')
print(f'ARIMA Mean Squared Error (MSE): {mse_arima:.4f}')
print(f'ARIMA Root Mean Squared Error (RMSE): {rmse_arima:.4f}')
print(f'ARIMA R^2 Score: {r2_arima:.4f}')

## LSTM

In [None]:
# Scale the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(close_prices.values.reshape(-1, 1))

# Prepare the dataset for LSTM
def create_dataset(data, time_step=1):
    X, y = [], []
    for i in range(len(data) - time_step):
        X.append(data[i:(i + time_step), 0])
        y.append(data[i + time_step, 0])
    return np.array(X), np.array(y)

# Define time_step (number of previous days to use for prediction)
time_step = 10  # You can adjust this value

X, y = create_dataset(scaled_data, time_step)
X = X.reshape(X.shape[0], X.shape[1], 1)  # Reshape for LSTM [samples, time steps, features]

# Split data into train and test sets
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

## Build and Train the LSTM Model

In [None]:
## LSTM Hyperparameter Tuning with PSO
def lstm_fitness_function(params):
    lstm_units, learning_rate, batch_size = int(params[0]), params[1], int(params[2])
    model_lstm = Sequential()
    model_lstm.add(LSTM(lstm_units, return_sequences=True, input_shape=(time_step, 1)))
    model_lstm.add(LSTM(lstm_units, return_sequences=False))
    model_lstm.add(Dense(1))
    model_lstm.compile(optimizer=Adam(learning_rate=learning_rate), loss='mean_squared_error')
    history = model_lstm.fit(X_train, y_train, epochs=50, batch_size=batch_size, validation_split=0.2, verbose=0)
    return min(history.history['val_loss'])  # Lower validation loss indicates a better model

# Define PSO optimizer for LSTM hyperparameters
def optimize_lstm_hyperparameters():
    lb = [10, 0.001, 16]  # Lower bounds for lstm_units, learning_rate, batch_size
    ub = [100, 0.1, 64]   # Upper bounds for lstm_units, learning_rate, batch_size
    best_params, _ = pso(lstm_fitness_function, lb, ub, swarmsize=10, maxiter=5)
    return int(best_params[0]), best_params[1], int(best_params[2])

# Get the best LSTM hyperparameters
lstm_units, learning_rate, batch_size = optimize_lstm_hyperparameters()
print(f'Optimized LSTM Hyperparameters: lstm_units={lstm_units}, learning_rate={learning_rate}, batch_size={batch_size}')

# Build and Train the LSTM Model
model_lstm = Sequential()
model_lstm.add(LSTM(lstm_units, return_sequences=True, input_shape=(time_step, 1)))
model_lstm.add(LSTM(lstm_units, return_sequences=False))
model_lstm.add(Dense(1))

# Compile the model
model_lstm.compile(optimizer=Adam(learning_rate=learning_rate), loss='mean_squared_error')

In [None]:
# Train the model
history = model_lstm.fit(X_train, y_train, epochs=50, batch_size=batch_size, validation_split=0.2, verbose=1)

In [None]:
# Optional: Plot training & validation loss values for LSTM model
plt.figure(figsize=(7, 3))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('LSTM Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

## Make Predictions with the LSTM Model

In [None]:
# Make predictions with the LSTM Model
lstm_predictions = model_lstm.predict(X_test)
lstm_predictions = scaler.inverse_transform(lstm_predictions)  # Inverse transform to get actual values
y_test = scaler.inverse_transform(y_test.reshape(-1, 1))  # Inverse transform to get actual values

# Create a DataFrame to hold the LSTM predictions
lstm_pred_df = pd.DataFrame({
    'Date': close_prices.index[train_size + time_step:],  # Date index for test data
    'Actual': y_test.flatten(),                          # Actual close prices
    'LSTM_Predicted': lstm_predictions.flatten()         # LSTM predicted close prices
})

In [None]:
# Plot the actual data and LSTM predictions
plt.figure(figsize=(7, 3))
plt.plot(lstm_pred_df['Date'], lstm_pred_df['Actual'], label='Actual Close Price', color='blue')
plt.plot(lstm_pred_df['Date'], lstm_pred_df['LSTM_Predicted'], label='LSTM Predicted Close Price', color='red', linestyle='--')
plt.title('Actual vs Predicted Stock Prices (LSTM)')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()

In [None]:
# Calculate the accuracy metrics for LSTM
mae_lstm = mean_absolute_error(lstm_pred_df['Actual'], lstm_pred_df['LSTM_Predicted'])
mse_lstm = mean_squared_error(lstm_pred_df['Actual'], lstm_pred_df['LSTM_Predicted'])
rmse_lstm = np.sqrt(mse_lstm)
r2_lstm = r2_score(lstm_pred_df['Actual'], lstm_pred_df['LSTM_Predicted'])
print(f'LSTM Mean Absolute Error (MAE): {mae_lstm:.4f}')
print(f'LSTM Mean Squared Error (MSE): {mse_lstm:.4f}')
print(f'LSTM Root Mean Squared Error (RMSE): {rmse_lstm:.4f}')
print(f'LSTM R^2 Score: {r2_lstm:.4f}')

## Combining ARIMA and LSTM Forecasts with PSO

In [None]:
# Make ARIMA predictions for the test period
arima_forecast = arima_results.predict(start=len(close_prices) - len(y_test), end=len(close_prices) - 1, dynamic=False)

# Define the fitness function for combining ARIMA and LSTM forecasts
def combine_forecasts_fitness_function(weights):
    arima_weight, lstm_weight = weights[0], weights[1]
    combined_predictions = arima_weight * arima_forecast + lstm_weight * lstm_predictions.flatten()
    return mean_squared_error(y_test.flatten(), combined_predictions)  # Lower MSE indicates better combination

# Define PSO optimizer for combining forecasts
def optimize_combination_weights():
    lb = [0, 0]  # Lower bounds for ARIMA and LSTM weights
    ub = [1, 1]  # Upper bounds for ARIMA and LSTM weights
    best_weights, _ = pso(combine_forecasts_fitness_function, lb, ub, swarmsize=10, maxiter=5)
    return best_weights

# Get the best weights for combining forecasts
best_weights = optimize_combination_weights()
arima_weight, lstm_weight = best_weights
print(f'Optimized Weights for Combining Forecasts: ARIMA_weight={arima_weight:.4f}, LSTM_weight={lstm_weight:.4f}')

In [None]:
# Combine forecasts using the optimal weights
combined_predictions = arima_weight * arima_forecast + lstm_weight * lstm_predictions.flatten()

## Evaluate the Model Performance

In [None]:
# Plot the combined forecasts
plt.figure(figsize=(10, 5))
plt.plot(lstm_pred_df['Date'], lstm_pred_df['Actual'], label='Actual Close Price', color='blue')
plt.plot(lstm_pred_df['Date'], combined_predictions, label='Combined Forecasted Price', color='green', linestyle='--')
plt.title('Actual vs Combined Forecasted Stock Prices')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()

In [None]:
# Calculate accuracy metrics for combined forecasts
mae_combined = mean_absolute_error(y_test.flatten(), combined_predictions)
mse_combined = mean_squared_error(y_test.flatten(), combined_predictions)
rmse_combined = np.sqrt(mse_combined)
r2_combined = r2_score(y_test.flatten(), combined_predictions)
print(f'Combined Forecasts Mean Absolute Error (MAE): {mae_combined:.4f}')
print(f'Combined Forecasts Mean Squared Error (MSE): {mse_combined:.4f}')
print(f'Combined Forecasts Root Mean Squared Error (RMSE): {rmse_combined:.4f}')
print(f'Combined Forecasts R^2 Score: {r2_combined:.4f}')