# LSTM-Based Stock Price Prediction with TensorFlow & Keras
_Predicting future stock prices using historical Microsoft data and deep learning._

In [None]:
# Importing all necessary libraries for data processing, visualization, modeling, and evaluation
import pandas as pd
import numpy as np
from tensorflow import keras
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import os
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
import time 

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'# Suppress TensorFlow warnings

In [None]:
# Load and inspect the Microsoft stock dataset
data = pd.read_csv("MicrosoftStock.csv")
print(data.head())
print(data.info())
print(data.describe())

In [None]:
# Visualize key stock trends: Open vs Close prices and Trading Volume over time
plt.figure(figsize=(12,6))
plt.plot(data['date'], data['open'], label="Open",color="blue")
plt.plot(data['date'], data['close'], label="Close",color="red")
plt.title("Open-Close Price over Time")
plt.legend()

plt.figure(figsize=(12,6))
plt.plot(data['date'],data['volume'],label="Volume",color="orange")
plt.title("Stock Volume over Time")

In [None]:
# Explore relationships between numeric features using a correlation heatmap
numeric_data = data.select_dtypes(include=["int64","float64"])

plt.figure(figsize=(8,6))
sns.heatmap(numeric_data.corr(), annot=True, cmap="coolwarm")
plt.title("Feature Correlation Heatmap")

In [None]:
# Convert 'date' column to datetime format and visualize close prices from 2013 to 2018
data['date'] = pd.to_datetime(data['date'])

prediction = data.loc[
    (data['date'] > datetime(2013,1,1)) &
    (data['date'] < datetime(2018,1,1))
]

plt.figure(figsize=(12,6))
plt.plot(data['date'], data['close'], color="blue")
plt.xlabel("Date")
plt.ylabel("Close")
plt.title("Price over Time")

In [None]:
# Prepare Training Data (Scaling + Sliding Window)
stock_close = data.filter(["close"])
dataset = stock_close.values
training_data_len = int(np.ceil(len(dataset) * 0.95))

# Scale the closing prices
scaler = StandardScaler()
scaled_data = scaler.fit_transform(dataset)

# Generate training sequences using past 60 days
training_data = scaled_data[:training_data_len]
X_train, y_train = [], []

for i in range(60, len(training_data)):
    X_train.append(training_data[i-60:i, 0])
    y_train.append(training_data[i, 0])

# Reshape for LSTM input
X_train, y_train = np.array(X_train), np.array(y_train)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

In [None]:
# Build and train LSTM Model
model = keras.models.Sequential()
model.add(keras.layers.LSTM(64, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(keras.layers.LSTM(64, return_sequences=False))
model.add(keras.layers.Dense(128, activation="relu"))
model.add(keras.layers.Dropout(0.5))
model.add(keras.layers.Dense(1))

model.summary()

model.compile(optimizer="adam",
              loss="mae",
              metrics=[keras.metrics.RootMeanSquaredError()])

training = model.fit(X_train, y_train, epochs=20, batch_size=32)

In [None]:
# Prepare test data and make predictions
test_data = scaled_data[training_data_len - 60:]
X_test, y_test = [], dataset[training_data_len:]

for i in range(60, len(test_data)):
    X_test.append(test_data[i-60:i, 0])

X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

predictions = model.predict(X_test)
predictions = scaler.inverse_transform(predictions)

In [None]:
# Visualize actual vs. predicted stock prices
train = data[:training_data_len]
test = data[training_data_len:].copy()
test['Predictions'] = predictions

plt.figure(figsize=(12,8))
plt.plot(train['date'], train['close'], label="Train (Actual)", color='blue')
plt.plot(test['date'], test['close'], label="Test (Actual)", color='orange')
plt.plot(test['date'], test['Predictions'], label="Predictions", color='red')
plt.title("LSTM Stock Predictions")
plt.xlabel("Date")
plt.ylabel("Close Price")
plt.legend()
plt.show()

In [None]:
# Evaluate model performance on test set using MAE and RMSE
test_loss, test_rmse = model.evaluate(X_test, y_test, verbose=0)
print(f"Test MAE: {test_loss:.4f}, Test RMSE: {test_rmse:.4f}")

In [None]:
# Naive baseline: yesterday's price is today's prediction
naive_preds = y_test[:-1]
actual = y_test[1:]

naive_mae = mean_absolute_error(actual, naive_preds)
print(f"Naive Baseline MAE: {naive_mae:.4f}")

In [None]:
# Calculate R² score to assess how well predictions explain the variance in actual values
r2 = r2_score(y_test, predictions)
print(f"R² Score: {r2:.4f}")

In [None]:
# Measure and display model training time for benchmarking
start = time.time()
training = model.fit(X_train, y_train, epochs=20, batch_size=32, verbose=0)
end = time.time()

print(f"Training time: {end - start:.2f} seconds")

In [None]:
# Visualize training loss over epochs to evaluate model convergence
plt.plot(training.history['loss'], label='Train Loss')
plt.title('Loss Curve')
plt.xlabel('Epoch')
plt.ylabel('MAE Loss')
plt.legend()
plt.show()