
# Stock Price Prediction with Sequential Models (PyTorch)

This notebook demonstrates **stock price prediction** using sequential neural networks — **LSTM** and **GRU** — implemented in PyTorch. 

It supports two data sources:

1. **Yahoo Finance via `yfinance`** (set `USE_YFINANCE=True` below).  
2. **A local CSV** (default; the synthetic file shipped with this notebook: `synthetic_stock_AAPL_like.csv`).

> **Note:** If you're running offline or don't have `yfinance` installed, the notebook will automatically fall back to the included synthetic dataset.

We: 
- load OHLCV data;  
- prepare windowed sequences;  
- train **both** LSTM and GRU models;  
- evaluate with RMSE/MAE/MAPE;  
- plot predicted vs actual closes.




### References
- `yfinance` project page: https://pypi.org/project/yfinance/  
- A PyTorch LSTM stock prediction walkthrough (community resources exist across blogs & repos).  



In [None]:

# Imports
import os
from datetime import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
rcParams['figure.figsize'] = (12, 5)

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# Try to import yfinance if available
try:
    import yfinance as yf
    HAS_YF = True
except Exception:
    HAS_YF = False

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using device:', DEVICE)


In [None]:

# Configuration
TICKER = 'AAPL'
START = '2015-01-01'
END = datetime.today().strftime('%Y-%m-%d')
USE_YFINANCE = False  # set True to fetch live data if online
LOCAL_CSV = 'synthetic_stock_AAPL_like.csv'  # default synthetic dataset shipped with this project
TARGET_COL = 'Close'
SEQ_LEN = 60
BATCH_SIZE = 64
EPOCHS = 20
LR = 1e-3
VAL_SPLIT = 0.1
TEST_SPLIT = 0.1
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)


In [None]:

# Data loading
if USE_YFINANCE and HAS_YF:
    print('Fetching data from Yahoo Finance for', TICKER)
    df = yf.download(tickers=TICKER, start=START, end=END)
    df = df.reset_index()
    df['Ticker'] = TICKER
    df.rename(columns={'Adj Close':'Adj Close'}, inplace=True)
else:
    print('Using local CSV:', LOCAL_CSV)
    df = pd.read_csv(LOCAL_CSV)
    df['Date'] = pd.to_datetime(df['Date'])

# Sort by date just in case
df = df.sort_values('Date').reset_index(drop=True)
print('Rows:', len(df), 'Date range:', df['Date'].min(), '->', df['Date'].max())

# Basic sanity
for c in ['Open','High','Low','Close','Adj Close','Volume']:
    assert c in df.columns, f"Missing column {c}"


In [None]:

# Feature scaling
features = ['Open','High','Low','Close','Volume']
scaler = MinMaxScaler()
scaled = scaler.fit_transform(df[features])

# We predict next-day Close (scaled)
close_idx = features.index('Close')
series = scaled[:, close_idx].reshape(-1, 1)

# Create sequences
X, y = [], []
for i in range(len(series) - SEQ_LEN):
    X.append(scaled[i:i+SEQ_LEN, :])
    y.append(series[i+SEQ_LEN])
X = np.array(X)
y = np.array(y)

# Train/Val/Test split by time
n = len(X)
test_size = int(n * TEST_SPLIT)
val_size = int(n * VAL_SPLIT)
train_size = n - val_size - test_size
X_train, y_train = X[:train_size], y[:train_size]
X_val, y_val = X[train_size:train_size+val_size], y[train_size:train_size+val_size]
X_test, y_test = X[train_size+val_size:], y[train_size+val_size:]

print(f'Train/Val/Test: {len(X_train)}/{len(X_val)}/{len(X_test)} samples')

class SeqDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_loader = DataLoader(SeqDataset(X_train, y_train), batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(SeqDataset(X_val, y_val), batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(SeqDataset(X_test, y_test), batch_size=BATCH_SIZE, shuffle=False)


In [None]:

# Define LSTM and GRU models
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size=64, num_layers=2, dropout=0.2):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, 1)
    def forward(self, x):
        out, _ = self.lstm(x)
        # take last time-step
        out = out[:, -1, :]
        return self.fc(out)

class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size=64, num_layers=2, dropout=0.2):
        super().__init__()
        self.gru = nn.GRU(input_size, hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, 1)
    def forward(self, x):
        out, _ = self.gru(x)
        out = out[:, -1, :]
        return self.fc(out)

input_size = len(features)
lstm_model = LSTMModel(input_size).to(DEVICE)
gru_model = GRUModel(input_size).to(DEVICE)


In [None]:

# Training utilities

def train_model(model, train_loader, val_loader, epochs=EPOCHS, lr=LR):
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    best_val = float('inf')
    history = {'train_loss': [], 'val_loss': []}
    for epoch in range(1, epochs+1):
        model.train()
        train_losses = []
        for xb, yb in train_loader:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            pred = model(xb)
            loss = criterion(pred, yb)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())
        # validation
        model.eval()
        val_losses = []
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(DEVICE), yb.to(DEVICE)
                pred = model(xb)
                loss = criterion(pred, yb)
                val_losses.append(loss.item())
        avg_train = float(np.mean(train_losses)) if train_losses else float('nan')
        avg_val = float(np.mean(val_losses)) if val_losses else float('nan')
        history['train_loss'].append(avg_train)
        history['val_loss'].append(avg_val)
        if avg_val < best_val:
            best_val = avg_val
            best_state = {k:v.cpu() for k,v in model.state_dict().items()}
        if epoch % 5 == 0:
            print(f'Epoch {epoch}/{epochs} - train {avg_train:.4f} - val {avg_val:.4f}')
    # restore best state
    model.load_state_dict(best_state)
    return history

print('Training LSTM...')
lstm_hist = train_model(lstm_model, train_loader, val_loader)
print('Training GRU...')
gru_hist = train_model(gru_model, train_loader, val_loader)


In [None]:

# Evaluate on test set and invert scaling

def evaluate(model, loader):
    model.eval()
    preds, targets = [], []
    with torch.no_grad():
        for xb, yb in loader:
            xb = xb.to(DEVICE)
            pred = model(xb).cpu().numpy()
            preds.append(pred)
            targets.append(yb.numpy())
    preds = np.concatenate(preds, axis=0)
    targets = np.concatenate(targets, axis=0)
    # invert scaling for Close
    # build array with preds at Close index and zeros elsewhere to use scaler.inverse_transform
    inv_preds = []
    inv_targets = []
    for p in preds:
        row = np.zeros((1, len(features)))
        row[0, close_idx] = p
        inv_p = scaler.inverse_transform(row)[0, close_idx]
        inv_preds.append(inv_p)
    for t in targets:
        row = np.zeros((1, len(features)))
        row[0, close_idx] = t
        inv_t = scaler.inverse_transform(row)[0, close_idx]
        inv_targets.append(inv_t)
    inv_preds = np.array(inv_preds).reshape(-1)
    inv_targets = np.array(inv_targets).reshape(-1)
    rmse = math.sqrt(mean_squared_error(inv_targets, inv_preds))
    mae = mean_absolute_error(inv_targets, inv_preds)
    mape = np.mean(np.abs((inv_targets - inv_preds) / (inv_targets + 1e-8))) * 100
    return inv_preds, inv_targets, rmse, mae, mape

lstm_preds, lstm_true, lstm_rmse, lstm_mae, lstm_mape = evaluate(lstm_model, test_loader)
gru_preds, gru_true, gru_rmse, gru_mae, gru_mape = evaluate(gru_model, test_loader)

print(f'LSTM - RMSE: {lstm_rmse:.4f}, MAE: {lstm_mae:.4f}, MAPE: {lstm_mape:.2f}%')
print(f'GRU  - RMSE: {gru_rmse:.4f}, MAE: {gru_mae:.4f}, MAPE: {gru_mape:.2f}%')


In [None]:

# Plot predictions vs actual (test portion)
plt.figure(figsize=(12,5))
plt.plot(lstm_true, label='Actual (Close)')
plt.plot(lstm_preds, label='LSTM Predicted')
plt.plot(gru_preds, label='GRU Predicted')
plt.title('Predicted vs Actual Closing Prices (Test set)')
plt.xlabel('Time index in test set')
plt.ylabel('Price')
plt.legend()
plt.grid(True)
plt.show()


In [None]:

# Save models and predictions
os.makedirs('artifacts', exist_ok=True)
torch.save(lstm_model.state_dict(), 'artifacts/lstm_model.pt')
torch.save(gru_model.state_dict(), 'artifacts/gru_model.pt')
np.save('artifacts/lstm_preds.npy', lstm_preds)
np.save('artifacts/gru_preds.npy', gru_preds)
np.save('artifacts/test_true.npy', lstm_true)
print('Artifacts saved to ./artifacts')



### Notes & Next Steps
- Try toggling `USE_YFINANCE=True` to fetch real data for any ticker (requires internet and `yfinance`).  
- Experiment with feature engineering: add technical indicators (SMA, EMA, RSI), macro data, or news sentiment for richer inputs.  
- Adjust hyperparameters (sequence length, hidden sizes, layers) and consider **early stopping** or learning rate schedules.  
- For production-grade models, evaluate **walk-forward validation**, **data leakage checks**, and **robust scaling** (e.g., `RobustScaler`).
