### This notebook is used to train a  deep learning model for prediction tasks

In [None]:
import torch
import pandas as pd
import numpy as np
from yahoo_fin import stock_info as si
import torch.nn as nn

#test - get some tickers(AKA stock symbol)
nasdaq_tickers = si.tickers_nasdaq()
print(nasdaq_tickers[:10])  

sp500_tickers = si.tickers_sp500()

['AACB', 'AACBR', 'AACBU', 'AACG', 'AADR', 'AAL', 'AAME', 'AAOI', 'AAON', 'AAPB']


### Ideas: 
1. First we will find some popular stock (Apple, Tesla, ....) an easy way is to select based on total market value
2. Through yahoo fin api we will get their historical data
3. For each stock, we will get `k` lastest timestamp (day, minutes, ...) OHLCV, which create `k` x `5` 2D matrix (`k` here is sequence_length below, which present how far in the past we look back to predict the future values)
4. We will get about 30-60 samples for each stock (as much as possible), for example if `k` = 15, we will get the value from day 1-15 as first sample, 16-30 as the second one, and so on .....
5. All of them will be feeded into the model, we will get the final models after training process.

Estimation: 50 stocks, each stock will have ~ 100 samples, `k` = 15 -> 50 x 100 x 15 x 5 

Train/valid/test ratio = 7/1.5/1.5

# Model decleration

In [None]:
class StockPriceModel(nn.Module):
    def __init__(self, sequence_length=30, 
                 input_features=5, 
                 hidden_size=128, 
                 num_layers=2, 
                 dropout=0.2):
        super(StockPriceModel, self).__init__()
        
        # Input: [batch_size, sequence_length, input_features]
        # Where sequence_length = k, input_features = 5 (OHLC) + 1 (VOLUME)
        self.lstm = nn.LSTM(
            input_size=input_features,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout
        )
        
        self.attention = nn.MultiheadAttention(hidden_size, num_heads=8)
        self.fc1 = nn.Linear(hidden_size, 64)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        
        # Output layer now predicts all 5 features (OHLCV)
        self.fc2 = nn.Linear(64, out_features= input_features) #output size = input size
        
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        
        # Reshape for attention
        lstm_out = lstm_out.permute(1, 0, 2)  # [k, batch_size, hidden_size]
        
        # Apply attention mechanism
        attn_out, _ = self.attention(lstm_out, lstm_out, lstm_out)
        
        # Return to original shape
        attn_out = attn_out.permute(1, 0, 2)  # [batch_size, k, hidden_size]
        
        # Use the last time step for prediction
        out = self.fc1(attn_out[:, -1, :])
        out = self.relu(out)
        out = self.dropout(out)
        
        # Output all 5 features
        out = self.fc2(out)
        return out

# Train

In [None]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=100):
    best_val_loss = float('inf')
    early_stopping_counter = 0
    early_stopping_patience = 10
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)
    
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0.0
        
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            
            # MSE across all 5 output features (OHLCV)
            loss = criterion(outputs, targets)
            loss.backward()
            
            
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            
            optimizer.step()
            train_loss += loss.item()
        
        # Validation phase
        model.eval()
        val_loss = 0.0
        
        with torch.no_grad():
            for inputs, targets in val_loader:
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                val_loss += loss.item()
        
        # Learning rate scheduling
        scheduler.step(val_loss)
        
        # Early stopping check
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), 'best_model.pth')
            early_stopping_counter = 0
        else:
            early_stopping_counter += 1
            if early_stopping_counter >= early_stopping_patience:
                print(f"Early stopping at epoch {epoch}")
                break
                
        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss/len(train_loader):.6f}, Val Loss: {val_loss/len(val_loader):.6f}")

# Validation


In [None]:

import torch.nn.functional as F

def evaluate_model(model, test_loader):
    model.eval()
    total_mse = 0.0
    feature_mse = [0.0, 0.0, 0.0, 0.0, 0.0]  # MSE for each of Open, High, Low, Close and Volume
    
    with torch.no_grad():
        for inputs, targets in test_loader:
            outputs = model(inputs)
            
            # Overall MSE
            mse = F.mse_loss(outputs, targets)
            total_mse += mse.item()
            
            # Individual feature MSE
            for i in range(5):
                feature_mse[i] += F.mse_loss(outputs[:, i], targets[:, i]).item()
    
    # Calculate average
    avg_mse = total_mse / len(test_loader)
    avg_feature_mse = [mse / len(test_loader) for mse in feature_mse]
    
    print(f"Overall MSE: {avg_mse:.6f}")
    print(f"Open MSE: {avg_feature_mse[0]:.6f}")
    print(f"High MSE: {avg_feature_mse[1]:.6f}")
    print(f"Low MSE: {avg_feature_mse[2]:.6f}")
    print(f"Close MSE: {avg_feature_mse[3]:.6f}")
    print(f"Volume MSE: {avg_feature_mse[4]:.6f}")
    
    return avg_mse, avg_feature_mse