In [None]:
pip install -r requirements.txt

In [10]:
import os
import requests
import pandas as pd
from datetime import datetime, timedelta
import pytorch_lightning as pl
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
from io import StringIO
import numpy as np
import time

# Alpha Vantage API settings
API_KEY = "FGU3D4XLZIJ9Q8YJ"
BASE_URL = "https://www.alphavantage.co/query"

def download_stock_data(symbol, interval="1min"):
    url = f"{BASE_URL}?function=TIME_SERIES_INTRADAY&symbol={symbol}&interval={interval}&apikey={API_KEY}&datatype=csv&extended_hours=false&outputsize=full"
    try:
        response = requests.get(url)
        if response.status_code == 200:
            df = pd.read_csv(StringIO(response.text))
            if df.empty:
                raise ValueError(f"Empty data received for {symbol}")
            df['timestamp'] = pd.to_datetime(df['timestamp'])
            return df
        elif response.status_code == 429:
            print(f"Rate limit exceeded for {symbol}. Waiting 60 seconds...")
            time.sleep(60)
            return download_stock_data(symbol, interval)
        else:
            raise ValueError(f"Error fetching data for {symbol}: {response.status_code}")
    except Exception as e:
        print(f"Error downloading data for {symbol}: {str(e)}")
        return None

def get_stock_data(symbols):
    stock_data = {}
    for symbol in symbols:
        print(f"Downloading data for {symbol}...")
        stock_data[symbol] = download_stock_data(symbol)
    return stock_data

def split_data(stock_data):
    train, val, test = {}, {}, {}
    for symbol, df in stock_data.items():
        df = df.sort_values('timestamp')
        df['date'] = df['timestamp'].dt.date
        grouped = df.groupby('date')
        days = list(grouped.groups.keys())

        train_end = int(len(days) * 0.6)
        val_end = int(len(days) * 0.8)

        train_days = days[:train_end]
        val_days = days[train_end:val_end]
        test_days = days[val_end:]

        train[symbol] = df[df['date'].isin(train_days)]
        val[symbol] = df[df['date'].isin(val_days)]
        test[symbol] = df[df['date'].isin(test_days)]

    return train, val, test

class StockDataset(Dataset):
    def __init__(self, data_dict, input_ratio=0.7, target_ratio=0.9):
        # Combine all stock data into a single dataframe with a 'symbol' column
        self.data = pd.concat([df.assign(symbol=symbol) for symbol, df in data_dict.items()])
        self.input_ratio = input_ratio
        self.target_ratio = target_ratio
        numerical_cols = self.data.select_dtypes(include=np.number).columns
        self.data[numerical_cols] = self.data[numerical_cols].astype('float64')
        self.numerical_cols = numerical_cols
        self.grouped = self.data.groupby([self.data['timestamp'].dt.date, 'symbol'])
        self.date_symbols = list(self.grouped.groups.keys())

    def __len__(self):
        return len(self.date_symbols)

    def __getitem__(self, idx):
        day, symbol = self.date_symbols[idx]
        day_data = self.grouped.get_group((day, symbol)).sort_values('timestamp')

        input_len = int(len(day_data) * self.input_ratio)
        target_idx = int(len(day_data) * self.target_ratio)

        input_data = day_data.iloc[:input_len][['close']].values.astype('float32')
        target_data = day_data.iloc[target_idx]['close'].astype('float32')

        return torch.tensor(input_data, dtype=torch.float32), torch.tensor([target_data], dtype=torch.float32)

class QuantileLoss(nn.Module):
    def __init__(self, quantile):
        super(QuantileLoss, self).__init__()
        self.quantile = quantile

    def forward(self, preds, targets):
        errors = targets - preds
        loss = torch.maximum((self.quantile - 1) * errors, self.quantile * errors)
        return torch.mean(loss)

class StockPredictor(pl.LightningModule):
    def __init__(self, input_size, hidden_dim):
        super(StockPredictor, self).__init__()
        self.save_hyperparameters()
        self.model = nn.Sequential(
            nn.Flatten(),
            nn.Linear(input_size, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 1)
        )
        self.loss_fn = QuantileLoss(quantile=0.5)

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        inputs, targets = batch
        outputs = self(inputs)
        targets = targets.view(-1, 1)  # Reshape targets to match output shape
        loss = self.loss_fn(outputs, targets)
        self.log('train_loss', loss, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        inputs, targets = batch
        outputs = self(inputs)
        targets = targets.view(-1, 1)  # Reshape targets to match output shape
        loss = self.loss_fn(outputs, targets)
        self.log('val_loss', loss, prog_bar=True)
        return loss

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=1e-5)
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='min', factor=0.5, patience=5, verbose=True
        )
        return {
            "optimizer": optimizer,
            "lr_scheduler": scheduler,
            "monitor": "val_loss"
        }

def create_dataloader(stock_split, batch_size=128):
    train_loaders = {symbol: DataLoader(StockDataset(df), batch_size=batch_size, shuffle=True)
                     for symbol, df in stock_split['train'].items()}
    val_loaders = {symbol: DataLoader(StockDataset(df), batch_size=batch_size, shuffle=False)
                   for symbol, df in stock_split['val'].items()}
    test_loaders = {symbol: DataLoader(StockDataset(df), batch_size=batch_size, shuffle=False)
                    for symbol, df in stock_split['test'].items()}

    return train_loaders, val_loaders, test_loaders

def main():
    symbols = ["AAPL", "MSFT", "GOOGL", "TSLA","AMZN","FB","NVDA","PYPL",
               "INTC","ADBE","CSCO","NFLX","CMCSA","PEP","COST","AMGN","AVGO",
               "TXN","QCOM","GILD","SBUX","INTU","BKNG","AMD","MU","ADP",
               "ISRG","FISV","CSX","VRTX","REGN","ILMN"]
    stock_data = get_stock_data(symbols)
    train, val, test = split_data(stock_data)

    stock_split = {
        'train': train,
        'val': val,
        'test': test
    }

    # Create single dataloader for all stocks
    train_loader = DataLoader(StockDataset(train), batch_size=128, shuffle=True)
    val_loader = DataLoader(StockDataset(val), batch_size=128, shuffle=False)
    test_loader = DataLoader(StockDataset(test), batch_size=128, shuffle=False)

    # Get input size from a sample batch
    sample_batch = next(iter(train_loader))
    input_size = sample_batch[0].shape[1]

    model = StockPredictor(input_size=input_size, hidden_dim=512)
    
    checkpoint_callback = pl.callbacks.ModelCheckpoint(
        monitor='val_loss',
        dirpath='checkpoints',
        filename='stock-predictor-{epoch:02d}-{val_loss:.2f}',
        save_top_k=3,
        mode='min'
    )

    early_stop_callback = pl.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=10,
        mode='min'
    )

    trainer = pl.Trainer(
        max_epochs=1000,
        callbacks=[checkpoint_callback, early_stop_callback],
        log_every_n_steps=1,
        deterministic=True
    )

    # Train single model on all stocks
    print("\nTraining unified model for all stocks")
    trainer.fit(model, train_loader, val_loader)
    
    # Save unified model
    torch.save(model.state_dict(), 'unified_stock_model.pt')

if __name__ == "__main__":
    main()


Downloading data for AAPL...
Downloading data for MSFT...
Downloading data for GOOGL...
Downloading data for TSLA...


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\amitb\Python\playground\.venv\Lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:654: Checkpoint directory C:\Users\amitb\Python\playground\checkpoints exists and is not empty.

  | Name    | Type         | Params | Mode 
-------------------------------------------------
0 | model   | Sequential   | 936 K  | train
1 | loss_fn | QuantileLoss | 0      | train
-------------------------------------------------
936 K     Trainable params
0         Non-trainable params
936 K     Total params
3.748     Total estimated model params size (MB)
10        Modules in train mode
0         Modules in eval mode



Training unified model for all stocks
                                                                            

c:\Users\amitb\Python\playground\.venv\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\amitb\Python\playground\.venv\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Epoch 116: 100%|██████████| 1/1 [00:00<00:00,  8.36it/s, v_num=9, train_loss=5.810, val_loss=7.150]
