## Entire pipeline

#### Old

In [None]:
# """
# LSTM pipeline with transfer learning (PyTorch)
# ------------------------------------------------------
# - Train parent model once on S&P 500 (^GSPC)
# - Train child model(s) per ticker by fine-tuning parent
# - Predict next-day Open/Close and next-week High/Low (5 trading days)
# - Save plots, JSON, scalers, and evaluation metrics (MSE, RMSE, R²)

# Quick start:
#     pip install -U yfinance pandas numpy matplotlib torch scikit-learn joblib

# Usage:
#     parent_dir = train_parent("^GSPC", start="2000-01-01", epochs=8)
#     summary = train_child("NVDA", start="2000-01-01", epochs=4, parent_dir=parent_dir)
#     preds = predict_child("NVDA", parent_dir=parent_dir)
# """

# import os, json, joblib
# from typing import Dict

# import numpy as np
# import pandas as pd
# import yfinance as yf
# import matplotlib.pyplot as plt

# import torch
# import torch.nn as nn
# from torch.utils.data import Dataset, DataLoader
# from sklearn.preprocessing import StandardScaler
# from sklearn.metrics import mean_squared_error, r2_score

# DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# CONTEXT_LEN = 60  # lookback days
# PRED_LEN = 5      # forecast horizon (days)
# INPUT_SIZE = 5    # OHLCV
# BATCH_SIZE = 32

# # -----------------------------
# # Data utilities
# # -----------------------------

# def fetch_ohlcv(ticker: str, start: str = "2000-01-01", end: str | None = None) -> pd.DataFrame:
#     df = yf.download(ticker, start=start, end=end, interval="1d", auto_adjust=True, progress=False)
#     df = df.reset_index().rename(columns={"Date": "date"})
#     df = df[["date", "Open", "High", "Low", "Close", "Volume"]].dropna()
#     return df


# class StockDataset(Dataset):
#     def __init__(self, df: pd.DataFrame, scaler: StandardScaler, context_len=CONTEXT_LEN, pred_len=PRED_LEN):
#         vals = scaler.transform(df[["Open", "High", "Low", "Close", "Volume"]])
#         vals = vals.astype("float32")

#         self.samples = []
#         for t in range(context_len, len(df) - pred_len):
#             past = vals[t - context_len:t]
#             fut = vals[t:t + pred_len]
#             self.samples.append((past, fut))

#     def __len__(self):
#         return len(self.samples)

#     def __getitem__(self, idx):
#         past, fut = self.samples[idx]
#         return torch.tensor(past), torch.tensor(fut)


# # -----------------------------
# # LSTM Model
# # -----------------------------

# class LSTMModel(nn.Module):
#     def __init__(self, input_size=INPUT_SIZE, hidden_size=64, num_layers=2, pred_len=PRED_LEN):
#         super().__init__()
#         self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
#         self.fc = nn.Linear(hidden_size, input_size * pred_len)
#         self.pred_len = pred_len
#         self.input_size = input_size

#     def forward(self, x):
#         out, _ = self.lstm(x)
#         out = out[:, -1, :]  # last hidden state
#         out = self.fc(out)
#         out = out.view(-1, self.pred_len, self.input_size)
#         return out


# # -----------------------------
# # Training
# # -----------------------------

# def fit_model(model: nn.Module, loader: DataLoader, epochs=8, lr=1e-3):
#     model.to(DEVICE)
#     opt = torch.optim.Adam(model.parameters(), lr=lr)
#     criterion = nn.MSELoss()

#     for ep in range(1, epochs + 1):
#         model.train()
#         total_loss = 0.0
#         for X, Y in loader:
#             X, Y = X.to(DEVICE), Y.to(DEVICE)
#             opt.zero_grad()
#             pred = model(X)
#             loss = criterion(pred, Y)
#             loss.backward()
#             torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
#             opt.step()
#             total_loss += loss.item()
#         avg = total_loss / len(loader)
#         print(f"Epoch {ep}/{epochs} - loss: {avg:.5f}")
#     return model


# def save_model(model: nn.Module, scaler: StandardScaler, path: str):
#     os.makedirs(path, exist_ok=True)
#     torch.save(model.state_dict(), os.path.join(path, "model.pt"))
#     joblib.dump(scaler, os.path.join(path, "scaler.pkl"))


# def load_model(path: str) -> tuple[LSTMModel, StandardScaler]:
#     model = LSTMModel()
#     model.load_state_dict(torch.load(os.path.join(path, "model.pt"), map_location=DEVICE))
#     scaler = joblib.load(os.path.join(path, "scaler.pkl"))
#     model.to(DEVICE)
#     return model, scaler

# # -----------------------------
# # Inference & Evaluation
# # -----------------------------

# def predict_one_step_and_week(model: nn.Module, df: pd.DataFrame, scaler: StandardScaler) -> Dict:
#     model.eval()
#     vals = scaler.transform(df[["Open", "High", "Low", "Close", "Volume"]]).astype("float32")
#     X = torch.tensor(vals[-CONTEXT_LEN:]).unsqueeze(0).to(DEVICE)

#     with torch.no_grad():
#         pred = model(X).squeeze(0).cpu().numpy()
#     pred = scaler.inverse_transform(pred)

#     next_day_open = float(pred[0, 0])
#     next_day_close = float(pred[0, 3])
#     next_week_high = float(np.max(pred[:, 1]))
#     next_week_low = float(np.min(pred[:, 2]))

#     last_date = pd.to_datetime(df["date"].iloc[-1])
#     future_dates = pd.bdate_range(last_date + pd.Timedelta(days=1), periods=PRED_LEN)

#     payload = {
#         "last_date": str(last_date.date()),
#         "future_window_days": int(PRED_LEN),
#         "predictions": {
#             "next_day_open": next_day_open,
#             "next_day_close": next_day_close,
#             "next_week_high": next_week_high,
#             "next_week_low": next_week_low,
#         },
#         "daily_trend": [
#             {
#                 "date": str(d.date()),
#                 "open": float(pred[i, 0]),
#                 "high": float(pred[i, 1]),
#                 "low": float(pred[i, 2]),
#                 "close": float(pred[i, 3]),
#             }
#             for i, d in enumerate(future_dates)
#         ],
#     }
#     return payload


# def evaluate_model(model: nn.Module, df: pd.DataFrame, scaler: StandardScaler, out_dir: str, ticker: str) -> Dict:
#     model.eval()
#     vals = scaler.transform(df[["Open", "High", "Low", "Close", "Volume"]]).astype("float32")
#     X, Y = [], []
#     for t in range(CONTEXT_LEN, len(vals) - PRED_LEN):
#         X.append(vals[t - CONTEXT_LEN:t])
#         Y.append(vals[t:t + PRED_LEN])
#     X, Y = np.array(X), np.array(Y)

#     if len(X) == 0:
#         return {}

#     X_t = torch.tensor(X).to(DEVICE)
#     with torch.no_grad():
#         preds = model(X_t).cpu().numpy()

#     mse = mean_squared_error(Y.reshape(-1, INPUT_SIZE), preds.reshape(-1, INPUT_SIZE))
#     rmse = np.sqrt(mse)
#     r2 = r2_score(Y.reshape(-1, INPUT_SIZE), preds.reshape(-1, INPUT_SIZE))

#     metrics = {"MSE": mse, "RMSE": rmse, "R2": r2}
#     with open(os.path.join(out_dir, f"{ticker}_metrics.json"), "w") as f:
#         json.dump(metrics, f, indent=2)
#     print(f"{ticker} → MSE: {mse:.5f}, RMSE: {rmse:.5f}, R²: {r2:.5f}")
#     return metrics


# def save_json(payload: Dict, path: str):
#     os.makedirs(os.path.dirname(path), exist_ok=True)
#     with open(path, "w") as f:
#         json.dump(payload, f, indent=2)
#     return path


# def plot_outputs(df: pd.DataFrame, payload: Dict, out_dir: str, ticker: str):
#     os.makedirs(out_dir, exist_ok=True)
#     plt.figure(figsize=(12, 5))
#     plt.plot(df["date"], df["Close"], label="History")
#     ndc = payload["predictions"]["next_day_close"]
#     whi = payload["predictions"]["next_week_high"]
#     wlo = payload["predictions"]["next_week_low"]
#     plt.axhline(ndc, color="r", linestyle="--", label="Next-day close")
#     plt.axhline(whi, color="g", linestyle=":", label="Next-week high")
#     plt.axhline(wlo, color="b", linestyle=":", label="Next-week low")
#     plt.legend()
#     plt.title(f"{ticker} Close + Forecast")
#     plt.savefig(os.path.join(out_dir, f"{ticker}_history_forecast.png"))
#     plt.close()

# # -----------------------------
# # Public functions
# # -----------------------------

# def train_parent(parent_ticker="^GSPC", start="2000-01-01", epochs=8, out_dir="outputs/parent") -> str:
#     df = fetch_ohlcv(parent_ticker, start=start)
#     scaler = StandardScaler().fit(df[["Open", "High", "Low", "Close", "Volume"]])
#     dataset = StockDataset(df, scaler)
#     loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

#     model = LSTMModel()
#     model = fit_model(model, loader, epochs=epochs, lr=1e-3)
#     save_model(model, scaler, out_dir)
#     evaluate_model(model, df, scaler, out_dir, parent_ticker.replace("^", ""))
#     return out_dir


# def train_child(child_ticker: str, start="2000-01-01", epochs=4, parent_dir="outputs/parent", workdir="outputs") -> Dict:
#     df = fetch_ohlcv(child_ticker, start=start)
#     parent_model, _ = load_model(parent_dir)

#     # Freeze lower LSTM layers for transfer learning
#     for name, param in parent_model.named_parameters():
#         if "lstm" in name:
#             param.requires_grad = False

#     scaler = StandardScaler().fit(df[["Open", "High", "Low", "Close", "Volume"]])
#     dataset = StockDataset(df, scaler)
#     loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

#     child_model = fit_model(parent_model, loader, epochs=epochs, lr=3e-4)
#     child_dir = os.path.join(workdir, child_ticker)
#     save_model(child_model, scaler, child_dir)

#     payload = predict_one_step_and_week(child_model, df, scaler)
#     json_path = os.path.join(child_dir, f"{child_ticker}_forecast.json")
#     save_json(payload, json_path)
#     plot_outputs(df, payload, child_dir, child_ticker)
#     evaluate_model(child_model, df, scaler, child_dir, child_ticker)
#     return {"checkpoint": child_dir, "json": json_path}


# def predict_child(child_ticker: str, parent_dir="outputs/parent", workdir="outputs") -> Dict:
#     child_dir = os.path.join(workdir, child_ticker)
#     df = fetch_ohlcv(child_ticker, start="2000-01-01")
#     model, scaler = load_model(child_dir)
#     payload = predict_one_step_and_week(model, df, scaler)
#     return payload


# # -----------------------------
# # Main execution
# # -----------------------------

# if __name__ == '__main__':
#     # Configuration
#     PARENT_TICKER = "^GSPC"  # S&P 500 as parent
#     CHILD_TICKERS = ["NVDA", "AAPL", "TSLA", "MSFT"]  # Example child stocks
#     START_DATE = "2000-01-01"
#     PARENT_EPOCHS = 8
#     CHILD_EPOCHS = 4
    
#     print("Starting LSTM Transfer Learning Pipeline")
#     print("=" * 50)
    
#     # Step 1: Train parent model on S&P 500
#     print(f"\n1. Training parent model on {PARENT_TICKER}...")
#     parent_dir = train_parent(
#         parent_ticker=PARENT_TICKER,
#         start=START_DATE,
#         epochs=PARENT_EPOCHS,
#         out_dir="outputs/parent"
#     )
#     print(f"Parent model saved to: {parent_dir}")
#     print(f"Parent metrics saved to: {parent_dir}/{PARENT_TICKER.replace('^', '')}_metrics.json")
    
#     # Step 2: Train child models with transfer learning
#     results = {}
#     for ticker in CHILD_TICKERS:
#         print(f"\n2. Training child model for {ticker}...")
#         try:
#             summary = train_child(
#                 child_ticker=ticker,
#                 start=START_DATE,
#                 epochs=CHILD_EPOCHS,
#                 parent_dir=parent_dir,
#                 workdir="outputs"
#             )
#             results[ticker] = summary
#             print(f"✓ {ticker} model trained and saved to: {summary['checkpoint']}")
#             print(f"✓ Predictions saved to: {summary['json']}")
#             print(f"✓ Metrics saved to: {summary['checkpoint']}/{ticker}_metrics.json")
            
#         except Exception as e:
#             print(f"✗ Error training {ticker}: {e}")
#             continue
    
#     # Step 3: Generate fresh predictions
#     print(f"\n3. Generating fresh predictions...")
#     for ticker in CHILD_TICKERS:
#         if ticker in results:
#             try:
#                 preds = predict_child(ticker, parent_dir=parent_dir, workdir="outputs")
#                 print(f"✓ {ticker} predictions:")
#                 print(f"  Next-day open: ${preds['predictions']['next_day_open']:.2f}")
#                 print(f"  Next-day close: ${preds['predictions']['next_day_close']:.2f}")
#                 print(f"  Next-week high: ${preds['predictions']['next_week_high']:.2f}")
#                 print(f"  Next-week low: ${preds['predictions']['next_week_low']:.2f}")
                
#             except Exception as e:
#                 print(f"✗ Error predicting {ticker}: {e}")
    
#     print(f"\n" + "=" * 50)
#     print("Pipeline completed! Check 'outputs/' directory for:")
#     print("- Model checkpoints (model.pt files)")
#     print("- Scalers (scaler.pkl files)")  
#     print("- Prediction JSONs (*_forecast.json)")
#     print("- Performance metrics (*_metrics.json)")
#     print("- Forecast plots (*_history_forecast.png)")
#     print("\nFile structure:")
#     print("outputs/")
#     print("├── parent/")
#     print(f"│   ├── model.pt")
#     print(f"│   ├── scaler.pkl")
#     print(f"│   └── {PARENT_TICKER.replace('^', '')}_metrics.json")
#     for ticker in CHILD_TICKERS:
#         if ticker in results:
#             print(f"├── {ticker}/")
#             print(f"│   ├── model.pt")
#             print(f"│   ├── scaler.pkl") 
#             print(f"│   ├── {ticker}_forecast.json")
#             print(f"│   ├── {ticker}_metrics.json")
#             print(f"│   └── {ticker}_history_forecast.png")

Starting LSTM Transfer Learning Pipeline

1. Training parent model on ^GSPC...
Epoch 1/8 - loss: 0.18583
Epoch 2/8 - loss: 0.04805
Epoch 3/8 - loss: 0.04558
Epoch 4/8 - loss: 0.04449
Epoch 5/8 - loss: 0.04377
Epoch 6/8 - loss: 0.04343
Epoch 7/8 - loss: 0.04308
Epoch 8/8 - loss: 0.04258
GSPC → MSE: 0.04380, RMSE: 0.20928, R²: 0.95564
Parent model saved to: outputs/parent
Parent metrics saved to: outputs/parent/GSPC_metrics.json

2. Training child model for NVDA...
Epoch 1/4 - loss: 0.15079
Epoch 2/4 - loss: 0.13966
Epoch 3/4 - loss: 0.13824
Epoch 4/4 - loss: 0.13724
NVDA → MSE: 0.13639, RMSE: 0.36931, R²: 0.86319
✓ NVDA model trained and saved to: outputs/NVDA
✓ Predictions saved to: outputs/NVDA/NVDA_forecast.json
✓ Metrics saved to: outputs/NVDA/NVDA_metrics.json

2. Training child model for AAPL...
Epoch 1/4 - loss: 0.09208
Epoch 2/4 - loss: 0.07641
Epoch 3/4 - loss: 0.07476
Epoch 4/4 - loss: 0.07445
AAPL → MSE: 0.07428, RMSE: 0.27254, R²: 0.92609
✓ AAPL model trained and saved to: o

### Each day

In [None]:
import os
import json
import joblib
from typing import Dict
from dotenv import load_dotenv

import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import ta

from comet_ml import Experiment
from comet_ml.integration.pytorch import log_model

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
CONTEXT_LEN = 60  # lookback days
PRED_LEN = 1     # forecast horizon (days)
INPUT_SIZE = 5    # OHLCV
BATCH_SIZE = 32

# Set environment variables
load_dotenv()
os.environ["COMET_API_KEY"] = os.getenv("COMET_API_KEY")

# -----------------------------
# Data utilities
# -----------------------------

def fetch_ohlcv(ticker: str, start: str = "2000-01-01", end: str | None = None) -> pd.DataFrame:
    df = yf.download(ticker, start=start, end=end, interval="1d", auto_adjust=True, progress=False)
    df = df.reset_index().rename(columns={"Date": "date"})
    df = df[["date", "Open", "High", "Low", "Close", "Volume"]].dropna()
    return df

class StockDataset(Dataset):
    def __init__(self, df: pd.DataFrame, scaler: StandardScaler, context_len=CONTEXT_LEN, pred_len=PRED_LEN):
        vals = scaler.transform(df[["Open", "High", "Low", "Close", "Volume"]])
        vals = vals.astype("float32")
        self.samples = []
        for t in range(context_len, len(df) - pred_len):
            past = vals[t - context_len:t]
            fut = vals[t:t + pred_len]
            self.samples.append((past, fut))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        past, fut = self.samples[idx]
        return torch.tensor(past), torch.tensor(fut)

# -----------------------------
# LSTM Model
# -----------------------------

class LSTMModel(nn.Module):
    def __init__(self, input_size=INPUT_SIZE, hidden_size=128, num_layers=3, pred_len=PRED_LEN, dropout=0.2):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, input_size * pred_len)
        self.pred_len = pred_len
        self.input_size = input_size

    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]  # Last hidden state
        out = self.fc(out)
        out = out.view(-1, self.pred_len, self.input_size)
        return out

# -----------------------------
# Training
# -----------------------------

def fit_model(model: nn.Module, loader: DataLoader, val_loader: DataLoader, epochs=8, lr=1e-3, experiment: Experiment = None):
    model.to(DEVICE)
    opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
    criterion = nn.MSELoss()
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(opt, mode='min', factor=0.5, patience=3)
    best_val_loss = float('inf')
    patience = 5
    counter = 0

    for ep in range(1, epochs + 1):
        model.train()
        total_loss = 0.0
        for X, Y in loader:
            X, Y = X.to(DEVICE), Y.to(DEVICE)
            opt.zero_grad()
            pred = model(X)
            loss = criterion(pred, Y)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0)
            opt.step()
            total_loss += loss.item()
        avg_train_loss = total_loss / len(loader)
        print(f"Epoch {ep}/{epochs} - Train Loss: {avg_train_loss:.5f}")

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for X, Y in val_loader:
                X, Y = X.to(DEVICE), Y.to(DEVICE)
                pred = model(X)
                val_loss += criterion(pred, Y).item()
        avg_val_loss = val_loss / len(val_loader)
        print(f"Epoch {ep}/{epochs} - Val Loss: {avg_val_loss:.5f}")

        if experiment:
            experiment.log_metric("train_loss", avg_train_loss, epoch=ep)
            experiment.log_metric("val_loss", avg_val_loss, epoch=ep)

        scheduler.step(avg_val_loss)

        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            counter = 0
        else:
            counter += 1
            if counter >= patience:
                print("Early stopping triggered")
                break

    return model

def save_model(model: nn.Module, scaler: StandardScaler, path: str, experiment: Experiment = None, model_type: str = "parent", ticker: str = None):
    os.makedirs(path, exist_ok=True)
    # Save locally
    torch.save(model.state_dict(), os.path.join(path, "model.pt"))
    scaler_filename = "parent_scaler.pkl" if model_type == "parent" else f"{ticker}_child_scaler.pkl"
    joblib.dump(scaler, os.path.join(path, scaler_filename))
    
    if experiment:
        model_name = "lstm_parent_model_checkpoint" if model_type == "parent" else f"lstm_child_model_checkpoint_{ticker}"
        checkpoint = {"model_state_dict": model.state_dict()}
        log_model(
            experiment=experiment,
            model=checkpoint,
            model_name=model_name,
            metadata={
                "input_size": INPUT_SIZE,
                "context_len": CONTEXT_LEN,
                "pred_len": PRED_LEN,
                "model_type": model_type,
                "ticker": ticker or "SP500",
                "timestamp": pd.Timestamp.now().isoformat()
            }
        )
        experiment.log_asset(os.path.join(path, scaler_filename), file_name=scaler_filename)

def load_model(path: str = None, experiment_key: str = None, model_type: str = "parent", ticker: str = None) -> tuple[LSTMModel, StandardScaler]:
    if model_type == "child" and not ticker:
        raise ValueError("Ticker must be provided for child model")
    
    model = LSTMModel().to(DEVICE)
    scaler = None
    model_name = "lstm_parent_model_checkpoint" if model_type == "parent" else f"lstm_child_model_checkpoint_{ticker}"
    scaler_filename = "parent_scaler.pkl" if model_type == "parent" else f"{ticker}_child_scaler.pkl"
    project_name = "S&P-500-parent-model" if model_type == "parent" else "child-model"

    if experiment_key:
        experiment = Experiment(api_key=os.getenv("COMET_API_KEY"), project_name=project_name)
        experiment.set_experiment_key(experiment_key)
        
        checkpoint = load_model(f"experiment://{experiment_key}/{model_name}")
        model.load_state_dict(checkpoint["model_state_dict"])
        
        asset_list = experiment.get_asset_list()
        scaler_asset_id = None
        for asset in asset_list:
            if asset["fileName"] == scaler_filename:
                scaler_asset_id = asset["assetId"]
                break
        if scaler_asset_id:
            scaler_data = experiment.get_asset(scaler_asset_id, return_type="binary")
            with open(f"temp_{scaler_filename}", "wb") as f:
                f.write(scaler_data)
            scaler = joblib.load(f"temp_{scaler_filename}")
            os.remove(f"temp_{scaler_filename}")
        else:
            raise ValueError(f"Scaler asset '{scaler_filename}' not found in Comet ML experiment")
        
        experiment.end()
    elif path:
        model.load_state_dict(torch.load(os.path.join(path, "model.pt"), map_location=DEVICE))
        scaler = joblib.load(os.path.join(path, scaler_filename))
    else:
        raise ValueError("Must provide either path or experiment_key")

    model.eval()
    return model, scaler

# -----------------------------
# Inference & Evaluation
# -----------------------------

def predict_one_step_and_week(model: nn.Module, df: pd.DataFrame, scaler: StandardScaler) -> Dict:
    model.eval()
    vals = scaler.transform(df[["Open", "High", "Low", "Close", "Volume"]]).astype("float32")
    X = torch.tensor(vals[-CONTEXT_LEN:]).unsqueeze(0).to(DEVICE)

    with torch.no_grad():
        pred = model(X).squeeze(0).cpu().numpy()
    pred = scaler.inverse_transform(pred)

    next_day_open = float(pred[0, 0])
    next_day_high = float(pred[0, 1])
    next_day_low = float(pred[0, 2])
    next_day_close = float(pred[0, 3])
    
    next_week_high = float(np.max(pred[:, 1]))
    next_week_low = float(np.min(pred[:, 2]))

    last_date = pd.to_datetime(df["date"].iloc[-1])
    next_business_day = pd.bdate_range(last_date + pd.Timedelta(days=1), periods=1)[0]

    payload = {
        "last_date": str(last_date.date()),
        "next_business_day": str(next_business_day.date()),
        "future_window_days": int(PRED_LEN),
        "predictions": {
            "next_day_open": next_day_open,
            "next_day_high": next_day_high,
            "next_day_low": next_day_low,
            "next_day_close": next_day_close,
            "next_week_high": next_week_high,
            "next_week_low": next_week_low,
        }
    }
    return payload

def evaluate_model(model: nn.Module, df: pd.DataFrame, scaler: StandardScaler, out_dir: str, ticker: str, experiment: Experiment = None) -> Dict:
    model.eval()
    vals = scaler.transform(df[["Open", "High", "Low", "Close", "Volume"]]).astype("float32")
    X, Y = [], []
    for t in range(CONTEXT_LEN, len(vals) - PRED_LEN):
        X.append(vals[t - CONTEXT_LEN:t])
        Y.append(vals[t:t + PRED_LEN])
    X, Y = np.array(X), np.array(Y)

    if len(X) == 0:
        return {}

    X_t = torch.tensor(X).to(DEVICE)
    with torch.no_grad():
        preds = model(X_t).cpu().numpy()

    mse = mean_squared_error(Y.reshape(-1, INPUT_SIZE), preds.reshape(-1, INPUT_SIZE))
    rmse = np.sqrt(mse)
    r2 = r2_score(Y.reshape(-1, INPUT_SIZE), preds.reshape(-1, INPUT_SIZE))

    metrics = {"MSE": mse, "RMSE": rmse, "R2": r2}
    metrics_filename = f"{ticker}_parent_metrics.json" if "parent" in out_dir else f"{ticker}_child_metrics.json"
    with open(os.path.join(out_dir, metrics_filename), "w") as f:
        json.dump(metrics, f, indent=2)
    print(f"{ticker} → MSE: {mse:.5f}, RMSE: {rmse:.5f}, R²: {r2:.5f}")
    if experiment:
        experiment.log_metrics({"mse": mse, "rmse": rmse, "r2": r2})
        experiment.log_asset(os.path.join(out_dir, metrics_filename), file_name=metrics_filename)
    return metrics

def save_json(payload: Dict, path: str):
    os.makedirs(os.path.dirname(path), exist_ok=True)
    with open(path, "w") as f:
        json.dump(payload, f, indent=2)
    return path

def plot_outputs(df: pd.DataFrame, payload: Dict, out_dir: str, ticker: str, experiment: Experiment = None):
    os.makedirs(out_dir, exist_ok=True)
    plt.figure(figsize=(12, 5))
    plt.plot(df["date"], df["Close"], label="History")
    
    ndo = payload["predictions"]["next_day_open"]
    ndc = payload["predictions"]["next_day_close"]
    ndh = payload["predictions"]["next_day_high"]
    ndl = payload["predictions"]["next_day_low"]
    whi = payload["predictions"]["next_week_high"]
    wlo = payload["predictions"]["next_week_low"]
    
    plt.axhline(ndo, color="orange", linestyle="-", alpha=0.7, label="Next-day open")
    plt.axhline(ndc, color="r", linestyle="--", label="Next-day close")
    plt.axhline(ndh, color="darkgreen", linestyle="-", alpha=0.7, label="Next-day high")
    plt.axhline(ndl, color="darkred", linestyle="-", alpha=0.7, label="Next-day low")
    plt.axhline(whi, color="g", linestyle=":", label="Next-week high")
    plt.axhline(wlo, color="b", linestyle=":", label="Next-week low")
    plt.legend()
    plt.title(f"{ticker} Close + Next Day & Week Forecast")
    plot_filename = f"{ticker}_parent_history_forecast.png" if "parent" in out_dir else f"{ticker}_child_history_forecast.png"
    plot_path = os.path.join(out_dir, plot_filename)
    plt.savefig(plot_path)
    plt.close()
    if experiment:
        experiment.log_image(plot_path, name=plot_filename)

# -----------------------------
# Public functions
# -----------------------------

def train_parent(parent_ticker="^GSPC", start="2000-01-01", epochs=20, out_dir="outputs/parent"):
    experiment = Experiment(project_name="S&P-500-parent-model", auto_metric_logging=False)
    experiment.set_name(f"parent_{parent_ticker.replace('^', '')}")
    experiment.add_tag("parent")
    experiment.log_parameters({
        "ticker": parent_ticker,
        "start": start,
        "epochs": epochs,
        "lr": 1e-3,
        "hidden_size": 128,
        "num_layers": 3,
        "dropout": 0.2,
        "context_len": CONTEXT_LEN,
        "pred_len": PRED_LEN,
        "batch_size": BATCH_SIZE,
        "input_size": INPUT_SIZE
    })

    df = fetch_ohlcv(parent_ticker, start=start)
    scaler = StandardScaler().fit(df[["Open", "High", "Low", "Close", "Volume"]])
    dataset = StockDataset(df, scaler)
    train_size = int(0.8 * len(dataset))
    train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, len(dataset) - train_size])
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

    model = LSTMModel()
    model = fit_model(model, train_loader, val_loader, epochs=epochs, lr=1e-3, experiment=experiment)
    save_model(model, scaler, out_dir, experiment=experiment, model_type="parent", ticker=parent_ticker)
    evaluate_model(model, df, scaler, out_dir, parent_ticker.replace("^", ""), experiment=experiment)
    experiment_key = experiment.get_key()
    experiment.end()
    return {"checkpoint": out_dir, "experiment_key": experiment_key}

def train_child(child_ticker: str, start="2000-01-01", epochs=4, parent_dir="outputs/parent", workdir="outputs") -> Dict:
    experiment = Experiment(project_name="child-model", auto_metric_logging=False)
    experiment.set_name(f"child_{child_ticker}")
    experiment.add_tag(f"child-{child_ticker}")
    experiment.log_parameters({
        "ticker": child_ticker,
        "start": start,
        "epochs": epochs,
        "lr": 3e-4,
        "hidden_size": 64,
        "num_layers": 2,
        "context_len": CONTEXT_LEN,
        "pred_len": PRED_LEN,
        "batch_size": BATCH_SIZE,
        "input_size": INPUT_SIZE,
        "parent_dir": parent_dir
    })

    df = fetch_ohlcv(child_ticker, start=start)
    parent_model, _ = load_model(path=parent_dir, model_type="parent")

    for name, param in parent_model.named_parameters():
        if "lstm" in name:
            param.requires_grad = False

    scaler = StandardScaler().fit(df[["Open", "High", "Low", "Close", "Volume"]])
    dataset = StockDataset(df, scaler)
    train_size = int(0.8 * len(dataset))
    train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, len(dataset) - train_size])
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

    child_model = fit_model(parent_model, train_loader, val_loader, epochs=epochs, lr=3e-4, experiment=experiment)
    child_dir = os.path.join(workdir, child_ticker)
    save_model(child_model, scaler, child_dir, experiment=experiment, model_type="child", ticker=child_ticker)

    payload = predict_one_step_and_week(child_model, df, scaler)
    json_filename = f"{child_ticker}_child_forecast.json"
    json_path = os.path.join(child_dir, json_filename)
    save_json(payload, json_path)
    if experiment:
        experiment.log_asset(json_path, file_name=json_filename)
        experiment.log_metrics(payload["predictions"])
    plot_outputs(df, payload, child_dir, child_ticker, experiment=experiment)
    evaluate_model(child_model, df, scaler, child_dir, child_ticker, experiment=experiment)
    experiment_key = experiment.get_key()
    experiment.end()
    return {"checkpoint": child_dir, "json": json_path, "experiment_key": experiment_key}

def predict_child(child_ticker: str, parent_dir="outputs/parent", workdir="outputs", experiment_key: str = None) -> Dict:
    child_dir = os.path.join(workdir, child_ticker)
    df = fetch_ohlcv(child_ticker, start="2000-01-01")
    model, scaler = load_model(path=child_dir, experiment_key=experiment_key, model_type="child", ticker=child_ticker)
    payload = predict_one_step_and_week(model, df, scaler)
    return payload


In [6]:
# Configuration
PARENT_TICKER = "^GSPC"  # S&P 500 as parent
CHILD_TICKERS = ["GOOG", "AMZN", "META", "AXP"]  # Example child stocks
START_DATE = "2000-01-01"
PARENT_EPOCHS = 20
CHILD_EPOCHS = 10

In [5]:

if __name__ == "__main__":
    # Example usage
    PARENT_TICKER = "^GSPC"
    CHILD_TICKERS = ["NVDA", "AAPL"]
    START_DATE = "2000-01-01"
    CHILD_EPOCHS = 4

    # Step 1: Train parent model
    print("1. Training parent model for S&P 500...")
    parent_summary = train_parent(PARENT_TICKER, start=START_DATE, epochs=8)
    parent_dir = parent_summary["checkpoint"]
    print(f"✓ Parent model trained and saved to: {parent_dir}")

    



1. Training parent model for S&P 500...


[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/karan-shingde/s-p-500-parent-model/e9f64eb655194739abeb81f0fbe2a005

[1;38;5;39mCOMET INFO:[0m Couldn't find a Git repository in '/Users/karan/Documents/machine-learning/everything-mlops/mlops-from-scrstch' nor in any parent directory. Set `COMET_GIT_DIRECTORY` if your Git Repository is elsewhere.


Epoch 1/8 - Train Loss: 0.13326
Epoch 1/8 - Val Loss: 0.05154
Epoch 2/8 - Train Loss: 0.04729
Epoch 2/8 - Val Loss: 0.04179
Epoch 3/8 - Train Loss: 0.04145
Epoch 3/8 - Val Loss: 0.04001
Epoch 4/8 - Train Loss: 0.04096
Epoch 4/8 - Val Loss: 0.04411
Epoch 5/8 - Train Loss: 0.03804
Epoch 5/8 - Val Loss: 0.03645
Epoch 6/8 - Train Loss: 0.03770
Epoch 6/8 - Val Loss: 0.03966
Epoch 7/8 - Train Loss: 0.03752
Epoch 7/8 - Val Loss: 0.03913
Epoch 8/8 - Train Loss: 0.03660
Epoch 8/8 - Val Loss: 0.03649


[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     name                  : parent_GSPC
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/karan-shingde/s-p-500-parent-model/e9f64eb655194739abeb81f0fbe2a005
[1;38;5;39mCOMET INFO:[0m   Metrics [count] (min, max):
[1;38;5;39mCOMET INFO:[0m     mse            : 0.035174526274204254
[1;38;5;39mCOMET INFO:[0m     r2             : 0.9644085764884949
[1;38;5;39mCOMET INFO:[0m     rmse           : 0.18754873039880662
[1;38;5;39mCOMET INFO:[0m     train_loss [8] : (0.0365980292117456, 0.1332612611935474)
[1;38;5;39mCOMET INFO:[0m     val_loss [8]   : 

GSPC → MSE: 0.03517, RMSE: 0.18755, R²: 0.96441




✓ Parent model trained and saved to: outputs/parent


In [None]:
 # Step 2: Train child models with transfer learning
results = {}
for ticker in CHILD_TICKERS:
    print(f"\n2. Training child model for {ticker}...")
    try:
        summary = train_child(
            child_ticker=ticker,
            start=START_DATE,
            epochs=CHILD_EPOCHS,
            parent_dir=parent_dir,
            workdir="outputs"
        )
        results[ticker] = summary
        print(f"✓ {ticker} model trained and saved to: {summary['checkpoint']}")
        print(f"✓ Predictions saved to: {summary['json']}")
        print(f"✓ Metrics saved to: {summary['checkpoint']}/{ticker}_metrics.json")
            
    except Exception as e:
        print(f"✗ Error training {ticker}: {e}")
        continue
    
    # # # Step 3: Generate fresh predictions
    print(f"\n3. Generating fresh predictions...")
    for ticker in CHILD_TICKERS:
        if ticker in results:
            try:
                preds = predict_child(ticker, parent_dir=parent_dir, workdir="outputs")
                print(f"✓ {ticker} predictions for {preds['next_business_day']}:")
                print(f"  Next-day open: ${preds['predictions']['next_day_open']:.2f}")
                print(f"  Next-day high: ${preds['predictions']['next_day_high']:.2f}")
                print(f"  Next-day low: ${preds['predictions']['next_day_low']:.2f}")
                print(f"  Next-day close: ${preds['predictions']['next_day_close']:.2f}")
                print(f"  Next-week high: ${preds['predictions']['next_week_high']:.2f}")
                print(f"  Next-week low: ${preds['predictions']['next_week_low']:.2f}")
                
            except Exception as e:
                print(f"✗ Error predicting {ticker}: {e}")
    
    print(f"\n" + "=" * 50)
    print("Pipeline completed! Check 'outputs/' directory for:")
    print("- Model checkpoints (model.pt files)")
    print("- Scalers (scaler.pkl files)")  
    print("- Prediction JSONs (*_forecast.json)")
    print("- Performance metrics (*_metrics.json)")
    print("- Forecast plots (*_history_forecast.png)")
    print("\nFile structure:")
    print("outputs/")
    print("├── parent/")
    print(f"│   ├── model.pt")
    print(f"│   ├── scaler.pkl")
    print(f"│   └── {PARENT_TICKER.replace('^', '')}_metrics.json")
    for ticker in CHILD_TICKERS:
        if ticker in results:
            print(f"├── {ticker}/")
            print(f"│   ├── model.pt")
            print(f"│   ├── scaler.pkl") 
            print(f"│   ├── {ticker}_forecast.json")
            print(f"│   ├── {ticker}_metrics.json")
            print(f"│   └── {ticker}_history_forecast.png")




2. Training child model for GOOG...


[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/karan-shingde/child-model/10af895e8f26430fbc662a2445492868

[1;38;5;39mCOMET INFO:[0m Couldn't find a Git repository in '/Users/karan/Documents/machine-learning/everything-mlops/mlops-from-scrstch' nor in any parent directory. Set `COMET_GIT_DIRECTORY` if your Git Repository is elsewhere.


Epoch 1/10 - Train Loss: 0.05583
Epoch 1/10 - Val Loss: 0.04103
Epoch 2/10 - Train Loss: 0.04944
Epoch 2/10 - Val Loss: 0.03999
Epoch 3/10 - Train Loss: 0.04934
Epoch 3/10 - Val Loss: 0.03963
Epoch 4/10 - Train Loss: 0.04823
Epoch 4/10 - Val Loss: 0.03938
Epoch 5/10 - Train Loss: 0.04800
Epoch 5/10 - Val Loss: 0.03938
Epoch 6/10 - Train Loss: 0.04869
Epoch 6/10 - Val Loss: 0.03933
Epoch 7/10 - Train Loss: 0.04922
Epoch 7/10 - Val Loss: 0.03935
Epoch 8/10 - Train Loss: 0.04788
Epoch 8/10 - Val Loss: 0.03944
Epoch 9/10 - Train Loss: 0.04817
Epoch 9/10 - Val Loss: 0.03916
Epoch 10/10 - Train Loss: 0.04859
Epoch 10/10 - Val Loss: 0.03936


[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     name                  : child_GOOG
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/karan-shingde/child-model/10af895e8f26430fbc662a2445492868
[1;38;5;39mCOMET INFO:[0m   Metrics [count] (min, max):
[1;38;5;39mCOMET INFO:[0m     mse             : 0.045351333916187286
[1;38;5;39mCOMET INFO:[0m     next_day_close  : 196.6043701171875
[1;38;5;39mCOMET INFO:[0m     next_day_high   : 198.56590270996094
[1;38;5;39mCOMET INFO:[0m     next_day_low    : 194.614990234375
[1;38;5;39mCOMET INFO:[0m     next_day_open   : 196.99160766601562
[1;38;5;39m

GOOG → MSE: 0.04535, RMSE: 0.21296, R²: 0.95162


[1;38;5;39mCOMET INFO:[0m Please wait for metadata to finish uploading (timeout is 3600 seconds)
[1;38;5;39mCOMET INFO:[0m Uploading 2 metrics, params and output messages


✓ GOOG model trained and saved to: outputs/GOOG
✓ Predictions saved to: outputs/GOOG/GOOG_child_forecast.json
✓ Metrics saved to: outputs/GOOG/GOOG_metrics.json

3. Generating fresh predictions...




✓ GOOG predictions for 2025-08-26:
  Next-day open: $196.99
  Next-day high: $198.57
  Next-day low: $194.61
  Next-day close: $196.60
  Next-week high: $198.57
  Next-week low: $194.61

Pipeline completed! Check 'outputs/' directory for:
- Model checkpoints (model.pt files)
- Scalers (scaler.pkl files)
- Prediction JSONs (*_forecast.json)
- Performance metrics (*_metrics.json)
- Forecast plots (*_history_forecast.png)

File structure:
outputs/
├── parent/
│   ├── model.pt
│   ├── scaler.pkl
│   └── GSPC_metrics.json
├── GOOG/
│   ├── model.pt
│   ├── scaler.pkl
│   ├── GOOG_forecast.json
│   ├── GOOG_metrics.json
│   └── GOOG_history_forecast.png

2. Training child model for AMZN...


[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/karan-shingde/child-model/a1dd6fc8eaaf426d9c130e6a750b7f1f

[1;38;5;39mCOMET INFO:[0m Couldn't find a Git repository in '/Users/karan/Documents/machine-learning/everything-mlops/mlops-from-scrstch' nor in any parent directory. Set `COMET_GIT_DIRECTORY` if your Git Repository is elsewhere.


Epoch 1/10 - Train Loss: 0.12021
Epoch 1/10 - Val Loss: 0.11875
Epoch 2/10 - Train Loss: 0.11583
Epoch 2/10 - Val Loss: 0.11769
Epoch 3/10 - Train Loss: 0.11627
Epoch 3/10 - Val Loss: 0.11755
Epoch 4/10 - Train Loss: 0.11606
Epoch 4/10 - Val Loss: 0.11738
Epoch 5/10 - Train Loss: 0.11589
Epoch 5/10 - Val Loss: 0.11687
Epoch 6/10 - Train Loss: 0.11571
Epoch 6/10 - Val Loss: 0.11690
Epoch 7/10 - Train Loss: 0.11541
Epoch 7/10 - Val Loss: 0.11670
Epoch 8/10 - Train Loss: 0.11557
Epoch 8/10 - Val Loss: 0.11682
Epoch 9/10 - Train Loss: 0.11546
Epoch 9/10 - Val Loss: 0.11691
Epoch 10/10 - Train Loss: 0.11499
Epoch 10/10 - Val Loss: 0.11702


[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     name                  : child_AMZN
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/karan-shingde/child-model/a1dd6fc8eaaf426d9c130e6a750b7f1f
[1;38;5;39mCOMET INFO:[0m   Metrics [count] (min, max):
[1;38;5;39mCOMET INFO:[0m     mse             : 0.1143484115600586
[1;38;5;39mCOMET INFO:[0m     next_day_close  : 221.8444061279297
[1;38;5;39mCOMET INFO:[0m     next_day_high   : 224.2028045654297
[1;38;5;39mCOMET INFO:[0m     next_day_low    : 219.7496337890625
[1;38;5;39mCOMET INFO:[0m     next_day_open   : 221.9628143310547
[1;38;5;39mCOM

AMZN → MSE: 0.11435, RMSE: 0.33815, R²: 0.88338




✓ AMZN model trained and saved to: outputs/AMZN
✓ Predictions saved to: outputs/AMZN/AMZN_child_forecast.json
✓ Metrics saved to: outputs/AMZN/AMZN_metrics.json

3. Generating fresh predictions...
✓ GOOG predictions for 2025-08-26:
  Next-day open: $196.99
  Next-day high: $198.57
  Next-day low: $194.61
  Next-day close: $196.60
  Next-week high: $198.57
  Next-week low: $194.61




✓ AMZN predictions for 2025-08-26:
  Next-day open: $221.96
  Next-day high: $224.20
  Next-day low: $219.75
  Next-day close: $221.84
  Next-week high: $224.20
  Next-week low: $219.75

Pipeline completed! Check 'outputs/' directory for:
- Model checkpoints (model.pt files)
- Scalers (scaler.pkl files)
- Prediction JSONs (*_forecast.json)
- Performance metrics (*_metrics.json)
- Forecast plots (*_history_forecast.png)

File structure:
outputs/
├── parent/
│   ├── model.pt
│   ├── scaler.pkl
│   └── GSPC_metrics.json
├── GOOG/
│   ├── model.pt
│   ├── scaler.pkl
│   ├── GOOG_forecast.json
│   ├── GOOG_metrics.json
│   └── GOOG_history_forecast.png
├── AMZN/
│   ├── model.pt
│   ├── scaler.pkl
│   ├── AMZN_forecast.json
│   ├── AMZN_metrics.json
│   └── AMZN_history_forecast.png

2. Training child model for META...


[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/karan-shingde/child-model/612b5c5ab3dd4e53b9168eb5b249fa4c

[1;38;5;39mCOMET INFO:[0m Couldn't find a Git repository in '/Users/karan/Documents/machine-learning/everything-mlops/mlops-from-scrstch' nor in any parent directory. Set `COMET_GIT_DIRECTORY` if your Git Repository is elsewhere.


Epoch 1/10 - Train Loss: 0.09138
Epoch 1/10 - Val Loss: 0.06091
Epoch 2/10 - Train Loss: 0.08499
Epoch 2/10 - Val Loss: 0.05920
Epoch 3/10 - Train Loss: 0.08453
Epoch 3/10 - Val Loss: 0.05897
Epoch 4/10 - Train Loss: 0.08430
Epoch 4/10 - Val Loss: 0.05833
Epoch 5/10 - Train Loss: 0.08346
Epoch 5/10 - Val Loss: 0.05792
Epoch 6/10 - Train Loss: 0.08385
Epoch 6/10 - Val Loss: 0.05817
Epoch 7/10 - Train Loss: 0.08362
Epoch 7/10 - Val Loss: 0.05739
Epoch 8/10 - Train Loss: 0.08375
Epoch 8/10 - Val Loss: 0.05731
Epoch 9/10 - Train Loss: 0.08239
Epoch 9/10 - Val Loss: 0.05727
Epoch 10/10 - Train Loss: 0.08270
Epoch 10/10 - Val Loss: 0.05733


[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     name                  : child_META
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/karan-shingde/child-model/612b5c5ab3dd4e53b9168eb5b249fa4c
[1;38;5;39mCOMET INFO:[0m   Metrics [count] (min, max):
[1;38;5;39mCOMET INFO:[0m     mse             : 0.07710161805152893
[1;38;5;39mCOMET INFO:[0m     next_day_close  : 726.6943359375
[1;38;5;39mCOMET INFO:[0m     next_day_high   : 736.3756713867188
[1;38;5;39mCOMET INFO:[0m     next_day_low    : 717.3019409179688
[1;38;5;39mCOMET INFO:[0m     next_day_open   : 728.9359741210938
[1;38;5;39mCOMET

META → MSE: 0.07710, RMSE: 0.27767, R²: 0.90977


[1;38;5;39mCOMET INFO:[0m Please wait for assets to finish uploading (timeout is 10800 seconds)
[1;38;5;39mCOMET INFO:[0m All assets have been sent, waiting for delivery confirmation


✓ META model trained and saved to: outputs/META
✓ Predictions saved to: outputs/META/META_child_forecast.json
✓ Metrics saved to: outputs/META/META_metrics.json

3. Generating fresh predictions...
✓ GOOG predictions for 2025-08-26:
  Next-day open: $196.99
  Next-day high: $198.57
  Next-day low: $194.61
  Next-day close: $196.60
  Next-week high: $198.57
  Next-week low: $194.61
✓ AMZN predictions for 2025-08-26:
  Next-day open: $221.96
  Next-day high: $224.20
  Next-day low: $219.75
  Next-day close: $221.84
  Next-week high: $224.20
  Next-week low: $219.75




✓ META predictions for 2025-08-26:
  Next-day open: $728.94
  Next-day high: $736.38
  Next-day low: $717.30
  Next-day close: $726.69
  Next-week high: $736.38
  Next-week low: $717.30

Pipeline completed! Check 'outputs/' directory for:
- Model checkpoints (model.pt files)
- Scalers (scaler.pkl files)
- Prediction JSONs (*_forecast.json)
- Performance metrics (*_metrics.json)
- Forecast plots (*_history_forecast.png)

File structure:
outputs/
├── parent/
│   ├── model.pt
│   ├── scaler.pkl
│   └── GSPC_metrics.json
├── GOOG/
│   ├── model.pt
│   ├── scaler.pkl
│   ├── GOOG_forecast.json
│   ├── GOOG_metrics.json
│   └── GOOG_history_forecast.png
├── AMZN/
│   ├── model.pt
│   ├── scaler.pkl
│   ├── AMZN_forecast.json
│   ├── AMZN_metrics.json
│   └── AMZN_history_forecast.png
├── META/
│   ├── model.pt
│   ├── scaler.pkl
│   ├── META_forecast.json
│   ├── META_metrics.json
│   └── META_history_forecast.png

2. Training child model for AXP...


[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/karan-shingde/child-model/92e512fe92de4fed81044bcdf96d7949

[1;38;5;39mCOMET INFO:[0m Couldn't find a Git repository in '/Users/karan/Documents/machine-learning/everything-mlops/mlops-from-scrstch' nor in any parent directory. Set `COMET_GIT_DIRECTORY` if your Git Repository is elsewhere.


Epoch 1/10 - Train Loss: 0.07360
Epoch 1/10 - Val Loss: 0.06863
Epoch 2/10 - Train Loss: 0.06723
Epoch 2/10 - Val Loss: 0.06711
Epoch 3/10 - Train Loss: 0.06662
Epoch 3/10 - Val Loss: 0.06632
Epoch 4/10 - Train Loss: 0.06605
Epoch 4/10 - Val Loss: 0.06602
Epoch 5/10 - Train Loss: 0.06579
Epoch 5/10 - Val Loss: 0.06594
Epoch 6/10 - Train Loss: 0.06591
Epoch 6/10 - Val Loss: 0.06575
Epoch 7/10 - Train Loss: 0.06518
Epoch 7/10 - Val Loss: 0.06558
Epoch 8/10 - Train Loss: 0.06542
Epoch 8/10 - Val Loss: 0.06549
Epoch 9/10 - Train Loss: 0.06558
Epoch 9/10 - Val Loss: 0.06536
Epoch 10/10 - Train Loss: 0.06548
Epoch 10/10 - Val Loss: 0.06548


[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     name                  : child_AXP
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/karan-shingde/child-model/92e512fe92de4fed81044bcdf96d7949
[1;38;5;39mCOMET INFO:[0m   Metrics [count] (min, max):
[1;38;5;39mCOMET INFO:[0m     mse             : 0.06392543017864227
[1;38;5;39mCOMET INFO:[0m     next_day_close  : 300.9759826660156
[1;38;5;39mCOMET INFO:[0m     next_day_high   : 303.3474426269531
[1;38;5;39mCOMET INFO:[0m     next_day_low    : 296.7518615722656
[1;38;5;39mCOMET INFO:[0m     next_day_open   : 300.0487060546875
[1;38;5;39mCOM

AXP → MSE: 0.06393, RMSE: 0.25283, R²: 0.93619


[1;38;5;39mCOMET INFO:[0m Uploading 2 metrics, params and output messages


✓ AXP model trained and saved to: outputs/AXP
✓ Predictions saved to: outputs/AXP/AXP_child_forecast.json
✓ Metrics saved to: outputs/AXP/AXP_metrics.json

3. Generating fresh predictions...
✓ GOOG predictions for 2025-08-26:
  Next-day open: $196.99
  Next-day high: $198.57
  Next-day low: $194.61
  Next-day close: $196.60
  Next-week high: $198.57
  Next-week low: $194.61
✓ AMZN predictions for 2025-08-26:
  Next-day open: $221.96
  Next-day high: $224.20
  Next-day low: $219.75
  Next-day close: $221.84
  Next-week high: $224.20
  Next-week low: $219.75
✓ META predictions for 2025-08-26:
  Next-day open: $728.94
  Next-day high: $736.38
  Next-day low: $717.30
  Next-day close: $726.69
  Next-week high: $736.38
  Next-week low: $717.30
✓ AXP predictions for 2025-08-26:
  Next-day open: $300.05
  Next-day high: $303.35
  Next-day low: $296.75
  Next-day close: $300.98
  Next-week high: $303.35
  Next-week low: $296.75

Pipeline completed! Check 'outputs/' directory for:
- Model check

NameError: name 'Experiment' is not defined

In [9]:
result = perform_eda(PARENT_TICKER, start="2020-01-01")

ValueError: Data must be 1-dimensional, got ndarray of shape (1419, 1) instead