In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [2]:
# =========================================
# Fast TFT — Regression on next return (MPS‑ready, macOS-safe DataLoader)
# =========================================
import os, math, warnings, random
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix
)

import matplotlib.pyplot as plt
import seaborn as sns

# -----------------------------
# Repro
# -----------------------------
def set_seed(seed=1337):
    random.seed(seed); np.random.seed(seed)
    torch.manual_seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
set_seed(1337)

# Mejora en MPS
try:
    torch.set_float32_matmul_precision("high")
except Exception:
    pass

# -----------------------------
# Feature engineering (TFT full)
# -----------------------------
def add_calendar_features(df):
    df["Date"] = pd.to_datetime(df["Date"])
    df["DoW"] = df["Date"].dt.dayofweek
    df["Month"] = df["Date"].dt.month
    df["Quarter"] = df["Date"].dt.quarter
    df["Day"] = df["Date"].dt.day
    # Cyclical encodings
    df["DoW_sin"] = np.sin(2*np.pi*df["DoW"]/7);    df["DoW_cos"] = np.cos(2*np.pi*df["DoW"]/7)
    df["Mon_sin"] = np.sin(2*np.pi*df["Month"]/12); df["Mon_cos"] = np.cos(2*np.pi*df["Month"]/12)
    df["Q_sin"]   = np.sin(2*np.pi*df["Quarter"]/4);df["Q_cos"]  = np.cos(2*np.pi*df["Quarter"]/4)
    df["DOM_sin"] = np.sin(2*np.pi*df["Day"]/31);   df["DOM_cos"] = np.cos(2*np.pi*df["Day"]/31)
    return df

def engineer_features(df):
    df = df.copy()

    # Base returns / vol
    df["Returns"] = df["Close"].pct_change()
    df["Volatility"] = df["Returns"].expanding(min_periods=20).std()

    # RSI (14,21)
    for period in [14, 21]:
        delta = df["Close"].diff()
        gain = (delta.where(delta > 0, 0)).rolling(period).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(period).mean()
        rs = gain / (loss.replace(0, np.nan))
        df[f"RSI_{period}"] = 100 - (100 / (1 + rs))

    # MACD + signal + histogram
    ema12 = df["Close"].ewm(span=12).mean()
    ema26 = df["Close"].ewm(span=26).mean()
    df["MACD"] = ema12 - ema26
    df["MACD_Signal"] = df["MACD"].ewm(span=9).mean()
    df["MACD_Histogram"] = df["MACD"] - df["MACD_Signal"]

    # Bollinger (20,50): pos & width
    for period in [20, 50]:
        m = df["Close"].rolling(period).mean()
        s = df["Close"].rolling(period).std()
        upper, lower = m + 2*s, m - 2*s
        df[f"BollingerPos_{period}"]   = (df["Close"] - lower) / (upper - lower)
        df[f"BollingerWidth_{period}"] = (upper - lower) / m

    # Volume features
    df["VolumeMA"] = df["Volume"].rolling(20).mean()
    df["VolumeRatio"] = df["Volume"] / df["VolumeMA"]
    df["VolumePriceCorr"] = df["Volume"].rolling(20).corr(df["Close"])

    # Price ratios
    df["HighLowRatio"] = df["High"] / df["Low"]
    df["CloseOpenRatio"] = df["Close"] / df["Open"]
    df["HighCloseRatio"] = df["High"] / df["Close"]
    df["LowCloseRatio"]  = df["Low"]  / df["Close"]

    # Multi-horizon returns
    for p in [1,3,5,10,20]:
        df[f"Ret_{p}"] = df["Close"].pct_change(p)

    # ATR (14,21)
    hl = df["High"] - df["Low"]
    hc = (df["High"] - df["Close"].shift()).abs()
    lc = (df["Low"]  - df["Close"].shift()).abs()
    tr = np.maximum(hl, np.maximum(hc, lc))
    for p in [14,21]:
        df[f"ATR_{p}"] = tr.rolling(p).mean()

    # EMA deviations (retorno relativo al EMA)
    for span in [5,12,26,50]:
        ema = df["Close"].ewm(span=span, adjust=False).mean()
        df[f"EMAdev_{span}"] = df["Close"] / ema - 1.0

    # Momentum / niveles
    df["Momentum_10"] = df["Close"] / df["Close"].shift(10) - 1
    df["Momentum_20"] = df["Close"] / df["Close"].shift(20) - 1
    df["HighMax_20"]  = df["High"].rolling(20).max()
    df["LowMin_20"]   = df["Low"].rolling(20).min()
    df["PricePosition"] = (df["Close"] - df["LowMin_20"]) / (df["HighMax_20"] - df["LowMin_20"])

    # Short lags for returns
    df["Ret1"] = df["Returns"].shift(1)
    df["Ret5"] = df["Close"].pct_change(5)

    df = add_calendar_features(df)
    return df

# -----------------------------
# Windows / Splits (TARGET = next return)
# -----------------------------
def make_sequences_by_dates(df, lookback=60, horizon=1):
    """
    Objetivo: Return_next = log(C_{t+h}/C_t). Métricas de dirección con diff() sobre retornos.
    """
    df = df.copy()
    df["Target_Return"] = np.log(df["Close"].shift(-horizon) / df["Close"])

    feature_cols = [
        "Open","High","Low","Close","Volume",
        "RSI_14","RSI_21","MACD","MACD_Signal","MACD_Histogram",
        "Volatility","BollingerPos_20","BollingerPos_50","BollingerWidth_20","BollingerWidth_50",
        "VolumeRatio","VolumePriceCorr","HighLowRatio","CloseOpenRatio","HighCloseRatio","LowCloseRatio",
        "Ret_1","Ret_3","Ret_5","Ret_10","Ret_20",
        "ATR_14","ATR_21","EMAdev_5","EMAdev_12","EMAdev_26","EMAdev_50",
        "Momentum_10","Momentum_20","PricePosition",
        "DoW_sin","DoW_cos","Mon_sin","Mon_cos","Q_sin","Q_cos","DOM_sin","DOM_cos",
        "Ret1","Ret5"
    ]
    feature_cols = [c for c in feature_cols if c in df.columns]

    df = df.dropna(subset=feature_cols + ["Target_Return"])

    X, y, tgt_dates = [], [], []
    V = df[feature_cols].values
    yv = df["Target_Return"].values
    dates = df["Date"].values

    for t in range(lookback-1, len(df)-horizon):
        X.append(V[t-(lookback-1):t+1, :])
        y.append(float(yv[t]))
        tgt_dates.append(pd.to_datetime(dates[t+horizon]))
    return np.asarray(X), np.asarray(y), np.array(tgt_dates), feature_cols

def split_train_val_test_by_date(
    tgt_dates,
    start_train="2020-01-01", end_trainval="2023-12-31",
    test_start="2024-01-01",  test_end="2024-12-31",
    val_ratio_within_train=0.20
):
    d = pd.to_datetime(tgt_dates)
    trv = np.where((d>=pd.Timestamp(start_train))&(d<=pd.Timestamp(end_trainval)))[0]
    te  = np.where((d>=pd.Timestamp(test_start)) &(d<=pd.Timestamp(test_end)) )[0]
    if len(trv)==0 or len(te)==0: return None, None, None
    trv = np.sort(trv)
    split = int(len(trv)*(1-val_ratio_within_train))
    tr, va = trv[:split], trv[split:]
    te = np.sort(te)
    if len(tr)==0 or len(va)==0 or len(te)==0: return None, None, None
    return tr, va, te

# -----------------------------
# Dataset
# -----------------------------
class ReturnDataset(Dataset):
    def __init__(self, X, y, dates=None):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
        self.dates = dates
    def __len__(self): return len(self.X)
    def __getitem__(self, idx): return self.X[idx], self.y[idx]

def prepare_datasets_for_ticker(df_ticker, lookback=60, horizon=1):
    df_feat = engineer_features(df_ticker.copy()).dropna()
    X, y, tgt_dates, feat_cols = make_sequences_by_dates(df_feat, lookback, horizon)

    split = split_train_val_test_by_date(tgt_dates)
    if split is None: return [None]*8
    tr, va, te = split

    scaler_x = StandardScaler()
    Xtr_2d = X[tr].reshape(-1, X.shape[-1])
    scaler_x.fit(Xtr_2d)

    def transform_windows(Xw):
        S = Xw.shape
        X2 = Xw.reshape(-1, S[-1])
        X2 = scaler_x.transform(X2)
        return X2.reshape(S)

    Xtr = transform_windows(X[tr]); ytr = y[tr]
    Xva = transform_windows(X[va]); yva = y[va]
    Xte = transform_windows(X[te]); yte = y[te]

    return ReturnDataset(Xtr, ytr), ReturnDataset(Xva, yva), ReturnDataset(Xte, yte, dates=tgt_dates[te]), scaler_x, None, tgt_dates[te], feat_cols, y[te]

# -----------------------------
# TFT ligero — regresión
# -----------------------------
class EnhancedVariableSelectionNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, dropout=0.2):
        super().__init__()
        self.hidden_size = hidden_size
        self.nets = nn.ModuleList([
            nn.Sequential(
                nn.Linear(1, hidden_size),
                nn.ELU(),
                nn.Dropout(dropout),
                nn.Linear(hidden_size, hidden_size),
                nn.ELU()
            ) for _ in range(input_size)
        ])
        self.selector = nn.Sequential(
            nn.Linear(hidden_size*input_size, hidden_size),
            nn.ELU(),
            nn.Linear(hidden_size, input_size),
            nn.Softmax(dim=-1)
        )
        self.drop = nn.Dropout(dropout)

    def forward(self, x):
        B,T,F = x.shape
        outs = [net(x[:,:,i:i+1]) for i,net in enumerate(self.nets)]
        concat = torch.cat(outs, dim=-1)
        w = self.selector(concat.view(B*T, -1)).view(B,T,F)
        fused = torch.zeros(B,T,self.hidden_size, device=x.device)
        for i,o in enumerate(outs):
            fused += w[:,:,i:i+1]*o
        return self.drop(fused)

class InterpretableMultiHeadAttention(nn.Module):
    def __init__(self, d_model, n_heads=6, dropout=0.15):
        super().__init__()
        assert d_model % n_heads == 0
        self.h = n_heads; self.dk = d_model // n_heads
        self.Wq = nn.Linear(d_model, d_model, bias=False)
        self.Wk = nn.Linear(d_model, d_model, bias=False)
        self.Wv = nn.Linear(d_model, d_model, bias=False)
        self.Wo = nn.Linear(d_model, d_model)
        self.norm = nn.LayerNorm(d_model); self.drop = nn.Dropout(dropout)

    def forward(self, q,k,v):
        B,T,H = q.shape
        Q = self.Wq(q).view(B,T,self.h,self.dk).transpose(1,2)
        K = self.Wk(k).view(B,T,self.h,self.dk).transpose(1,2)
        V = self.Wv(v).view(B,T,self.h,self.dk).transpose(1,2)
        scores = torch.matmul(Q, K.transpose(-2,-1)) / math.sqrt(self.dk)
        attn = torch.softmax(scores, dim=-1)
        ctx = torch.matmul(attn, V).transpose(1,2).contiguous().view(B,T,H)
        out = self.norm(q + self.drop(self.Wo(ctx)))
        return out

class TFTRegressor(nn.Module):
    def __init__(self, input_size, hidden_size=96, num_layers=2, num_heads=6, dropout=0.15):
        super().__init__()
        self.vsn = EnhancedVariableSelectionNetwork(input_size, hidden_size, dropout)
        self.lstm = nn.LSTM(hidden_size, hidden_size//2, num_layers=num_layers,
                            batch_first=True, bidirectional=True, dropout=dropout if num_layers>1 else 0.0)
        self.attn = InterpretableMultiHeadAttention(d_model=hidden_size, n_heads=num_heads, dropout=dropout)
        self.grn  = nn.Sequential(
            nn.Linear(hidden_size, hidden_size*2),
            nn.GLU(dim=-1), nn.ELU(), nn.Dropout(dropout),
            nn.Linear(hidden_size, hidden_size)
        )
        self.norm1 = nn.LayerNorm(hidden_size); self.norm2 = nn.LayerNorm(hidden_size)
        self.head = nn.Sequential(
            nn.Linear(hidden_size*3, hidden_size),
            nn.ELU(), nn.Dropout(dropout),
            nn.Linear(hidden_size, 1)
        )

    def forward(self, x):
        x = self.vsn(x)
        enc,_ = self.lstm(x)
        att = self.attn(enc, enc, enc)
        x = self.norm1(enc + att)
        x = self.norm2(x + self.grn(x))
        pooled = torch.cat([x.mean(1), x.max(1)[0], x[:,-1,:]], dim=-1)
        return self.head(pooled).squeeze(-1)

# -----------------------------
# Trainer (Huber + OneCycleLR) con autocast MPS
# -----------------------------
class Trainer:
    def __init__(self, model, device='cpu', autocast_enabled=False):
        self.model = model.to(device); self.device = device
        self.loss_fn = nn.SmoothL1Loss(beta=0.01)
        self.train_losses = []; self.val_losses = []
        self.autocast_enabled = autocast_enabled

    def _run_epoch(self, loader, opt=None, train=True):
        if train: self.model.train()
        else:     self.model.eval()
        total=0.0
        with torch.set_grad_enabled(train):
            for xb,yb in loader:
                xb = xb.to(self.device); yb = yb.to(self.device)
                if train: opt.zero_grad(set_to_none=True)
                with torch.autocast(device_type=self.device.type, dtype=torch.float16, enabled=self.autocast_enabled):
                    preds = self.model(xb); loss = self.loss_fn(preds, yb)
                if train:
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.8)
                    opt.step()
                total += loss.item()
        return total/len(loader)

    def fit(self, train_loader, val_loader, epochs=80, lr=3e-4):
        opt = optim.AdamW(self.model.parameters(), lr=lr, weight_decay=0.03, betas=(0.9,0.95))
        sch = optim.lr_scheduler.OneCycleLR(opt, max_lr=lr, steps_per_epoch=max(1,len(train_loader)),
                                            epochs=epochs, pct_start=0.2, anneal_strategy='cos',
                                            div_factor=25.0, final_div_factor=100.0)
        best = float('inf'); patience=15; wait=0

        for ep in range(1, epochs+1):
            tr = self._run_epoch(train_loader, opt, train=True); sch.step()
            va = self._run_epoch(val_loader, train=False)
            self.train_losses.append(tr); self.val_losses.append(va)

            if va < best: best, wait = va, 0; torch.save(self.model.state_dict(), "best_tft_return.pth")
            else: wait += 1

            if ep % 10 == 0:
                print(f"Epoch {ep:03d} | Train {tr:.5f} | Val {va:.5f}")
            if wait >= patience and ep >= 40:
                print(f"⏹️ Early stopping at epoch {ep}"); break

    def predict(self, loader):
        self.model.eval()
        preds=[]; targets=[]
        with torch.no_grad():
            for xb,yb in loader:
                xb = xb.to(self.device)
                out = self.model(xb).cpu().numpy()
                preds.append(out); targets.append(yb.numpy())
        return np.concatenate(preds), np.concatenate(targets)

# -----------------------------
# Evaluación (dirección desde retornos) y plots
# -----------------------------
def evaluate_direction_from_returns(y_pred, y_true, title="Confusion Matrix (returns)"):
    y_pred_bin = (pd.Series(y_pred).diff() >= 0).astype(int).iloc[1:]
    y_true_bin = (pd.Series(y_true).diff() >= 0).astype(int).iloc[1:]

    acc  = accuracy_score(y_true_bin, y_pred_bin)
    prec = precision_score(y_true_bin, y_pred_bin, zero_division=0)
    rec  = recall_score(y_true_bin, y_pred_bin, zero_division=0)
    f1   = f1_score(y_true_bin, y_pred_bin, zero_division=0)
    rmse = np.sqrt(np.mean((y_true - y_pred)**2))

    print(f"Accuracy: {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall: {rec:.4f}")
    print(f"F1-Score: {f1:.4f}")
    print(f"RMSE: {rmse:.6f}")

    cm = confusion_matrix(y_true_bin, y_pred_bin, labels=[0,1])
    plt.figure(figsize=(6,4))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False,
                xticklabels=['Down/0','Up/1'], yticklabels=['Down/0','Up/1'])
    plt.title(title); plt.xlabel('Predicted'); plt.ylabel('Actual')
    plt.tight_layout(); plt.show()

    return acc, prec, rec, f1, rmse

def plot_return_curve(dates, preds, targets, ticker):
    dt = pd.to_datetime(dates)
    plt.figure(figsize=(15,5))
    plt.plot(dt, targets, label="Actual next return", linewidth=1.2)
    plt.plot(dt, preds,   label="Predicted next return", linewidth=1.2, alpha=0.9)
    plt.title(f"{ticker} — Next-return prediction (test 2024)")
    plt.ylabel("Return"); plt.xlabel("Date"); plt.grid(alpha=0.3); plt.legend(); plt.tight_layout(); plt.show()

def plot_losses(trainer, ticker):
    plt.figure(figsize=(12,4))
    plt.plot(trainer.train_losses, label='Train')
    plt.plot(trainer.val_losses,   label='Val')
    plt.title(f'{ticker} — Loss history'); plt.grid(alpha=0.3); plt.legend(); plt.tight_layout(); plt.show()

# -----------------------------
# Device setup (MPS > CUDA > CPU)
# -----------------------------
if torch.backends.mps.is_available():
    device = torch.device("mps")
    autocast_enabled = True
elif torch.cuda.is_available():
    device = torch.device("cuda")
    autocast_enabled = False
else:
    device = torch.device("cpu")
    autocast_enabled = False
print("Using device:", device)

# -----------------------------
# MAIN
# -----------------------------
if __name__ == "__main__":
    lookback_window = 60
    forecast_horizon = 1
    hidden_size = 192
    num_layers = 2
    num_heads = 8           # 192 % 8 == 0
    dropout = 0.15
    epochs = 80
    batch_size = 48
    max_tickers = 101
    min_rows_required = 350

    # Carga CSV multi‑índice (formato Kaggle)
    try_paths = [
        "/kaggle/input/nasdaq100-stock-data/nasdaq100_stock_data.csv",
        "nasdaq100_stock_data.csv"
    ]
    path = next((p for p in try_paths if os.path.exists(p)), try_paths[-1])
    df = pd.read_csv(path, header=[0,1])
    df = df.iloc[1:].copy()
    cols = df.columns.tolist(); cols[0] = ('Date',''); df.columns = pd.MultiIndex.from_tuples(cols)

    tickers = df.columns.get_level_values(0).unique()
    tickers = [t for t in tickers if t != 'Date']

    print(f"🖥️ Device: {device} | File: {path} | Tickers: {len(tickers)}")

    # 👇 Evita multiprocessing para no “picklear” clases de __main__ en macOS/py3.13
    loader_kwargs_train = dict(batch_size=batch_size, shuffle=True,  num_workers=0, persistent_workers=False)
    loader_kwargs_eval  = dict(batch_size=batch_size, shuffle=False, num_workers=0, persistent_workers=False)

    # Acumuladores por ticker
    accuracies, precisions, recalls, f1_scores, RMSES, tick_list = [], [], [], [], [], []

    processed = 0
    for t in tickers:
        if processed >= max_tickers: break
        print(f"\n[{processed+1}/{max_tickers}] 🎯 {t}")
        try:
            dft = df[t].copy().rename_axis(None, axis=1)
            dft["Date"] = pd.to_datetime(df[('Date','')].values, errors='coerce')
            dft = dft.dropna(subset=["Date","Open","High","Low","Close","Volume"])
            dft = dft[["Date","Open","High","Low","Close","Volume"]].reset_index(drop=True)
            if len(dft) < min_rows_required:
                print(f"⚠️  Insufficient rows after cleaning: {len(dft)}"); continue

            # Datasets
            res = prepare_datasets_for_ticker(dft, lookback_window, forecast_horizon)
            if res[0] is None:
                print("⚠️ Could not build datasets with required date windows."); continue
            train_ds, val_ds, test_ds, scaler_x, _, test_dates, feat_cols, y_test_returns = res

            train_loader = DataLoader(train_ds, **loader_kwargs_train)
            val_loader   = DataLoader(val_ds,   **loader_kwargs_eval)
            test_loader  = DataLoader(test_ds,  **loader_kwargs_eval)

            # Modelo + trainer
            model = TFTRegressor(input_size=len(feat_cols), hidden_size=hidden_size,
                                 num_layers=num_layers, num_heads=num_heads, dropout=dropout)
            trainer = Trainer(model, device=device, autocast_enabled=autocast_enabled)
            trainer.fit(train_loader, val_loader, epochs=epochs, lr=3e-4)

            # Carga mejor checkpoint y predice
            model.load_state_dict(torch.load("best_tft_return.pth", map_location=device))
            preds, targets = trainer.predict(test_loader)

            # Outputs
            plot_return_curve(test_dates, preds, targets, t)
            plot_losses(trainer, t)
            acc, prec, rec, f1, rmse = evaluate_direction_from_returns(preds, targets)

            accuracies.append(acc); precisions.append(prec); recalls.append(rec)
            f1_scores.append(f1); RMSES.append(rmse); tick_list.append(t)

            processed += 1

        except Exception as e:
            print(f"❌ Error in {t}: {e}")
            continue

    # Guardado de métricas por ticker
    if processed > 0:
        results_df = pd.DataFrame({
            'Ticker': tick_list,
            'Accuracy': accuracies,
            'Precision': precisions,
            'Recall': recalls,
            'F1-Score': f1_scores,
            'RMSE': RMSES
        })
        print("\nResultados por ticker:")
        print(results_df.to_string(index=False, float_format="%.4f"))
        results_df.to_csv("tft_return_evaluation_results.csv", index=False)
        print("\n💾 Saved -> tft_return_evaluation_results.csv")
    else:
        print("❌ No se completó ningún ticker.")

Using device: mps
🖥️ Device: mps | File: nasdaq100_stock_data.csv | Tickers: 101

[1/101] 🎯 FANG


KeyboardInterrupt: 

In [None]:
results_df.to_csv("tft_evaluation_results_sc.csv")