### Sequential Training 

In [None]:
# lightning >=2.2
import torch, pytorch_lightning as pl
from torch.utils.data import IterableDataset, DataLoader
import pandas as pd
import numpy as np

# ---------- 1.  Build an iterable Dataset that streams in time order ----------
class RollingWindowDataset(IterableDataset):
    """
    Yields (X_window, y_target) pairs in chronological order.
    Assumes df already sorted by timestamp ascending.
    """
    def __init__(self, df: pd.DataFrame, feature_cols, target_col,
                 window: int = 24, horizon: int = 1):
        self.X = df[feature_cols].values.astype(np.float32)
        self.y = df[target_col].values.astype(np.float32)
        self.window, self.horizon = window, horizon

    def __iter__(self):
        # leave the last `horizon` rows unused as inputs
        for t in range(self.window, len(self.X) - self.horizon):
            X_win = self.X[t - self.window:t]          # shape (window, n_feat)
            y_tar = self.y[t + self.horizon - 1]       # scalar regression target
            yield torch.from_numpy(X_win), torch.tensor(y_tar)

# ---------- 2.  LightningDataModule ----------
class TSDataModule(pl.LightningDataModule):
    def __init__(self, df, feature_cols, target_col, window=24, horizon=1,
                 val_size=0.05, batch_size=32):
        super().__init__()
        self.save_hyperparameters()

    def setup(self, stage=None):
        df = self.hparams.df
        split_idx = int(len(df) * (1 - self.hparams.val_size))
        self.train_ds = RollingWindowDataset(df.iloc[:split_idx],
                                             self.hparams.feature_cols,
                                             self.hparams.target_col,
                                             self.hparams.window,
                                             self.hparams.horizon)
        self.val_ds   = RollingWindowDataset(df.iloc[split_idx:],
                                             self.hparams.feature_cols,
                                             self.hparams.target_col,
                                             self.hparams.window,
                                             self.hparams.horizon)

    def train_dataloader(self):
        return DataLoader(self.train_ds,
                          batch_size=self.hparams.batch_size,
                          shuffle=False,  # critical!
                          drop_last=False)

    def val_dataloader(self):
        return DataLoader(self.val_ds,
                          batch_size=self.hparams.batch_size,
                          shuffle=False)

# ---------- 3.  LightningModule ----------
import torch.nn as nn
import torch.nn.functional as F

class SimpleTSRegressor(pl.LightningModule):
    """
    Example network: flatten window → 2-layer MLP → scalar.
    Replace with LSTM/Transformer for better TS handling.
    """
    def __init__(self, n_features, window, lr=1e-3, hidden=128):
        super().__init__()
        self.save_hyperparameters()
        in_dim = n_features * window
        self.net = nn.Sequential(
            nn.Flatten(),                       # (B, window*n_feat)
            nn.Linear(in_dim, hidden),
            nn.ReLU(),
            nn.LayerNorm(hidden),
            nn.Linear(hidden, 1)
        )

    def forward(self, x):
        return self.net(x).squeeze(-1)           # (B,)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.mse_loss(y_hat, y)
        self.log("train_loss", loss, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.mse_loss(y_hat, y)
        self.log("val_loss", loss, prog_bar=True)

    def configure_optimizers(self):
        opt = torch.optim.AdamW(self.parameters(), lr=self.hparams.lr)
        sched = torch.optim.lr_scheduler.StepLR(opt, step_size=10, gamma=0.5)
        return [opt], [sched]

# ---------- 4.  Putting it together ----------
def run_training(df, feature_cols, target_col):
    window = 24          # last 24 timesteps → predict t+1
    dm = TSDataModule(df, feature_cols, target_col, window=window,
                      horizon=1, val_size=0.05, batch_size=64)
    n_feat = len(feature_cols)
    model = SimpleTSRegressor(n_features=n_feat, window=window)
    trainer = pl.Trainer(max_epochs=50,
                         gradient_clip_val=1.0,
                         callbacks=[
                             pl.callbacks.ModelCheckpoint(
                                 monitor="val_loss", save_top_k=3, mode="min"
                             )
                         ])
    trainer.fit(model, dm)

Loss Logger 

In [None]:
from pytorch_lightning.loggers import TensorBoardLogger
logger = TensorBoardLogger("lightning_logs", name="seq_ts")

trainer = pl.Trainer(max_epochs=50,
                     logger=logger,              # ⬅️ add logger
                     callbacks=[
                         pl.callbacks.ModelCheckpoint(
                             monitor="val_loss", mode="min", save_top_k=3),
                         pl.callbacks.RichProgressBar()  # nicer bar (optional)
                     ])
trainer.fit(model, datamodule=dm)

### Choosing different mechanisms 

In [None]:
import torch
import torch.nn as nn
import math

def build_backbone(arch: str,
                   n_features: int,
                   window: int,
                   hidden: int = 128,
                   n_layers: int = 2,
                   n_heads: int = 4):
    """
    Returns a nn.Module that maps (B, window, n_features) ➜ (B, 1)
    Supported arch: "mlp", "lstm", "gru", "cnn", "transformer"
    """
    arch = arch.lower()

    # 1) Plain MLP (baseline we used before)
    if arch == "mlp":
        in_dim = n_features * window
        return nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_dim, hidden),
            nn.ReLU(),
            nn.LayerNorm(hidden),
            nn.Linear(hidden, 1)
        )

    # 2) LSTM (take last hidden state)
    if arch == "lstm":
        class LSTMHead(nn.Module):
            def __init__(self):
                super().__init__()
                self.lstm = nn.LSTM(
                    input_size=n_features,
                    hidden_size=hidden,
                    num_layers=n_layers,
                    batch_first=True)
                self.fc = nn.Linear(hidden, 1)

            def forward(self, x):             # x: (B, w, n_feat)
                out, _ = self.lstm(x)
                return self.fc(out[:, -1])     # last time step
        return LSTMHead()

    # 3) GRU
    if arch == "gru":
        class GRUHead(nn.Module):
            def __init__(self):
                super().__init__()
                self.gru = nn.GRU(
                    input_size=n_features,
                    hidden_size=hidden,
                    num_layers=n_layers,
                    batch_first=True)
                self.fc = nn.Linear(hidden, 1)

            def forward(self, x):
                out, _ = self.gru(x)
                return self.fc(out[:, -1])
        return GRUHead()

    # 4) 1-D Temporal Convolution (Causal)
    if arch == "cnn":
        class CNNHead(nn.Module):
            def __init__(self):
                super().__init__()
                self.conv = nn.Sequential(
                    nn.Conv1d(n_features, hidden,
                              kernel_size=3, padding=2, dilation=2),
                    nn.ReLU(),
                    nn.Conv1d(hidden, hidden,
                              kernel_size=3, padding=4, dilation=4),
                    nn.ReLU(),
                    nn.AdaptiveAvgPool1d(1)
                )
                self.fc = nn.Linear(hidden, 1)

            def forward(self, x):
                x = x.permute(0, 2, 1)        # (B, C=n_feat, L=window)
                x = self.conv(x).squeeze(-1)   # (B, hidden)
                return self.fc(x)
        return CNNHead()

    # 5) Transformer Encoder (positional embedding + last token)
    if arch == "transformer":
        class TransEncHead(nn.Module):
            def __init__(self):
                super().__init__()
                self.input_proj = nn.Linear(n_features, hidden)
                layer = nn.TransformerEncoderLayer(
                    d_model=hidden, nhead=n_heads,
                    batch_first=True, norm_first=True)
                self.encoder = nn.TransformerEncoder(layer, num_layers=n_layers)
                # learned positional embedding
                self.pos = nn.Parameter(torch.zeros(window, hidden))
                nn.init.uniform_(self.pos, -0.02, 0.02)
                self.fc = nn.Linear(hidden, 1)

            def forward(self, x):
                x = self.input_proj(x) + self.pos  # add position
                x = self.encoder(x)
                return self.fc(x[:, -1])
        return TransEncHead()

    raise ValueError(f"Unknown architecture: {arch}")


    ### Calling Function ###
    
import pytorch_lightning as pl
import torch.nn.functional as F

class FlexibleTSRegressor(pl.LightningModule):
    """
    Same loss-switcher we used before; only backbone replaced by build_backbone.
    """
    def __init__(self, *,
                 n_features: int,
                 window: int,
                 backbone: str = "mlp",
                 hidden: int = 128,
                 loss_name: str = "mse",
                 lr: float = 1e-3,
                 **loss_kw):
        super().__init__()
        self.save_hyperparameters()

        # build chosen architecture
        self.backbone = build_backbone(backbone,
                                       n_features, window,
                                       hidden=hidden)

        # ----- loss selection (identical to earlier snippet) ------------
        ln = loss_name.lower()
        if ln == "mse":
            self.criterion = nn.MSELoss()
        elif ln == "mae":
            self.criterion = nn.L1Loss()
        elif ln == "huber":
            beta = loss_kw.get("huber_beta", 1.0)
            self.criterion = nn.SmoothL1Loss(beta=beta)
        else:
            self.criterion = None             # rmse / quantile handled below
        self.loss_name = ln
        self.quantile = loss_kw.get("quantile", 0.9)

    # ----- forward & loss ------------------------------------------------
    def forward(self, x):
        return self.backbone(x).squeeze(-1)

    def _compute_loss(self, y_hat, y):
        if self.loss_name in ("mse", "mae", "huber"):
            return self.criterion(y_hat, y)
        if self.loss_name == "rmse":
            return F.mse_loss(y_hat, y).sqrt()
        if self.loss_name == "quantile":
            q = self.quantile
            diff = y_hat - y
            return torch.where(diff >= 0, q * diff, (q - 1) * diff).mean()
        raise RuntimeError

    # ----- training & validation ----------------------------------------
    def training_step(self, batch, _):
        x, y = batch
        loss = self._compute_loss(self(x), y)
        self.log("train_loss", loss, prog_bar=True, on_epoch=True)
        return loss

    def validation_step(self, batch, _):
        x, y = batch
        loss = self._compute_loss(self(x), y)
        self.log("val_loss", loss, prog_bar=True, on_epoch=True)

    def configure_optimizers(self):
        opt = torch.optim.AdamW(self.parameters(),
                                lr=self.hparams.lr)
        sch = torch.optim.lr_scheduler.StepLR(opt, 10, gamma=0.5)
        return [opt], [sch]

# keep your existing RollingWindowDataset and TSDataModule

n_feat  = len(feature_cols)
window  = 24

# ➊ MLP (baseline)
model = FlexibleTSRegressor(n_features=n_feat,
                            window=window,
                            backbone="mlp")

# ➋ LSTM
model = FlexibleTSRegressor(n_features=n_feat,
                            window=window,
                            backbone="lstm",
                            hidden=256, n_layers=2)

# ➌ GRU
model = FlexibleTSRegressor(n_features=n_feat,
                            window=window,
                            backbone="gru",
                            hidden=256, n_layers=3)

#  Temporal-CNN
model = FlexibleTSRegressor(n_features=n_feat,
                            window=window,
                            backbone="cnn",
                            hidden=128)

# Transformer Encoder
model = FlexibleTSRegressor(n_features=n_feat,
                            window=window,
                            backbone="transformer",
                            hidden=128,
                            n_layers=2,
                            n_heads=4,
                            loss_name="quantile",
                            quantile=0.95)

In [None]:
# ──────────────────────────────────────────────────────────────────────────────
# 1. Imports
# ──────────────────────────────────────────────────────────────────────────────
import os, torch, pytorch_lightning as pl
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
from pytorch_lightning.loggers import TensorBoardLogger

# ──────────────────────────────────────────────────────────────────────────────
# 2. Rolling-window Dataset  (indexable so Lightning knows its length)
# ──────────────────────────────────────────────────────────────────────────────
class RollingWindowDataset(Dataset):
    def __init__(self, df: pd.DataFrame,
                 feature_cols, target_col,
                 window: int = 24, horizon: int = 1):
        self.X  = df[feature_cols].values.astype("float32")
        self.y  = df[target_col].values.astype("float32")
        self.w  = window
        self.h  = horizon
        self.indices = range(window, len(df) - horizon)

    def __len__(self):               # Lightning can now draw 0/??? bars
        return len(self.indices)

    def __getitem__(self, idx):
        t = self.indices[idx]
        X_win = self.X[t-self.w : t]              # shape (window, n_feat)
        y_tar = self.y[t + self.h - 1]            # scalar
        return torch.from_numpy(X_win), torch.tensor(y_tar)

# ──────────────────────────────────────────────────────────────────────────────
# 3. DataModule (keeps order, no shuffle!)
# ──────────────────────────────────────────────────────────────────────────────
class TSDataModule(pl.LightningDataModule):
    def __init__(self, df, feature_cols, target_col,
                 window=24, horizon=1,
                 val_size=0.05, batch_size=64,
                 num_workers=os.cpu_count()//2):
        super().__init__()
        self.save_hyperparameters(ignore=["df"])
        self.df = df.copy()

    def setup(self, stage=None):
        split = int(len(self.df) * (1 - self.hparams.val_size))
        train_df = self.df.iloc[:split]
        val_df   = self.df.iloc[split:]

        self.train_ds = RollingWindowDataset(train_df,
                                             self.hparams.feature_cols,
                                             self.hparams.target_col,
                                             self.hparams.window,
                                             self.hparams.horizon)
        self.val_ds   = RollingWindowDataset(val_df,
                                             self.hparams.feature_cols,
                                             self.hparams.target_col,
                                             self.hparams.window,
                                             self.hparams.horizon)

    def train_dataloader(self):
        return DataLoader(self.train_ds,
                          batch_size=self.hparams.batch_size,
                          shuffle=False,
                          drop_last=False,
                          num_workers=self.hparams.num_workers,
                          persistent_workers=True)

    def val_dataloader(self):
        return DataLoader(self.val_ds,
                          batch_size=self.hparams.batch_size,
                          shuffle=False,
                          num_workers=self.hparams.num_workers,
                          persistent_workers=True)

# ──────────────────────────────────────────────────────────────────────────────
# 4. Simple MLP regressor (swap with LSTM/GRU later if you like)
# ──────────────────────────────────────────────────────────────────────────────
class SimpleTSRegressor(pl.LightningModule):
    def __init__(self, n_features, window, hidden=128, lr=1e-3):
        super().__init__()
        self.save_hyperparameters()
        self.net = nn.Sequential(
            nn.Flatten(),
            nn.Linear(window * n_features, hidden),
            nn.ReLU(),
            nn.LayerNorm(hidden),
            nn.Linear(hidden, 1)
        )

    def forward(self, x):
        return self.net(x).squeeze(-1)

    def training_step(self, batch, _):
        x, y = batch
        loss = F.mse_loss(self(x), y)
        self.log("train_loss", loss, prog_bar=True, on_epoch=True)
        return loss

    def validation_step(self, batch, _):
        x, y = batch
        loss = F.mse_loss(self(x), y)
        self.log("val_loss", loss, prog_bar=True, on_epoch=True)

    def configure_optimizers(self):
        opt  = torch.optim.AdamW(self.parameters(), lr=self.hparams.lr)
        sched = torch.optim.lr_scheduler.StepLR(opt, 10, gamma=0.5)
        return [opt], [sched]

# ──────────────────────────────────────────────────────────────────────────────
# 5. One-shot training + return best checkpoint path
# ──────────────────────────────────────────────────────────────────────────────
def train_and_get_best_ckpt(df, feature_cols, target_col,
                            window=24, horizon=1):
    dm = TSDataModule(df, feature_cols, target_col,
                      window=window, horizon=horizon)

    model = SimpleTSRegressor(n_features=len(feature_cols), window=window)

    ckpt_cb = pl.callbacks.ModelCheckpoint(
        monitor="val_loss", mode="min", save_top_k=1)

    logger = TensorBoardLogger("lightning_logs", name="seq_ts")

    trainer = pl.Trainer(
        max_epochs=50,
        accelerator="auto",
        devices="auto",
        logger=logger,
        callbacks=[ckpt_cb, pl.callbacks.RichProgressBar()])
    trainer.fit(model, dm)

    return ckpt_cb.best_model_path

# ──────────────────────────────────────────────────────────────────────────────
# 6. ==== RUN TRAINING =========================================================
# df, feature_cols, target_col must already exist in your interpreter
# ──────────────────────────────────────────────────────────────────────────────
ckpt_path = train_and_get_best_ckpt(df, feature_cols, target_col)
print("Best model checkpoint ➜", ckpt_path)

# ──────────────────────────────────────────────────────────────────────────────
# 7. Load best model & make predictions on (for example) the validation slice
# ──────────────────────────────────────────────────────────────────────────────
best_model = SimpleTSRegressor.load_from_checkpoint(
                 ckpt_path,
                 n_features=len(feature_cols),
                 window=24).eval()

val_ds = RollingWindowDataset(df.iloc[int(len(df)*0.95):],
                              feature_cols, target_col,
                              window=24, horizon=1)
val_dl = DataLoader(val_ds, batch_size=64, shuffle=False)

preds = []
with torch.no_grad():
    for x, _ in val_dl:
        preds.append(best_model(x).cpu())
preds = torch.cat(preds).numpy()
print("Inference done. 1st five predictions:", preds[:5])

# ──────────────────────────────────────────────────────────────────────────────
# 8. Launch TensorBoard (run this **in a terminal**, not inside Python)
# ──────────────────────────────────────────────────────────────────────────────
#   tensorboard --logdir lightning_logs
# Then open the printed URL in your browser to see train/val loss curves.


In [None]:
tensorboard --logdir lightning_logs

### New NN layers 

In [None]:
# binary_classifier.py

import yaml
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

class BinaryClassifier:
    def __init__(self, config: dict):
        """
        config should contain:
          - 'train_end', 'val_end' (YYYY-MM-DD strings)
          - optional 'test_end'
          - 'batch_size', 'epochs', 'learning_rate'
          - 'hidden_size', 'pos_weight' (float)
        """
        self.cfg = config
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model = None
        self.scaler = StandardScaler()
        self.history = {'train_loss': [], 'val_loss': []}

    def load_data(self, df: pd.DataFrame, target_col: str):
        # ensure datetime index
        if not isinstance(df.index, pd.DatetimeIndex):
            raise ValueError("DataFrame must have a DatetimeIndex")
        # split by date
        t0 = pd.to_datetime(self.cfg['train_end'])
        v0 = pd.to_datetime(self.cfg['val_end'])
        df_train = df.loc[:t0]
        df_val   = df.loc[t0 + pd.Timedelta(days=1): v0]
        if 'test_end' in self.cfg:
            t1 = pd.to_datetime(self.cfg['test_end'])
            df_test = df.loc[v0 + pd.Timedelta(days=1): t1]
        else:
            df_test = df.loc[v0 + pd.Timedelta(days=1):]

        # separate X/y
        def split(df_):
            X = df_.drop(columns=[target_col]).values.astype(float)
            y = df_[target_col].values.astype(float)
            return X, y

        X_train, y_train = split(df_train)
        X_val,   y_val   = split(df_val)
        X_test,  y_test  = split(df_test)

        # scale features
        X_train = self.scaler.fit_transform(X_train)
        X_val   = self.scaler.transform(X_val)
        X_test  = self.scaler.transform(X_test)

        # wrap in DataLoader
        bs = self.cfg.get('batch_size', 64)
        def make_loader(X, y, shuffle):
            tX = torch.tensor(X, dtype=torch.float32)
            ty = torch.tensor(y, dtype=torch.float32).unsqueeze(1)
            ds = TensorDataset(tX, ty)
            return DataLoader(ds, batch_size=bs, shuffle=shuffle)
        self.train_loader = make_loader(X_train, y_train, shuffle=True)
        self.val_loader   = make_loader(X_val,   y_val,   shuffle=False)
        self.test_loader  = make_loader(X_test,  y_test,  shuffle=False)

    def _build_model(self, input_dim: int):
        hs = self.cfg.get('hidden_size', 32)
        model = nn.Sequential(
            nn.Linear(input_dim, hs),
            nn.ReLU(),
            nn.Linear(hs, 1),
            nn.Sigmoid()
        ).to(self.device)
        self.model = model

        # weighted BCE
        pw = self.cfg.get('pos_weight', 1.0)
        weight = torch.tensor([pw], dtype=torch.float32, device=self.device)
        self.criterion = nn.BCEWithLogitsLoss(pos_weight=weight)
        self.optimizer = torch.optim.Adam(model.parameters(), lr=self.cfg.get('learning_rate', 1e-3))

    def train(self):
        if self.model is None:
            # assume load_data called
            input_dim = next(iter(self.train_loader))[0].shape[1]
            self._build_model(input_dim)

        epochs = self.cfg.get('epochs', 10)
        for epoch in range(1, epochs + 1):
            self.model.train()
            total_loss = 0.0
            for Xb, yb in self.train_loader:
                Xb, yb = Xb.to(self.device), yb.to(self.device)
                self.optimizer.zero_grad()
                logits = self.model[0:3](Xb)  # before sigmoid
                loss = self.criterion(logits, yb)
                loss.backward()
                self.optimizer.step()
                total_loss += loss.item() * Xb.size(0)
            avg_train = total_loss / len(self.train_loader.dataset)

            # validation
            self.model.eval()
            val_loss = 0.0
            with torch.no_grad():
                for Xb, yb in self.val_loader:
                    Xb, yb = Xb.to(self.device), yb.to(self.device)
                    logits = self.model[0:3](Xb)
                    val_loss += self.criterion(logits, yb).item() * Xb.size(0)
            avg_val = val_loss / len(self.val_loader.dataset)

            self.history['train_loss'].append(avg_train)
            self.history['val_loss'].append(avg_val)
            print(f"Epoch {epoch}/{epochs} — train_loss: {avg_train:.4f}, val_loss: {avg_val:.4f}")

    def evaluate(self):
        self.model.eval()
        total_loss = 0.0
        correct = 0
        total = 0
        preds_list, true_list = [], []
        with torch.no_grad():
            for Xb, yb in self.test_loader:
                Xb, yb = Xb.to(self.device), yb.to(self.device)
                logits = self.model[0:3](Xb)
                loss = self.criterion(logits, yb)
                total_loss += loss.item() * Xb.size(0)
                probs = torch.sigmoid(logits)
                preds = (probs > 0.5).float()
                correct += (preds == yb).sum().item()
                total += Xb.size(0)
                preds_list.extend(probs.cpu().numpy().flatten())
                true_list.extend(yb.cpu().numpy().flatten())

        avg_loss = total_loss / total
        accuracy = correct / total
        print(f"Test loss: {avg_loss:.4f}, Accuracy: {accuracy:.4%}")
        return {'loss': avg_loss, 'accuracy': accuracy, 'preds': preds_list, 'true': true_list}

    def predict(self, df_new: pd.DataFrame) -> pd.Series:
        if self.model is None:
            raise RuntimeError("Model has not been trained yet")
        X = df_new.copy()
        if not isinstance(X.index, pd.DatetimeIndex):
            X.index = pd.to_datetime(X.index)
        X = self.scaler.transform(X.values)
        tX = torch.tensor(X, dtype=torch.float32).to(self.device)
        self.model.eval()
        with torch.no_grad():
            logits = self.model[0:3](tX)
            probs = torch.sigmoid(logits).cpu().numpy().flatten()
        return pd.Series(probs, index=df_new.index, name='predicted_proba')

    def plot_history(self):
        plt.figure()
        plt.plot(self.history['train_loss'], label='train loss')
        plt.plot(self.history['val_loss'],   label='val loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.title('Training History')
        plt.show()


# Example usage script
if __name__ == "__main__":
    import pandas as pd

    # 1) Define your config as a Python dict:
    config = {
        'train_end'    : '2020-12-31',
        'val_end'      : '2021-06-30',
        # 'test_end'   : '2021-12-31',  # optional
        'batch_size'   : 128,
        'epochs'       : 20,
        'learning_rate': 1e-3,
        'hidden_size'  : 64,
        'pos_weight'   : 5.0,
    }

    # 2) Load your dataframe (assumes first column is datetime index)
    df = pd.read_csv('your_data.csv', parse_dates=[0], index_col=0)

    # 3) Instantiate, load/split, train, evaluate, predict, and plot:
    clf = BinaryClassifier(config)
    clf.load_data(df, target_col='your_target_column')
    clf.train()
    test_results = clf.evaluate()
    print(test_results)
    
    # 4) Predict on new unseen data (no target column):
    new_df = pd.read_csv('new_data.csv', parse_dates=[0], index_col=0)
    preds = clf.predict(new_df)
    print(preds.head())

    # 5) Visualize training history:
    clf.plot_history()


In [None]:
# binary_classifier.py

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt


class BinaryClassifier:
    def __init__(self, config: dict):
        """
        config keys:
          - 'train_end', 'val_end' (YYYY-MM-DD strings)
          - optional 'test_end'
          - 'batch_size', 'epochs', 'learning_rate'
          - 'hidden_sizes': list of ints for each hidden layer
          - 'dropout': float probability
          - 'activation': 'relu' or 'tanh'
          - 'pos_weight': float
          - 'optimizer': 'adam' or 'sgd'
          - 'weight_decay': float
          - optional 'momentum' (for SGD)
          - 'threshold': float in [0,1] for classification
        """
        self.config = config
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.scaler = StandardScaler()
        self.feature_cols = []
        self.history = {'train_loss': [], 'val_loss': []}
        self.threshold = config.get('threshold', 0.5)

    def load_data(self, df: pd.DataFrame, target_col: str):
        # ensure datetime index
        if not isinstance(df.index, pd.DatetimeIndex):
            raise ValueError("DataFrame index must be a pd.DatetimeIndex for splitting.")
        df = df.sort_index().dropna()

        # split by dates
        t0 = pd.to_datetime(self.config['train_end'])
        v0 = pd.to_datetime(self.config['val_end'])
        df_train = df.loc[:t0]
        df_val   = df.loc[t0 + pd.Timedelta(days=1): v0]
        if 'test_end' in self.config:
            t1 = pd.to_datetime(self.config['test_end'])
            df_test = df.loc[v0 + pd.Timedelta(days=1): t1]
        else:
            df_test = df.loc[v0 + pd.Timedelta(days=1):]

        # save columns
        self.target_col = target_col
        self.feature_cols = [c for c in df.columns if c != target_col]

        # split features/targets
        def split(df_):
            X = df_[self.feature_cols].values.astype(float)
            y = df_[target_col].values.astype(float).reshape(-1, 1)
            return X, y

        X_train, y_train = split(df_train)
        X_val,   y_val   = split(df_val)
        X_test,  y_test  = split(df_test)

        # scale features
        X_train = self.scaler.fit_transform(X_train)
        X_val   = self.scaler.transform(X_val)
        X_test  = self.scaler.transform(X_test)

        # create DataLoaders
        bs = self.config.get('batch_size', 64)
        def make_loader(X, y, shuffle):
            tX = torch.tensor(X, dtype=torch.float32)
            ty = torch.tensor(y, dtype=torch.float32)
            ds = TensorDataset(tX, ty)
            return DataLoader(ds, batch_size=bs, shuffle=shuffle)

        self.train_loader = make_loader(X_train, y_train, shuffle=True)
        self.val_loader   = make_loader(X_val,   y_val,   shuffle=False)
        self.test_loader  = make_loader(X_test,  y_test,  shuffle=False)

    def _build_model(self, input_dim: int):
        hs_list = self.config.get('hidden_sizes', [self.config.get('hidden_size', 32)])
        dropout_p = self.config.get('dropout', 0.5)
        act = self.config.get('activation', 'relu').lower()

        layers = []
        in_dim = input_dim
        for hs in hs_list:
            layers.append(nn.Linear(in_dim, hs))
            if act == 'tanh':
                layers.append(nn.Tanh())
            else:
                layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout_p))
            in_dim = hs
        layers.append(nn.Linear(in_dim, 1))  # output logits

        self.model = nn.Sequential(*layers).to(self.device)

        # loss
        pw = torch.tensor([self.config.get('pos_weight', 1.0)], device=self.device)
        self.criterion = nn.BCEWithLogitsLoss(pos_weight=pw)

        # optimizer
        opt_name = self.config.get('optimizer', 'adam').lower()
        lr = self.config.get('learning_rate', 1e-3)
        wd = self.config.get('weight_decay', 0.0)
        if opt_name == 'sgd':
            momentum = self.config.get('momentum', 0.9)
            self.optimizer = torch.optim.SGD(self.model.parameters(), lr=lr,
                                             momentum=momentum, weight_decay=wd)
        else:
            self.optimizer = torch.optim.Adam(self.model.parameters(), lr=lr,
                                              weight_decay=wd)

    def train(self):
        # build model if not done
        if not hasattr(self, 'model') or self.model is None:
            inp_dim = next(iter(self.train_loader))[0].shape[1]
            self._build_model(inp_dim)

        epochs = self.config.get('epochs', 10)
        for epoch in range(1, epochs+1):
            # training
            self.model.train()
            running_loss = 0.0
            for Xb, yb in self.train_loader:
                Xb, yb = Xb.to(self.device), yb.to(self.device)
                self.optimizer.zero_grad()
                logits = self.model(Xb)
                loss = self.criterion(logits, yb)
                loss.backward()
                self.optimizer.step()
                running_loss += loss.item() * Xb.size(0)
            train_loss = running_loss / len(self.train_loader.dataset)

            # validation
            self.model.eval()
            val_loss = 0.0
            with torch.no_grad():
                for Xb, yb in self.val_loader:
                    Xb, yb = Xb.to(self.device), yb.to(self.device)
                    logits = self.model(Xb)
                    val_loss += self.criterion(logits, yb).item() * Xb.size(0)
            val_loss /= len(self.val_loader.dataset)

            self.history['train_loss'].append(train_loss)
            self.history['val_loss'].append(val_loss)
            print(f"Epoch {epoch}/{epochs} - train_loss: {train_loss:.4f}, val_loss: {val_loss:.4f}")

    def evaluate(self):
        self.model.eval()
        total_loss, total_correct, total = 0.0, 0, 0
        all_preds, all_true = [], []
        with torch.no_grad():
            for Xb, yb in self.test_loader:
                Xb, yb = Xb.to(self.device), yb.to(self.device)
                logits = self.model(Xb)
                total_loss += self.criterion(logits, yb).item() * Xb.size(0)
                probs = torch.sigmoid(logits)
                preds = (probs >= self.threshold).float()
                total_correct += (preds == yb).sum().item()
                total += Xb.size(0)
                all_preds.extend(probs.cpu().numpy().flatten())
                all_true.extend(yb.cpu().numpy().flatten())

        avg_loss = total_loss / total
        accuracy = total_correct / total
        print(f"Test_loss: {avg_loss:.4f}, Accuracy: {accuracy:.4%}")
        return {
            'loss': avg_loss,
            'accuracy': accuracy,
            'predictions': np.array(all_preds),
            'true': np.array(all_true)
        }

    def predict(self, df_new: pd.DataFrame) -> pd.Series:
        if not hasattr(self, 'model') or self.model is None:
            raise RuntimeError("Model is not trained yet.")
        if not isinstance(df_new.index, pd.DatetimeIndex):
            df_new = df_new.copy()
            df_new.index = pd.to_datetime(df_new.index)
        X = df_new[self.feature_cols].values.astype(float)
        X = self.scaler.transform(X)
        tX = torch.tensor(X, dtype=torch.float32).to(self.device)
        self.model.eval()
        with torch.no_grad():
            probs = torch.sigmoid(self.model(tX)).cpu().numpy().flatten()
        return pd.Series(probs, index=df_new.index, name='prediction')

    def plot_history(self):
        if not self.history['train_loss']:
            raise RuntimeError("No training history. Call train() first.")
        plt.figure()
        plt.plot(self.history['train_loss'], label='train_loss')
        plt.plot(self.history['val_loss'],   label='val_loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.title('Training vs Validation Loss')
        plt.show()


# Example usage
if __name__ == "__main__":
    # Define config dictionary
    config = {
        'train_end':     '2020-12-31',
        'val_end':       '2021-06-30',
        # 'test_end':    '2021-12-31',        # optional
        'batch_size':    128,
        'epochs':        20,
        'learning_rate': 1e-3,
        'weight_decay':  1e-4,
        'optimizer':     'adam',
        'hidden_sizes':  [64, 32, 16],
        'dropout':       0.5,
        'activation':    'relu',
        'pos_weight':    5.0,
        'threshold':     0.5
    }

    # Load your DataFrame (first column must be datetime index)
    df = pd.read_csv('your_data.csv', parse_dates=[0], index_col=0)

    # Initialize, load/split, train, evaluate, predict, plot
    clf = BinaryClassifier(config)
    clf.load_data(df, target_col='target')
    clf.train()
    results = clf.evaluate()
    print(results)

    # Predict on new data
    new_df = pd.read_csv('new_data.csv', parse_dates=[0], index_col=0)
    preds = clf.predict(new_df)
    print(preds.head())

    # Plot loss curves
    clf.plot_history()
