In [1]:
import sys
from pathlib import Path

ROOT = Path.cwd().parent
sys.path.append(str(ROOT))

import mlflow
import yaml
from torch.utils.data import DataLoader

from src.data.pipeline import IngestionPipeline
from src.datasets.dual_input import DualInputSequenceDataset
from src.models.gru import GRUModel, EnsembleGRU
from src.utils.utils import collate_with_macro, TrainConfig

In [2]:
with open("../config/model_config.yml") as stream:
        config=yaml.safe_load(stream)
        config=TrainConfig(**config)

macro_paths = ["../" + path for path in config.macro_data]

pipeline = IngestionPipeline(
    company_path = "../" + config.firm_data,
    macro_paths = macro_paths,
    company_col = config.company_col,
    bankruptcy_col = config.bankruptcy_col
)

pipeline.run()
X, M, y = pipeline.get_tensors()

dataset = DualInputSequenceDataset(firm_tensor=X, macro_tensor=M, labels=y)

INFO:src.data.loaders:Reading file: ../data/demo_data.xlsx
INFO:src.data.loaders:Dropping high-revenue outliers...
INFO:src.data.loaders:Loading 3 macroeconomic series...
  df["Date"]=pd.to_datetime(df["Date"], errors="coerce")
  df["Date"]=pd.to_datetime(df["Date"], errors="coerce")
  df["Date"]=pd.to_datetime(df["Date"], errors="coerce")
INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /var/folders/h1/hrjhnsw55w3fh7wq8fc7_bcm0000gn/T/tmpjakkf4gv/ywpx4dfs.json
DEBUG:cmdstanpy:input tempfile: /var/folders/h1/hrjhnsw55w3fh7wq8fc7_bcm0000gn/T/tmpjakkf4gv/5mi40v0c.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/Users/guillaumedecina-halmi/miniforge3/lib/python3.12/site-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=56512', 'data

In [10]:
dataset.firm_tensor.dim()

3

In [3]:
device="mps"
dataset.to_device(device)

Data sent to device: mps


In [4]:
dataset.input_dims()

(torch.Size([6296, 3, 4]), torch.Size([3, 36]))

AttributeError: 'EnsembleGRU' object has no attribute 'weights'

In [5]:
import mlflow
import mlflow.pytorch
import torch
from src.models.gru import GRUModel

mlflow.set_tracking_uri("http://127.0.0.1:8080")
model_uri = "runs:/ab1759a5e70b495ab95660a139a635aa/model_2025-07-23 22:11:49.460279"

model = mlflow.pytorch.load_model(model_uri)

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

In [13]:
M.T.shape

torch.Size([36, 3])

In [19]:
X[0].unsqueeze(0).shape

torch.Size([1, 3, 4])

In [62]:
model2 = EnsembleGRU(models=model.models, hidden_sizes=[32, 16],
                    threshold=model.threshold, dropout=0.2)

In [71]:
model2.load_state_dict(state_dict)

<All keys matched successfully>

In [72]:
model2

EnsembleGRU(
  (mlp): Sequential(
    (0): Linear(in_features=5, out_features=32, bias=True)
    (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): GELU(approximate='none')
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=32, out_features=16, bias=True)
    (5): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): GELU(approximate='none')
    (7): Dropout(p=0.2, inplace=False)
    (8): Linear(in_features=16, out_features=1, bias=True)
  )
)

In [70]:
state_dict = dict(torch.load("../models/model_2025-07-24 09:03:44.021601.pth"))

In [60]:
model

EnsembleGRU(
  (mlp): Sequential(
    (0): Linear(in_features=5, out_features=32, bias=True)
    (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): GELU(approximate='none')
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=32, out_features=16, bias=True)
    (5): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): GELU(approximate='none')
    (7): Dropout(p=0.2, inplace=False)
    (8): Linear(in_features=16, out_features=1, bias=True)
  )
)

In [34]:
model

EnsembleGRU(
  (mlp): Sequential(
    (0): Linear(in_features=5, out_features=32, bias=True)
    (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): GELU(approximate='none')
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=32, out_features=16, bias=True)
    (5): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): GELU(approximate='none')
    (7): Dropout(p=0.2, inplace=False)
    (8): Linear(in_features=16, out_features=1, bias=True)
  )
)

In [53]:
model.state_dict().items()

odict_items([('mlp.0.weight', tensor([[ 0.2907, -0.1019, -0.2359,  0.0024,  0.4878],
        [ 0.7834,  0.1607, -0.1966, -0.2057, -0.3746],
        [-0.2993,  0.4070,  0.2314, -0.0563, -0.0835],
        [-0.4575, -0.2892,  0.6885, -0.1054, -0.3468],
        [-0.7886,  0.5006, -0.0492,  0.0852, -0.1190],
        [ 0.2763,  0.2465, -0.5119,  0.1890, -0.5665],
        [ 0.0242, -0.0562, -0.0910,  0.7594, -0.1328],
        [-0.0890,  0.2369,  0.1490, -0.0857, -0.7911],
        [-0.5069,  0.0232, -0.3157,  0.4576,  0.1612],
        [ 0.3682,  0.4130, -0.4107, -0.2102, -0.5238],
        [-0.0534,  0.0021,  0.1241,  0.8764,  0.0791],
        [-0.2333,  0.1001, -0.3299,  0.5708,  0.1567],
        [ 0.2389, -0.2877, -0.3486,  0.5910,  0.4528],
        [ 0.0195, -0.3562,  0.1867,  0.4644,  0.3978],
        [-0.2698, -0.0608,  0.0355,  0.8782,  0.1496],
        [ 0.4117, -0.2977,  0.0456, -0.5023,  0.3168],
        [ 0.3175,  0.3767,  0.4090, -0.3336,  0.2096],
        [ 0.5913, -0.2702, -0.2036,

In [38]:
print(X[4].unsqueeze(0))

tensor([[[  4.7836,  -6.9714,  -8.3270,  -4.3388],
         [  5.0638, -15.1471, -11.9996,  -9.9014],
         [  3.4292, -12.9364, -14.3504,  -9.3069]]], device='mps:0')


In [39]:
print(M.unsqueeze(0))

tensor([[[-73.0000, -75.0000, -75.0000, -74.0000, -68.0000, -61.0000, -57.0000,
          -59.0000, -52.0000, -59.0000, -58.0000, -59.0000, -60.0000, -65.0000,
          -59.0000, -68.0000, -67.0000, -72.0000, -78.0000, -74.0000, -77.0000,
          -80.0000, -81.0000, -84.0000, -81.0000, -83.0000, -84.0000, -84.0000,
          -83.0000, -77.0000, -79.0000, -76.0000, -80.0000, -82.0000, -81.0000,
          -81.0000],
         [-27.0000, -27.0000, -24.0000, -20.0000, -12.0000, -14.0000, -13.0000,
          -11.0000, -10.0000,   8.0000,  -7.0000,  -9.0000,  -5.0000, -12.0000,
           41.0000,  23.0000,  12.0000,   2.0000,  -1.0000, -11.0000,  -2.0000,
           -1.0000,  -6.0000,  -3.0000, -15.0000, -13.0000,  -8.0000, -27.0000,
          -44.0000, -56.0000, -63.0000, -49.0000, -47.0000, -45.0000, -49.0000,
          -48.0000],
         [107.5600, 107.0900, 107.5700, 107.7400, 106.6200, 107.4800, 107.6600,
          107.8000, 108.7700, 109.0300, 109.2100, 109.3800, 109.7800, 109.9900

In [78]:
X.shape

torch.Size([6296, 3, 4])

In [77]:
M.shape

torch.Size([3, 36])

In [75]:
model2 = model2.to(device)

In [81]:
X = X.to(device)
M = M.to(device)

model2.predict_one(X[4].unsqueeze(0), M.T.unsqueeze(0))

ValueError: Expected more than 1 value per channel when training, got input size torch.Size([1, 32])

In [82]:
mlflow.set_tracking_uri("http://127.0.0.1:8080")
device="mps"

loader = DataLoader(dataset, batch_size = 1, shuffle = True, collate_fn = collate_with_macro)

preds = []

In [None]:
model_uri = "runs:/ed24e67a8e5b439caf7825229ed3cbe0/model_2025-07-24 09:03:35.937812"
model = mlflow.pytorch.load_model(model_uri=model_uri)

device="mps"

Downloading artifacts:   0%|          | 0/6 [00:00<?, ?it/s]

In [None]:
from mlflow.client import MlflowClient

def get_best_models(n_models: int, metric: str = "val_matthews"):
    mlflow.set_tracking_uri("http://127.0.0.1:8080")
    client = MlflowClient()
    
    raw_list = mlflow.search_runs(experiment_names=["bankruptcy-predictions"])
    runs = raw_list.sort_values(by = "metrics." + metric, ascending = False)
    runs.reset_index(inplace=True)
    top_runs = runs[:n_models]["run_id"]
    print(top_runs)
    model_paths = []
    
    for run in top_runs:
        artifacts = client.list_artifacts(run)
        
        for artifact in artifacts:
            if artifact.is_dir and artifact.path.startswith("GRUModel_"):
                model_name = artifact.path
                model_uri = f"runs:/{run}/{model_name}"
                model_paths.append(model_uri)

    print(model_paths)
    models = []

    for model_uri in model_paths:
        model = mlflow.pytorch.load_model(model_uri=model_uri)
        models.append(model)
        
    return models

In [None]:
def get_best_models(n_models: int, metric: str = "val_matthews"):
    mlflow.set_tracking_uri("http://127.0.0.1:8080")
    client = MlflowClient()
    
    raw_list = mlflow.search_runs(experiment_names=["bankruptcy-predictions"])
    runs = raw_list.sort_values(by = "metrics." + metric, ascending = False)
    runs.reset_index(inplace=True)
    run_ids = runs["run_id"]
    
    models = []
    current_pool = 0
    
    for run in run_ids:
        try:
            artifacts = client.list_artifacts(run)
            for artifact in artifacts:
                if artifact.is_dir and artifact.path.startswith("GRUModel_"):
                    model_name = artifact.path
                    model_uri = f"runs:/{run}/{model_name}"
                    print(f"Loading model: {model_uri}")
                    model = mlflow.pytorch.load_model(model_uri=model_uri)
                    models.append(model)
                    
                    current_pool += 1
                    if current_pool == n_models:
                        print(f"Retrieved {len(models)} models.")
                        return models
                    
        except Exception as e:
            print(f"Skipping run {run_id} due to error: {e}")
    
    print(f"Retrieved {len(models)} models.")
    return models

In [None]:
model.models

[GRUModel(
   (firm_gru): GRU(4, 32, num_layers=2, batch_first=True)
   (macro_gru): GRU(3, 32, num_layers=2, batch_first=True)
   (fc): Sequential(
     (0): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
     (1): Dropout(p=0, inplace=False)
     (2): Linear(in_features=32, out_features=32, bias=True)
     (3): ReLU()
     (4): Linear(in_features=32, out_features=1, bias=True)
   )
   (firm_bn): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
   (macro_bn): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
 ),
 GRUModel(
   (firm_gru): GRU(4, 32, num_layers=2, batch_first=True)
   (macro_gru): GRU(3, 32, num_layers=2, batch_first=True)
   (fc): Sequential(
     (0): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
     (1): Dropout(p=0, inplace=False)
     (2): Linear(in_features=32, out_features=32, bias=True)
     (3): ReLU()
     (4): Linear(in_features=32, out_features=1, bias=True)
   )
   (firm_bn): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
   (macr

In [None]:
from torch.nn import BCEWithLogitsLoss
from torchmetrics.classification import BinaryF1Score, BinaryMatthewsCorrCoef
from train.gru import evaluate_one_epoch

loss_fn = BCEWithLogitsLoss()

evaluate_one_epoch(
    model, loader, loss_fn, device="mps", metrics={
        "F1": BinaryF1Score().to(device), 
        "MCC": BinaryMatthewsCorrCoef().to(device)}
)

{'F1': 0.0, 'MCC': 0.04332416132092476, 'loss': nan}

In [None]:
import torch.nn as nn

class DynamicNorm(nn.Module):
    """
    A safe replacement for LayerNorm that dynamically uses BatchNorm1d
    with the correct feature size on the first forward call.
    """
    def __init__(self):
        super().__init__()
        self.bn = None

    def forward(self, x):
        if self.bn is None:
            feature_dim = x.size(1)
            self.bn = nn.BatchNorm1d(feature_dim).to(x.device)
        return self.bn(x)

def replace_ln_with_dn(module: nn.Module) -> None:
    for name, child in module.named_children():
        if isinstance(child, nn.LayerNorm):
            setattr(module, name, DynamicNorm())
        else:
            replace_ln_with_dn(child)
            
for submodel in model.models:
    replace_ln_with_dn(submodel.fc)

replace_ln_with_dn(model)