In [2]:
import sys
from pathlib import Path
import numpy as np
import pandas as pd
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Setup
PROJECT_ROOT = Path.cwd().parent if 'notebooks' in str(Path.cwd()) else Path.cwd()
sys.path.insert(0, str(PROJECT_ROOT))

# Test in notebook cell
from src.methods.all_methods_runner import run_dataset
from src.utils.config_reader import load_config

# Load config to get defaults
config = load_config()

# Common parameters
common_params = {
    'test_size': config['split']['test_size'],
    'val_size': config['split']['val_size'],
    'cv_splits': 1,
    'seed': config['split']['seed'],
    'row_limit': 100,
    'max_epoch': config['training']['max_epochs'],
    'batch_size': config['training']['batch_size'],
    'tune': True,
    'n_trials': 5,
    'early_stopping': True,
    'early_stopping_patience': 5,
    'verbose': False,
}

def print_results_summary(results, task_name):
    """Print results excluding large arrays"""
    print(f"\n{'='*80}")
    print(f"Results for {task_name}")
    print(f"{'='*80}\n")
    
    for method, method_results in results.items():
        print(f"\nMethod: {method}")
        print(f"{'-'*60}")
        
        for fold_id, fold_results in method_results.items():
            print(f"  Fold {fold_id}:")
            
            for key, value in fold_results.items():
                if key not in ['y_true', 'y_pred', 'y_prob', 'info']:
                    print(f"    {key}: {value}")
            
            if 'info' in fold_results:
                info = fold_results['info']
                print(f"    info: {{n_samples: {info.get('n_samples')}, "
                      f"n_num_features: {info.get('n_num_features')}, "
                      f"n_cat_features: {info.get('n_cat_features')}}}")

# Test 1: LGD
print("TEST 1: Running 0001.heloc (LGD)")
results_lgd = run_dataset(task='lgd', dataset='0001.heloc', **common_params)
print_results_summary(results_lgd, "0001.heloc (LGD)")

# Test 2: PD
print("\n\nTEST 2: Running 0001.gmsc (PD)")
results_pd = run_dataset(task='pd', dataset='0001.gmsc', **common_params)
print_results_summary(results_pd, "0001.gmsc (PD)")

# Summary
print(f"\n\n{'='*80}")
print("SUMMARY")
print(f"{'='*80}")
print(f"LGD methods: {list(results_lgd.keys())}")
print(f"PD methods: {list(results_pd.keys())}")

TEST 1: Running 0001.heloc (LGD)

Results for 0001.heloc (LGD)


Method: catboost
------------------------------------------------------------
  Fold 1:
    metrics: (0.022836718222129064, 0.9948062708005561, 0.03028741770112027)
    metric_names: ['MAE', 'R2', 'RMSE']
    primary_metric: MAE
    val_loss: None
    train_time: 0.3539266586303711
    method: catboost
    dataset: 0001.heloc
    task: lgd
    fold_id: 1
    used_hpo: True
    info: {n_samples: None, n_num_features: 9, n_cat_features: 0}

Method: knn
------------------------------------------------------------
  Fold 1:
    metrics: (0.15435069252248676, 0.6511398645264058, 0.2482265714127561)
    metric_names: ['MAE', 'R2', 'RMSE']
    primary_metric: MAE
    val_loss: None
    train_time: 0.0
    method: knn
    dataset: 0001.heloc
    task: lgd
    fold_id: 1
    used_hpo: True
    info: {n_samples: None, n_num_features: 9, n_cat_features: 0}

Method: lightgbm
-----------------------------------------------------------

In [3]:
import sys
from pathlib import Path


# Setup
PROJECT_ROOT = Path.cwd().parent if 'notebooks' in str(Path.cwd()) else Path.cwd()
sys.path.insert(0, str(PROJECT_ROOT))

from src.methods.HPO_runner import run_hpo_comparison
from src.utils.config_reader import load_config

config = load_config()

results = run_hpo_comparison(
    task='pd',
    dataset='0001.gmsc',
    test_size=config['split']['test_size'],
    val_size=config['split']['val_size'],
    cv_splits=2,
    seed=config['split']['seed'],
    row_limit=100,
    max_epoch=config['training']['max_epochs'],
    batch_size=config['training']['batch_size'],
    n_trials=5,
    early_stopping=config['training']['early_stopping'],
    early_stopping_patience=config['training']['early_stopping_patience'],
    verbose=False,
)
print(results)
# Access results
no_hpo_xgb_fold1 = results['NO_HPO']['xgboost'][1]
hpo_xgb_fold1 = results['HPO']['xgboost'][1]

# For methods that don't support HPO (like tabpfn), both are the same
no_hpo_tabpfn = results['NO_HPO']['tabpfn']
hpo_tabpfn = results['HPO']['tabpfn']  # Same as above

{'NO_HPO': {'catboost': {1: {'y_true': array([1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0], dtype=int64), 'y_pred': array([[0.55374604, 0.44625396],
       [0.84995201, 0.15004799],
       [0.82491861, 0.17508139],
       [0.8636893 , 0.1363107 ],
       [0.81918302, 0.18081698],
       [0.9060905 , 0.0939095 ],
       [0.88002336, 0.11997664],
       [0.6877918 , 0.3122082 ],
       [0.87281384, 0.12718616],
       [0.89899078, 0.10100922],
       [0.91883942, 0.08116058],
       [0.88279753, 0.11720247],
       [0.89117474, 0.10882526],
       [0.75782338, 0.24217662],
       [0.81297495, 0.18702505],
       [0.89765115, 0.10234885],
       [0.87276778, 0.12723222],
       [0.88899788, 0.11100212],
       [0.93781631, 0.06218369],
       [0.90488025, 0.09511975],
       [0.74596696, 0.25403304],
       [0.8838217 , 0.1161783 ],
       [0.88651395, 0.11348605],
      