# Multitask NN experiment

Runs the custom tabular multitask NN and reports CV AUC.

In [1]:
from pathlib import Path
import sys

import pandas as pd

ROOT = Path("..").resolve()
sys.path.insert(0, str(ROOT))

from src.data import load_training
from src.models.nn import NNConfig, train_cv_multitask_nn

DATA_DIR = ROOT / "data"
RUNS_DIR = ROOT / "runs"

X, y = load_training(DATA_DIR)

cfg = NNConfig(
    n_splits=5,
    seed=42,
    epochs=30,
    batch_size=256,
    lr=0.001,
    weight_decay=0.0001,
    hidden_sizes=[128, 64],
    dropout=0.2,
    early_stopping_rounds=5,
)

oof, scores = train_cv_multitask_nn(X, y, cfg)
scores

{'h1n1_vaccine': 0.8322424740644143,
 'seasonal_vaccine': 0.8516934161901979,
 'mean_auc': 0.8419679451273061}

In [2]:
out_dir = RUNS_DIR / "nn"
out_dir.mkdir(parents=True, exist_ok=True)
oof.to_csv(out_dir / "oof_nn.csv", index_label="respondent_id")
scores

{'h1n1_vaccine': 0.8322424740644143,
 'seasonal_vaccine': 0.8516934161901979,
 'mean_auc': 0.8419679451273061}