In [8]:
import pickle
import time
import datetime

import pandas as pd
import numpy as np
import optuna

import torch
from ray import tune

from nam.data import NAMDataset, FoldedDataset, load_sklearn_housing_data, load_breast_data
from nam.config import defaults
from nam.models import NAM
from nam.models import get_num_units
from nam.trainer import LitNAM, Trainer
from nam.types import Config
from nam.utils import parse_args
from nam.utils import plot_mean_feature_importance
from nam.utils import plot_nams
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint

In [2]:
with open('../data/clean_data.pickle', 'rb') as file:
    data_dict = pickle.load(file)

y = data_dict['y']#.head(100)
X = data_dict['X']#.head(100)

In [None]:
len(X)

In [None]:
y.hist()

In [3]:
config = defaults()
config

namespace(device='cpu',
          seed=2021,
          data_path='data/GALLUP.csv',
          experiment_name='NAM',
          regression=False,
          num_epochs=1,
          lr=0.0003,
          batch_size=1024,
          logdir='output',
          wandb=True,
          hidden_sizes=[64, 32],
          activation='exu',
          optimizer='adam',
          dropout=0.5,
          feature_dropout=0.5,
          decay_rate=0.995,
          l2_regularization=0.5,
          output_regularization=0.5,
          num_basis_functions=1000,
          units_multiplier=2,
          shuffle=True,
          cross_val=False,
          num_folds=5,
          num_splits=3,
          fold_num=1,
          num_models=1,
          num_workers=16,
          save_model_frequency=2,
          save_top_k=3,
          use_dnn=False,
          early_stopping_patience=50)

In [4]:
feature_cols = X.columns
X['Distress'] = y.astype(float)

config.logdir = 'NAM_testing'
config.wandb = False
# config.cross_val = True
config.cross_val = False # nur fuer den Testdatensatz
# Ich brauch FoldedDataset, das macht Cross Validation
config.num_epochs = 50
nam_dataset = load_breast_data(config)

In [11]:
current_datetime = datetime.datetime.now()
formatted_datetime = current_datetime.strftime("%Y-%m-%d_%H-%M-%S")
logdir = f"NAM_run_{formatted_datetime}"
logdir

'NAM_run_2023-09-07_00-05-19'

In [5]:
# Neural Additive Model
config.logdir = logdir
nam_dataset = NAMDataset(
    config,
    data_path=X,
    features_columns=feature_cols,
    targets_column='Distress',
)
nam_model = NAM(
    config=config,
    name='Testing_NAM',
    num_inputs=len(nam_dataset[0][0]),
    num_units=get_num_units(config, nam_dataset.features)
)
data_loaders = nam_dataset.train_dataloaders()
for fold, (train_loader, val_loader) in enumerate(data_loaders):
     tb_logger = TensorBoardLogger(
          save_dir=config.logdir,
          name=f'{nam_model.name}',
          version=f'fold_{fold + 1}')

     checkpoint_callback = ModelCheckpoint(
          filename=tb_logger.log_dir + "/{epoch:02d}-{val_loss:.4f}",
          monitor='val_loss',
          save_top_k=config.save_top_k,
          mode='min'
     )
     litmodel = LitNAM(config, nam_model)
     pl.Trainer()
     trainer = pl.Trainer(
          logger=tb_logger,
          max_epochs=config.num_epochs,
          callbacks=checkpoint_callback,
     )
     # print('=============================')
     # print('=============================')
     # print('target_set')
     # target_set = [batch[1] for batch in train_loader]
     # # target_test = pd.DataFrame(target_set)
     # df = pd.DataFrame(torch.cat(target_set))
     # print('data lenght', len(df))
     # df.hist()
     # print('=============================')
     trainer.fit(
          litmodel,
          train_dataloaders=train_loader,
          val_dataloaders=val_loader)
     print('-------------------------------------------------------------------------')
     print('metrics', trainer.callback_metrics)
     print('-------------------------------------------------------------------------')

  return _no_grad_trunc_normal_(tensor, mean, std, a, b)
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")

  | Name  | Type | Params
-------------------------------
0 | model | NAM  | 629 K 
-------------------------------
629 K     Trainable params
0         Non-trainable params
629 K     Total params
2.516     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.


-------------------------------------------------------------------------
metrics {'train_loss': tensor(226.7653), 'train_loss_step': tensor(225.8266), 'AUROC_metric': tensor(0.4992), 'AUROC_metric_step': tensor(0.5174), 'val_loss': tensor(225.6996), 'val_loss_epoch': tensor(225.6996), 'AUROC_metric_epoch': tensor(0.4992), 'train_loss_epoch': tensor(226.7653)}
-------------------------------------------------------------------------


In [6]:
trainer.test(litmodel, dataloaders=nam_dataset.test_dataloaders())

  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Runningstage.testing metric      DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
   AUROC_metric_epoch       0.5220816135406494
     test_loss_epoch         225.7032012939453
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_loss_epoch': 225.7032012939453,
  'AUROC_metric_epoch': 0.5220816135406494}]

In [None]:
trainer.callback_metrics

In [None]:
feature_cols = X.columns
X['Distress'] = y.astype(float)
config.logdir = 'NAM_testing'
# Ich brauch FoldedDataset, das macht Cross Validation
config.cross_val = True

def training(cfg):
    # Neural Additive Model
    config.update(**cfg)
    nam_dataset = FoldedDataset(
        config,
        data_path=X,
        features_columns=feature_cols,
        targets_column='Distress',
    )
    nam_model = NAM(
        config=config,
        name='Testing_NAM',
        num_inputs=len(nam_dataset[0][0]),
        num_units=get_num_units(config, nam_dataset.features)
    )
    # NAM Training
    data_loaders = nam_dataset.train_dataloaders()
    for fold, (train_loader, val_loader) in enumerate(data_loaders):
        tb_logger = TensorBoardLogger(
            save_dir=config.logdir,
            name=f'{nam_model.name}',
            version=f'fold_{fold + 1}')

        checkpoint_callback = ModelCheckpoint(
            filename=tb_logger.log_dir + "/{epoch:02d}-{val_loss:.4f}",
            monitor='val_loss',
            save_top_k=config.save_top_k,
            mode='min'
        )
        litmodel = LitNAM(config, nam_model)
        pl.Trainer()
        trainer = pl.Trainer(
            logger=tb_logger,
            max_epochs=config.num_epochs,
            callbacks=checkpoint_callback,
        )
        trainer.fit(
            litmodel,
            train_dataloaders=train_loader,
            val_dataloaders=val_loader)

In [None]:
result = tune.Tuner(
    training,
    num_samples=1,
    metric="loss",
    mode="min",
    config={
        "lr": tune.loguniform(1e-4, 1e-1),
        "l2_regularization": tune.loguniform(0.01, 1.0),
        "output_regularization": tune.loguniform(0.01, 1.0),
        "dropout": tune.loguniform(0.01, 1.0),
        "feature_dropout": tune.loguniform(0.01, 1.0),
        "batch_size": tune.choice([128, 512, 1024]),
        "hidden_sizes": tune.choice([[], [32], [64, 32]])
    })

)

In [None]:
fig = plot_mean_feature_importance(litmodel.model, nam_dataset)

In [None]:
# fig = plot_nams(litmodel.model, nam_dataset, num_cols=3)
fig = plot_nams(litmodel.model, nam_dataset, num_cols=3)