In [None]:
import pickle

                                                                           
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import joblib

import sklearn.metrics as skmetrics

from nam.trainer import Trainer
from nam.data import NAMDataset
from nam.config import defaults
from nam.data import FoldedDataset
from nam.models import NAM
from nam.models import get_num_units
from nam.trainer import LitNAM
from nam.types import Config
from nam.utils import parse_args
from nam.utils import plot_mean_feature_importance
from nam.utils import plot_nams

import torch
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint

# Prepare Dataset

In [None]:
with open('../data/all_data.pickle', 'rb') as file:
    all_data = pickle.load(file)

orig_characteristics = all_data['OrigCharacteristics.dta']
orig_characteristics_columns = [
    'type',
    'CutoffLTV',
    'CutoffDSCR',
    'CutoffCpn',
    'log_bal',
    'fixed',
    'buildingage',
    'CutoffOcc',
    'quarter_type',
    'AmortType',
    'Size',
    'OVER_w',
    'past_over',
    'high_overstatement2',
    'Distress'
]
orig_data = orig_characteristics[orig_characteristics_columns]
target_col = 'Distress'
orig_data_with_dummies = pd.get_dummies(
    orig_data,
    columns=[
        'AmortType',
        'type',
    ]
)
clean_data = orig_data_with_dummies[
    orig_data_with_dummies.notna().all(axis=1)
]

dummy_cols = [col for col, dtype in clean_data.dtypes.items() if dtype == bool]
for dummy_col in dummy_cols:
    clean_data[dummy_col] = clean_data[dummy_col].map({True: 1, False:0})

# Percentage of clean data from whole dataset
print('percentage of clean data and all data ', len(clean_data) / len(orig_data_with_dummies))

y = clean_data[target_col]
X = clean_data.drop(columns=target_col)
feature_cols = X.columns
X['Distress'] = y
sample_size = len(y)
print('sample size ', sample_size)

# Load hyperparameters for NAM

In [None]:
with open('../models/best_params_nam_maximize.joblib', 'rb') as file:
    hyper_params = joblib.load(file)['params']

config = defaults()
config.val_size = 0.0
config.test_size = 0.0
config.num_workers = 4
config.wandb = False
config.update(**hyper_params)

# NAM training

In [None]:
nam_dataset = NAMDataset(
    config,
    data_path=X,
    features_columns=feature_cols,
    targets_column='Distress',
)
nam_model = NAM(
    config=config,
    name='Final_NAM',
    num_inputs=len(nam_dataset[0][0]),
    num_units=get_num_units(config, nam_dataset.features)
)
litnam = LitNAM(config, nam_model)
trainer = pl.Trainer()

data_loaders = nam_dataset.train_dataloaders()
for run, (train_loader, val_loader) in enumerate(data_loaders):
    print('run ', run)
    tb_logger = TensorBoardLogger(
        save_dir=config.logdir,
        name=f'{nam_model.name}',
        version=f'run{run + 1}')

    checkpoint_callback = ModelCheckpoint(
        filename=tb_logger.log_dir + "/{epoch:02d}-{val_loss:.4f}",
        monitor='val_loss',
        save_top_k=config.save_top_k,
        mode='max'
    )
    trainer = pl.Trainer(
        logger=tb_logger,
        max_epochs=config.num_epochs,
        callbacks=checkpoint_callback,
        log_every_n_steps=5
    )
    trainer.fit(
        litnam,
        train_dataloaders=train_loader,
        val_dataloaders=val_loader)

# Evaluation

In [None]:
model = litnam.model
X.reset_index(drop=True, inplace=True)
sample_size = len(X)
model.eval()

with torch.no_grad() as grad:
    logits, fnns = model(torch.tensor(X.loc[:sample_size, feature_cols].values, dtype=torch.double))
    targets = torch.tensor(X.loc[:sample_size, 'Distress'].values, dtype=torch.double) 

y = targets.numpy()
pred = logits.numpy()

fpr, tpr, thresholds = skmetrics.roc_curve(y, pred)
roc_auc = skmetrics.auc(fpr, tpr)
display = skmetrics.RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc,
                                  estimator_name='ROC NAM')
sns.set_style('darkgrid')
display.plot()
display.line_.set_color('#F09135') 
fig = display.figure_
ax = display.ax_
# plot diagonal
ax.plot(np.linspace(0,1,2),np.linspace(0,1,2), linewidth=.8, linestyle='--', color='#00007B')

ax.set_facecolor('#E5EDF6')
fig.savefig('../plots/NAM_ROC.png', format='png', bbox_inches='tight')

In [None]:
fig = plot_mean_feature_importance(litnam.model, nam_dataset, width=0.4)
fig.savefig('../plots/NAM_FeatureImportance.png', format='png')

In [None]:
fig = plot_nams(litnam.model, nam_dataset, num_cols=4)
fig.savefig('../plots/NAM_FeatureContributions.png', format='png')