In [1]:
from run import *
from tint.metrics import mse, mae
import tint, gc, os
from tqdm import tqdm
import pandas as pd
from exp.config import FeatureFiles
from utils.interpreter import *

from tint.attr import (
    AugmentedOcclusion,
    DynaMask,
    Occlusion, 
    FeatureAblation
)

In [4]:
parser = get_parser()
argv = """
  --model Autoformer --use_gpu --result_path scratch --data_path Top_20.csv
""".split()
args = parser.parse_args(argv)

args.n_features = len(set(DataConfig.static_reals+DataConfig.observed_reals+DataConfig.targets))
args.enc_in = args.dec_in = args.c_out = args.n_features
args.n_targets = len(DataConfig.targets)

In [5]:
set_random_seed(args.seed)
# Disable cudnn if using cuda accelerator.
# Please see https://captum.ai/docs/faq#how-can-i-resolve-cudnn-rnn-backward-error-for-rnn-or-lstm-network
# args.use_gpu = False

Exp = Exp_Forecast

setting = stringify_setting(args)

In [6]:
exp = Exp(args, setting)  # set experiments
exp.load_model()
result_folder = exp.output_folder

Use GPU: cuda:0

Train samples 12740, validation samples 560, test samples 560
637 days of training, 14 days of validation data, 14 days of test data.

Fitting scalers on train data
Loading dataset from ./dataset/processed\Top_20\train.pt
Loading dataset from ./dataset/processed\Top_20\val.pt
Loading dataset from ./dataset/processed\Top_20\test.pt
loading best model from scratch\Autoformer_Top_20\checkpoint.pth


In [7]:
model = exp.model
model.eval()
explainer = FeatureAblation(model)

In [8]:
flag = 'train'
_, dataloader = exp._get_data(flag)
attr = batch_compute_attr(dataloader, exp, explainer)

100%|██████████| 48/48 [01:35<00:00,  1.99s/it]


In [20]:
df = exp.data_map[flag]
df = df[['Date', 'FIPS']]
df.sort_values(by=['Date', 'FIPS'], inplace=True)
df.head(3)

Unnamed: 0,Date,FIPS
746,2020-03-01,2261
1782,2020-03-01,4013
2818,2020-03-01,6037


In [None]:
attr_numpy = attr.detach().cpu().numpy()
np.save(os.path.join(exp.output_folder, f'{flag}_{explainer.get_name()}.npy'), attr_numpy)

# align importance along their time axis with the input data
features = exp.age_data.static_reals + exp.age_data.observed_reals

group_agg_scores_df = align_interpretation(
    df, attr_numpy, features
)
print(group_agg_scores_df.describe())

In [68]:
common_features = list(set(features) & set(exp.age_data.static_reals))
print(f'Common static features {common_features}')
if len(common_features) == 0:
    print('Ground truth available only for age group features.\nReturning...\n')
    raise

Common static features ['AGE1829', 'AGE3039', 'AGE4049', 'AGE75PLUS', 'AGE5064', 'UNDER5', 'AGE517', 'AGE6574']


In [69]:
# Load ground truth
group_cases = pd.read_csv(
    os.path.join(FeatureFiles.root_folder, 'Cases by age groups.csv')
)
group_cases['end_of_week'] = pd.to_datetime(group_cases['end_of_week'])

# find a common start point
first_common_date = find_first_common_date(
    group_cases, group_agg_scores_df['Date'].values
)

Found first common date 2020-03-21T00:00:00.000000000.


In [74]:
# since age group ground truth is weekly aggregated
# do the same for predicted importance
weekly_agg_scores_df = aggregate_importance_by_window(
    group_agg_scores_df, common_features, first_common_date
)
result_df = evaluate_interpretation(
    group_cases, weekly_agg_scores_df, common_features
)
result_df.to_csv(
    os.path.join(exp.output_folder, f'{flag}_int_metrics.csv'), 
    index=False
)


        Rank mae: 0.29319, rmse: 0.37306, ndcg: 0.8857

        Normalized mae: 0.061193, rmse: 0.071974, ndcg: 0.81659
    
