In [15]:
import numpy as np
import pandas as pd
import pickle
from darts.models import RegressionModel
from darts.explainability.shap_explainer import ShapExplainer
from pprint import pprint

In [16]:
TARGET_BM = 'Muscle Mass'
WEIGHTS_DIR = 'weights/' + TARGET_BM + '/'

In [17]:
WEEKS_PER_MONTH = 4.2
horizons = [int(np.ceil(WEEKS_PER_MONTH * i)) for i in range(1, 6)]

In [18]:
model = RegressionModel.load(WEIGHTS_DIR + "model.pkl")
preprocess_pipeline = pickle.load(open(WEIGHTS_DIR + 'preprocessor.pkl', 'rb'))
scaler = pickle.load(open(WEIGHTS_DIR + 'scaler.pkl', 'rb'))
target = pickle.load(open(WEIGHTS_DIR + 'target.pkl', 'rb'))
past_cov = pickle.load(open(WEIGHTS_DIR + 'past_cov.pkl', 'rb'))

# Explainability

In [19]:
shap_explainer = ShapExplainer(model, target, past_cov)

In [20]:
def get_shap_values(shap_explainer, target, past_cov, horizons):
    
    explainability_res = shap_explainer.explain(target, past_cov,  horizons=horizons)
    comp_list = explainability_res.get_feature_values(horizons[0]).components.to_list()
    drop_comp_list = set([x for x in comp_list if any([x.startswith(y) for y in explainability_res.available_components])])

    importances_df = pd.DataFrame([])
    for horizon in horizons:
        if len(importances_df) == 0:
            importances_df = explainability_res.get_explanation(horizon).pd_dataframe().iloc[-1].T
        else:
            importances_df = pd.concat([importances_df, explainability_res.get_explanation(horizon).pd_dataframe().iloc[-1]], axis=1)
            
    importances_df.columns = horizons

    if isinstance(importances_df, pd.Series):
        importances_df = importances_df.to_frame()
        importances_df.columns = [horizons[0]]
    importances_df.reset_index(inplace=True)
    importances_df = importances_df[~importances_df['component'].isin(drop_comp_list)]

    importances_df = importances_df.T
    importances_df.columns = importances_df.iloc[0]
    importances_df = importances_df.drop(importances_df.index[0])
    importances_df.columns.name = 'Horizon'

    lag_columns = [col for col in importances_df.columns if 'lag-' in col]
    prefixes = set(col.rsplit('_', 1)[0] for col in lag_columns)

    for prefix in prefixes:
        lag_cols = [col for col in lag_columns if col.startswith(prefix)]
        importances_df[f'{prefix}'] = importances_df[lag_cols].mean(axis=1)
        
    importances_df = importances_df.drop(columns=lag_columns)
    return importances_df

In [21]:
%%capture
importances_df = get_shap_values(shap_explainer, target[0], past_cov[0], horizons)

In [22]:
pprint(importances_df.to_dict())

{'Age_statcov_target_Muscle Mass': {5: 0.00011069872604193194,
                                    9: 0.00014040235855990643,
                                    13: 0.00011432947319086247,
                                    17: 0.00012583146123264685,
                                    21: 0.00010252676948056354},
 'Gender_statcov_target_Muscle Mass': {5: -1.2954035773837802e-05,
                                       9: -0.0005297894030809047,
                                       13: 2.0775424782219314e-05,
                                       17: 0.001693629771470991,
                                       21: 0.003312669992446874},
 'avg_calories_per_workout_pastcov': {5: 5.5506726101624736e-05,
                                      9: -0.00017984933582810346,
                                      13: -2.2135856614887334e-05,
                                      17: 0.00015828982578236498,
                                      21: 0.00021686791824513147},
 'avg_cardio_workou