# Feature Residuals Template

Recipe `interpretability_overview.ipynb` provides a basic introduction to the Howso Engine interpretability capabilities. This recipe provides a barebones template to retrieve Feature Residuals.


In [1]:
import pandas as pd
from pmlb import fetch_data

from howso.engine import Trainee
from howso.utilities import infer_feature_attributes

# Section 1: Train, Analyze, and React

We will be using the `Adult` dataset where the Action Feature is a binary indicator of whether a person makes over $50k/year.

### Step 1: Load Data

In [2]:
df = fetch_data('adult', local_cache_dir="../../../data/adult")

# Subsample the data to ensure the example runs quickly
df = df.sample(1000, random_state=0).reset_index(drop=True)

df

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,target
0,41.0,4,151856.0,11,9.0,2,11,0,4,1,0.0,0.0,40.0,39,1
1,57.0,6,87584.0,10,16.0,0,10,1,4,0,0.0,0.0,25.0,39,1
2,31.0,2,220669.0,9,13.0,4,10,3,4,0,6849.0,0.0,40.0,39,1
3,55.0,4,171355.0,8,11.0,2,7,0,4,1,0.0,0.0,20.0,39,1
4,59.0,6,148626.0,0,6.0,2,5,0,4,1,0.0,0.0,40.0,39,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,29.0,4,166220.0,9,13.0,4,12,3,4,0,0.0,0.0,50.0,39,1
996,60.0,0,204486.0,9,13.0,2,0,0,4,1,0.0,0.0,8.0,39,0
997,48.0,2,93449.0,14,15.0,2,10,0,1,1,99999.0,0.0,40.0,30,0
998,25.0,4,114838.0,14,15.0,4,10,1,4,0,0.0,0.0,8.0,22,1


### Step 1a: Define Test Case to Determine Local Metrics

In [3]:
# Split out the last row for a test set
df_test = df.iloc[[-1]].copy()
df_test = df_test.drop('target', axis=1)

df.drop(df.index[-1], inplace=True)

### Step 2: Train

In [4]:
# Infer feature attributes
features = infer_feature_attributes(df)

# Specify Context and Action Features
action_features = ['target']
context_features = features.get_names(without=action_features)

# Create the Trainee
t = Trainee(
    features=features,
    overwrite_existing=True
)

# Train
t.train(df)

# Targeted Analysis
t.analyze(context_features=context_features, action_features=action_features)

# Local Metrics

To determine local feature residuals, use `React` on specific cases.

In [5]:
# Specify the set of details to request from the Trainee that will
# provide interpretability
details = {
    'feature_residuals': True,
    'robust_residuals': True,
}

results = t.react(
    df_test,
    context_features=context_features,
    action_features=action_features,
    details=details
)

# Local Feature Residual
feature_residuals_dicts = results['details']['feature_residuals']
feature_residuals = pd.DataFrame(list(feature_residuals_dicts[0].items()))
feature_residuals

Unnamed: 0,0,1
0,sex,0.068394
1,education-num,1.133302
2,hours-per-week,4.966019
3,age,8.450222
4,relationship,0.178286
5,fnlwgt,41389.955222
6,target,0.361109
7,capital-gain,445.543748
8,marital-status,0.19028
9,native-country,0.088438


# Global Metrics

To get the global feature residuals, use `react_into_trainee`.

In [6]:
# We use react on the cases in our trainee and cache the results with react_into_trainee
t.react_into_trainee(
    context_features=context_features,
    action_feature=action_features[0],
    residuals=True
)

In [7]:
# Global Feature Residual
global_feature_residuals = t.get_prediction_stats(action_feature=action_features[0], stats=['mae'])
global_feature_residuals

Unnamed: 0,sex,education-num,hours-per-week,age,relationship,fnlwgt,capital-gain,marital-status,native-country,race,education,capital-loss,occupation,workclass
mae,0.278391,1.589751,7.525315,8.947327,0.377181,73836.17051,1578.462246,0.31548,0.159334,0.200264,0.609505,239.405959,0.835817,0.479066
