In [53]:
import sys
sys.path.append("../")
from cfmining.action_set import ActionSet
from cfmining.predictors import GeneralClassifier_Shap
from cfmining.algorithms import MAPOFCEM
from cfmining.utils import get_data_model, DeepPipeExplainer

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
X_train, Y_train, model, outlier_detection, individuals = get_data_model("german", "LGBMClassifier")

In [15]:
_, _, model_MLP, _, individuals_mlp = get_data_model("german", "MLPClassifier")

In [54]:
deep_pipe_explainer = DeepPipeExplainer(model_MLP, X_train.sample(100))

## ActionSet

In [10]:
not_mutable_features = ['Age', 'OwnsHouse', 'isMale', 'JobClassIsSkilled', 'Single', 'ForeignWorker', 'RentsHouse']
mutable_features = [feat for feat in X_train.columns if feat not in not_mutable_features]
action_set = ActionSet(X = X_train, default_step_size = 0.01, mutable_features = mutable_features)

In [11]:
grid_size = [[feat.name, len(feat.grid) if feat.mutable else 0] for feat in action_set]
grid_size

[['ForeignWorker', 0],
 ['Single', 0],
 ['Age', 0],
 ['LoanDuration', 55],
 ['LoanAmount', 101],
 ['LoanRateAsPercentOfIncome', 4],
 ['YearsAtCurrentHome', 4],
 ['NumberOfOtherLoansAtBank', 3],
 ['NumberOfLiableIndividuals', 2],
 ['HasTelephone', 2],
 ['CheckingAccountBalance_geq_0', 2],
 ['CheckingAccountBalance_geq_200', 2],
 ['SavingsAccountBalance_geq_100', 2],
 ['SavingsAccountBalance_geq_500', 2],
 ['MissedPayments', 2],
 ['NoCurrentLoan', 2],
 ['CriticalAccountOrLoansElsewhere', 2],
 ['OtherLoansAtBank', 2],
 ['HasCoapplicant', 2],
 ['HasGuarantor', 2],
 ['OwnsHouse', 0],
 ['RentsHouse', 0],
 ['Unemployed', 2],
 ['YearsAtCurrentJob_lt_1', 2],
 ['YearsAtCurrentJob_geq_4', 2],
 ['JobClassIsSkilled', 0],
 ['is_male', 2]]

In [12]:
not_mutable_features = ['Age', 'OwnsHouse', 'isMale', 'JobClassIsSkilled', 'Single', 'ForeignWorker', 'RentsHouse']
mutable_features = [feat for feat in X_train.columns if feat not in not_mutable_features]
action_set = ActionSet(X = X_train, default_step_size = 0.1, mutable_features = mutable_features)

In [13]:
grid_size = [[feat.name, len(feat.grid) if feat.mutable else 0] for feat in action_set]
grid_size

[['ForeignWorker', 0],
 ['Single', 0],
 ['Age', 0],
 ['LoanDuration', 10],
 ['LoanAmount', 11],
 ['LoanRateAsPercentOfIncome', 4],
 ['YearsAtCurrentHome', 4],
 ['NumberOfOtherLoansAtBank', 3],
 ['NumberOfLiableIndividuals', 2],
 ['HasTelephone', 2],
 ['CheckingAccountBalance_geq_0', 2],
 ['CheckingAccountBalance_geq_200', 2],
 ['SavingsAccountBalance_geq_100', 2],
 ['SavingsAccountBalance_geq_500', 2],
 ['MissedPayments', 2],
 ['NoCurrentLoan', 2],
 ['CriticalAccountOrLoansElsewhere', 2],
 ['OtherLoansAtBank', 2],
 ['HasCoapplicant', 2],
 ['HasGuarantor', 2],
 ['OwnsHouse', 0],
 ['RentsHouse', 0],
 ['Unemployed', 2],
 ['YearsAtCurrentJob_lt_1', 2],
 ['YearsAtCurrentJob_geq_4', 2],
 ['JobClassIsSkilled', 0],
 ['is_male', 2]]

## Predictor

In [14]:
clf_shap = GeneralClassifier_Shap(
    model,
    outlier_detection,
    X_train
)

print(clf_shap.predict(X_train.iloc[0].values))
print(clf_shap.predict_proba(X_train.iloc[0].values))

open_vars = [5, 6, 7]
cfe = X_train.iloc[0].values.copy()
cfe[4] = X_train.iloc[:, 4].sample().values[0]
cfe[9] = X_train.iloc[:, 9].sample().values[0]

print(clf_shap.predict(cfe))
print(clf_shap.predict_proba(cfe))
print(clf_shap.predict_max(cfe, open_vars))
print(clf_shap.predict_max(cfe, open_vars, n_changes = 1))

True
0.6144198772264504
True
0.936764776301494
1.146659026566456
1.0121137989938012


In [24]:
clf_shap = GeneralClassifier_Shap(
    model,
    outlier_detection,
    X_train,
    shap_explainer = "tree"
)

print(clf_shap.predict(X_train.iloc[0].values))
print(clf_shap.predict_proba(X_train.iloc[0].values))

open_vars = [5, 6, 7]
cfe = X_train.iloc[0].values.copy()
cfe[4] = X_train.iloc[:, 4].sample().values[0]
cfe[9] = X_train.iloc[:, 9].sample().values[0]

print(clf_shap.predict(cfe))
print(clf_shap.predict_proba(cfe))
print(clf_shap.predict_max(cfe, open_vars))
print(clf_shap.predict_max(cfe, open_vars, n_changes = 1))

True
0.6144198772264504
True
0.9117424463154873
1.1327963560310752
0.9869582996364833


In [25]:
clf_shap = GeneralClassifier_Shap(
    model,
    outlier_detection,
    X_train,
    shap_explainer = "permutation"
)

print(clf_shap.predict(X_train.iloc[0].values))
print(clf_shap.predict_proba(X_train.iloc[0].values))

open_vars = [5, 6, 7]
cfe = X_train.iloc[0].values.copy()
cfe[4] = X_train.iloc[:, 4].sample().values[0]
cfe[9] = X_train.iloc[:, 9].sample().values[0]

print(clf_shap.predict(cfe))
print(clf_shap.predict_proba(cfe))
print(clf_shap.predict_max(cfe, open_vars))
print(clf_shap.predict_max(cfe, open_vars, n_changes = 1))

True
0.6144198772264504
True
0.8441658076445185
1.074862352044524
0.9357814732820714


### Predictor with MLP

In [52]:
clf_shap = GeneralClassifier_Shap(
    model,
    outlier_detection,
    X_train,
    shap_explainer = "custom",
    explainer = deep_pipe_explainer
)

print(clf_shap.predict(X_train.iloc[0].values))
print(clf_shap.predict_proba(X_train.iloc[0].values))

open_vars = [5, 6, 7]
cfe = X_train.iloc[0].values.copy()
cfe[4] = X_train.iloc[:, 4].sample().values[0]
cfe[9] = X_train.iloc[:, 9].sample().values[0]

print(clf_shap.predict(cfe))
print(clf_shap.predict_proba(cfe))
print(clf_shap.predict_max(cfe, open_vars))
print(clf_shap.predict_max(cfe, open_vars, n_changes = 1))

True
0.6144198772264504
True
0.8776223649352376
0.9343401694519032
0.8776223649352376




## MAPOFCEM

In [67]:
m = MAPOFCEM(
    action_set,
    clf_shap
)

m.mutable_features

[3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 22, 23, 24, 26]

In [71]:
for i in range(X_train.shape[0]):
    if not clf_shap.predict(X_train.iloc[i].values):
        m.fit(X_train.iloc[i].values)
        print(f"Line {i} found {len(m.solutions)} solutions")

Line 6 found 1 solutions
Line 10 found 1 solutions
Line 11 found 1 solutions
Line 24 found 1 solutions
Line 29 found 1 solutions
Line 31 found 1 solutions
Line 36 found 1 solutions
Line 37 found 1 solutions
Line 44 found 1 solutions
Line 46 found 1 solutions
Line 52 found 1 solutions
Line 61 found 1 solutions
Line 69 found 1 solutions
Line 71 found 1 solutions
Line 72 found 1 solutions
Line 74 found 1 solutions
Line 76 found 1 solutions
Line 78 found 1 solutions
Line 85 found 1 solutions
Line 86 found 1 solutions
Line 89 found 1 solutions
Line 94 found 1 solutions
Line 97 found 1 solutions
Line 106 found 1 solutions
Line 110 found 1 solutions
Line 120 found 1 solutions
Line 130 found 1 solutions
Line 133 found 1 solutions
Line 137 found 1 solutions
Line 141 found 1 solutions
Line 147 found 1 solutions
Line 152 found 1 solutions
Line 157 found 1 solutions
Line 159 found 1 solutions
Line 161 found 1 solutions
Line 176 found 1 solutions
Line 184 found 1 solutions
Line 192 found 1 solution

In [72]:
m = MAPOFCEM(
    action_set,
    clf_shap,
    compare = "percentile_change"
)

m.mutable_features

[3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 22, 23, 24, 26]

In [73]:
for i in range(X_train.shape[0]):
    if not clf_shap.predict(X_train.iloc[i].values):
        m.fit(X_train.iloc[i].values)
        print(f"Line {i} found {len(m.solutions)} solutions")

Line 6 found 2 solutions
Line 10 found 3 solutions
Line 11 found 1 solutions
Line 24 found 1 solutions
Line 29 found 2 solutions
Line 31 found 1 solutions
Line 36 found 3 solutions
Line 37 found 2 solutions
Line 44 found 1 solutions
Line 46 found 1 solutions
Line 52 found 1 solutions
Line 61 found 2 solutions
Line 69 found 2 solutions
Line 71 found 2 solutions
Line 72 found 1 solutions
Line 74 found 3 solutions
Line 76 found 2 solutions
Line 78 found 1 solutions
Line 85 found 1 solutions
Line 86 found 1 solutions
Line 89 found 1 solutions
Line 94 found 3 solutions
Line 97 found 2 solutions
Line 106 found 1 solutions
Line 110 found 1 solutions
Line 120 found 2 solutions
Line 130 found 1 solutions
Line 133 found 1 solutions
Line 137 found 1 solutions
Line 141 found 2 solutions
Line 147 found 2 solutions
Line 152 found 2 solutions
Line 157 found 1 solutions
Line 159 found 2 solutions
Line 161 found 2 solutions
Line 176 found 1 solutions
Line 184 found 1 solutions
Line 192 found 2 solution