# Understand your dataset with prototypes and criticisms

## Configuration and imports

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [58]:
import pandas as pd
import numpy as np
from aix360.algorithms.protodash import ProtodashExplainer

In [3]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

## Load dataset

In [4]:
data = pd.read_feather("data/heloc/heloc_preprocessed.feather")
data.head(20).transpose()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
RiskPerformance,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0
ExternalRiskEstimate,55.0,61.0,67.0,66.0,81.0,59.0,54.0,68.0,59.0,61.0,82.0,50.0,74.0,69.0,63.0,63.0,61.0,62.0,89.0,65.0
MSinceOldestTradeOpen,144.0,58.0,66.0,169.0,333.0,137.0,88.0,148.0,324.0,79.0,96.0,311.0,274.0,256.0,262.0,184.0,256.0,131.0,244.0,43.0
MSinceMostRecentTradeOpen,4.0,15.0,5.0,1.0,27.0,11.0,7.0,7.0,2.0,4.0,5.0,25.0,2.0,6.0,26.0,3.0,19.0,4.0,9.0,2.0
AverageMInFile,84.0,41.0,24.0,73.0,132.0,78.0,37.0,65.0,138.0,36.0,47.0,76.0,66.0,117.0,95.0,77.0,68.0,59.0,147.0,26.0
NumSatisfactoryTrades,20.0,2.0,9.0,28.0,12.0,31.0,25.0,17.0,24.0,19.0,16.0,12.0,23.0,11.0,3.0,34.0,14.0,16.0,10.0,15.0
NumTrades60Ever2DerogPubRec,3.0,4.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,2.0,0.0,0.0,4.0,0.0,1.0
NumTrades90Ever2DerogPubRec,0.0,4.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,0.0,0.0,2.0,0.0,1.0
PercentTradesNeverDelq,83.0,100.0,100.0,93.0,100.0,91.0,92.0,83.0,85.0,95.0,100.0,60.0,100.0,100.0,75.0,97.0,87.0,79.0,100.0,100.0
MSinceMostRecentDelq,2.0,15.0,15.0,76.0,15.0,1.0,9.0,31.0,5.0,5.0,15.0,2.0,15.0,15.0,27.0,32.0,0.0,23.0,15.0,15.0


## Prototypes for original dataset

### Prototypes for good risk performance applicants

In [71]:
data_good = data[data.RiskPerformance == 0].reset_index(drop=True)

X = data_good.values[1:]
y = data_good.values[0].reshape((1, data_good.shape[1]))

print(f"Shape of X: {X.shape}")
print(f"Shape of Y: {y.shape}")

exp = ProtodashExplainer()
(weights, protos, obj_fn_vals) = exp.explain(y, X, m=5)

proto_df = data_good.iloc[protos]
proto_df["Weight"] = np.around(weights / np.sum(weights), 4)
res_df = proto_df.append(data_good.iloc[0])
res_df.transpose()

Shape of X: (4734, 36)
Shape of Y: (1, 36)


  if 'x' in initvals:
  if 's' in initvals:
  if 'y' in initvals:
  if 'z' in initvals:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]


Unnamed: 0,3362,1543,3206,965,569,0
RiskPerformance,0.0,0.0,0.0,0.0,0.0,0.0
ExternalRiskEstimate,74.0,61.0,76.0,69.0,77.0,54.0
MSinceOldestTradeOpen,195.0,279.0,163.0,190.0,152.0,88.0
MSinceMostRecentTradeOpen,10.0,4.0,7.0,6.0,2.0,7.0
AverageMInFile,75.0,86.0,72.0,66.0,67.0,37.0
NumSatisfactoryTrades,34.0,45.0,27.0,30.0,12.0,25.0
NumTrades60Ever2DerogPubRec,0.0,0.0,0.0,1.0,3.0,0.0
NumTrades90Ever2DerogPubRec,0.0,0.0,0.0,0.0,3.0,0.0
PercentTradesNeverDelq,100.0,98.0,97.0,91.0,92.0,92.0
MSinceMostRecentDelq,15.0,69.0,25.0,17.0,73.0,9.0


### Prototypes for bad risk performance applicants

In [72]:
data_bad = data[data.RiskPerformance == 1].reset_index(drop=True)

X = data_bad.values[1:]
y = data_bad.values[0].reshape((1, data_bad.shape[1]))

print(f"Shape of X: {X.shape}")
print(f"Shape of Y: {y.shape}")

exp = ProtodashExplainer()
(weights, protos, obj_fn_vals) = exp.explain(y, X, m=5)

proto_df = data_bad.iloc[protos]
proto_df["Weight"] = np.around(weights / np.sum(weights), 4)
res_df = proto_df.append(data_bad.iloc[0])
res_df.transpose()

Shape of X: (5135, 36)
Shape of Y: (1, 36)


  if 'x' in initvals:
  if 's' in initvals:
  if 'y' in initvals:
  if 'z' in initvals:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]


Unnamed: 0,4606,1310,4981,4718,1723,0
RiskPerformance,1.0,1.0,1.0,1.0,1.0,1.0
ExternalRiskEstimate,68.0,61.0,59.0,71.0,65.0,55.0
MSinceOldestTradeOpen,126.0,255.0,207.0,132.0,141.0,144.0
MSinceMostRecentTradeOpen,8.0,15.0,6.0,9.0,3.0,4.0
AverageMInFile,58.0,125.0,68.0,63.0,25.0,84.0
NumSatisfactoryTrades,15.0,17.0,16.0,29.0,12.0,20.0
NumTrades60Ever2DerogPubRec,0.0,3.0,0.0,0.0,3.0,3.0
NumTrades90Ever2DerogPubRec,0.0,2.0,0.0,0.0,3.0,0.0
PercentTradesNeverDelq,63.0,71.0,82.0,90.0,100.0,83.0
MSinceMostRecentDelq,16.0,12.0,2.0,2.0,15.0,2.0


## Prototypes for predictions from trained model

### Prototypes for good risk performance applicants

### Prototypes for bad risk performance applicants
