In [8]:
import notebookjs
import glob
from notebookjs import execute_js
import random
from sklearn.calibration import calibration_curve
import sklearn
import shap
import pandas as pd
import numpy as np
import sys
from sklearn.neural_network import MLPClassifier

sys.path.insert(0, '../core/')
from calibrate import Calibrate

## User Simulation

In [9]:
X,y = shap.datasets.adult(display=True)

le = sklearn.preprocessing.LabelEncoder()
X['Sex'] = le.fit_transform(X['Sex'])
X = pd.get_dummies(X, columns=["Workclass", "Marital Status", "Occupation", "Relationship", "Race", "Country"])

scaler = sklearn.preprocessing.StandardScaler()
train_col_sacle = X[['Age','Education-Num','Hours per week']]
train_scaler_col = scaler.fit_transform(train_col_sacle)
train_scaler_col = pd.DataFrame(train_scaler_col,columns=train_col_sacle.columns)
X['Age'] = train_scaler_col['Age']
X['Education-Num'] = train_scaler_col['Education-Num']
X['Hours per week'] = train_scaler_col['Hours per week']

X_train, X_valid, y_train, y_valid = sklearn.model_selection.train_test_split(X, y, test_size=0.2, random_state=42)

clf = MLPClassifier(random_state=42, max_iter=100).fit(X_train, y_train)
y_preds = clf.predict_proba(X_valid)

# OHE labels
ohe = sklearn.preprocessing.OneHotEncoder()

y_valid_labels = ohe.fit_transform(y_valid.reshape(-1,1)).toarray()

In [10]:
c = Calibrate(data=X_valid[['Age', 'Education-Num', 'Sex', 'Capital Gain', 'Capital Loss',
       'Hours per week', 'Workclass_ ?']], predictions=y_preds, labels=y_valid_labels).calibrate()

## tests

In [None]:
test ={ 
'Age': {'name': "Age", 'start': 1.093, 'end': 2.361},
'Education-Num':{ 'name': "Education-Num", 'start': -1.197, 'end': 0.2}
}

In [78]:
y_valid_labels
filter_by_feature_range(X_valid[['Age', 'Education-Num', 'Sex', 'Capital Gain', 'Capital Loss',
       'Hours per week', 'Workclass_ ?']], y_preds, y_valid_labels, test )

{'tableheader': ['Age',
  'Education-Num',
  'Sex',
  'Capital Gain',
  'Capital Loss',
  'Hours per week',
  'Workclass_ ?'],
 'tablebody': [[-0.85, -0.03, 0.0, 0.0, 0.0, -0.2, 0.0],
  [0.47, -0.42, 0.0, 0.0, 0.0, -0.04, 0.0],
  [-0.7, -0.03, 1.0, 2202.0, 0.0, 0.77, 0.0],
  [1.42, -0.42, 1.0, 15024.0, 0.0, 1.58, 0.0],
  [-1.22, -0.42, 0.0, 0.0, 0.0, -0.04, 0.0],
  [0.84, -0.42, 1.0, 3103.0, 0.0, -0.04, 0.0],
  [-0.12, -0.42, 1.0, 0.0, 0.0, -0.04, 0.0],
  [-0.41, -0.03, 0.0, 0.0, 0.0, -0.44, 0.0],
  [-0.56, -0.42, 1.0, 0.0, 0.0, -0.04, 0.0],
  [-1.14, -0.42, 0.0, 0.0, 0.0, -0.85, 0.0],
  [-0.7, -0.03, 1.0, 0.0, 0.0, 0.61, 0.0],
  [1.5, -0.42, 1.0, 0.0, 0.0, -0.04, 0.0],
  [-0.41, -0.03, 1.0, 0.0, 0.0, 0.77, 0.0],
  [-1.51, -0.42, 0.0, 0.0, 0.0, -0.6, 0.0],
  [-0.48, -0.03, 1.0, 0.0, 0.0, -0.04, 0.0],
  [-1.14, -0.42, 1.0, 0.0, 0.0, -0.04, 0.0],
  [-1.0, -0.03, 0.0, 0.0, 0.0, -0.85, 0.0],
  [1.13, -0.42, 0.0, 0.0, 0.0, -1.66, 0.0],
  [-0.12, -0.03, 1.0, 0.0, 0.0, -0.04, 0.0],
  [2.45, -

In [76]:
def filter_by_feature_range(data, preds, labels, filters, classindex=0):
    
    filteredPreds = pd.DataFrame()
    filteredLabels = pd.DataFrame()
    filteredData = pd.DataFrame()

    for key in filters:

        if(filteredData.shape[0] == 0):
            
            currentconds = ((data[filters[key]['name']] >= filters[key]['start']) & (data[filters[key]['name']] <= filters[key]['end']))
            
            filteredData = data[ currentconds ]
            filteredPreds = preds[:, classindex][ currentconds ] 
            filteredLabels = labels[:, classindex][ currentconds ]
            
        else:
            
            currentconds = (filteredData[filters[key]['name']] >= filters[key]['start']) &  (filteredData[filters[key]['name']] <= filters[key]['end'])
            
            filteredData = filteredData[ currentconds ]
            filteredPreds = filteredPreds[ currentconds ] 
            filteredLabels = filteredLabels[ currentconds ]
            
            
    return {
        'tableheader': filteredData.columns.tolist(),
        'tablebody': np.around(filteredData.values, decimals=2).tolist(),
        'classifications': filteredLabels.tolist()
    }