In [1]:
import os
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from recourse.builder import RecourseBuilder
from recourse.builder import ActionSet

In [2]:
data_file = os.path.join('loan_test.csv')
## load and process data
german_df = pd.read_csv(data_file).reset_index(drop=True)
german_df = (german_df
             .assign(isMale=lambda df: (df['Gender']=='Male').astype(int))
             .drop(['Gender'], axis=1)
            )

y = german_df['Credit_History']
X = german_df.drop('Credit_History', axis=1)

In [3]:
action_set = ActionSet(X = X)
action_set['Married'].mutable = False
action_set['Education'].mutable = False
action_set['Self_Employed'].mutable = False

In [4]:
action_set

+------------------+----------------+------------+------------+----------------+----------------+-----------+-----------+-----------+--------+---------+
|             name |  variable type | actionable | compatible | step direction | flip direction | grid size | step type | step size |     lb |      ub |
+------------------+----------------+------------+------------+----------------+----------------+-----------+-----------+-----------+--------+---------+
|          Married | <class 'bool'> |       True |        nan |              0 |            nan |         2 |  absolute |       1.0 |    0.0 |     1.0 |
|        Education | <class 'bool'> |       True |        nan |              0 |            nan |         2 |  absolute |       1.0 |    0.0 |     1.0 |
|    Self_Employed | <class 'bool'> |       True |        nan |              0 |            nan |         2 |  absolute |       1.0 |    0.0 |     1.0 |
|  ApplicantIncome |  <class 'int'> |       True |        nan |              0 |  

In [5]:
action_set['ApplicantIncome']

'ApplicantIncome': (1115.0, 15219.0)

In [6]:
action_set['Loan_Amount_Term'].bounds = (1, 500)

In [7]:
## grid search
clf = LogisticRegression(max_iter=1000, solver='lbfgs')
grid = GridSearchCV(
  clf, param_grid={'C': np.logspace(-4, 3)},
  cv=10,
  scoring='roc_auc',
  return_train_score=True
)
grid.fit(X, y)
clf = grid.best_estimator_

In [8]:
coefficients = clf.coef_[0]
intercept = clf.intercept_[0]

In [9]:
action_set.align(coefficients=coefficients)

AttributeError: 'ActionSet' object has no attribute 'align'

In [None]:
pd.Series(coefficients, index=X.columns).to_frame('Coefficients')

Unnamed: 0,Coefficients
Married,0.000486
Education,0.000175
Self_Employed,0.00025
ApplicantIncome,0.000277
LoanAmount,-0.006072
Loan_Amount_Term,-0.001569
isMale,0.000119


In [None]:
action_set['isMale'].step_direction = -1

In [None]:
pd.Series(coefficients, index=X.columns).to_frame('Coefficients')

Unnamed: 0,Coefficients
Married,0.000486
Education,0.000175
Self_Employed,0.00025
ApplicantIncome,0.000277
LoanAmount,-0.006072
Loan_Amount_Term,-0.001569
isMale,0.000119


In [10]:
scores = pd.Series(clf.predict_proba(X)[:, 1])

In [11]:
scores.loc[lambda s: s<.8].head()

4     0.740023
7     0.783547
18    0.750841
19    0.615580
24    0.794243
dtype: float64

In [12]:
scores.loc[lambda s: s<.8].shape

(65,)

In [13]:
denied_individuals = scores.loc[lambda s: s < .8].index

In [14]:
x = X.values[denied_individuals[0]]

p = .8
rb = RecourseBuilder(
      optimizer="cplex",
      coefficients=coefficients,
      intercept=intercept- (np.log(p / (1. - p))),
      action_set=action_set,
      x=x
)

In [15]:
output_1 = rb.fit()
output_1

{'cost': 0.1367708694724742,
 'feasible': True,
 'status': 'integer optimal solution',
 'costs': array([ 0.        , -0.        ,  0.        ,  0.13677087,  0.13405588,
         0.11404178,  0.        ]),
 'actions': array([  0.,  -0.,   0., 796., -18.,  -7.,   0.]),
 'upperbound': 0.13682611973707492,
 'lowerbound': 0.13682611973707495,
 'gap': 0.0,
 'iterations': 38,
 'nodes_processed': 0,
 'nodes_remaining': 0,
 'runtime': 0.0}

In [16]:
pd.Series(output_1['actions'], index=X.columns).to_frame('Actions')

Unnamed: 0,Actions
Married,0.0
Education,-0.0
Self_Employed,0.0
ApplicantIncome,796.0
LoanAmount,-18.0
Loan_Amount_Term,-7.0
isMale,0.0


In [17]:
clf.predict_proba([X.loc[denied_individuals[0]] + pd.Series(output_1['actions'], index=X.columns)])[:, 1]



array([0.8000485])