# Scenario - Credit Card Approval

In [36]:
import os
import sys
from sklearn.metrics import accuracy_score
sys.path.insert(1, "../")
try:
    from helpers import save_solution
except Exception as e:
    print(e)

In [37]:
from factsheet import Factsheet
factsheet = Factsheet()

factsheet.set_question_fairness(3)
factsheet.set_protected_feature("Group")
factsheet.set_protected_group("lambda x: x[protected_feature] == 1")
factsheet.set_target_column("Target")
factsheet.set_favorable_outcome("lambda x: x[target_column] == 1")




In [3]:
# Load all necessary packages
#import os
#import sys
#sys.path.insert(1, "../")  
  
import numpy as np
np.random.seed(0)

import matplotlib.pyplot as plt

import pandas as pd
import sklearn
import pickle
from aif360.datasets import GermanDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.algorithms.preprocessing import Reweighing

from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

from IPython.display import Markdown, display

In [4]:
data = pd.read_csv("./data/data.csv", nrows=60000)
data.head(5)

Unnamed: 0,Num_Children,Group,Income,Own_Car,Own_Housing,Target
0,4,0,36151,0,0,0
1,0,0,36095,1,0,0
2,2,1,62110,1,1,0
3,0,1,73644,1,0,0
4,3,0,99146,0,0,1


In [5]:
print("Number of samples: {}".format(len(data)))

Number of samples: 60000


In [6]:
target_column = "Target"

In [7]:
X = data.loc[:, data.columns != target_column]
X.head(5)

Unnamed: 0,Num_Children,Group,Income,Own_Car,Own_Housing
0,4,0,36151,0,0
1,0,0,36095,1,0
2,2,1,62110,1,1
3,0,1,73644,1,0
4,3,0,99146,0,0


In [8]:
y = data.loc[:, data.columns == target_column]
y.head(5)

Unnamed: 0,Target
0,0
1,0
2,0
3,0
4,1


In [10]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
train_data = pd.concat([X_train, y_train], axis=1)
test_data = pd.concat([X_test, y_test], axis=1)

### Statistical Parity Difference

In [24]:
import operator

target_column = "Target"
favorable_outome = lambda x: x[target_column]==1

protected_feature = "Group"
protected = lambda x: x[protected_feature]==0
protected_indices = data.apply(protected, axis=1)

In [25]:
minority = data[protected_indices]
minority_size = len(minority)
favored_minority = minority[minority.apply(favorable_outome, axis=1)]
favored_minority_size = len(favored_minority)
favored_minority_ratio = favored_minority_size/minority_size
print("{0}/{1} = {2}".format(favored_minority_size, minority_size, favored_minority_ratio))

49389/249675 = 0.1978131571042355


In [26]:
majority = data[~protected_indices]
majority_size = len(majority)
favored_majority = majority[majority.apply(favorable_outome, axis=1)]
favored_majority_size = len(favored_majority)
favored_majority_ratio = favored_majority_size/majority_size
print("{0}/{1} = {2}".format(favored_majority_size, majority_size, favored_majority_ratio))

143924/250325 = 0.574948566863078


In [27]:
statistical_parity_difference = favored_minority_ratio - favored_majority_ratio
print(statistical_parity_difference)

-0.3771354097588425


### 1. SVM

In [45]:
from sklearn import svm
support_vector_machine_01 = svm.SVC(verbose=1, probability=True)
support_vector_machine_01.fit(X_train, y_train.values.ravel())

[LibSVM].......
*
optimization finished, #iter = 7748
obj = -7336.730041, rho = -1.542540
nSV = 7372, nBSV = 7370
Total nSV = 7372
.......
*
optimization finished, #iter = 7822
obj = -7511.042528, rho = -1.679441
nSV = 7546, nBSV = 7544
Total nSV = 7546
.......
*.
*
optimization finished, #iter = 7875
obj = -7485.481246, rho = -2.001766
nSV = 7522, nBSV = 7520
Total nSV = 7522
........
*.
*
optimization finished, #iter = 8083
obj = -7514.587675, rho = -2.179099
nSV = 7552, nBSV = 7550
Total nSV = 7552
.......
*.
*
optimization finished, #iter = 7838
obj = -7491.907466, rho = -1.884757
nSV = 7528, nBSV = 7526
Total nSV = 7528
.........
*
optimization finished, #iter = 9622
obj = -9326.399502, rho = 2.318357
nSV = 9366, nBSV = 9364
Total nSV = 9366


SVC(probability=True, verbose=1)

In [46]:
y_pred = support_vector_machine_01.predict(X_test)

In [47]:
accuracy_score(y_test, y_pred)

0.9158333333333334

In [48]:
save_solution("credit_card_approval", "jans_support_vector_machine_01", support_vector_machine_01, train_data, test_data, factsheet, to_webapp=True)

base directory /Users/jankreischer/Desktop/Masterproject/Shared/scenarios/credit_card_approval/../../webapp/scenarios/credit_card_approval/solutions/jans_support_vector_machine_01


In [49]:
support_vector_machine_02 = svm.SVC(verbose=1, probability=True)
support_vector_machine_02.fit(X_train, y_train.values.ravel())

[LibSVM].......
*
optimization finished, #iter = 7891
obj = -7503.632622, rho = -1.419943
nSV = 7538, nBSV = 7536
Total nSV = 7538
.......
*.
*
optimization finished, #iter = 7794
obj = -7449.764649, rho = -2.206482
nSV = 7488, nBSV = 7486
Total nSV = 7488
.......
*
optimization finished, #iter = 7736
obj = -7458.689983, rho = -2.368770
nSV = 7498, nBSV = 7496
Total nSV = 7498
.......
*
optimization finished, #iter = 7807
obj = -7472.208202, rho = -1.615098
nSV = 7506, nBSV = 7503
Total nSV = 7506
.......
**.
*
optimization finished, #iter = 7966
obj = -7455.821821, rho = -1.653242
nSV = 7490, nBSV = 7488
Total nSV = 7490
.........
*
optimization finished, #iter = 9622
obj = -9326.399502, rho = 2.318357
nSV = 9366, nBSV = 9364
Total nSV = 9366


SVC(probability=True, verbose=1)

In [44]:
save_solution("credit_card_approval", "jans_support_vector_machine_02", support_vector_machine_02, train_data, test_data, factsheet, to_webapp=True)

base directory /Users/jankreischer/Desktop/Masterproject/Shared/scenarios/credit_card_approval/../../webapp/scenarios/credit_card_approval/solutions/jans_support_vector_machine_02


### 2. k-Nearest Neighbors

In [9]:
from sklearn.neighbors import KNeighborsClassifier
clf = KNeighborsClassifier(n_neighbors=3)
clf.fit(X_train, y_train.values.ravel());

In [10]:
y_pred = clf.predict(X_test)

In [11]:
print(confusion_matrix(y_test,y_pred))

[[58107  3120]
 [ 3371 35402]]


In [12]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.95      0.95      0.95     61227
           1       0.92      0.91      0.92     38773

    accuracy                           0.94    100000
   macro avg       0.93      0.93      0.93    100000
weighted avg       0.94      0.94      0.94    100000



In [38]:
error_rate = []

for i in range(1,11):
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(X_train,y_train.values.ravel())
    y_pred = knn.predict(X_test)
    error_rate.append(np.mean(y_pred != y_test.values.ravel()))

KeyboardInterrupt: 

In [None]:
plt.figure(figsize=(10,6))
plt.plot(range(1,11),error_rate,color='blue', linestyle='dashed', marker='o',
 markerfacecolor='red', markersize=10)
plt.title('Error Rate vs. K Value')
plt.xlabel('K')
plt.ylabel('Error Rate')

In [13]:
# NOW WITH K=20
clf_knn_01 = KNeighborsClassifier(n_neighbors=20)
clf_knn_01.fit(X_train,y_train.values.ravel())
pred = clf_knn_01.predict(X_test)

print(confusion_matrix(y_test,pred))

print(classification_report(y_test,pred))

[[57813  3414]
 [ 4666 34107]]
              precision    recall  f1-score   support

           0       0.93      0.94      0.93     61227
           1       0.91      0.88      0.89     38773

    accuracy                           0.92    100000
   macro avg       0.92      0.91      0.91    100000
weighted avg       0.92      0.92      0.92    100000



In [14]:
save_solution("credit_card_approval", "jans_knn_classifier_01", clf_knn_01, train_data, test_data, factsheet, to_webapp=True)

base directory /Users/jankreischer/Desktop/Masterproject/Shared/scenarios/credit_card_approval/../../webapp/scenarios/credit_card_approval/solutions/jans_knn_classifier_01


### Testing Lambda Expressions

In [33]:
var = 3
inc_by_one = eval("lambda x: x+var")
numbers = [1,2,3,4]
result = list(map(inc_by_one, numbers))
print(result)

[4, 5, 6, 7]


In [185]:
import inspect

protected_feature = "Group"
protected = "lambda x: x[protected_feature]==1"
print(protected)
protected = eval(protected, {"protected_feature": protected_feature})
print(protected)

lambda x: x[protected_feature]==1
<function <lambda> at 0x7fc865aead30>


In [186]:
minority = data[data.apply(protected, axis=1)]

In [187]:
minority.head(5)

Unnamed: 0,Num_Children,Group,Income,Own_Car,Own_Housing,Target
2,2,1,62110,1,1,0
3,0,1,73644,1,0,0
5,0,1,114086,1,0,1
7,2,1,83130,1,1,1
8,2,1,81965,1,0,1


In [19]:
factsheet = {
    "general": {}
}
factsheet["general"]["target_column"] = "target"
print(factsheet)


{'general': {'target_column': 'target'}}


In [21]:
keys = ["general", "target_column"]

for key in key 
factsheet.get(key)

TypeError: unhashable type: 'list'

In [24]:
question_fairness = 0
question_fairness in list(range(1,5))

False

### 3. Random Forest Classifier

In [25]:
from sklearn.ensemble import RandomForestClassifier

In [26]:
random_forest_classifier_01 = RandomForestClassifier(max_depth=2, random_state=0)
random_forest_classifier_01.fit(X_train, y_train.values.ravel())

RandomForestClassifier(max_depth=2, random_state=0)

In [27]:
acc = random_forest_classifier_01.score(X_test, y_test.values.ravel())
print(acc)

0.9405833333333333


In [28]:
save_solution("credit_card_approval", "jans_random_forest_classifier_01", random_forest_classifier_01, train_data, test_data, factsheet, to_webapp=True)

base directory /Users/jankreischer/Desktop/Masterproject/Shared/scenarios/credit_card_approval/../../webapp/scenarios/credit_card_approval/solutions/jans_random_forest_classifier_01


In [29]:
random_forest_classifier_02 = RandomForestClassifier(max_depth=5, random_state=0)
random_forest_classifier_02.fit(X_train, y_train.values.ravel())

RandomForestClassifier(max_depth=5, random_state=0)

In [30]:
acc = random_forest_classifier_02.score(X_test, y_test.values.ravel())
print(acc)

0.9700833333333333


In [31]:
save_solution("credit_card_approval", "jans_random_forest_classifier_02", random_forest_classifier_02, train_data, test_data, factsheet, to_webapp=True)

base directory /Users/jankreischer/Desktop/Masterproject/Shared/scenarios/credit_card_approval/../../webapp/scenarios/credit_card_approval/solutions/jans_random_forest_classifier_02
