# Introduction

Dataset Information
This dataset contains information on default payments, demographic factors, credit data, history of payment, and bill statements of credit card clients in Taiwan from April 2005 to September 2005. (<a href="https://www.kaggle.com/uciml/default-of-credit-card-clients-dataset" target="blank_">Source</a>)

Content
There are 25 variables:

    ID: ID of each client
    LIMIT_BAL: Amount of given credit in NT dollars (includes individual and family/supplementary credit
    SEX: Gender (1=male, 2=female)
    EDUCATION: (1=graduate school, 2=university, 3=high school, 4=others, 5=unknown, 6=unknown)
    MARRIAGE: Marital status (1=married, 2=single, 3=others)
    AGE: Age in years
    PAY_0: Repayment status in September, 2005 (-1=pay duly, 1=payment delay for one month, 2=payment delay for two months, … 8=payment delay for eight months, 9=payment delay for nine months and above)
    PAY_2: Repayment status in August, 2005 (scale same as above)
    PAY_3: Repayment status in July, 2005 (scale same as above)
    PAY_4: Repayment status in June, 2005 (scale same as above)
    PAY_5: Repayment status in May, 2005 (scale same as above)
    PAY_6: Repayment status in April, 2005 (scale same as above)
    BILL_AMT1: Amount of bill statement in September, 2005 (NT dollar)
    BILL_AMT2: Amount of bill statement in August, 2005 (NT dollar)
    BILL_AMT3: Amount of bill statement in July, 2005 (NT dollar)
    BILL_AMT4: Amount of bill statement in June, 2005 (NT dollar)
    BILL_AMT5: Amount of bill statement in May, 2005 (NT dollar)
    BILL_AMT6: Amount of bill statement in April, 2005 (NT dollar)
    PAY_AMT1: Amount of previous payment in September, 2005 (NT dollar)
    PAY_AMT2: Amount of previous payment in August, 2005 (NT dollar)
    PAY_AMT3: Amount of previous payment in July, 2005 (NT dollar)
    PAY_AMT4: Amount of previous payment in June, 2005 (NT dollar)
    PAY_AMT5: Amount of previous payment in May, 2005 (NT dollar)
    PAY_AMT6: Amount of previous payment in April, 2005 (NT dollar)
    default.payment.next.month: Default payment (1=yes, 0=no)

In [145]:
import time
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import rcParams

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedKFold

from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

# 1. Import Dataset

In [183]:
df = pd.read_csv('dataset/UCI_Credit_Card.csv')
df.columns = [i.lower().replace('.','_') for i in df.columns]
df['default_payment_next_month'] = [0 if i==1 else 1 for i in df['default_payment_next_month']]
df = df.drop(columns=['id'])

print(df.info())
print(df.shape)
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30000 entries, 0 to 29999
Data columns (total 24 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   limit_bal                   30000 non-null  float64
 1   sex                         30000 non-null  int64  
 2   education                   30000 non-null  int64  
 3   marriage                    30000 non-null  int64  
 4   age                         30000 non-null  int64  
 5   pay_0                       30000 non-null  int64  
 6   pay_2                       30000 non-null  int64  
 7   pay_3                       30000 non-null  int64  
 8   pay_4                       30000 non-null  int64  
 9   pay_5                       30000 non-null  int64  
 10  pay_6                       30000 non-null  int64  
 11  bill_amt1                   30000 non-null  float64
 12  bill_amt2                   30000 non-null  float64
 13  bill_amt3                   300

Unnamed: 0,limit_bal,sex,education,marriage,age,pay_0,pay_2,pay_3,pay_4,pay_5,...,bill_amt4,bill_amt5,bill_amt6,pay_amt1,pay_amt2,pay_amt3,pay_amt4,pay_amt5,pay_amt6,default_payment_next_month
0,20000.0,2,2,1,24,2,2,-1,-1,-2,...,0.0,0.0,0.0,0.0,689.0,0.0,0.0,0.0,0.0,0
1,120000.0,2,2,2,26,-1,2,0,0,0,...,3272.0,3455.0,3261.0,0.0,1000.0,1000.0,1000.0,0.0,2000.0,0
2,90000.0,2,2,2,34,0,0,0,0,0,...,14331.0,14948.0,15549.0,1518.0,1500.0,1000.0,1000.0,1000.0,5000.0,1
3,50000.0,2,2,1,37,0,0,0,0,0,...,28314.0,28959.0,29547.0,2000.0,2019.0,1200.0,1100.0,1069.0,1000.0,1
4,50000.0,1,2,1,57,-1,0,-1,0,0,...,20940.0,19146.0,19131.0,2000.0,36681.0,10000.0,9000.0,689.0,679.0,1


# 2. Preprocessing

The steps in this process attempt to split each column in the dataset into bins. Then, each bin is assigned with Weight of Evidence (WOE) value using the proportion of good and bad applicants. WOE value is then used to develop Information Value (IV) so that variable importance could be performed.

In [184]:
all_cols = [i for i in df.columns if i != 'default_payment_next_month']
df_woe = df.copy()
bin_names = []
woe = []
iv = []

for i in all_cols:
    print(i)
    
    var_name = str(i)
    var_bins = var_name + '_bin'
    bin_names.append(var_bins)

    df_woe[var_bins] = pd.qcut(df[i], q=4, duplicates='drop')

    index = df_woe.groupby(var_bins).count().index
    count = df_woe.groupby(var_bins).count()['default_payment_next_month'].values
    good = df_woe.groupby(var_bins).sum()['default_payment_next_month'].values
    bad = count - good

    woe_temp = pd.DataFrame()
    woe_temp['group'] = index
    woe_temp['count'] = count
    woe_temp['good'] = good
    woe_temp['bad'] = bad
    woe_temp['total_distri'] = woe_temp['count']/sum(woe_temp['count'])
    woe_temp['good_distri'] = woe_temp['good']/sum(woe_temp['good'])
    woe_temp['bad_distri'] = woe_temp['bad']/sum(woe_temp['bad'])
    woe_temp['WOE'] = np.log(woe_temp['good_distri'] / woe_temp['bad_distri'])
    woe_temp['WOE%'] = woe_temp['WOE'] * 100
    
    for j in range(len(woe_temp['group'])):
        woe.append({'feature': var_bins, 'bin': woe_temp['group'][j], 'woe_val': woe_temp['WOE%'][j]})
    
    iv_val = sum((woe_temp['good_distri'] - woe_temp['bad_distri']) * woe_temp['WOE'])
    iv.append({'feature': var_name, 'IV': iv_val})
        
iv = pd.DataFrame(iv)
woe = pd.DataFrame(woe)

limit_bal
sex
education
marriage
age
pay_0
pay_2
pay_3
pay_4
pay_5
pay_6
bill_amt1
bill_amt2
bill_amt3
bill_amt4
bill_amt5
bill_amt6
pay_amt1
pay_amt2
pay_amt3
pay_amt4
pay_amt5
pay_amt6


List of WOE value for each bin in each feature

In [185]:
woe[:20]

Unnamed: 0,feature,bin,woe_val
0,limit_bal_bin,"(9999.999, 50000.0]",-49.51272
1,limit_bal_bin,"(50000.0, 140000.0]",-14.495754
2,limit_bal_bin,"(140000.0, 240000.0]",30.239052
3,limit_bal_bin,"(240000.0, 1000000.0]",55.822579
4,sex_bin,"(0.999, 2.0]",0.0
5,education_bin,"(-0.001, 1.0]",17.777649
6,education_bin,"(1.0, 2.0]",-9.141557
7,education_bin,"(2.0, 6.0]",-8.647306
8,marriage_bin,"(-0.001, 1.0]",-7.37037
9,marriage_bin,"(1.0, 2.0]",7.056338


In [186]:
cols_to_drop = [i for i in df_woe.columns if i[-4:] != '_bin']
df_woe = df_woe.drop(columns=cols_to_drop)
df_woe

Unnamed: 0,limit_bal_bin,sex_bin,education_bin,marriage_bin,age_bin,pay_0_bin,pay_2_bin,pay_3_bin,pay_4_bin,pay_5_bin,...,bill_amt3_bin,bill_amt4_bin,bill_amt5_bin,bill_amt6_bin,pay_amt1_bin,pay_amt2_bin,pay_amt3_bin,pay_amt4_bin,pay_amt5_bin,pay_amt6_bin
0,"(9999.999, 50000.0]","(0.999, 2.0]","(1.0, 2.0]","(-0.001, 1.0]","(20.999, 28.0]","(0.0, 8.0]","(0.0, 8.0]","(-2.001, -1.0]","(-2.001, -1.0]","(-2.001, -1.0]",...,"(-157264.001, 2666.25]","(-170000.001, 2326.75]","(-81334.001, 1763.0]","(-339603.001, 1256.0]","(-0.001, 1000.0]","(-0.001, 833.0]","(-0.001, 390.0]","(-0.001, 296.0]","(-0.001, 252.5]","(-0.001, 117.75]"
1,"(50000.0, 140000.0]","(0.999, 2.0]","(1.0, 2.0]","(1.0, 2.0]","(20.999, 28.0]","(-2.001, -1.0]","(0.0, 8.0]","(-1.0, 0.0]","(-1.0, 0.0]","(-1.0, 0.0]",...,"(2666.25, 20088.5]","(2326.75, 19052.0]","(1763.0, 18104.5]","(1256.0, 17071.0]","(-0.001, 1000.0]","(833.0, 2009.0]","(390.0, 1800.0]","(296.0, 1500.0]","(-0.001, 252.5]","(1500.0, 4000.0]"
2,"(50000.0, 140000.0]","(0.999, 2.0]","(1.0, 2.0]","(1.0, 2.0]","(28.0, 34.0]","(-1.0, 0.0]","(-1.0, 0.0]","(-1.0, 0.0]","(-1.0, 0.0]","(-1.0, 0.0]",...,"(2666.25, 20088.5]","(2326.75, 19052.0]","(1763.0, 18104.5]","(1256.0, 17071.0]","(1000.0, 2100.0]","(833.0, 2009.0]","(390.0, 1800.0]","(296.0, 1500.0]","(252.5, 1500.0]","(4000.0, 528666.0]"
3,"(9999.999, 50000.0]","(0.999, 2.0]","(1.0, 2.0]","(-0.001, 1.0]","(34.0, 41.0]","(-1.0, 0.0]","(-1.0, 0.0]","(-1.0, 0.0]","(-1.0, 0.0]","(-1.0, 0.0]",...,"(20088.5, 60164.75]","(19052.0, 54506.0]","(18104.5, 50190.5]","(17071.0, 49198.25]","(1000.0, 2100.0]","(2009.0, 5000.0]","(390.0, 1800.0]","(296.0, 1500.0]","(252.5, 1500.0]","(117.75, 1500.0]"
4,"(9999.999, 50000.0]","(0.999, 2.0]","(1.0, 2.0]","(-0.001, 1.0]","(41.0, 79.0]","(-2.001, -1.0]","(-1.0, 0.0]","(-2.001, -1.0]","(-1.0, 0.0]","(-1.0, 0.0]",...,"(20088.5, 60164.75]","(19052.0, 54506.0]","(18104.5, 50190.5]","(17071.0, 49198.25]","(1000.0, 2100.0]","(5000.0, 1684259.0]","(4505.0, 896040.0]","(4013.25, 621000.0]","(252.5, 1500.0]","(117.75, 1500.0]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29995,"(140000.0, 240000.0]","(0.999, 2.0]","(2.0, 6.0]","(-0.001, 1.0]","(34.0, 41.0]","(-1.0, 0.0]","(-1.0, 0.0]","(-1.0, 0.0]","(-1.0, 0.0]","(-1.0, 0.0]",...,"(60164.75, 1664089.0]","(54506.0, 891586.0]","(18104.5, 50190.5]","(1256.0, 17071.0]","(5006.0, 873552.0]","(5000.0, 1684259.0]","(4505.0, 896040.0]","(1500.0, 4013.25]","(4031.5, 426529.0]","(117.75, 1500.0]"
29996,"(140000.0, 240000.0]","(0.999, 2.0]","(2.0, 6.0]","(1.0, 2.0]","(41.0, 79.0]","(-2.001, -1.0]","(-2.001, -1.0]","(-2.001, -1.0]","(-2.001, -1.0]","(-1.0, 0.0]",...,"(2666.25, 20088.5]","(2326.75, 19052.0]","(1763.0, 18104.5]","(-339603.001, 1256.0]","(1000.0, 2100.0]","(2009.0, 5000.0]","(4505.0, 896040.0]","(-0.001, 296.0]","(-0.001, 252.5]","(-0.001, 117.75]"
29997,"(9999.999, 50000.0]","(0.999, 2.0]","(1.0, 2.0]","(1.0, 2.0]","(34.0, 41.0]","(0.0, 8.0]","(0.0, 8.0]","(0.0, 8.0]","(-2.001, -1.0]","(-1.0, 0.0]",...,"(2666.25, 20088.5]","(19052.0, 54506.0]","(18104.5, 50190.5]","(17071.0, 49198.25]","(-0.001, 1000.0]","(-0.001, 833.0]","(4505.0, 896040.0]","(4013.25, 621000.0]","(1500.0, 4031.5]","(1500.0, 4000.0]"
29998,"(50000.0, 140000.0]","(0.999, 2.0]","(2.0, 6.0]","(-0.001, 1.0]","(34.0, 41.0]","(0.0, 8.0]","(-2.001, -1.0]","(-1.0, 0.0]","(-1.0, 0.0]","(-1.0, 0.0]",...,"(60164.75, 1664089.0]","(19052.0, 54506.0]","(1763.0, 18104.5]","(17071.0, 49198.25]","(5006.0, 873552.0]","(2009.0, 5000.0]","(390.0, 1800.0]","(1500.0, 4013.25]","(4031.5, 426529.0]","(1500.0, 4000.0]"


List of IV for each feature, sorted from the highest to lowest. High IV means high predictiveness and low IV indicates low predictive power. The table below shows how IV value and its corresponding predictive power.

<img src="https://miro.medium.com/max/1156/1*5S_5aAHWe0_knDGZUK3W8w.png" />

In this case, only features with IV > 0.02 are kept and used to fit the logistic regression model.

In [187]:
iv.sort_values('IV', ascending=False)

Unnamed: 0,feature,IV
5,pay_0,0.692613
6,pay_2,0.541747
7,pay_3,0.409001
8,pay_4,0.355175
9,pay_5,0.329335
10,pay_6,0.281748
0,limit_bal,0.160107
17,pay_amt1,0.142889
18,pay_amt2,0.128998
19,pay_amt3,0.113012


In [188]:
important_vars = iv[iv.sort_values('IV', ascending=False)['IV'] > 0.02]['feature'].values
important_vars

  important_vars = iv[iv.sort_values('IV', ascending=False)['IV'] > 0.02]['feature'].values


array(['limit_bal', 'pay_0', 'pay_2', 'pay_3', 'pay_4', 'pay_5', 'pay_6',
       'pay_amt1', 'pay_amt2', 'pay_amt3', 'pay_amt4', 'pay_amt5',
       'pay_amt6'], dtype=object)

In [189]:
%%time

df_woe_val = pd.DataFrame()

for i in important_vars:
    i = i + '_bin'
    temp_col = []
    for j in df_woe[i]:     
        temp_col.append(woe[(woe['feature']==i) & (woe['bin']==j)]['woe_val'].values[0])

    df_woe_val[i] = temp_col
    
df_woe_val

Wall time: 6min 20s


Unnamed: 0,limit_bal_bin,pay_0_bin,pay_2_bin,pay_3_bin,pay_4_bin,pay_5_bin,pay_6_bin,pay_amt1_bin,pay_amt2_bin,pay_amt3_bin,pay_amt4_bin,pay_amt5_bin,pay_amt6_bin
0,-49.512720,-127.042078,-149.136700,34.179830,30.238570,27.339247,23.131774,-49.052646,-44.693836,-43.356328,-38.144277,-33.201313,-35.031618
1,-14.495754,42.818919,-149.136700,29.529737,23.554247,20.091018,-135.164118,-49.052646,-7.304752,-4.903270,-9.509035,-33.201313,8.435462
2,-14.495754,65.906092,40.608318,29.529737,23.554247,20.091018,20.146510,0.152897,-7.304752,-4.903270,-9.509035,-9.358715,48.796588
3,-49.512720,65.906092,40.608318,29.529737,23.554247,20.091018,20.146510,0.152897,10.215988,-4.903270,-9.509035,-9.358715,-11.019515
4,-49.512720,42.818919,40.608318,34.179830,23.554247,20.091018,20.146510,0.152897,58.341509,51.821611,47.069483,-9.358715,-11.019515
...,...,...,...,...,...,...,...,...,...,...,...,...,...
29995,30.239052,65.906092,40.608318,29.529737,23.554247,20.091018,20.146510,57.397183,58.341509,51.821611,11.776175,43.875232,-11.019515
29996,30.239052,42.818919,33.738497,34.179830,30.238570,20.091018,20.146510,0.152897,10.215988,51.821611,-38.144277,-33.201313,-35.031618
29997,-49.512720,-127.042078,-149.136700,-134.942093,30.238570,20.091018,20.146510,-49.052646,-44.693836,51.821611,47.069483,7.543310,8.435462
29998,-14.495754,-127.042078,33.738497,29.529737,23.554247,20.091018,23.131774,57.397183,10.215988,-4.903270,11.776175,43.875232,8.435462


# 3. Fitting a Model

A logistic regression model is used so that the intercept and coefficient of each features can be obtained to develop scorecard.

In [190]:
df_woe_val['default_payment_next_month'] = df['default_payment_next_month']
df_woe_val = df_woe_val.astype('category')

df_woe_val2 = pd.get_dummies(df_woe_val.drop(columns=['default_payment_next_month']))
df_woe_val2['default_payment_next_month'] = df['default_payment_next_month']
df_woe_val2

Unnamed: 0,limit_bal_bin_-49.51271951073218,limit_bal_bin_-14.495753504712733,limit_bal_bin_30.23905163030277,limit_bal_bin_55.82257861509897,pay_0_bin_-127.04207847057339,pay_0_bin_42.81891895986019,pay_0_bin_65.90609157281699,pay_2_bin_-149.13669975290432,pay_2_bin_33.738497448902635,pay_2_bin_40.60831787444083,...,pay_amt4_bin_47.06948330099579,pay_amt5_bin_-33.20131283212806,pay_amt5_bin_-9.358714706772377,pay_amt5_bin_7.543309511635318,pay_amt5_bin_43.875232073641016,pay_amt6_bin_-35.031618292856756,pay_amt6_bin_-11.019515211653868,pay_amt6_bin_8.435462204718565,pay_amt6_bin_48.79658775404443,default_payment_next_month
0,1,0,0,0,1,0,0,1,0,0,...,0,1,0,0,0,1,0,0,0,0
1,0,1,0,0,0,1,0,1,0,0,...,0,1,0,0,0,0,0,1,0,0
2,0,1,0,0,0,0,1,0,0,1,...,0,0,1,0,0,0,0,0,1,1
3,1,0,0,0,0,0,1,0,0,1,...,0,0,1,0,0,0,1,0,0,1
4,1,0,0,0,0,1,0,0,0,1,...,1,0,1,0,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29995,0,0,1,0,0,0,1,0,0,1,...,0,0,0,0,1,0,1,0,0,1
29996,0,0,1,0,0,1,0,0,1,0,...,0,1,0,0,0,1,0,0,0,1
29997,1,0,0,0,1,0,0,1,0,0,...,1,0,0,1,0,0,0,1,0,0
29998,0,1,0,0,1,0,0,0,1,0,...,0,0,0,0,1,0,0,1,0,0


In [191]:
X = df_woe_val2.drop('default_payment_next_month', axis=1)
y = df_woe_val2['default_payment_next_month']

X_train, X_test, y_train, y_test = train_test_split(
    X, 
    y, 
    test_size=0.2, 
    random_state=21, 
    stratify=y
)

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

LogisticRegression(max_iter=1000)

In [192]:
prediction = model.predict(X_test)
print('Accuracy: ', accuracy_score(y_test, prediction))

Accuracy:  0.8065


# 4. Building a Credit Scorecard

In order to develop scorecard, each score for all features is calculated using a formula with several parameters such as:

    score_i = (b_i * woe_i + (a/n)) * factor + (offset/n)

    a = logistic regression intercept
    b_i = logistic regression coefficient for variable x_i
    woe_i = WOE of variable x_i
    n = number of variables used to fit the model
    cut_off = cut off va|lue that determines whether applicant's credit should be granted or not
    factor & offset = scaling parameter, where

    factor = pdo/ln(2)
    offset = cut_off - (factor * ln(50)) 
    
Pdo means to double the odds. In this case, the cut_off value or target score is set to 600 with odds of 50:1 of good applicants to bad applicants.

In [193]:
a = model.intercept_[0]
b = model.coef_[0]
n = len(X.columns)
factor = 20/np.log(2)
cut_off = 600
offset = cut_off - factor * np.log(50)

print('factor', round(factor, 2))
print('offset', round(offset, 2))

factor 28.85
offset 487.12


In [194]:
scorecard = pd.DataFrame()

filtered_woe = woe[woe['feature'].isin([i + '_bin' for i in important_vars])].reset_index(drop=True)

scorecard['variable'] = [i[:-4] for i in filtered_woe['feature']]
scorecard['variable_name'] = coefficients['feature']
scorecard['bin'] = filtered_woe['bin']

point = []
for i in range(len(filtered_woe)):
    woe_i = filtered_woe['woe_val'][i]
    score_i = (b[i] * woe_i + (a/n)) * factor + (offset/n)
    point.append(score_i)

scorecard['scorecard_point'] = point
scorecard

Unnamed: 0,variable,variable_name,bin,scorecard_point
0,limit_bal,limit_bal_bin_-55.82257861509898,"(9999.999, 50000.0]",270.11494
1,limit_bal,limit_bal_bin_-30.23905163030277,"(50000.0, 140000.0]",53.733871
2,limit_bal,limit_bal_bin_14.495753504712738,"(140000.0, 240000.0]",90.240231
3,limit_bal,limit_bal_bin_49.51271951073219,"(240000.0, 1000000.0]",322.189915
4,pay_0,pay_0_bin_-65.90609157281699,"(-2.001, -1.0]",-1145.104935
5,pay_0,pay_0_bin_-42.818918959860184,"(-1.0, 0.0]",-151.098195
6,pay_0,pay_0_bin_127.04207847057339,"(0.0, 8.0]",-3735.38753
7,pay_2,pay_2_bin_-40.60831787444082,"(-2.001, -1.0]",10.565014
8,pay_2,pay_2_bin_-33.73849744890261,"(-1.0, 0.0]",512.46841
9,pay_2,pay_2_bin_149.13669975290432,"(0.0, 8.0]",1845.6935


Example #1: Given a set of data from a new customer, determine whether the credit application should be granted or rejected.

In [195]:
random.seed(10)

r = random.randrange(0,len(df))
new_customer = df.iloc[r]

customer_score = []
for i in range(len(new_customer)):
    for j in range(len(scorecard)):
        if new_customer.index.values[i] == scorecard['variable'][j] and new_customer.values[i] in scorecard['bin'][j]:
            customer_score.append({
                'variable': new_customer.index.values[i], 
                'value': new_customer.values[i], 
                'bin': scorecard['bin'][j], 
                'score': scorecard['scorecard_point'][j]
            })

customer_score = pd.DataFrame(customer_score)
total_score = sum(customer_score['score'])

if total_score >= cut_off:
    print('Score is', round(total_score, 2), '| Decision: Grant Credit')
else:
    print('Score is', round(total_score, 2), '| Decision: Reject Credit')
    
customer_score

Score is 1173.85 | Decision: Grant Credit


Unnamed: 0,variable,value,bin,score
0,limit_bal,160000.0,"(140000.0, 240000.0]",90.240231
1,pay_0,0.0,"(-1.0, 0.0]",-151.098195
2,pay_2,0.0,"(-1.0, 0.0]",512.46841
3,pay_3,0.0,"(-1.0, 0.0]",44.051487
4,pay_4,0.0,"(-1.0, 0.0]",29.635535
5,pay_5,-1.0,"(-2.001, -1.0]",-161.243344
6,pay_6,0.0,"(-1.0, 0.0]",122.508158
7,pay_amt1,100000.0,"(5006.0, 873552.0]",212.031324
8,pay_amt2,10000.0,"(5000.0, 1684259.0]",231.015122
9,pay_amt3,6749.0,"(4505.0, 896040.0]",158.598461


Example #2: Given a set of data from a new customer, determine whether the credit application should be granted or rejected.

In [203]:
random.seed(20)

r = random.randrange(0,len(df))
new_customer = df.iloc[r]

customer_score = []
for i in range(len(new_customer)):
    for j in range(len(scorecard)):
        if new_customer.index.values[i] == scorecard['variable'][j] and new_customer.values[i] in scorecard['bin'][j]:
            customer_score.append({
                'variable': new_customer.index.values[i], 
                'value': new_customer.values[i], 
                'bin': scorecard['bin'][j], 
                'score': scorecard['scorecard_point'][j]
            })

customer_score = pd.DataFrame(customer_score)
total_score = sum(customer_score['score'])

if total_score >= cut_off:
    print('Score is', round(total_score, 2), '| Decision: Grant Credit')
else:
    print('Score is', round(total_score, 2), '| Decision: Reject Credit')

customer_score

Score is -1032.9 | Decision: Reject Credit


Unnamed: 0,variable,value,bin,score
0,limit_bal,50000.0,"(9999.999, 50000.0]",270.11494
1,pay_0,-1.0,"(-2.001, -1.0]",-1145.104935
2,pay_2,-1.0,"(-2.001, -1.0]",10.565014
3,pay_3,-1.0,"(-2.001, -1.0]",-256.560216
4,pay_4,0.0,"(-1.0, 0.0]",29.635535
5,pay_5,-1.0,"(-2.001, -1.0]",-161.243344
6,pay_6,-1.0,"(-2.001, -1.0]",-182.253985
7,pay_amt1,0.0,"(-0.001, 1000.0]",98.256166
8,pay_amt2,46257.0,"(5000.0, 1684259.0]",231.015122
9,pay_amt3,2200.0,"(1800.0, 4505.0]",24.754289


Source
<ul>
    <li><a href="https://towardsdatascience.com/intro-to-credit-scorecard-9afeaaa3725f" target="blank_">Intro to Credit Scorecard</a></li>
</ul>