# Import Library

In [50]:
from scorecard import Scorecard
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression

# Import Data

In [51]:
data = pd.read_csv('data.csv')
data = data.drop(columns=['Unnamed: 0'])
data['submitDate'] = pd.to_datetime(data['submitDate'], errors='coerce', utc=True)
data

Unnamed: 0,id,submitDate,flag,Xnum_1,Xnum_2,Xnum_3,Xnum_4,Xnum_5,Xcat_1
0,1,2025-01-01 00:00:00+00:00,0.0,338.450624,1.0,0.750000,28.0,,yes
1,2,2025-01-01 00:00:00+00:00,0.0,295.273222,1.0,1.000000,0.0,,yes
2,3,2025-01-01 00:00:00+00:00,0.0,,1.0,0.750000,877.0,-999.0,
3,4,2025-01-01 00:00:00+00:00,0.0,328.080625,1.0,0.250000,-99.0,0.0,yes
4,5,2025-01-01 00:00:00+00:00,0.0,287.117858,1.0,0.250000,71.0,,yes
...,...,...,...,...,...,...,...,...,...
29995,29996,2025-01-01 00:00:00+00:00,0.0,259.727997,1.0,1.000000,0.0,3.0,yes
29996,29997,2025-02-01 00:00:00+00:00,0.0,331.296620,2.0,1.000000,0.0,,checking
29997,29998,2025-01-01 00:00:00+00:00,0.0,291.039826,1.0,0.916667,5.0,,yes
29998,29999,2025-01-01 00:00:00+00:00,0.0,282.396999,1.0,1.000000,-99.0,-999.0,yes


In [52]:
# flag distribution
data.flag.value_counts(dropna=False)

0.0    27871
1.0     2129
Name: flag, dtype: int64

In [53]:
data = data.copy()
data.Xnum_1.iloc[2]=None
data.Xcat_1.iloc[2]=None

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


# Specify Model Features / Independent Variables

In [54]:
selected_features = [
    #numerical
    'Xnum_1',
    'Xnum_2',
    'Xnum_3',
    'Xnum_4',
    'Xnum_5',
    
    #category
    'Xcat_1'
]

# Intime-OOT Split

In [55]:
sc = Scorecard(data.reset_index(drop=True), 
                    pd.to_datetime('2025-01-01', utc=True),
                    pd.to_datetime('2025-01-31', utc=True),
                    pd.to_datetime('2025-02-01', utc=True),
                    pd.to_datetime('2025-02-28', utc=True),
                    date_column='submitDate')

# Feature Binning (Optimal or Quantile Method)

In [56]:

feat_bin = []

for feat in selected_features:
    each_feat_row = (feat, 'optimal') # optimal binning
#     each_feat_row = (feat, 'quantile') # quantile binning
    feat_bin.append(each_feat_row)
    
for f in feat_bin:
    print(f)
    try:
        display(sc.transform_binning_and_pivot_woe(f[0], binning_method = f[1]))
    except Exception as e:
        print(e)
        pass

('Xnum_1', 'optimal')


flag,0.0,1.0,TOTAL,%BAD,DIST_BAD,DIST_GOOD,WOE,IV
Xnum_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"(-inf, -999.0]",2,0,2,0.0,0.0,0.000103,-4.639582,0.000476
"(-999.0, -99.0]",1293,79,1372,0.05758,0.053091,0.066267,-0.221672,0.002921
"(-99.0, -1.0]",0,0,0,,0.0,0.0,0.0,0.0
"(-1.0, 0.0]",0,0,0,,0.0,0.0,0.0,0.0
"(0.0, 271.837]",1796,343,2139,0.160355,0.230511,0.092046,0.918003,0.127111
"(271.837, 280.005]",1430,193,1623,0.118916,0.129704,0.073288,0.570851,0.032205
"(280.005, 284.241]",1090,132,1222,0.10802,0.08871,0.055863,0.462459,0.01519
"(284.241, 290.458]",1668,176,1844,0.095445,0.11828,0.085486,0.324697,0.010648
"(290.458, 294.472]",1245,114,1359,0.083885,0.076613,0.063807,0.182902,0.002342
"(294.472, 297.845]",987,76,1063,0.071496,0.051075,0.050584,0.00966,5e-06


('Xnum_2', 'optimal')


flag,0.0,1.0,TOTAL,%BAD,DIST_BAD,DIST_GOOD,WOE,IV
Xnum_2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"(-inf, -999.0]",0,0,0,,0.0,0.0,0.0,0.0
"(-999.0, -99.0]",1307,81,1388,0.058357,0.054435,0.066984,-0.20744,0.002603
"(-99.0, -49.5]",0,0,0,,0.0,0.0,0.0,0.0
"(-49.5, -1.0]",0,0,0,,0.0,0.0,0.0,0.0
"(-1.0, 0.0]",2691,152,2843,0.053465,0.102151,0.137915,-0.300188,0.010736
"(0.0, 0.5]",0,0,0,,0.0,0.0,0.0,0.0
"(0.5, 1.5]",11756,852,12608,0.067576,0.572581,0.602501,-0.050936,0.001524
"(1.5, inf]",3758,403,4161,0.096852,0.270833,0.192599,0.34089,0.026669


('Xnum_3', 'optimal')


flag,0.0,1.0,TOTAL,%BAD,DIST_BAD,DIST_GOOD,WOE,IV
Xnum_3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"(-inf, -999.0]",4308,369,4677,0.078897,0.247984,0.220787,0.116164,0.003159
"(-999.0, -549.0]",0,0,0,,0.0,0.0,0.0,0.0
"(-549.0, -99.0]",121,20,141,0.141844,0.013441,0.006201,0.773452,0.005599
"(-99.0, -1.0]",0,0,0,,0.0,0.0,0.0,0.0
"(-1.0, 0.0]",461,64,525,0.121905,0.043011,0.023626,0.599063,0.011612
"(0.0, 0.268]",582,101,683,0.147877,0.067876,0.029828,0.822228,0.031285
"(0.268, 0.913]",1541,203,1744,0.116399,0.136425,0.078977,0.546611,0.031402
"(0.913, inf]",12499,731,13230,0.055253,0.491263,0.64058,-0.265393,0.039628


('Xnum_4', 'optimal')


flag,0.0,1.0,TOTAL,%BAD,DIST_BAD,DIST_GOOD,WOE,IV
Xnum_4,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"(-inf, -999.0]",4308,369,4677,0.078897,0.247984,0.220787,0.116164,0.003159
"(-999.0, -549.0]",0,0,0,,0.0,0.0,0.0,0.0
"(-549.0, -99.0]",2916,226,3142,0.071929,0.151882,0.149446,0.016164,3.9e-05
"(-99.0, -49.5]",0,0,0,,0.0,0.0,0.0,0.0
"(-49.5, -1.0]",0,0,0,,0.0,0.0,0.0,0.0
"(-1.0, 0.0]",9715,575,10290,0.055879,0.386425,0.497899,-0.253459,0.028254
"(0.0, 27.5]",571,36,607,0.059308,0.024194,0.029264,-0.190266,0.000965
"(27.5, inf]",2002,282,2284,0.123468,0.189516,0.102604,0.613597,0.053329


('Xnum_5', 'optimal')


flag,0.0,1.0,TOTAL,%BAD,DIST_BAD,DIST_GOOD,WOE,IV
Xnum_5,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"(-inf, -999.0]",11183,958,12141,0.078906,0.643817,0.573134,0.116294,0.00822
"(-999.0, -499.5]",0,0,0,,0.0,0.0,0.0,0.0
"(-499.5, -99.0]",0,0,0,,0.0,0.0,0.0,0.0
"(-99.0, -1.0]",0,0,0,,0.0,0.0,0.0,0.0
"(-1.0, 0.0]",4062,192,4254,0.045134,0.129032,0.20818,-0.478336,0.037859
"(0.0, 0.5]",0,0,0,,0.0,0.0,0.0,0.0
"(0.5, 1.5]",1993,113,2106,0.053656,0.075941,0.102142,-0.296408,0.007766
"(1.5, 2.5]",1043,73,1116,0.065412,0.049059,0.053454,-0.085799,0.000377
"(2.5, inf]",1231,152,1383,0.109906,0.102151,0.063089,0.481889,0.018823


('Xcat_1', 'optimal')


flag,0.0,1.0,TOTAL,%BAD,DIST_BAD,DIST_GOOD,WOE,IV
Xcat_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
-99,4.0,0.0,4.0,0.0,0.0,0.000205,-5.327886,0.001092
-999,22.0,4.0,26.0,0.153846,0.002688,0.001128,0.868334,0.001355
-9999999999,1.0,0.0,1.0,0.0,0.0,5.1e-05,-3.95605,0.000203
checking,147.0,10.0,157.0,0.063694,0.00672,0.007534,-0.114235,9.3e-05
no,1789.0,172.0,1961.0,0.08771,0.115591,0.091687,0.231677,0.005538
yes,17549.0,1302.0,18851.0,0.069068,0.875,0.899395,-0.027499,0.000671


# Binning Adjustment

## Continuous Features

In [57]:
sc.set_binning('Xnum_1',[-np.inf,-99,270, 290,295,310,320,335,np.inf], evaluate_woe=True, binning_method='optimal')

Xnum_1 bin set into: [-inf, -99, 270, 290, 295, 310, 320, 335, inf]


flag,0.0,1.0,TOTAL,%BAD,DIST_BAD,DIST_GOOD,WOE,IV
Xnum_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"(-inf, -999.0]",2,0,2,0.0,0.0,0.000103,-4.639582,0.000476
"(-999.0, -99.0]",1293,79,1372,0.05758,0.053091,0.066267,-0.221672,0.002921
"(-99.0, -1.0]",0,0,0,,0.0,0.0,0.0,0.0
"(-1.0, 0.0]",0,0,0,,0.0,0.0,0.0,0.0
"(0.0, 271.837]",1796,343,2139,0.160355,0.230511,0.092046,0.918003,0.127111
"(271.837, 280.005]",1430,193,1623,0.118916,0.129704,0.073288,0.570851,0.032205
"(280.005, 284.241]",1090,132,1222,0.10802,0.08871,0.055863,0.462459,0.01519
"(284.241, 290.458]",1668,176,1844,0.095445,0.11828,0.085486,0.324697,0.010648
"(290.458, 294.472]",1245,114,1359,0.083885,0.076613,0.063807,0.182902,0.002342
"(294.472, 297.845]",987,76,1063,0.071496,0.051075,0.050584,0.00966,5e-06




flag,0.0,1.0,TOTAL,%BAD,DIST_BAD,DIST_GOOD,WOE,IV
Xnum_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"(-inf, -99.0]",1293,79,1372,0.05758,0.053091,0.066274,-0.221775,0.002924
"(-99.0, 270.0]",1584,300,1884,0.159236,0.201613,0.081189,0.909561,0.109533
"(270.0, 290.0]",4283,521,4804,0.108451,0.350134,0.219528,0.466834,0.060971
"(290.0, 295.0]",1538,146,1684,0.086698,0.098118,0.078831,0.21886,0.004221
"(295.0, 310.0]",6090,320,6410,0.049922,0.215054,0.312148,-0.372587,0.036176
"(310.0, 320.0]",1769,68,1837,0.037017,0.045699,0.090671,-0.685157,0.030813
"(320.0, 335.0]",1673,38,1711,0.022209,0.025538,0.085751,-1.211266,0.072934
"(335.0, inf]",1280,16,1296,0.012346,0.010753,0.065607,-1.808455,0.099202


In [58]:
sc.set_binning('Xnum_2',[-np.inf,-99,0,1,2,np.inf], evaluate_woe=False, binning_method='optimal')

Xnum_2 bin set into: [-inf, -99, 0, 1, 2, inf]


In [59]:
sc.set_binning('Xnum_3',[-np.inf,-99,0.5,0.9,0.99,np.inf], evaluate_woe=False, binning_method='optimal')

Xnum_3 bin set into: [-inf, -99, 0.5, 0.9, 0.99, inf]


In [60]:
sc.set_binning('Xnum_4',[-np.inf,-999,-99,0,185,np.inf], evaluate_woe=False, binning_method='optimal')

Xnum_4 bin set into: [-inf, -999, -99, 0, 185, inf]


In [61]:
sc.set_binning('Xnum_5',[-np.inf,-999,0,1,2,np.inf], evaluate_woe=False, binning_method='optimal')

Xnum_5 bin set into: [-inf, -999, 0, 1, 2, inf]


## Categorical Features

In [62]:
sc.set_binning('Xcat_1',{'yes': 'yes',
   '-9999999999': 'no',
   'no': 'no',
   'checking': 'no',
   '-99': 'no',
   '-999': 'no'}, evaluate_woe=True, binning_method='optimal')

Xcat_1 bin set into: {'yes': 'yes', '-9999999999': 'no', 'no': 'no', 'checking': 'no', '-99': 'no', '-999': 'no'}


flag,0.0,1.0,TOTAL,%BAD,DIST_BAD,DIST_GOOD,WOE,IV
Xcat_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
-99,4.0,0.0,4.0,0.0,0.0,0.000205,-5.327886,0.001092
-999,22.0,4.0,26.0,0.153846,0.002688,0.001128,0.868334,0.001355
-9999999999,1.0,0.0,1.0,0.0,0.0,5.1e-05,-3.95605,0.000203
checking,147.0,10.0,157.0,0.063694,0.00672,0.007534,-0.114235,9.3e-05
no,1789.0,172.0,1961.0,0.08771,0.115591,0.091687,0.231677,0.005538
yes,17549.0,1302.0,18851.0,0.069068,0.875,0.899395,-0.027499,0.000671




flag,0.0,1.0,TOTAL,%BAD,DIST_BAD,DIST_GOOD,WOE,IV
Xcat_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
UNBINNED,1.0,0.0,1.0,0.0,0.0,5.1e-05,-3.95605,0.000203
no,1962.0,186.0,2148.0,0.086592,0.125,0.100554,0.217622,0.00532
yes,17549.0,1302.0,18851.0,0.069068,0.875,0.899395,-0.027499,0.000671


# Transform into WoE

In [63]:
sc.woe_transform(selected_features)

Transforming 1/6 into WoE: Xnum_1
Transforming 2/6 into WoE: Xnum_2
Transforming 3/6 into WoE: Xnum_3
Transforming 4/6 into WoE: Xnum_4
Transforming 5/6 into WoE: Xnum_5
Transforming 6/6 into WoE: Xcat_1


# Model Training and Evaluation

In [64]:
sc.train(LogisticRegression())

Cross-Validation Results (Intime Data)
------------------------------------------------------------
 Fold 1: 0.7046
 Fold 2: 0.7174
 Fold 3: 0.7404
 Fold 4: 0.7047
 Fold 5: 0.7365
------------------------------------------------------------
 Mean AUC: 0.7207
Xnum_1


flag,Xnum_1,Xnum_1_WOE,TOTAL_intime,%BAD_intime,TOTAL_oot,%BAD_oot,SCORE
0,"(-inf, -99.0]",-0.223218,1374,0.057496,616,0.058442,12.0
1,"(-99.0, 270.0]",0.909663,1884,0.159236,796,0.157035,-50.0
2,"(270.0, 290.0]",0.466936,4804,0.108451,2118,0.108121,-25.0
3,"(290.0, 295.0]",0.218963,1684,0.086698,744,0.083333,-12.0
4,"(295.0, 310.0]",-0.372484,6410,0.049922,2593,0.050906,20.0
5,"(310.0, 320.0]",-0.685054,1837,0.037017,854,0.039813,37.0
6,"(320.0, 335.0]",-1.211163,1711,0.022209,753,0.01992,66.0
7,"(335.0, inf]",-1.808352,1296,0.012346,526,0.015209,99.0


Xnum_2


flag,Xnum_2,Xnum_2_WOE,TOTAL_intime,%BAD_intime,TOTAL_oot,%BAD_oot,SCORE
0,"(-inf, -99.0]",-0.20744,1388,0.058357,627,0.060606,5.0
1,"(-99.0, 0.0]",-0.300188,2843,0.053465,1176,0.062925,7.0
2,"(0.0, 1.0]",-0.050936,12608,0.067576,5432,0.067194,1.0
3,"(1.0, 2.0]",0.237202,3233,0.088153,1415,0.084806,-6.0
4,"(2.0, inf]",0.647236,928,0.127155,350,0.125714,-15.0


Xnum_3


flag,Xnum_3,Xnum_3_WOE,TOTAL_intime,%BAD_intime,TOTAL_oot,%BAD_oot,SCORE
0,"(-inf, -99.0]",0.141246,4818,0.080739,2021,0.08758,-7.0
1,"(-99.0, 0.5]",0.670317,1873,0.129738,817,0.132191,-31.0
2,"(0.5, 0.9]",0.536356,1075,0.115349,465,0.105376,-25.0
3,"(0.9, 0.99]",-0.224515,505,0.057426,213,0.079812,10.0
4,"(0.99, inf]",-0.265872,12729,0.055228,5484,0.052881,12.0


Xnum_4


flag,Xnum_4,Xnum_4_WOE,TOTAL_intime,%BAD_intime,TOTAL_oot,%BAD_oot,SCORE
0,"(-inf, -999.0]",0.116164,4677,0.078897,1956,0.086401,-3.0
1,"(-999.0, -99.0]",0.016164,3142,0.071929,1383,0.070137,-0.0
2,"(-99.0, 0.0]",-0.253459,10290,0.055879,4413,0.054385,7.0
3,"(0.0, 185.0]",0.243426,1331,0.088655,553,0.092224,-6.0
4,"(185.0, inf]",0.656667,1560,0.128205,695,0.120863,-17.0


Xnum_5


flag,Xnum_5,Xnum_5_WOE,TOTAL_intime,%BAD_intime,TOTAL_oot,%BAD_oot,SCORE
0,"(-inf, -999.0]",0.116294,12141,0.078906,5288,0.081884,-6.0
1,"(-999.0, 0.0]",-0.478336,4254,0.045134,1752,0.034247,25.0
2,"(0.0, 1.0]",-0.296408,2106,0.053656,912,0.057018,16.0
3,"(1.0, 2.0]",-0.085799,1116,0.065412,465,0.070968,5.0
4,"(2.0, inf]",0.481889,1383,0.109906,583,0.108062,-25.0


Xcat_1


flag,Xcat_1,Xcat_1_WOE,TOTAL_intime,%BAD_intime,TOTAL_oot,%BAD_oot,SCORE
0,no,0.217112,2149,0.086552,978,0.088957,-10.0
1,yes,-0.027499,18851,0.069068,8022,0.06906,1.0


Unnamed: 0,Metric,Value
0,Model Base Point,422.768
1,AUC (Intime),0.7163
2,AUC (Cross-Val),0.7207
3,AUC (OOT),0.7195


# Model Scorecard

In [67]:
sc.scorecard_json

{'Xnum_1': {Interval(-inf, -99.0, closed='right'): 12.0,
  Interval(-99.0, 270.0, closed='right'): -50.0,
  Interval(270.0, 290.0, closed='right'): -25.0,
  Interval(290.0, 295.0, closed='right'): -12.0,
  Interval(295.0, 310.0, closed='right'): 20.0,
  Interval(310.0, 320.0, closed='right'): 37.0,
  Interval(320.0, 335.0, closed='right'): 66.0,
  Interval(335.0, inf, closed='right'): 99.0},
 'Xnum_2': {Interval(-inf, -99.0, closed='right'): 5.0,
  Interval(-99.0, 0.0, closed='right'): 7.0,
  Interval(0.0, 1.0, closed='right'): 1.0,
  Interval(1.0, 2.0, closed='right'): -6.0,
  Interval(2.0, inf, closed='right'): -15.0},
 'Xnum_3': {Interval(-inf, -99.0, closed='right'): -7.0,
  Interval(-99.0, 0.5, closed='right'): -31.0,
  Interval(0.5, 0.9, closed='right'): -25.0,
  Interval(0.9, 0.99, closed='right'): 10.0,
  Interval(0.99, inf, closed='right'): 12.0},
 'Xnum_4': {Interval(-inf, -999.0, closed='right'): -3.0,
  Interval(-999.0, -99.0, closed='right'): -0.0,
  Interval(-99.0, 0.0, c