## **5. Scaling**
---

Now, we'll:
- Construct the scorecards.
- Create the dictionary for the points map.
- Estimate the credit score based on an input.

### **5.1 Create Scorecards**
---

Give a score to each attribute by indicating: - 20 PDO (points to double the odds of good) and - Odds of good of 30:1 at 300 points score.

Thus, we can calculate the offset and factor:
- $\text{Factor}=\text{PDO}/ \ln(2)$
- $\text{Offset} = \text{Score} − {\text{Factor} ∗ \ln (\text{Odds of good})}$

In [1]:
# Import library
import pandas as pd
import numpy as np

# Load configuration
import src.utils as utils

To define the references and dump the scorecards, update the configuration file.

In [2]:
# Update the config file 
CONFIG_DATA = utils.load_config()
CONFIG_DATA

{'raw_data_path': 'data/raw/credit_dataset.csv',
 'data_path': 'data/output/data.pkl',
 'predictors_set_path': 'data/output/predictors.pkl',
 'target_set_path': 'data/output/target.pkl',
 'train_path': ['data/output/X_train.pkl', 'data/output/y_train.pkl'],
 'test_path': ['data/output/X_test.pkl', 'data/output/y_test.pkl'],
 'data_train_path': 'data/output/data_train.pkl',
 'data_train_binned_path': 'data/output/data_train_binned.pkl',
 'crosstab_list_path': 'data/output/crosstab_list.pkl',
 'WOE_table_path': 'data/output/WOE_table.pkl',
 'IV_table_path': 'data/output/IV_table.pkl',
 'WOE_map_dict_path': 'data/output/WOE_map_dict.pkl',
 'X_train_woe_path': 'data/output/X_train_woe.pkl',
 'target_variable': 'Credit_Score',
 'test_size': 0.3,
 'num_columns': ['Age',
  'Annual_Income',
  'Num_of_Loan',
  'Num_of_Delayed_Payment',
  'Outstanding_Debt',
  'Monthly_Inhand_Salary',
  'Num_Credit_Inquiries',
  'Credit_Utilization_Ratio',
  'Total_EMI_per_month',
  'Num_Bank_Accounts',
  'Num_C

In [11]:
# The function that transforms the model's output into points
def scaling():
    """The function that transforms the model's output into points"""

    # Describe the references (pdo, score, and odds).
    pdo = CONFIG_DATA['pdo']
    score = CONFIG_DATA['score_ref']
    odds = CONFIG_DATA['odds_ref']

    # Load the best model
    best_model_path = CONFIG_DATA['best_model_path']
    best_model = utils.load_pickle(best_model_path)

    # Load the WOE table
    WOE_table_path = CONFIG_DATA['WOE_table_path']
    WOE_table = utils.load_pickle(WOE_table_path)

    # Load the best model's estimates table
    best_model_summary_path = CONFIG_DATA['best_model_summary_path']
    best_model_summary = utils.load_pickle(best_model_summary_path)

    # Calculate Factor and Offset
    factor = pdo/np.log(2)
    offset = score-(factor*np.log(odds))

    print('===================================================')
    print(f"Odds of good of {odds}:1 at {score} points score.")
    print(f"{pdo} PDO (points to double the odds of good).")
    print(f"Offset = {offset:.2f}")
    print(f"Factor = {factor:.2f}")
    print('===================================================')

    # Define n = number of characteristics
    n = best_model_summary.shape[0] - 1

    # Define b0
    b0 = best_model.intercept_[0]

    # Change the name of the characteristic in best_model_summary_table.
    num_cols = CONFIG_DATA['num_columns']
    for col in best_model_summary['Characteristic']:

        if col in num_cols:
            bin_col = col + '_bin'
        else:
            bin_col = col

        best_model_summary.replace(col, bin_col, inplace = True) 

    # To obtain a beta or parameter estimate for each characteristic, merge tables.
    scorecards = pd.merge(left = WOE_table,
                          right = best_model_summary,
                          how = 'left',
                          on = ['Characteristic'])
    
    # Define beta and WOE
    beta = scorecards['Estimate']
    WOE = scorecards['WOE']

    # Determine the point value for every attribute.
    scorecards['Points'] = round((offset/n) - factor*((b0/n) + (beta*WOE)))
    try :
        scorecards['Points'] = scorecards['Points'].astype('int')
    except Exception:
        pass

    # Validation
    print('Scorecards table shape : ', scorecards.shape)
    
    # Dump the scorecards
    scorecards_path = CONFIG_DATA['scorecards_path']
    utils.dump_pickle(scorecards, scorecards_path)

    return scorecards

In [16]:
scaling()

Odds of good of 30:1 at 300 points score.
20 PDO (points to double the odds of good).
Offset = 201.86
Factor = 28.85
Scorecards table shape :  (112, 5)


Unnamed: 0,Characteristic,Attribute,WOE,Estimate,Points
0,Age_bin,"(13.999, 24.0]",-0.294877,0.266541,21.0
1,Age_bin,"(24.0, 33.0]",-0.120788,0.266541,19.0
2,Age_bin,"(33.0, 41.0]",-0.157248,0.266541,20.0
3,Age_bin,"(41.0, 55.0]",0.594385,0.266541,14.0
4,Age_bin,Missing,0.123833,0.266541,18.0
5,Annual_Income_bin,"(7005.929, 18921.78]",-0.701343,0.110433,21.0
6,Annual_Income_bin,"(18921.78, 36346.13]",0.096058,0.110433,18.0
7,Annual_Income_bin,"(36346.13, 70112.78]",-0.149729,0.110433,19.0
8,Annual_Income_bin,"(70112.78, 23658189.0]",0.674363,0.110433,16.0
9,Num_of_Loan_bin,"(0.999, 2.0]",0.676677,0.285725,13.0


## **5.2 Predict the Credit Score**
---

In order to predict the credit score from an input, we must create the points map dictionary in this case.

To dump the credit score and the points map dictionary, update the configuration file.

In [17]:
# Update the config file 
CONFIG_DATA = utils.load_config()
CONFIG_DATA

{'raw_data_path': 'data/raw/credit_dataset.csv',
 'data_path': 'data/output/data.pkl',
 'predictors_set_path': 'data/output/predictors.pkl',
 'target_set_path': 'data/output/target.pkl',
 'train_path': ['data/output/X_train.pkl', 'data/output/y_train.pkl'],
 'test_path': ['data/output/X_test.pkl', 'data/output/y_test.pkl'],
 'data_train_path': 'data/output/data_train.pkl',
 'data_train_binned_path': 'data/output/data_train_binned.pkl',
 'crosstab_list_path': 'data/output/crosstab_list.pkl',
 'WOE_table_path': 'data/output/WOE_table.pkl',
 'IV_table_path': 'data/output/IV_table.pkl',
 'WOE_map_dict_path': 'data/output/WOE_map_dict.pkl',
 'X_train_woe_path': 'data/output/X_train_woe.pkl',
 'target_variable': 'Credit_Score',
 'test_size': 0.3,
 'num_columns': ['Age',
  'Annual_Income',
  'Num_of_Loan',
  'Num_of_Delayed_Payment',
  'Outstanding_Debt',
  'Monthly_Inhand_Salary',
  'Num_Credit_Inquiries',
  'Credit_Utilization_Ratio',
  'Total_EMI_per_month',
  'Num_Bank_Accounts',
  'Num_C

In [18]:
# Create the dict function for the Points map.
def get_points_map_dict():
    """Create the dict function for the Points map."""
    # Load the Scorecards table
    scorecards = utils.load_pickle(CONFIG_DATA['scorecards_path'])

    # Set the dictionary to start.
    points_map_dict = {}
    points_map_dict['Missing'] = {}
    unique_char = set(scorecards['Characteristic'])
    for char in unique_char:
        # Obtain the WOE and attribute information for each characteristic.
        current_data = (scorecards
                            [scorecards['Characteristic']==char]     # Filter based on characteristic
                            [['Attribute', 'Points']])                 # Then select the attribute & WOE
        
        # Get the mapping
        points_map_dict[char] = {}
        for idx in current_data.index:
            attribute = current_data.loc[idx, 'Attribute']
            points = current_data.loc[idx, 'Points']

            if attribute == 'Missing':
                points_map_dict['Missing'][char] = points
            else:
                points_map_dict[char][attribute] = points
                points_map_dict['Missing'][char] = np.nan

    # Validation data
    print('Number of key : ', len(points_map_dict.keys()))

    # Dump
    utils.dump_pickle(points_map_dict, CONFIG_DATA['points_map_dict_path'])

    return points_map_dict
    

In [19]:
get_points_map_dict()

Number of key :  23


{'Missing': {'Amount_invested_monthly_bin': nan,
  'Payment_Behaviour': nan,
  'Monthly_Balance_bin': nan,
  'Type_of_Loan': nan,
  'Outstanding_Debt_bin': nan,
  'Credit_Utilization_Ratio_bin': nan,
  'Payment_of_Min_Amount': nan,
  'Num_Credit_Inquiries_bin': nan,
  'Interest_Rate_bin': 16.0,
  'Monthly_Inhand_Salary_bin': nan,
  'Annual_Income_bin': nan,
  'Age_bin': 18.0,
  'Num_Bank_Accounts_bin': 15.0,
  'Total_EMI_per_month_bin': nan,
  'Num_of_Delayed_Payment_bin': 15.0,
  'Credit_History_Age_bin': nan,
  'Occupation': nan,
  'Num_Credit_Card_bin': 18.0,
  'Num_of_Loan_bin': 20.0,
  'Changed_Credit_Limit_bin': nan,
  'Delay_from_due_date_bin': nan,
  'Credit_Mix': nan},
 'Amount_invested_monthly_bin': {Interval(-0.001, 71.415, closed='right'): nan,
  Interval(71.415, 129.42, closed='right'): nan,
  Interval(129.42, 247.885, closed='right'): nan,
  Interval(247.885, 10000.0, closed='right'): nan},
 'Payment_Behaviour': {'!@9#%8': nan,
  'High_spent_Large_value_payments': nan,
  

Next, convert the unprocessed input data into points for scoring.

In [20]:
def transform_points(raw_data=None, type=None, CONFIG_DATA=None):
    """Swap out the data value for points."""
    # Load the numerical columns
    num_cols = CONFIG_DATA['num_columns']

    # Load the points_map_dict
    points_map_dict = utils.load_pickle(CONFIG_DATA['points_map_dict_path'])

    # Load the saved data if type is not None
    if type is not None:
        raw_data = utils.load_pickle(CONFIG_DATA[f'{type}_path'][0])

    # Map the data
    points_data = raw_data.copy()
    for col in points_data.columns:
        # Fix numerical columns
        if col in num_cols:
            map_col = col + '_bin'
        else:
            map_col = col    

        points_data[col] = points_data[col].map(points_map_dict[map_col])

    # If a value is missing or outside of the range, map the data.
    for col in points_data.columns:
        if col in num_cols:
            map_col = col + '_bin'
        else:
            map_col = col 

        points_data[col] = points_data[col].fillna(value=points_map_dict['Missing'][map_col])

    # Dump data
    if type is not None:
        utils.dump_pickle(points_data, CONFIG_DATA[f'X_{type}_points_path'])

    return points_data

In [None]:
X_train_points = transform_points(type='train', CONFIG_DATA=CONFIG_DATA)

X_train_points

Next, include a function that determines the credit score.

To dump the credit score, update the configuration file.

In [22]:
# Update the config file 
CONFIG_DATA = utils.load_config()
CONFIG_DATA

{'raw_data_path': 'data/raw/credit_dataset.csv',
 'data_path': 'data/output/data.pkl',
 'predictors_set_path': 'data/output/predictors.pkl',
 'target_set_path': 'data/output/target.pkl',
 'train_path': ['data/output/X_train.pkl', 'data/output/y_train.pkl'],
 'test_path': ['data/output/X_test.pkl', 'data/output/y_test.pkl'],
 'data_train_path': 'data/output/data_train.pkl',
 'data_train_binned_path': 'data/output/data_train_binned.pkl',
 'crosstab_list_path': 'data/output/crosstab_list.pkl',
 'WOE_table_path': 'data/output/WOE_table.pkl',
 'IV_table_path': 'data/output/IV_table.pkl',
 'WOE_map_dict_path': 'data/output/WOE_map_dict.pkl',
 'X_train_woe_path': 'data/output/X_train_woe.pkl',
 'target_variable': 'Credit_Score',
 'test_size': 0.3,
 'num_columns': ['Age',
  'Annual_Income',
  'Num_of_Loan',
  'Num_of_Delayed_Payment',
  'Outstanding_Debt',
  'Monthly_Inhand_Salary',
  'Num_Credit_Inquiries',
  'Credit_Utilization_Ratio',
  'Total_EMI_per_month',
  'Num_Bank_Accounts',
  'Num_C

In [23]:
# Predictive function for credit score
def predict_score(raw_data, CONFIG_DATA):
    """Predictive function for credit score"""
    
    points = transform_points(raw_data = raw_data, 
                              type = None, 
                              CONFIG_DATA = CONFIG_DATA)
    
    score = int(points.sum(axis=1))

    # print(f"Credit Score : ", score)
    
    # cutoff_score = CONFIG_DATA['cutoff_score']

    # if score > cutoff_score:
    #     print("Recommendation : APPROVE")
    # else:
    #     print("Recommendation : REJECT")

    utils.dump_pickle(score, CONFIG_DATA['score_path'])

    return score


In [24]:
# Test the function using the input of raw data.
tes_input = {
    'Age' : 23,
    'Annual_Income' : 19000,
    'Num_of_Loan' : 4,
    'Num_of_Delayed_Payment' : 7,
    'Outstanding_Debt' : 810,
    'Monthly_Inhand_Salary' : 1825,
    'Num_Credit_Inquiries' : 4,
    'Credit_Utilization_Ratio' : 26.8,
    'Total_EMI_per_month' : 49.5,
    'Num_Bank_Accounts' : 3,
    'Num_Credit_Card' : 4,
    'Interest_Rate' : 3,
    'Delay_from_due_date' : 3,
    'Amount_invested_monthly' : 80,
    'Monthly_Balance' : 312,
    'Changed_Credit_Limit' : 11.27,
    'Credit_History_Age' : 23,
    'Occupation' : 'Scientist',
    'Type_of_Loan' : 'Auto Loan',
    'Credit_Mix' : 'Good',
    'Payment_of_Min_Amount' : 'No',
    'Payment_Behaviour' : 'High_spent_Small_value_payments',
}

tes = pd.DataFrame(tes_input, index=[0])

tes

Unnamed: 0,Age,Annual_Income,Num_of_Loan,Num_of_Delayed_Payment,Outstanding_Debt,Monthly_Inhand_Salary,Num_Credit_Inquiries,Credit_Utilization_Ratio,Total_EMI_per_month,Num_Bank_Accounts,...,Delay_from_due_date,Amount_invested_monthly,Monthly_Balance,Changed_Credit_Limit,Credit_History_Age,Occupation,Type_of_Loan,Credit_Mix,Payment_of_Min_Amount,Payment_Behaviour
0,23,19000,4,7,810,1825,4,26.8,49.5,3,...,3,80,312,11.27,23,Scientist,Auto Loan,Good,No,High_spent_Small_value_payments


In [25]:
# Predict the credit score
predict_score(raw_data=tes, CONFIG_DATA=CONFIG_DATA)

147