# Model Building 

* Using Custom Transformer(s)
* Open Source libraries
* Pipeline

In [1]:
def go_up_from_current_directory(*, go_up: int = 1) -> None:
    """This is used to up a number of directories.
    Args:
    -----
    go_up: int, default=1
        This indicates the number of times to go back up from the current directory.
    Returns:
    --------
    None
    """
    import os
    import sys

    CONST: str = "../"
    NUM: str = CONST * go_up

    # Goto the previous directory
    prev_directory = os.path.join(os.path.dirname(__name__), NUM)
    # Get the 'absolute path' of the previous directory
    abs_path_prev_directory = os.path.abspath(prev_directory)

    # Add the path to the System paths
    sys.path.insert(0, abs_path_prev_directory)
    print(abs_path_prev_directory)

In [2]:
# Standard imports
import numpy as np
import pandas as pd


# for saving the pipeline
import joblib

# from Scikit-learn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, Binarizer
from sklearn.feature_selection import SelectFromModel
from sklearn.pipeline import Pipeline

# from feature-engine
from feature_engine.imputation import (
    AddMissingIndicator,
    MeanMedianImputer,
    CategoricalImputer,
)

from feature_engine.encoding import (
    RareLabelEncoder,
    OrdinalEncoder,
)

from feature_engine.transformation import (
    LogTransformer,
    YeoJohnsonTransformer,
)

from feature_engine.discretisation import EqualFrequencyDiscretiser

from feature_engine.selection import DropFeatures
from feature_engine.wrappers import SklearnTransformerWrapper

# Custom Module(s)
go_up_from_current_directory(go_up=1)
import preprocessor as pp
import utilities
from lending_data import LendingData

# Built-in library
import itertools
import re
import json
import typing as tp

# pandas settings
pd.options.display.max_rows = 1_000
pd.options.display.max_columns = 1_000
pd.options.display.max_colwidth = 600

# Black code formatter (Optional)
%load_ext lab_black
# auto reload imports
%load_ext autoreload
%autoreload 2

/Users/neidu/Desktop/Projects/Personal/tutorials/LendingClub Issued Loans


In [3]:
fp = "../data/lc_2016_2017.csv"  # Filepath
columns_to_drop = ["id", "member_id", "desc", "policy_code"]

orig_data = utilities.load_data(filepath=fp, format_="csv", low_memory=False)
orig_data.drop(columns=columns_to_drop, inplace=True)

orig_data.head(10)

The shape of the data: (759338, 72)



Unnamed: 0,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,emp_title,emp_length,home_ownership,annual_inc,verification_status,issue_d,loan_status,pymnt_plan,purpose,title,zip_code,addr_state,dti,delinq_2yrs,earliest_cr_line,inq_last_6mths,mths_since_last_delinq,mths_since_last_record,open_acc,pub_rec,revol_bal,revol_util,total_acc,initial_list_status,out_prncp,out_prncp_inv,total_pymnt,total_pymnt_inv,total_rec_prncp,total_rec_int,total_rec_late_fee,recoveries,collection_recovery_fee,last_pymnt_d,last_pymnt_amnt,next_pymnt_d,last_credit_pull_d,collections_12_mths_ex_med,mths_since_last_major_derog,application_type,annual_inc_joint,dti_joint,verification_status_joint,acc_now_delinq,tot_coll_amt,tot_cur_bal,open_acc_6m,open_il_12m,open_il_24m,mths_since_rcnt_il,total_bal_il,il_util,open_rv_12m,open_rv_24m,max_bal_bc,all_util,total_rev_hi_lim,inq_fi,total_cu_tl,inq_last_12m
0,2300,2300,2300.0,36 months,12.62,77.08,C,C1,,,OWN,10000.0,Not Verified,Jun-2017,Current,n,credit_card,Credit card refinancing,148xx,NY,21.61,0,Sep-1985,1.0,,,4,0,3911.0,55.1,6,w,2029.91,2029.91,382.99,382.99,270.09,112.9,0.0,0.0,0.0,Dec-2017,77.08,Jan-2018,Dec-2017,0,,Individual,,,,0,0.0,3911.0,0.0,0.0,0.0,,0.0,,1.0,2.0,2315.0,55.0,7100.0,1.0,0.0,2.0
1,16000,16000,16000.0,60 months,12.62,360.95,C,C1,teacher,10+ years,MORTGAGE,94000.0,Not Verified,Jun-2017,Current,n,debt_consolidation,Debt consolidation,021xx,MA,25.61,0,Jun-1992,0.0,,,9,0,33752.0,105.8,26,w,14813.07,14813.07,2060.09,2060.09,1186.93,873.16,0.0,0.0,0.0,Dec-2017,360.95,Jan-2018,Dec-2017,0,,Individual,,,,0,0.0,234457.0,2.0,0.0,0.0,26.0,11078.0,69.0,3.0,5.0,1962.0,94.0,31900.0,0.0,6.0,1.0
2,6025,6025,6025.0,36 months,15.05,209.01,C,C4,Front Office,7 years,MORTGAGE,46350.0,Not Verified,Jun-2017,Current,n,home_improvement,Home improvement,018xx,MA,8.88,0,Jun-2002,0.0,,,11,0,12425.0,44.9,27,w,5340.82,5340.82,1032.46,1032.46,684.18,348.28,0.0,0.0,0.0,Dec-2017,209.01,Jan-2018,Dec-2017,0,,Individual,,,,0,0.0,121311.0,1.0,1.0,2.0,10.0,0.0,,1.0,2.0,1950.0,45.0,27700.0,1.0,5.0,3.0
3,20400,20400,20400.0,36 months,9.44,652.91,B,B1,Manager,10+ years,RENT,44000.0,Source Verified,Jun-2017,Current,n,car,Car financing,913xx,CA,27.06,0,Jan-2007,1.0,,,15,0,8769.0,18.7,19,w,17898.81,17898.81,3237.8,3237.8,2501.19,736.61,0.0,0.0,0.0,Dec-2017,652.91,Jan-2018,Dec-2017,0,,Individual,,,,0,0.0,62335.0,1.0,1.0,1.0,6.0,53566.0,95.0,1.0,2.0,4240.0,60.0,46900.0,1.0,1.0,1.0
4,13000,13000,13000.0,36 months,11.99,431.73,B,B5,Paramedic,10+ years,MORTGAGE,85000.0,Source Verified,Jun-2017,Current,n,debt_consolidation,Debt consolidation,560xx,MN,6.79,1,Feb-2002,0.0,16.0,,5,0,6866.0,88.0,24,w,11460.66,11460.66,2137.0,2137.0,1539.34,597.66,0.0,0.0,0.0,Dec-2017,431.73,Jan-2018,Dec-2017,0,16.0,Individual,,,,0,0.0,15332.0,0.0,0.0,0.0,127.0,8466.0,72.0,0.0,1.0,2996.0,78.0,7800.0,0.0,0.0,0.0
5,12000,12000,12000.0,36 months,9.44,384.06,B,B1,Teacher,10+ years,MORTGAGE,70000.0,Not Verified,Jun-2017,Current,n,debt_consolidation,Debt consolidation,184xx,PA,18.34,0,Aug-1997,0.0,35.0,83.0,14,1,15025.0,58.2,30,w,10528.73,10528.73,1904.57,1904.57,1471.27,433.3,0.0,0.0,0.0,Dec-2017,384.06,Jan-2018,Dec-2017,0,,Individual,,,,0,0.0,211227.0,0.0,0.0,0.0,41.0,12438.0,40.0,2.0,3.0,5227.0,49.0,25800.0,0.0,0.0,2.0
6,6000,6000,6000.0,36 months,10.42,194.79,B,B3,Office Clerk,7 years,MORTGAGE,37107.19,Source Verified,Jun-2017,Current,n,medical,Medical expenses,773xx,TX,30.63,0,Mar-1992,0.0,70.0,71.0,10,1,9332.0,59.1,28,w,5274.05,5274.05,965.27,965.27,725.95,239.32,0.0,0.0,0.0,Dec-2017,194.79,Jan-2018,Dec-2017,0,,Individual,,,,0,275.0,107517.0,0.0,0.0,1.0,19.0,19983.0,,1.0,1.0,3990.0,59.0,15800.0,0.0,0.0,0.0
7,12000,12000,12000.0,60 months,15.05,285.8,C,C4,PROGRAM DIRECTOR,6 years,RENT,69800.0,Not Verified,Jun-2017,Current,n,debt_consolidation,Debt consolidation,953xx,CA,31.43,0,Feb-2006,1.0,,,10,0,15227.0,79.7,12,w,11162.31,11162.31,1624.51,1624.51,837.69,786.82,0.0,0.0,0.0,Dec-2017,285.8,Jan-2018,Dec-2017,0,,Individual,,,,0,0.0,51126.0,1.0,0.0,1.0,16.0,35899.0,54.0,1.0,2.0,4107.0,60.0,19100.0,1.0,0.0,4.0
8,11575,11575,11575.0,36 months,7.35,359.26,A,A4,Solutions Architect,6 years,OWN,153000.0,Not Verified,Jun-2017,Fully Paid,n,credit_card,Credit card refinancing,923xx,CA,16.99,0,Jul-1994,0.0,24.0,84.0,20,1,8550.0,22.7,46,w,0.0,0.0,11707.816236,11707.82,11575.0,132.82,0.0,0.0,0.0,Sep-2017,11360.38,,Sep-2017,0,24.0,Individual,,,,0,0.0,442731.0,1.0,0.0,0.0,27.0,92315.0,63.0,2.0,8.0,1581.0,36.0,37600.0,1.0,6.0,2.0
9,20400,20400,20400.0,60 months,7.97,413.35,A,A5,asst plant manager,10+ years,MORTGAGE,110000.0,Not Verified,Jun-2017,Current,n,major_purchase,Major purchase,286xx,NC,11.07,0,May-1987,1.0,40.0,,8,0,5294.0,8.1,21,w,18992.12,18992.12,2044.17,2044.17,1407.88,636.29,0.0,0.0,0.0,Dec-2017,413.35,Jan-2018,Dec-2017,0,,Individual,,,,0,0.0,101473.0,0.0,0.0,1.0,18.0,44985.0,,0.0,1.0,5133.0,8.0,65500.0,0.0,2.0,1.0


In [4]:
num_vars_to_drop = [
    LendingData.FUNDED_AMNT,
    LendingData.FUNDED_AMNT_INV,
    LendingData.INSTALLMENT,
    LendingData.TOTAL_PYMNT_INV,
    LendingData.TOTAL_REC_PRNCP,
    LendingData.OUT_PRNCP_INV,
    LendingData.COLLECTION_RECOVERY_FEE,
    LendingData.OPEN_RV_24M,
]

cat_vars_to_drop = [
    LendingData.EMP_TITLE,
    LendingData.ISSUE_D,
    LendingData.LAST_PYMNT_D,
    LendingData.NEXT_PYMNT_D,
    LendingData.LAST_CREDIT_PULL_D,
    LendingData.SUB_GRADE,
    LendingData.TITLE,
    LendingData.EARLIEST_CR_LINE,
    LendingData.PYMNT_PLAN,
]

# Drop the variables
orig_data.drop(columns=num_vars_to_drop + cat_vars_to_drop, inplace=True)

In [5]:
## ====== CONFIGS ======
TARGET = "loan_status"
TEST_SIZE = 0.1
RANDOM_STATE = 123

In [6]:
# These labels will be treated as default
default_list = [
    "Charged Off",
    "Late (31-120 days)",
    "Default",
    "Does not meet the credit policy. Status:Charged Off",
]

# Update the loan_status
orig_data[TARGET] = orig_data[TARGET].apply(
    lambda status: 1 if status in default_list else 0
)

# Check the frequency table of the target
orig_data[TARGET].value_counts(normalize=True)

0    0.930746
1    0.069254
Name: loan_status, dtype: float64

In [7]:
# Split the data
X = orig_data.drop(columns=[TARGET])
y = orig_data[TARGET]

# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=TEST_SIZE, random_state=RANDOM_STATE
)

## Pipeline Configs

In [8]:
# Numerical variables with NA in train set
NUMERICAL_VARS_WITH_NA = [
    LendingData.DTI,
    LendingData.INQ_LAST_6MTHS,
    LendingData.MTHS_SINCE_LAST_DELINQ,
    LendingData.MTHS_SINCE_LAST_RECORD,
    LendingData.REVOL_UTIL,
    LendingData.MTHS_SINCE_LAST_MAJOR_DEROG,
    LendingData.ANNUAL_INC_JOINT,
    LendingData.DTI_JOINT,
    LendingData.OPEN_ACC_6M,
    LendingData.OPEN_IL_12M,
    LendingData.OPEN_IL_24M,
    LendingData.MTHS_SINCE_RCNT_IL,
    LendingData.TOTAL_BAL_IL,
    LendingData.IL_UTIL,
    LendingData.OPEN_RV_12M,
    LendingData.MAX_BAL_BC,
    LendingData.ALL_UTIL,
    LendingData.INQ_FI,
    LendingData.TOTAL_CU_TL,
    LendingData.INQ_LAST_12M,
]

# Categorical variables with NA in train set
CATEGORICAL_VARS_WITH_NA_FREQUENT = [LendingData.EMP_LENGTH, LendingData.ZIP_CODE]
CATEGORICAL_VARS_WITH_NA_MISSING = [LendingData.VERIFICATION_STATUS_JOINT]

# Numerical variables to replace with median in train set
REPL_VARS_WITH_MEDIAN = [
    LendingData.DTI,
    LendingData.INQ_LAST_6MTHS,
    LendingData.MTHS_SINCE_LAST_DELINQ,
    LendingData.MTHS_SINCE_LAST_MAJOR_DEROG,
    LendingData.ANNUAL_INC_JOINT,
    LendingData.ALL_UTIL,
]
# Numerical variables to replace with median in train set
REPL_VARS_WITH_MEAN = [
    LendingData.MTHS_SINCE_LAST_RECORD,
    LendingData.REVOL_UTIL,
    LendingData.DTI_JOINT,
    LendingData.OPEN_ACC_6M,
    LendingData.OPEN_IL_12M,
    LendingData.OPEN_IL_24M,
    LendingData.MTHS_SINCE_RCNT_IL,
    LendingData.TOTAL_BAL_IL,
    LendingData.IL_UTIL,
    LendingData.OPEN_RV_12M,
    LendingData.MAX_BAL_BC,
    LendingData.INQ_FI,
    LendingData.TOTAL_CU_TL,
    LendingData.INQ_LAST_12M,
]

# Numerical variables to transform with log_transformation in train set
LOG_TRANSFORMED_VARS = [LendingData.TOTAL_ACC, LendingData.INT_RATE]
# Numerical variables to transform with yea_johnson_transformation in train set
YEO_JOHNSON_LOG_TRANSFORMED_VARS = [
    LendingData.TOTAL_REV_HI_LIM,
    LendingData.OPEN_ACC,
    LendingData.MTHS_SINCE_RCNT_IL,
    LendingData.MTHS_SINCE_LAST_DELINQ,
    LendingData.IL_UTIL,
    LendingData.TOT_CUR_BAL,
    LendingData.TOTAL_PYMNT,
    LendingData.TOTAL_BAL_IL,
    LendingData.REVOL_BAL,
    LendingData.MTHS_SINCE_LAST_MAJOR_DEROG,
    LendingData.TOTAL_REC_INT,
    LendingData.MAX_BAL_BC,
    LendingData.LAST_PYMNT_AMNT,
]

# Numerical variables to discretize in train set
NUMERICALS_TO_BIN = [
    LendingData.ANNUAL_INC,
    LendingData.ANNUAL_INC_JOINT,
    LendingData.PUB_REC,
    LendingData.MTHS_SINCE_LAST_RECORD,
    LendingData.DELINQ_2YRS,
    LendingData.OPEN_IL_24M,
    LendingData.OPEN_RV_12M,
    LendingData.TOT_COLL_AMT,
    LendingData.INQ_FI,
    LendingData.RECOVERIES,
    LendingData.INQ_LAST_12M,
    LendingData.TOTAL_CU_TL,
    LendingData.TOTAL_REC_LATE_FEE,
    LendingData.DTI,
    LendingData.DTI_JOINT,
    LendingData.OUT_PRNCP,
]

# Variable(s) to map
MAPPING_VARS = [LendingData.EMP_LENGTH]
EMP_LENGTH_MAPPINGS = {
    "10+ years": "10",
    "2 years": "2",
    "< 1 year": "0",
    "3 years": "3",
    "1 year": "1",
    "5 years": "5",
    "4 years": "4",
    "6 years": "6",
    "8 years": "8",
    "9 years": "9",
    "7 years": "7",
}

NUMERICAL_VARS = [
    LendingData.LOAN_AMNT,
    LendingData.INT_RATE,
    LendingData.ANNUAL_INC,
    LendingData.DTI,
    LendingData.DELINQ_2YRS,
    LendingData.INQ_LAST_6MTHS,
    LendingData.MTHS_SINCE_LAST_DELINQ,
    LendingData.MTHS_SINCE_LAST_RECORD,
    LendingData.OPEN_ACC,
    LendingData.PUB_REC,
    LendingData.REVOL_BAL,
    LendingData.REVOL_UTIL,
    LendingData.TOTAL_ACC,
    LendingData.OUT_PRNCP,
    LendingData.TOTAL_PYMNT,
    LendingData.TOTAL_REC_INT,
    LendingData.TOTAL_REC_LATE_FEE,
    LendingData.RECOVERIES,
    LendingData.LAST_PYMNT_AMNT,
    LendingData.COLLECTIONS_12_MTHS_EX_MED,
    LendingData.MTHS_SINCE_LAST_MAJOR_DEROG,
    LendingData.ANNUAL_INC_JOINT,
    LendingData.DTI_JOINT,
    LendingData.ACC_NOW_DELINQ,
    LendingData.TOT_COLL_AMT,
    LendingData.TOT_CUR_BAL,
    LendingData.OPEN_ACC_6M,
    LendingData.OPEN_IL_12M,
    LendingData.OPEN_IL_24M,
    LendingData.MTHS_SINCE_RCNT_IL,
    LendingData.TOTAL_BAL_IL,
    LendingData.IL_UTIL,
    LendingData.OPEN_RV_12M,
    LendingData.MAX_BAL_BC,
    LendingData.ALL_UTIL,
    LendingData.TOTAL_REV_HI_LIM,
    LendingData.INQ_FI,
    LendingData.TOTAL_CU_TL,
    LendingData.INQ_LAST_12M,
]

CONTINUOUS_VARS = [
    LendingData.LOAN_AMNT,
    LendingData.INT_RATE,
    LendingData.ANNUAL_INC,
    LendingData.DTI,
    LendingData.DELINQ_2YRS,
    LendingData.MTHS_SINCE_LAST_DELINQ,
    LendingData.MTHS_SINCE_LAST_RECORD,
    LendingData.OPEN_ACC,
    LendingData.PUB_REC,
    LendingData.REVOL_BAL,
    LendingData.REVOL_UTIL,
    LendingData.TOTAL_ACC,
    LendingData.OUT_PRNCP,
    LendingData.TOTAL_PYMNT,
    LendingData.TOTAL_REC_INT,
    LendingData.TOTAL_REC_LATE_FEE,
    LendingData.RECOVERIES,
    LendingData.LAST_PYMNT_AMNT,
    LendingData.MTHS_SINCE_LAST_MAJOR_DEROG,
    LendingData.ANNUAL_INC_JOINT,
    LendingData.DTI_JOINT,
    LendingData.TOT_COLL_AMT,
    LendingData.TOT_CUR_BAL,
    LendingData.OPEN_IL_24M,
    LendingData.MTHS_SINCE_RCNT_IL,
    LendingData.TOTAL_BAL_IL,
    LendingData.IL_UTIL,
    LendingData.OPEN_RV_12M,
    LendingData.MAX_BAL_BC,
    LendingData.ALL_UTIL,
    LendingData.TOTAL_REV_HI_LIM,
    LendingData.INQ_FI,
    LendingData.TOTAL_CU_TL,
    LendingData.INQ_LAST_12M,
]

DISCRETE_VARS = [
    LendingData.INQ_LAST_6MTHS,
    LendingData.COLLECTIONS_12_MTHS_EX_MED,
    LendingData.ACC_NOW_DELINQ,
    LendingData.OPEN_ACC_6M,
    LendingData.OPEN_IL_12M,
]

CATEGORICAL_VARS = [
    LendingData.TERM,
    LendingData.GRADE,
    LendingData.EMP_LENGTH,
    LendingData.HOME_OWNERSHIP,
    LendingData.VERIFICATION_STATUS,
    LendingData.PURPOSE,
    LendingData.ZIP_CODE,
    LendingData.ADDR_STATE,
    LendingData.INITIAL_LIST_STATUS,
    LendingData.APPLICATION_TYPE,
    LendingData.VERIFICATION_STATUS_JOINT,
]

In [10]:
sample_list = [
    "dti",
    "inq_last_6mths",
    "mths_since_last_delinq",
    "mths_since_last_major_derog",
    "annual_inc_joint",
    "all_util",
]

result = utilities.obtain_mapped_variables(
    data=orig_data, input_list=sample_list, name="LendingData"
)
print(result)

[LendingData.DTI, LendingData.INQ_LAST_6MTHS, LendingData.MTHS_SINCE_LAST_DELINQ, LendingData.MTHS_SINCE_LAST_MAJOR_DEROG, LendingData.ANNUAL_INC_JOINT, LendingData.ALL_UTIL]


### Workflow

* Handle missing data
    * Numerical data: Create a missing indicator and replace NaNs with mean/median.
    * Categorical data: Replace NaNs with missing/most frequent label.
* Transform numerical data: log and yeo_johnson transformations.
* Discretize numerical data (highly skewed data).
* Clean categorical data.
* Remove rare labels (categorical and discrete data).
* Encode data (categorical and discrete data).
* Scale data

<br><hr>

## Build Pipeline For Feature Engineering

In [11]:
# Visualize Pipelines
from sklearn import set_config

set_config(display="diagram")

In [12]:
default_pipe = Pipeline(
    steps=[
        # ========== IMPUTATION ==========
        # add missing indicator
        ("missing_indicator", AddMissingIndicator(variables=NUMERICAL_VARS_WITH_NA)),
        # Impute numerical variables with the median
        (
            "median_imputation",
            MeanMedianImputer(
                imputation_method="median", variables=REPL_VARS_WITH_MEDIAN
            ),
        ),
        # Impute numerical variables with the mean
        (
            "mean_imputation",
            MeanMedianImputer(imputation_method="mean", variables=REPL_VARS_WITH_MEAN),
        ),
        # Impute categorical variables with string missing
        (
            "missing_imputation",
            CategoricalImputer(
                imputation_method="missing", variables=CATEGORICAL_VARS_WITH_NA_MISSING
            ),
        ),
        (
            "frequent_imputation",
            CategoricalImputer(
                imputation_method="frequent",
                variables=CATEGORICAL_VARS_WITH_NA_FREQUENT,
            ),
        ),
        # ========== TRANSFORM NUMERICAL VARIABLES ==========
        (
            "log_transform",
            LogTransformer(variables=LOG_TRANSFORMED_VARS),
        ),
        (
            "yeo_johnson_transform",
            YeoJohnsonTransformer(variables=YEO_JOHNSON_LOG_TRANSFORMED_VARS),
        ),
        # ========== DISCRETIZE NUMERICAL VARIABLES ==========
        (
            "discretizer",
            EqualFrequencyDiscretiser(
                variables=DISCRETE_VARS, q=10, return_object=True
            ),
        ),
        # ========== CLEAN CATEGORICAL VARIABLES ==========
        (
            "emp_length_mapper",
            pp.Mapper(variables=MAPPING_VARS, mappings=EMP_LENGTH_MAPPINGS),
        ),
        # ========== REMOVE RARE LABELS ==========
        (
            "rare_labels",
            RareLabelEncoder(
                tol=0.05, n_categories=5, variables=CATEGORICAL_VARS + DISCRETE_VARS
            ),
        ),
        # ========== ENCODE CATEGORICAL VARIABLES ==========
        (
            "categorical_encoder",
            OrdinalEncoder(
                encoding_method="ordered", variables=CATEGORICAL_VARS + DISCRETE_VARS
            ),
        ),
        # ========== SCALE VARIABLES ==========
        (
            "scaler",
            StandardScaler(),
        ),
    ]
)
default_pipe

In [13]:
# Train the pipeline
default_pipe.fit(X_train, y_train)



In [14]:
# Apply the pipeline to train and test data
X_train = default_pipe.transform(X_train)
X_test = default_pipe.transform(X_test)

In [15]:
# Parameters are learnt and stored in each step of the pipeline
default_pipe.named_steps["median_imputation"].imputer_dict_

{'dti': 18.18,
 'inq_last_6mths': 0.0,
 'mths_since_last_delinq': 30.0,
 'mths_since_last_major_derog': 44.0,
 'annual_inc_joint': 106000.0,
 'all_util': 60.0}