In [7]:
%reload_ext autoreload
%autoreload 2

import os
import pandas as pd
import numpy as np
from IPython.core.display import HTML, Javascript

MODEL_NAME = 'cul-ascore'
from mylibs.configs import load_configs

configs = load_configs(MODEL_NAME)

from mylibs.table_display import (
    readModelInfo, 
    formatMultiIndexTable, 
    docTable,
    create_and_excecute_code_cell,
    getWaterfallTable,
    getFrontendPSI,
    computeFactorCA,
    computeMultiFactorCA,
    getStyledTable,
)

from mylibs.backend_tests import (
    calcAR,
    calcRiskRankingTable,
    getRiskRankingOutcome,
    getRiskRankingChart,
    calcSubsegmentRanking,
    calcMultiFactorAR,
    calcFactorAR,
)


MODEL_INFO_FILE = os.environ.get('HOME_DIR') + f'/models/{MODEL_NAME}/data/validation_info.xlsx'

MODEL_INFO_ModelInfo = readModelInfo(MODEL_INFO_FILE,'Model-Info')
MODEL_INFO_DocInfo = readModelInfo(MODEL_INFO_FILE,'Doc-Info')
MODEL_INFO_DocSignoff = readModelInfo(MODEL_INFO_FILE,'Doc-Signoff')
MODEL_INFO_DataSource = readModelInfo(MODEL_INFO_FILE,'Data-Source')
MODEL_INFO_DataPeriod = readModelInfo(MODEL_INFO_FILE,'Data-Period')
MODEL_INFO_KeyDefinitions = readModelInfo(MODEL_INFO_FILE,'Key-Definitions')
MODEL_INFO_Chrono = readModelInfo(MODEL_INFO_FILE,'Chrono')
MODEL_INFO_OviewOS = readModelInfo(MODEL_INFO_FILE,'Port-Overview-OS')
MODEL_INFO_OviewCnt = readModelInfo(MODEL_INFO_FILE,'Port-Overview-Cnt')

strDocname = MODEL_INFO_DocInfo[MODEL_INFO_DocInfo['Field'] == 'Name']['Value'].values[0]
strFilename = MODEL_INFO_DocInfo[MODEL_INFO_DocInfo['Field'] == 'Filename']['Value'].values[0]
strPortfolio = MODEL_INFO_ModelInfo[MODEL_INFO_ModelInfo['Field'] == 'Portfolio Name']['Value'].values[0]
strModelname = MODEL_INFO_ModelInfo[MODEL_INFO_ModelInfo['Field'] == 'Model Name']['Value'].values[0]

dfData = pd.read_csv(os.environ.get('HOME_DIR') + f'/models/{MODEL_NAME}/data/ascore_val_data.csv', dtype={'cohort': str})

# load custom css styling
def css_styling():
    styles = open(os.environ.get('HOME_DIR') + '/css/capp.css', "r").read()
    return HTML('<style>' + styles + '</style>' +
                f'<header style="visibility: hidden;">{strDocname}</header>'
               )
css_styling()

<br><br><br><br><br>
<div style="font-size:24pt; text-align:center; margin: 20px">{{strDocname}}</div>
<div style="font-size:24pt; text-align:center; margin: 20px">{{strPortfolio}}</div>
<div style="font-size:24pt; text-align:center; margin: 20px">{{strModelname}}</div>
<br><br><br><br><br>

{{docTable(MODEL_INFO_ModelInfo, no_header=True, table_type='text-left')}}
<br><br><br><br><br>


<p class="pagebreak"></p>
<div style="font-size:18pt; text-align:center;">
{{strDocname}}<br>
{{strPortfolio}} -  {{strModelname}}<br>
</div>
<br><br><br><br>

**Confidentiality**
<br>
This document contains proprietary information that is confidential to our company. 
Disclosure of this document in full or part may result in material damage to the company.
Written permission must be obtained from our company prior to the disclosure of this document to a third party.
<br>
<br>
{{docTable(MODEL_INFO_DocInfo, no_header=True, table_type='text-left')}}
<br><br>

**Document Sign-off**
<br>
{{docTable(MODEL_INFO_DocSignoff,table_type='doc-signoff text-left')}}



<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Introduction" data-toc-modified-id="Introduction-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Introduction</a></span><ul class="toc-item"><li><span><a href="#Background" data-toc-modified-id="Background-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Background</a></span></li><li><span><a href="#Scope-of-Validation" data-toc-modified-id="Scope-of-Validation-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Scope of Validation</a></span></li></ul></li><li><span><a href="#Executive-Summary" data-toc-modified-id="Executive-Summary-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Executive Summary</a></span></li><li><span><a href="#Portfolio-Overview" data-toc-modified-id="Portfolio-Overview-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Portfolio Overview</a></span></li><li><span><a href="#Validation-Data-and-Key-Technical-Definitions" data-toc-modified-id="Validation-Data-and-Key-Technical-Definitions-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Validation Data and Key Technical Definitions</a></span><ul class="toc-item"><li><span><a href="#Data-Sources" data-toc-modified-id="Data-Sources-4.1"><span class="toc-item-num">4.1&nbsp;&nbsp;</span>Data Sources</a></span></li><li><span><a href="#Data-Period" data-toc-modified-id="Data-Period-4.2"><span class="toc-item-num">4.2&nbsp;&nbsp;</span>Data Period</a></span></li><li><span><a href="#Key-Technical-Definitions" data-toc-modified-id="Key-Technical-Definitions-4.3"><span class="toc-item-num">4.3&nbsp;&nbsp;</span>Key Technical Definitions</a></span></li><li><span><a href="#Data-Quality-and-Completeness" data-toc-modified-id="Data-Quality-and-Completeness-4.4"><span class="toc-item-num">4.4&nbsp;&nbsp;</span>Data Quality and Completeness</a></span></li><li><span><a href="#Summary-of-Validation-Data" data-toc-modified-id="Summary-of-Validation-Data-4.5"><span class="toc-item-num">4.5&nbsp;&nbsp;</span>Summary of Validation Data</a></span></li></ul></li><li><span><a href="#Scorecard-Assessment" data-toc-modified-id="Scorecard-Assessment-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Scorecard Assessment</a></span><ul class="toc-item"><li><span><a href="#Front-end-Assessment" data-toc-modified-id="Front-end-Assessment-5.1"><span class="toc-item-num">5.1&nbsp;&nbsp;</span>Front-end Assessment</a></span><ul class="toc-item"><li><span><a href="#Population-Stability-Index-(PSI)" data-toc-modified-id="Population-Stability-Index-(PSI)-5.1.1"><span class="toc-item-num">5.1.1&nbsp;&nbsp;</span>Population Stability Index (PSI)</a></span></li><li><span><a href="#Factor-Charateristic-Analysis" data-toc-modified-id="Factor-Charateristic-Analysis-5.1.2"><span class="toc-item-num">5.1.2&nbsp;&nbsp;</span>Factor Charateristic Analysis</a></span></li></ul></li><li><span><a href="#Back-end-Assessment" data-toc-modified-id="Back-end-Assessment-5.2"><span class="toc-item-num">5.2&nbsp;&nbsp;</span>Back-end Assessment</a></span><ul class="toc-item"><li><span><a href="#Overall-Model-Performance" data-toc-modified-id="Overall-Model-Performance-5.2.1"><span class="toc-item-num">5.2.1&nbsp;&nbsp;</span>Overall Model Performance</a></span></li><li><span><a href="#Sub-segment-Level-Performance" data-toc-modified-id="Sub-segment-Level-Performance-5.2.2"><span class="toc-item-num">5.2.2&nbsp;&nbsp;</span>Sub-segment Level Performance</a></span></li><li><span><a href="#Factor-Level-Performance" data-toc-modified-id="Factor-Level-Performance-5.2.3"><span class="toc-item-num">5.2.3&nbsp;&nbsp;</span>Factor Level Performance</a></span></li></ul></li></ul></li><li><span><a href="#Conclusion" data-toc-modified-id="Conclusion-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Conclusion</a></span></li><li><span><a href="#Appendices" data-toc-modified-id="Appendices-7"><span class="toc-item-num">7&nbsp;&nbsp;</span>Appendices</a></span><ul class="toc-item"><li><span><a href="#Factor-Distribution" data-toc-modified-id="Factor-Distribution-7.1"><span class="toc-item-num">7.1&nbsp;&nbsp;</span>Factor Distribution</a></span></li><li><span><a href="#Chronology" data-toc-modified-id="Chronology-7.2"><span class="toc-item-num">7.2&nbsp;&nbsp;</span>Chronology</a></span></li></ul></li></ul></div>

# Introduction

## Background
To have a better tool to assess customer credit risk, the bank had develop the a model suite for Consumer Unsecured Lending segment. It comprised of an application scorecard, a behaviour scorecard and PD/EAD/LGD<span class="footnote">PD: Probability of Default just try a very long footnote to see how it go on the printing just hope everything ok<br> EAD: Exposure At Default<br> LGD: Loss Given at Default</span> segmentation models.
<br>
<br>
The application scorecard was developed in 2014 and implemented in Oct 2015, after gone through independent validation processes and obtained the approval for uses by Credit Risk Committee. Since then the scorecard was being assessed annually and concluding to be fit for continued uses.
<br>
<br>
The newly developed scorecards and segmentation models had achieved excellent risk ranking power, with AR of 71% for the Ascore, 85% for Bscore and 80% for PD models.

## Scope of Validation

<p>The scope of this review is to analyse the performance of the application scorecard for Consumer Unsecured Lending portfolios. Data was based on recent cohorts and asss if the scorecard is fit for continue uses. This report is to document the validation results and findings.</p>
<p>The performance of the scorecard is assessed against stardards set out in our Independent Validation Guidelines. These take into account the regulatory requirements. We employed the following color coding scheme in our assessment of the scorecard.</p>

<table class="ctable">
    <caption>Outcome Color Code</caption>
    <thead>
        <tr><th style="text-align: center">Color</th><th style="text-align: left">Outcome</th></tr>
    </thead>
    <tbody>
        <tr><td class="outcome-complied" style="text-align: center">Green</td><td style="text-align: left">Indicates generally complied with requirements/standards</td></tr>
        <tr><td class="outcome-moderate" style="text-align: center">Yellow</td><td style="text-align: left">Indicates moderate gaps to requirements/standards</td></tr>
        <tr><td class="outcome-significant" style="text-align: center">Red</td><td style="text-align: left">Indicates significant gaps to requirements/standards</td></tr>
    </tbody>
</table>

# Executive Summary

*Place holder for user comment*

# Portfolio Overview

The Consumer Unsecured Lending portfolio consist of 2 main products, Credit Card and Overdraft. The portfolio composition over the years are shown in following tables.
<br>
{{docTable(MODEL_INFO_OviewOS, caption='Portfolio Overview (By Outstanding $mil)', numbers_format='{:,.0f}', table_type='text-center')}}
<br>
{{docTable(MODEL_INFO_OviewCnt,caption='Portfolio Overview (By Count)', numbers_format='{:,.0f}', table_type='text-center')}}


*Place-holder for analyst to write more stories*

# Validation Data and Key Technical Definitions

## Data Sources

The data sources for current review are summarised below.
{{docTable(MODEL_INFO_DataSource, table_type='text-left', caption='Data Sources')}}


## Data Period

{{docTable(MODEL_INFO_DataPeriod,table_type='data-period text-left', caption='Data Period')}}

## Key Technical Definitions

{{docTable(MODEL_INFO_KeyDefinitions, pre_wrap_data=True, table_type='text-left')}}
<div class="pagebreak"> </div>

## Data Quality and Completeness

For this exercise, a thorough check on data quality and completeness was conducted to ensure no significant impact to the assessment results.<br>
No issue was found. We concluded that data quality and completeness were fit and representative for validation purpuse.

## Summary of Validation Data

Table below summarised the steps to arrive the front-end and back-end validation samples.
<br>
{{docTable(getWaterfallTable(dfData,configs), show_index=True, numbers_format='{:,.0f}', table_type='text-center index-left', caption='Data Waterfall Analysis')}}

# Scorecard Assessment

## Front-end Assessment

### Population Stability Index (PSI)

In [8]:
dfFrontend,dfPSIOutcome = getFrontendPSI(dfData, configs)

formatFrontend = formatMultiIndexTable(cols=dfFrontend.columns,
                                       firstIndexNames=['Count','Distribution', 'IV'],
                                       formats = ['{:,.0f}', '{:,.1%}', '{:,.4f}'])
# formatFrontend

{{docTable(dfFrontend, show_index=True, table_type='text-center', numbers_format=formatFrontend, caption='Frontend Distribution')}}
<br>
{{docTable(dfPSIOutcome, show_index=True, table_type='text-center', numbers_format='{:,.1%}', caption='PSI Outcome')}}

*Place holder for user comments*  
From the outcome table above, we concluded that compared to the baseline, the population of all 3 review cohorts are very stable with PSI well below the threshold of 15%.  
The AHI of review cohorts were also well below the threshold of 20%, indicated no concentration concern for review data.

<div class="pagebreak"> </div>

### Factor Charateristic Analysis

In [9]:
syledCAtable, factorsOutcome = computeMultiFactorCA(dfData, configs)
# factorsOutcome[0]['styledTable']
# getStyledTable(factorsOutcome, rank=1)

The tests outcome on Characteristic Analysis are shown in table below.
{{syledCAtable}}

*User to enter comments here*

Below are tables that showed distribution and score changes for first {{configs['CA_threshold']['top_factor']}} factors.
<br>
{{getStyledTable(factorsOutcome, rank=1)}}

*User to enter comments here*

{{getStyledTable(factorsOutcome, rank=2)}}

*User to enter comments here*

{{getStyledTable(factorsOutcome, rank=3)}}

*User to enter comments here*

{{getStyledTable(factorsOutcome, rank=4)}}

*User to enter comments here*

<div class="pagebreak"> </div>

## Back-end Assessment

### Overall Model Performance

In [10]:
dfBackend = dfData[dfData['waterfall_ind']=='Back-end']
dfOutcome, dfAR, styledOutcome, styledAR = calcRiskRankingTable(dfBackend, configs)
sSubsegOutcome = calcSubsegmentRanking(dfBackend, dfOutcome, configs)

# get series of bucket, distribution, badrate & AR to display charts
capp_config =configs.get('capp_config')
bucket = dfAR.index.astype(str).values
total = dfAR.loc[:, 'Count'].values
bad = dfAR.loc[:, capp_config['BAD_NAME']].values
dist = dfAR.loc[:, 'Distribution'].values
badrate = dfAR.loc[:, capp_config['BADRATE_NAME']].values


Tables below show the overall model performance tests outcome and bad rate distribution.
<br>
{{styledOutcome}}
<br>
{{styledAR}}

*Place-holder for user comments*

<div class="pagebreak"> </div>

In [11]:
%%capture out
cohorts = dfAR.loc[:, 'Count'].columns.get_level_values(1)

figs = []
for idx, cohort in enumerate(cohorts):
    AR, AUC = calcAR(total[:,idx], bad[:,idx], bucket, ascending=True)
    figs.append(getRiskRankingChart(
        bucket, dist[:, idx], badrate[:,idx], 
        title=f"{cohort} cohort - Distribution & {capp_config['BADRATE_NAME']} (AR={AR:.0%})",
        configs=configs))
    
strcell = ''
strrow = ''
for idx, fig in enumerate(figs):
    if idx==0:
        strcell += f'<td colspan="2">{{{{figs[{idx}]}}}}</td>'
    else:
        strcell += f'<td>{{{{figs[{idx}]}}}}</td>'
        
    if idx % 2 == 0:
        strrow += f'<tr>{strcell}</tr>'
        strcell = ''

strhtml = f'<table class="text-center">{strrow}</table>'

# Note: this fuction will create a new cell every time it run. 
# Hence, delete the below result cell if want to re-run.
# create_and_excecute_code_cell('markdown', strhtml)


<table class="ctable text-center"><tr><td colspan="2">{{figs[0]}}</td></tr><tr><td>{{figs[1]}}</td><td>{{figs[2]}}</td></tr></table>

### Sub-segment Level Performance

The risk ranking tests were conducted on sub-segments. The results are shown in table below.
<br>
{{sSubsegOutcome}}

*Place holder for user comments*
<br>
e.g. Most of the sub segments remained strong in terms of risk ranking power, except for customers with OD (including customers with only OD<span class="footnote">Consumber OverDraft product</span>). This is due to the agressive growth pre-2019, where the bank decided to tab on customers with lower income.

### Factor Level Performance

In [12]:
sAROutcome, multiFactorARs = calcMultiFactorAR(dfBackend, configs)


{{sAROutcome}}

*Place holder for user comments*
<br>
e.g. From table 5.11, we see 3 factors dropped in risk ranking power. Those are 'Address Type', 'Years of Service' and 'Ever 30+ dpd in last 12 months.

{{multiFactorARs[0].get('factorDist')}}

*Place holder for user comments*<br>

{{multiFactorARs[4].get('factorDist')}}

*Place holder for user comments*<br>

{{multiFactorARs[8].get('factorDist')}}

*Place holder for user comments*<br>

# Conclusion

# Appendices

## Factor Distribution

Each of 11 factors distribution and bad rate are shown in this section.
<br>
{{multiFactorARs[0].get('factorDist')}}<br>
{{multiFactorARs[1].get('factorDist')}}<br>
{{multiFactorARs[2].get('factorDist')}}<br>
{{multiFactorARs[3].get('factorDist')}}<br>
{{multiFactorARs[4].get('factorDist')}}<br>
{{multiFactorARs[5].get('factorDist')}}<br>
{{multiFactorARs[6].get('factorDist')}}<br>
{{multiFactorARs[7].get('factorDist')}}<br>
{{multiFactorARs[8].get('factorDist')}}<br>
{{multiFactorARs[9].get('factorDist')}}<br>
{{multiFactorARs[10].get('factorDist')}}<br>

## Chronology

{{docTable(MODEL_INFO_Chrono, pre_wrap_data=True, table_type='text-left')}}


<div class="last_page"></div>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<br>
<div style="text-align: center">---- End of Document ----</div>
<div style="text-align: center">Thank you.</div>
