In [79]:
import boto3
import io
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns; sns.set()

### set-up

In [80]:
session = boto3.Session()
s3 = session.client('s3')

In [81]:
bucket_name = "cdo-ililapse-364524684987-bucket"
file_path = "x266754/lapse/"

### data intake

In [82]:
file_name = "x266754/lapse/lgbm_results_out_12_22_2022.parquet"

In [83]:
%%time
obj = s3.get_object(Bucket = bucket_name, Key = file_name)
score = pd.read_parquet(io.BytesIO(obj['Body'].read())) 

CPU times: user 2.64 s, sys: 1.01 s, total: 3.65 s
Wall time: 2.44 s


In [84]:
score.head()
print(score.shape)

(1623276, 57)


### decile

In [85]:
# rank_method = 'prob'
def decile_table(agg_method, rank_method):

    ser, bins = pd.qcut(score[rank_method], 10, retbins=True, labels=False,duplicates='drop')
    score['prob_bins']= pd.cut(score[rank_method], bins=bins, labels=False,include_lowest=True)
    #score['prob_bins']= score['prob_bins'].fillna(1)
    
    col         = 'prob_bins'
    conditions  = [(score[col] >=0) & (score[col] < 1), (score[col] >=1) & (score[col] < 2) , (score[col] >=2) & (score[col] < 3),
                (score[col] >=3) & (score[col] < 4) ,  (score[col] >=4) & (score[col] < 5) ,  (score[col] >=5) & (score[col] < 6) ,
                 (score[col] >=6) & (score[col] < 7) ,  (score[col] >=7) & (score[col] < 8) ,  (score[col] >=8) & (score[col] < 9),
                 (score[col] >=9) & (score[col] < 10) , (score[col] >= 10)]

    choices     = [ 'D1','D2','D3','D4','D5','D6','D7','D8','D9','D10','NoScore' ]
    
    score["decile"] = np.select(conditions, choices, default='NoScore')
    
     # events and non-events
    nonevents = score[score['3mo_ahead_Lapse']==0]
    events = score[score['3mo_ahead_Lapse']==1]

    # bin cut-off percentage
    bins =  pd.DataFrame(bins).T

    #crate a pivot table that counts number of non-events
    good = pd.pivot_table(nonevents, values=agg_method, aggfunc = "count", index=['3mo_ahead_Lapse'], columns=['decile'])
    good =good.reindex(['D1','D2','D3','D4','D5','D6','D7','D8','D9','D10'], axis=1)
    
    bad = pd.pivot_table(events, values=agg_method, aggfunc = "count", index=['3mo_ahead_Lapse'], columns=['decile'])
    bad =bad.reindex(['D1','D2','D3','D4','D5','D6','D7','D8','D9','D10'], axis=1)

    d_table = pd.pivot_table(score, values=agg_method, aggfunc = "count", index=['3mo_ahead_Lapse'], columns=['decile'])
    d_table =d_table.reindex(['D1','D2','D3','D4','D5','D6','D7','D8','D9','D10'], axis=1)
    d_sum=d_table.sum()
    lapse_rate = (bad/d_sum*100).round(2)
    #d_table['lapse_rate'] = d_table.iloc[[0]]/(d_table.iloc[[0]]+d_table.iloc[[1]])
    #decile table 
    #d_table = bad/(good+bad)*100
    #d_table = d_table.reindex(['D1','D2','D3','D4','D5','D6','D7','D8','D9','D10'], axis=1)
    #d_table = d_table.round(2)
    #d_table=  d_table.rename_axis('3mo_ahead_Lapse')
    
    return good, bad, bins, d_table, lapse_rate

In [86]:
good, bad, bins_table, d_table, lapse_rate = decile_table('pfmc_cur_month', 'prob')
display(bins_table)
display(d_table)
display(lapse_rate)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,0.0,0.000469,0.000915,0.001437,0.001585,0.001853,0.001947,0.002053,0.002189,0.002427,1.0


decile,D1,D2,D3,D4,D5,D6,D7,D8,D9,D10
3mo_ahead_Lapse,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,162273,162058,162284,162161,164790,159297,162019,178725,145558,159534
1,230,103,144,169,278,290,324,394,388,2257


decile,D1,D2,D3,D4,D5,D6,D7,D8,D9,D10
3mo_ahead_Lapse,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,0.14,0.06,0.09,0.1,0.17,0.18,0.2,0.22,0.27,1.4


### bucket

In [87]:
#cash value
col         = 'mod_mpt_total'  
conditions  = [(score[col] >=0) & (score[col] <= 10000),
               (score[col] >10000) & (score[col] <= 25000),
               (score[col] >25000) & (score[col] <= 50000),
               (score[col] >50000) & (score[col] <= 100000),  
               (score[col] >100000) & (score[col] <= 250000),  
               (score[col] >250000) & (score[col] <= 500000),  
               (score[col] >500000) & (score[col] <= 750000), 
               (score[col] >750000) & (score[col] <= 1000000),
               (score[col] >1000000) & (score[col] <= 3000000),
               (score[col] >3000000) & (score[col] <= 5000000), 
               (score[col] > 5000000)]  
              
choices     = [ '$0k-10k', '$10k-25k', '$25k-50k','$50k-100k', 
               '$100k-250k', '$250k-500k','$500k-750k', '$750k-1M', 
               '$1M-3M', '$3M-5M','>$5M']
    
score["cash_val_bucket"] = np.select(conditions, choices, default='missing')

In [88]:
#policy_year
score['policy_age_yr'] = (score['policy_age']/12).round(decimals = 1)
col         = 'policy_age_yr'  
conditions  = [(score[col] >=0) & (score[col] <= 5),
               (score[col] >5) & (score[col] <= 10),
               (score[col] >10) & (score[col] <= 15),
               (score[col] >15) & (score[col] <= 20),  
               (score[col] >20) & (score[col] <= 25),  
               (score[col] >25)]  
        
choices     = [ '0-5yr', '5yr-10yr', '10yr-15yr','15yr-20yr', 
               '20yr-25yr', '25+yr']
    
score["policy_age_bucket"] = np.select(conditions, choices, default='missing')

### Mapping and Formatting

In [89]:
score['campaign_profile'] = score["policy_age_bucket"] + ' , ' + score["cash_val_bucket"]

In [90]:
# none to NaN
score.replace(to_replace=[None], value=np.nan, inplace=True)

# NaN to "Unknown"
score['mrtl_stat_cd']= score['mrtl_stat_cd'].fillna('NA')
score['prmr_prsnc_chld_ind']=score['prmr_prsnc_chld_ind'].fillna('NA')
score['prim_occup_cd']=score['prim_occup_cd'].fillna('NA')
score['SR_TYPE_CATG']=score['SR_TYPE_CATG'].fillna('None')

In [91]:
# format
score['prob'] = (score['prob']*100).round(2)
score['holder_age']= score['holder_age'].astype(int)
score['num_sr_catg']= score['num_sr_catg'].astype(int)
score['num_sr_6m']= score['num_sr_6m'].astype(int)

In [92]:
# income range
income_map = {
0:	'0',
1:	' 0 ~ 14,999',
2:	' 15,000 ~ 19,999',
3:	' 20,000 ~ 29,999',
4:	' 30,000 ~ 39,999',
5:	' 40,000 ~ 49,999',
6:	' 50,000 ~ 74,999',
7:	' 75,000 ~ 99,999',
8:	' 100,000 ~ 124,999',
9:	' 125,000+'
}

score['income_range'] = score['incm_rng_cd'] .map(income_map)

In [93]:
# occupation
occupation_map = {
'1':	' Professional/Technical',
'2':	' Administration/Managerial',
'3':	' Sales/Service',
'4':	' Clerical/White Collar',
'5':	' Craftsman/Blue Collar',
'6':	' Student',
'7':	' Homemaker',
'8':	' Retired',
'9':	' Farmer',
'A ':	' Military',
'B ':	' Religious',
'C ':	' Self Employed',
'D ':	' Self Employed - Professional/Technical',
'E ':	' Self Employed - Administration/Managerial',
'F ':	' Self Employed - Sales/Service',
'G ':	' Self Employed - Clerical/White Collar',
'H ':	' Self Employed - Craftsman/Blue Collar',
'I ':	' Self Employed - Student',
'J ':	' Self Employed - Homemaker',
'K ':	' Self Employed - Retired',
'L ':	' Self Employed - Other',
'V ':	' Educator',
'W ':	' Financial Professional',
'X ':	' Legal Professional',
'Y ':	' Medical Professional',
'Z ':	' Other'
}

score['prime_occup'] = score['prim_occup_cd'].map(occupation_map)

In [94]:
# networth
networth_map = {
'1':	' Less than or equal to $0',
'2':	' $1 - $4,999',
'3':	' $5,000 - $9,999',
'4':	' $10,000 - $24,999',
'5':	' $25,000 - $49,999',
'6':	' $50,000 - $99,999',
'7':	' $100,000 - $249,999',
'8':	' $250,000 - $499,999',
'9':	' $500,000 - $999,999',
'A ':	' $1,000,000 - $1,999,999',
'B ':	' $2,000,000 +'
}

score['hh_networth'] = score['net_wrth_gold_cd'].map(networth_map)

In [95]:
# martial
martial_map = {
6:	'ESTRANGED',
5:	'SEPARATED',
4:	'DIVORCED',
3:	'WIDOWED',
2:	'SINGLE',
1:	'MARRIED'
}

score['martial_status'] = score['mrtl_stat_cd'].map(martial_map)

In [96]:
# ixi_financial_cohort
financial_cohort_map = {
'A01':	'Mass Market',
'A02':	'Mass Market',
'A03':	'Mass Market',
'A04':	'Mass Market',
'B05':	'Mass Market',
'B06':	'Mass Market',
'B07':	'Mass Market',
'B08':	'Mass Market',
'C09':	'Mass Market',
'C10':	'Mass Market',
'C11':	'Mass Market',
'C12':	'Mass Market',
'D13':	'Mass Market',
'D14':	'Mass Market',
'D15':	'Mass Market',
'D16':	'Mass Market',
'E17':	'Mass Affluent',
'E18':	'Mass Affluent',
'E19':	'Mass Affluent',
'E20':	'Mass Affluent',
'F21':	'Mass Affluent',
'F22':	'Mass Affluent',
'F23':	'Mass Affluent',
'F24':	'Mass Affluent',
'G25':	'Mass Affluent',
'G26':	'Mass Affluent',
'G27':	'Mass Affluent',
'G28':	'Mass Affluent',
'H29':	'Mass Affluent',
'H30':	'Mass Affluent',
'H31':	'Mass Affluent',
'I32':	'Affluent',
'J33':	'Affluent',
'J34':	'Affluent',
'J35':	'Affluent',
'J36':	'Affluent',
'K37':	'Affluent',
'K38':	'Affluent',
'K39':	'Affluent',
'L40':	'Affluent',
'L41':	'Affluent',
'L42':	'Affluent',
'M43':	'Mass Market',
'M44':	'Mass Market',
'M45':	'Mass Market',
'M46':	'Mass Market',
'M47':	'Mass Market',
'N48':	'Mass Affluent',
'N49':	'Mass Affluent',
'N50':	'Mass Affluent',
'N51':	'Mass Affluent',
'O52':	'Affluent',
'O53':	'Affluent',
'P54':	'Mass Market',
'P55':	'Mass Market',
'P56':	'Mass Market',
'P57':	'Mass Market',
'Q58':	'Mass Affluent',
'Q59':	'Mass Affluent',
'Q60':	'Mass Affluent',
'R61':	'Affluent',
'UNK':	'Unknown'
}

score['ixi_financial_cohort'] = score['ixi_fin_cohort_cd'].map(financial_cohort_map)

In [97]:
# ixi_cohort_grp
financial_cohort_grp_map = {
'A01':	'Getting By',
'A02':	'Financial Challenges',
'A03':	'Planners',
'A04':	'New Investors',
'B05':	'Financial Challenges',
'B06':	'Conservative Savers',
'B07':	'Credit Reliant',
'B08':	'Investors',
'C09':	'Financial Challenges',
'C10':	'Credit Reliant',
'C11':	'Conservative Savers',
'C12':	'Investors',
'D13':	'Minimal Assets',
'D14':	'Very Elderly',
'D15':	'Conservative Savers',
'D16':	'Mixed Investments',
'E17':	'Planners',
'E18':	'Credit Dependent',
'E19':	'Mixed Investments',
'E20':	'Thriving Investors',
'F21':	'Credit-Active Investors',
'F22':	'Retirement Planners',
'F23':	'Savers',
'F24':	'Investors',
'G25':	'Credit-Active Investors',
'G26':	'Savers',
'G27':	'Retirement Planners',
'G28':	'Investors',
'H29':	'Post-Retirement Mix',
'H30':	'Very Elderly',
'H31':	'Complex Investments',
'I32':	'Young Investors',
'J33':	'Planners',
'J34':	'Accumulators',
'J35':	'Investors',
'J36':	'High Worth',
'K37':	'Accumulators',
'K38':	'Investors',
'K39':	'Very Wealthy',
'L40':	'Well-Planned Retirement',
'L41':	'Very Elderly',
'L42':	'Wealthiest',
'M43':	'Meager Means',
'M44':	'Credit Reliant',
'M45':	'Planners',
'M46':	'Near Retirement',
'M47':	'Beginning to Invest',
'N48':	'Deposits and Credit',
'N49':	'Older Conservative',
'N50':	'Retirement Planners',
'N51':	'Older Investors',
'O52':	'Elderly and Prudent',
'O53':	'Wealthy Investors',
'P54':	'Financial Challenges',
'P55':	'Credit Reliant',
'P56':	'Conservative Planners',
'P57':	'Conservative Investors',
'Q58':	'Older Conservative',
'Q59':	'Retirement Planners',
'Q60':	'Older Investors',
'R61':	'Wealthy Investors',
'UNK':	'Unknown'
}

score['ixi_cohort_grp'] = score['ixi_fin_cohort_cd'].map(financial_cohort_grp_map)

In [98]:
# geo desc
geo_desc_map = {
'A01':	'Major Metro',
'A02':	'Major Metro',
'A03':	'Major Metro',
'A04':	'Major Metro',
'B05':	'Major Metro',
'B06':	'Major Metro',
'B07':	'Major Metro',
'B08':	'Major Metro',
'C09':	'Major Metro',
'C10':	'Major Metro',
'C11':	'Major Metro',
'C12':	'Major Metro',
'D13':	'Major Metro',
'D14':	'Major Metro',
'D15':	'Major Metro',
'D16':	'Major Metro',
'E17':	'Major Metro',
'E18':	'Major Metro',
'E19':	'Major Metro',
'E20':	'Major Metro',
'F21':	'Major Metro',
'F22':	'Major Metro',
'F23':	'Major Metro',
'F24':	'Major Metro',
'G25':	'Major Metro',
'G26':	'Major Metro',
'G27':	'Major Metro',
'G28':	'Major Metro',
'H29':	'Major Metro',
'H30':	'Major Metro',
'H31':	'Major Metro',
'I32':	'Major Metro',
'J33':	'Major Metro',
'J34':	'Major Metro',
'J35':	'Major Metro',
'J36':	'Major Metro',
'K37':	'Major Metro',
'K38':	'Major Metro',
'K39':	'Major Metro',
'L40':	'Major Metro',
'L41':	'Major Metro',
'L42':	'Major Metro',
'M43':	'Small City',
'M44':	'Small City',
'M45':	'Small City',
'M46':	'Small City',
'M47':	'Small City',
'N48':	'Small City',
'N49':	'Small City',
'N50':	'Small City',
'N51':	'Small City',
'O52':	'Small City',
'O53':	'Small City',
'P54':	'Small Town & Rural',
'P55':	'Small Town & Rural',
'P56':	'Small Town & Rural',
'P57':	'Small Town & Rural',
'Q58':	'Small Town & Rural',
'Q59':	'Small Town & Rural',
'Q60':	'Small Town & Rural',
'R61':	'Small Town & Rural',
'UNK':	'Unknown',
}

score['geo_loc'] = score['ixi_fin_cohort_cd'].map(geo_desc_map)

In [99]:
# orphan code
orph_cd_map = {
0:	'No Retail Prod',
1:	'Agent',
2:	'Orph'
}

score['orph_desc'] = score['rtl_dstrb_orphn_cd'].map(orph_cd_map)

### Segmentation score

In [100]:
pd.set_option('display.max_columns', None)
sample = score[(score['3mo_ahead_Lapse'] ==1) & (score['val_dt'] =='3/31/2022')]
                                                 

sample = sample[['policy_id', 
                 'val_dt',
                 
                 #model output
                 'prob', 
                 'decile',
                 'campaign_profile',
                 
                 # demographic info
                 'holder_age',
                 'gndr_cd', 
                 'martial_status', 
                 'prmr_prsnc_chld_ind', 
                 'subj_state_cd', 
                 'geo_loc',
                 'prime_occup',
                 'income_range',
                 'ixi_financial_cohort', 
                 'ixi_cohort_grp',
                 'hh_networth',


                #policy info
                'num_policies_hh',
                'prod_grp',
                'issue_year',
                'policy_age_bucket',
                'base_face_amt',
                'mod_mpt_total',
                'cash_val_bucket',
                'accum_actual_prem_paid',
                'mom_accum_prem_paid',
                'orph_desc',
                'prem_mode',
                'prem_mode_change_flag',

                 #SR category
                'SR_TYPE_CATG',
               # 'sr_create_month',
                'num_sr_catg',
                'num_sr_6m']].sort_values('prob', ascending= False)


In [101]:
print(sample.shape)
sample.to_csv("score_output.csv")

sample.head()

(771, 31)


Unnamed: 0,policy_id,val_dt,prob,decile,campaign_profile,holder_age,gndr_cd,martial_status,prmr_prsnc_chld_ind,subj_state_cd,geo_loc,prime_occup,income_range,ixi_financial_cohort,ixi_cohort_grp,hh_networth,num_policies_hh,prod_grp,issue_year,policy_age_bucket,base_face_amt,mod_mpt_total,cash_val_bucket,accum_actual_prem_paid,mom_accum_prem_paid,orph_desc,prem_mode,prem_mode_change_flag,SR_TYPE_CATG,num_sr_catg,num_sr_6m
261195,V2709982,2022-03-31,100.0,D10,"0-5yr , $0k-10k",56,M,,Y,NY,Major Metro,,"125,000+",Affluent,Very Wealthy,,3.0,VUL Protector,2020,0-5yr,313322.0,2376.0,$0k-10k,5029.58,0.0,Agent,Q,0,,0,0
1137772,V2648145,2022-03-31,100.0,D10,"0-5yr , $0k-10k",46,F,,Y,MI,Small Town & Rural,Homemaker,"75,000 ~ 99,999",Mass Affluent,Older Investors,"$500,000 - $999,999",1.0,VUL Protector,2020,0-5yr,500000.0,3861.0,$0k-10k,9108.0,0.0,No Retail Prod,A,0,ClientChg,1,6
795947,V3001468,2022-03-31,100.0,D10,"20yr-25yr , $0k-10k",67,M,MARRIED,,OH,Major Metro,,0,Mass Affluent,Post-Retirement Mix,"$1 - $4,999",1.0,Other VUL,1999,20yr-25yr,184000.0,3813.86,$0k-10k,84694.51,0.0,Orph,A,0,,0,1
1153934,V2241524,2022-03-31,100.0,D10,"5yr-10yr , $0k-10k",18,F,SINGLE,Y,NJ,Major Metro,Clerical/White Collar,"125,000+",Affluent,Accumulators,,2.0,Custom Premier II,2013,5yr-10yr,100000.0,131.0,$0k-10k,2199.15,0.0,Orph,Q,0,,0,0
1212401,V2600532,2022-03-31,100.0,D10,"0-5yr , $0k-10k",38,M,,Y,MD,Major Metro,Clerical/White Collar,"15,000 ~ 19,999",Mass Affluent,Retirement Planners,"$1 - $4,999",1.0,VUL Protector,2019,0-5yr,250000.0,2474.0,$0k-10k,4708.0,0.0,Orph,Q,0,,0,0
