# Preparing libranzas

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from datetime import datetime, timedelta 
from dateutil import relativedelta
import s3fs
import pyarrow.parquet as pq
import os
import calendar
import gc


pd.set_option('max_columns',None)
pd.set_option('max_rows',None)

%matplotlib inline

In [2]:
BASE_PATH_LIBRANZA = 's3://adl-refined-dev-popular/data_orig/libranzas/'
LIBRANZA_FILE_NAME = 'libranzas_cnl_M202007.csv'
libranza_file_path = BASE_PATH_LIBRANZA+LIBRANZA_FILE_NAME

BASE_PATH_OUT_OBJECTS_LIBRANZA = 's3://adl-refined-dev-popular/parquet/TC_adquisicion/'

In [3]:
month_number = ['0' + str(i) for i in range(1,10)] + ['10','11','12']
anio_mes_strs = ['2020-' + x for x in month_number if int(x) < 8]  ## Change for month number

In [4]:
import s3fs
import pyarrow.parquet as pq

fs = s3fs.S3FileSystem()

input_path = 's3://adl-refined-dev-popular/data_parquet/productos/libranzas/productos_libranzas_dwh_M202007'
input = input_path
dataset = pq.ParquetDataset(input, filesystem=fs)
lib = dataset.read()
lib = lib.to_pandas()

del dataset

In [5]:
# lib will be a global variable in this script
# lib = pd.read_csv(libranza_file_path,
#                   sep = ';',
#                   encoding = 'latin-1',
#                   na_values = '',
#                   dtype={'id_cliente': str}
#                  )

lib.rename(columns=lambda x: x.lower(), inplace=True)
lib_original_cols = lib.columns
print('Libranza original shape: ', lib.shape)

if lib['id_cliente'].isnull().sum() > 0:
    lib = lib.loc[lib['id_cliente'].notnull()]
    
lib['id_cliente'] =  lib['id_cliente'].astype(int)

Libranza original shape:  (3039505, 66)


In [6]:
lib.head()

Unnamed: 0,sk_cliente,id_cliente,sk_rc_libranza,id_rc_libranza,sk_fe_radicacion,sk_fe_aprobacion,sk_fe_desembolso,sk_fe_finalizacion,sk_fe_estado_actual,sk_convenio_libranza,sk_persona,dk_persona,sk_oficina,sk_producto_servicio,sk_clasificacion_tradicional,cd_oficina,cd_originador,ds_originador,cd_segmento_lib,ds_segmento_lib,cd_subsegmento_lib,ds_subsegmento_lib,no_solicitud,fe_solicitud,vl_monto_solicitado,mp_reestructurado,no_obligacion,fe_desembolso,vl_monto_aprobado,vl_recapitalizacion,vl_monto_desembolsado,no_cuotas,fe_finalizacion,no_meses_per_gracia,vl_total_cuota,fe_exigibilidad_cuota_1,cd_estado_actual,ds_estado_actual,fe_estado_actual,vl_tasa,vl_tasa_efectiva_anual,cd_modalidad_pag_int,ds_modalidad_pag_int,cd_periodicidad_pag_int,ds_periodicidad_pag_int,cd_base_liquidacion,cd_tipo_libranza,ds_tipo_libranza,cd_tipo_credito,ds_tipo_credito,cd_tipo_colocacion,ds_tipo_colocacion,no_obligacion_novada,vl_monto_novado,mp_compra_ind_cartera,mp_afecta_desp_nomina,cd_linea_credito,fe_carga,fe_actua,no_particion,cd_sector,cd_subsector,cd_tipo_pagaduria,ds_sector,ds_subsector,ds_tipo_pagaduria
273,60834,102652295584838601,1023924,1000003*11439693*LB*36403010026444,,,20040205,20060405,19000101,5063,662495,1000003*11439693,166.0,27,1,364,1,Banco Popular,1,Fuerzas Militares,4,Libranzas Ejercito Nacional,36403010028969,2004-02-05,2100000.0,0,36403010026444,2004-02-05,2100000.0,0.0,1555542.0,24,2006-04-05,0,113137.0,2004-04-05,2,Cancelada,1900-01-01,26.0,25.7097,V,Vencido,M,Mensual,360,1,Organica,0000000000000000000000000000000000000000000000...,Novacion,1,1-ORD HST 50 AÃO 364 DIA,0,382878.0,0,0,66,NaT,NaT,0,1,4,1,ORDEN NACIONAL,Mindefensa Ejercito Nacional Activos,NACIONAL CENTRALIZADA
274,60834,102652295584838601,280423,1000003*11439693*LB*9550301121187,,,20010612,20040705,19000101,19724,662495,1000003*11439693,,27,1,955,1,Banco Popular,1,Fuerzas Militares,4,Libranzas Ejercito Nacional,9550301121187,1900-01-01,3100000.0,0,9550301121187,2001-06-12,2960000.0,0.0,0.0,36,2004-07-05,0,128101.0,2001-07-05,2,Cancelada,1900-01-01,31.5,25.7097,V,Vencido,M,Mensual,360,1,Organica,0000000000000000000000000000000000000000000000...,Nueva,1,1-ORD HST 50 AÃO 364 DIA,0,0.0,0,0,66,NaT,NaT,0,1,4,1,ORDEN NACIONAL,Mindefensa Ejercito Nacional Activos,NACIONAL CENTRALIZADA
275,60951,102652297314584801,215366,1000003*23823293*LB*2500300206080,,,20001227,20040205,19000101,5354,1250337,1000003*23823293,151.0,27,13,250,1,Banco Popular,4,Educativo,1,Libranzas Educadores,2500300206080,2000-12-27,2940000.0,0,2500300206080,2000-12-27,2940000.0,0.0,0.0,36,2004-02-05,0,125614.0,2001-02-05,2,Cancelada,1900-01-01,30.5,25.7097,V,Vencido,M,Mensual,360,1,Organica,0000000000000000000000000000000000000000000000...,Nueva,1,1-ORD HST 50 AÃO 364 DIA,0,0.0,0,0,66,NaT,NaT,0,3,7,3,ENTES TERRITORIALES,Secretaría de Educacion,REGIONAL
276,60951,102652297314584801,390474,1000003*23823293*LB*25003010013115,,,20021003,20050705,19000101,5354,1250337,1000003*23823293,151.0,27,13,250,1,Banco Popular,4,Educativo,1,Libranzas Educadores,25003010013632,2002-09-27,4000000.0,0,25003010013115,2002-10-03,4000000.0,0.0,2104473.0,32,2005-07-05,0,175856.0,2002-11-05,2,Cancelada,1900-01-01,26.6,25.7097,V,Vencido,M,Mensual,360,1,Organica,0000000000000000000000000000000000000000000000...,Novacion,1,1-ORD HST 50 AÃO 364 DIA,0,1697710.0,0,0,66,NaT,NaT,0,3,7,3,ENTES TERRITORIALES,Secretaría de Educacion,REGIONAL
277,60977,102652303039490201,197538,1000003*57441197*LB*4000300114523,,,20010608,20040705,20160630,16862,2299336,1000003*57441197,103.0,27,19,400,1,Banco Popular,0,Libranzas Otras,1,Libranzas Otras,4000300114523,2001-06-08,5000000.0,0,4000300114523,2001-06-08,5000000.0,0.0,0.0,36,2004-07-05,0,216387.0,2001-07-05,3,Castigado,2003-04-30,31.5,31.9009,V,Vencido,M,Mensual,360,1,Organica,0000000000000000000000000000000000000000000000...,Nueva,1,1-ORD HST 50 AÃO 364 DIA,0,0.0,0,0,66,NaT,NaT,0,4,1,4,ENTES DESENTRALIZADOS,Salud,LOCAL


In [7]:
lib['ds_estado_actual'].value_counts()

Cancelada     2620846
En Cartera     356148
Castigado       55800
                 6438
Name: ds_estado_actual, dtype: int64

In [8]:
# This is the list of columns that we will drop in the first stage
lib_constant_cols = ['sk_producto_servicio', 'cd_modalidad_pag_int', 'ds_modalidad_pag_int',
                     'cd_periodicidad_pag_int', 'ds_periodicidad_pag_int',
                     'cd_base_liquidacion', 'no_obligacion_novada']

lib.drop(columns=lib_constant_cols, inplace=True)

In [9]:
lib = lib.loc[lib['ds_tipo_libranza'] == 'Organica']
lib = lib.loc[lib['ds_tipo_credito'].notnull()]

In [10]:
lib_date_cols = ['fe_solicitud', 'fe_desembolso', 'fe_finalizacion']
for col in lib_date_cols:
    lib[col] = pd.to_datetime(lib[col], format='%Y-%m-%d', errors='coerce')

In [11]:
## filtro tiempo
begining_training_period = '2006-01-01' # TODO: verify if this period is enough
ending_training_period = '2020-07-01'

In [12]:
period_filter = (lib['fe_solicitud'] >= begining_training_period) & (lib['fe_solicitud'] <= ending_training_period)
lib = lib.loc[period_filter]

In [13]:
lib_relevant_cols = ['id_cliente', 'sk_rc_libranza', 'fe_solicitud',
                     'vl_monto_solicitado', 'fe_desembolso', 'fe_finalizacion',
                     'vl_monto_aprobado', 'no_obligacion', 'vl_monto_desembolsado',
                     'no_cuotas', 'vl_total_cuota', 'ds_estado_actual',
                     'vl_tasa', 'ds_tipo_credito', 'ds_tipo_libranza', 'vl_monto_novado',
                    'ds_sector','ds_subsector','ds_tipo_pagaduria']

lib = lib[lib_relevant_cols]

In [14]:
lib.loc[lib['fe_solicitud'].isnull(), 'fe_solicitud'] = lib.loc[lib['fe_solicitud'].isnull(), 'fe_desembolso']

In [15]:
# def add_columns_to_lib():
hoy = pd.datetime.today()

lib['años_ult_lib'] = (hoy - lib['fe_desembolso'])/(np.timedelta64(1, 'D')*365)
lib['dias_procesamiento'] = (lib['fe_desembolso'] - lib['fe_solicitud'])/np.timedelta64(1, 'D')
# this is a necessary fix because there are many dates with fecha de solicitud in 1900
lib.loc[lib['dias_procesamiento'] > 360, 'dias_procesamiento'] = lib['dias_procesamiento'].median()
lib['prop_desemb_vs_aprobado'] = lib['vl_monto_desembolsado']/lib['vl_monto_aprobado']

In [16]:
lib.head()

Unnamed: 0,id_cliente,sk_rc_libranza,fe_solicitud,vl_monto_solicitado,fe_desembolso,fe_finalizacion,vl_monto_aprobado,no_obligacion,vl_monto_desembolsado,no_cuotas,vl_total_cuota,ds_estado_actual,vl_tasa,ds_tipo_credito,ds_tipo_libranza,vl_monto_novado,ds_sector,ds_subsector,ds_tipo_pagaduria,años_ult_lib,dias_procesamiento,prop_desemb_vs_aprobado
278,102652303931898101,1647562,2013-01-11,29500000.0,2013-01-14,2020-02-05,29500000.0,56503010074745,1499850.0,84,530269.0,Cancelada,12.6,Novacion,Organica,25926022.0,ORDEN NACIONAL,"OTROS POLICÍA, FONDO ROTATORIO ARMADA Y EJERCITO",NACIONAL CENTRALIZADA,7.596053,3.0,0.050842
279,102652303931898101,1433724,2013-05-15,32700000.0,2013-05-17,2020-06-05,32700000.0,50003010107799,1488409.0,84,587789.0,Cancelada,12.6,Novacion,Organica,28952184.0,ORDEN NACIONAL,"OTROS POLICÍA, FONDO ROTATORIO ARMADA Y EJERCITO",NACIONAL CENTRALIZADA,7.259066,2.0,0.045517
280,102652303931898101,2197892,2015-07-24,54500000.0,2015-07-30,2022-09-05,54500000.0,58703090012023,2180428.0,84,943608.0,En Cartera,10.68,Novacion,Organica,51640752.0,ORDEN NACIONAL,"OTROS POLICÍA, FONDO ROTATORIO ARMADA Y EJERCITO",NACIONAL CENTRALIZADA,5.056327,6.0,0.040008
281,102652303931898101,1052519,2006-09-11,7188264.0,2006-09-12,2011-11-05,7188000.0,47003010079423,1438836.0,60,192443.0,Cancelada,20.5,Novacion,Organica,5205064.0,ORDEN NACIONAL,Policia Nacional,NACIONAL CENTRALIZADA,13.941258,1.0,0.200172
284,102652303931898101,2006414,2013-12-24,38500000.0,2013-12-27,2021-02-05,38500000.0,58203010144269,1730882.0,84,718883.0,Cancelada,12.6,Novacion,Organica,36128628.0,ORDEN NACIONAL,"OTROS POLICÍA, FONDO ROTATORIO ARMADA Y EJERCITO",NACIONAL CENTRALIZADA,6.645368,3.0,0.044958


In [17]:
def prev_months_last_date(anio_mes_str):
    '''Gets the year and month of prediction and returns the previous month's last day's date'''
    anio, mes = anio_mes_str.split('-')
    anio = int(anio)
    mes = int(mes)
    if mes == 1:
        anio = int(anio - 1)
        mes = 12
    else:
        mes = mes - 1
    last_day_of_month = calendar.monthrange(anio, mes)[1]
    out = datetime(anio, mes, last_day_of_month)
    return out 

In [18]:
def get_grouped_df(anio_mes_str):
    '''Produces the extended lib_df and the grouping object that will be used
    as input to produce the final libranzas df'''
    cutoff_date = prev_months_last_date(anio_mes_str)
    ## considering records only before the predicting period, grouping by client
    lib_df = lib.loc[lib['fe_desembolso'] <= cutoff_date].sort_values(['id_cliente','fe_desembolso'], ascending=False).copy()
    gp = lib_df.groupby('id_cliente')
    return gp

In [19]:
def create_lib_df(gp):
    '''Gets the extended lib_df and the grouped object and returns the data frame ready for master table'''
    #producing the first dataframe
    lib_out = pd.DataFrame(index=gp.groups.keys()) # this is different from the actual prod code
    
    # number of records for each cliente
    lib_out['num_lib_solicitadas'] = gp.size()

    # these have a very close relation
    lib_out['prom_monto_solicitado'] = gp['vl_monto_solicitado'].mean()
    lib_out['prom_monto_aprobado'] = gp['vl_monto_aprobado'].mean()
    lib_out['prom_monto_desembolsado'] = gp['vl_monto_desembolsado'].mean()
    lib_out['prom_monto_novado'] = gp['vl_monto_novado'].mean()
    
    # Minimum years since las product
    lib_out['min_anos_ult_lib'] = gp['años_ult_lib'].min()

    # other averages
    lib_out['prom_prop_desembolso'] = gp['prop_desemb_vs_aprobado'].mean()
    lib_out['prom_n_cuotas'] = gp['no_cuotas'].mean()
    lib_out['prom_valor_cuota'] = gp['vl_total_cuota'].mean()
    lib_out['prom_tasa'] = gp['vl_tasa'].mean()

    # we have to choose one of these two...
    lib_out['prom_dias_procesamiento'] = gp['dias_procesamiento'].mean()
    lib_out['max_dias_procesamiento'] = gp['dias_procesamiento'].max()

    # counting novacion events within the historical period
    lib_out['n_novaciones'] = gp['ds_tipo_credito'].value_counts().unstack()['Novacion'].fillna(0).astype(int)
    # categorical columns
    
    lib_out['sector_ultlibranza'] = gp.apply(lambda x: x['ds_sector'].iloc[0])
    lib_out['subsector_ultlibranza'] = gp.apply(lambda x: x['ds_subsector'].iloc[0])
    lib_out['tipo_pagaduria_ultlibranza'] = gp.apply(lambda x: x['ds_tipo_pagaduria'].iloc[0])
    
    lib_out.index.rename('id_cliente', inplace=True)
    lib_out.reset_index(inplace = True)
    
    return lib_out

In [20]:
def get_lib_df(anio_mes_str):
    '''Glues the get_objects_for_lib_df and create_lib_df'''
    gp = get_grouped_df(anio_mes_str)
    out = create_lib_df(gp)
    return out

In [21]:
# producing the objects 
for ix, x in enumerate(anio_mes_strs):
    
    if ix == 0:
        lib_df = get_lib_df(x)
        lib_df['periodo'] = x
    else:
        lib_temp = get_lib_df(x)
        lib_temp['periodo'] = x
        lib_df = pd.concat([lib_df,lib_temp], ignore_index=True)
    
    print(x)

2020-01
2020-02
2020-03
2020-04
2020-05
2020-06
2020-07


In [22]:
lib.sort_values(['id_cliente','fe_solicitud'], ascending=False).head(20)

Unnamed: 0,id_cliente,sk_rc_libranza,fe_solicitud,vl_monto_solicitado,fe_desembolso,fe_finalizacion,vl_monto_aprobado,no_obligacion,vl_monto_desembolsado,no_cuotas,vl_total_cuota,ds_estado_actual,vl_tasa,ds_tipo_credito,ds_tipo_libranza,vl_monto_novado,ds_sector,ds_subsector,ds_tipo_pagaduria,años_ult_lib,dias_procesamiento,prop_desemb_vs_aprobado
3019202,999958035714320502,2719326,2018-08-08,97000000.0,2018-08-09,2026-11-05,97000000.0,25103260000664,96989280.0,96,1765068.0,Cancelada,13.08,Nueva,Organica,0.0,PENSIONADOS,Fiduprevisora,NACIONAL CENTRALIZADA,2.02619,1.0,0.999889
2889793,999955693775109001,841646,2008-04-16,16000000.0,2008-04-17,2013-05-05,16000000.0,18003010294628,15215387.0,60,476588.0,Cancelada,25.74,Nueva,Organica,0.0,PENSIONADOS,Caja De retiro de Las FFMM,NACIONAL CENTRALIZADA,12.343998,1.0,0.950962
2626796,999955266582332102,1423192,2011-02-03,34300000.0,2011-02-08,2018-01-08,34270000.0,1203010063300,10748344.0,84,780730.0,Cancelada,20.950001,Novacion,Organica,21462744.0,PENSIONADOS,I.S.S. Pensionados,NACIONAL CENTRALIZADA,9.530299,5.0,0.313637
2626799,999955266582332102,1645223,2009-07-14,27000000.0,2009-07-24,2014-07-05,27000000.0,1203010051965,21754520.0,60,785221.0,Cancelada,24.540001,Novacion,Organica,3872778.0,PENSIONADOS,I.S.S. Pensionados,NACIONAL CENTRALIZADA,11.075505,10.0,0.805723
2626797,999955266582332102,842517,2007-04-19,5570000.0,2007-04-30,2012-05-05,5570000.0,4203010024491,4517765.0,60,150061.0,Cancelada,20.799999,Novacion,Organica,766300.0,PENSIONADOS,I.S.S. Pensionados,NACIONAL CENTRALIZADA,13.311121,11.0,0.811089
2402512,999955266450872402,1761673,2014-06-27,29100000.0,2014-06-27,2021-07-05,29100000.0,35303070001146,28937310.0,84,533773.0,Cancelada,12.0,Nueva,Organica,0.0,ENTES TERRITORIALES,Fondos Educativos,NACIONAL CENTRALIZADA,6.146738,0.0,0.994409
2402511,999955266450872402,1405539,2010-11-10,12000000.0,2010-11-12,2016-12-05,12000000.0,36303010059187,5050873.0,72,272880.0,Cancelada,17.879999,Novacion,Organica,6184277.0,ENTES TERRITORIALES,Fondos Educativos,NACIONAL CENTRALIZADA,9.771395,2.0,0.420906
2402513,999955266450872402,912371,2008-01-29,10000000.0,2008-01-30,2013-03-05,10000000.0,36303010037166,9389358.0,60,289773.0,Cancelada,24.360001,Nueva,Organica,0.0,ENTES TERRITORIALES,Fondos Educativos,NACIONAL CENTRALIZADA,12.557696,1.0,0.938936
2713296,999955266207457702,2059230,2014-02-17,31500000.0,2014-02-28,2021-11-05,31500000.0,8803400000086,4701647.0,92,595403.0,Castigado,12.0,Novacion,Organica,26588540.0,PENSIONADOS,Otros Pensionados,NACIONAL CENTRALIZADA,6.472765,11.0,0.149259
2713297,999955266207457702,2200877,2014-01-20,26800000.0,2014-01-31,2021-10-05,26800000.0,8803350000886,21616128.0,92,499250.0,Cancelada,15.48,Nueva,Organica,0.0,PENSIONADOS,Otros Pensionados,NACIONAL CENTRALIZADA,6.549477,11.0,0.806572


In [23]:
print(lib_df.shape)
lib_df.head()

(5482702, 18)


Unnamed: 0,id_cliente,num_lib_solicitadas,prom_monto_solicitado,prom_monto_aprobado,prom_monto_desembolsado,prom_monto_novado,min_anos_ult_lib,prom_prop_desembolso,prom_n_cuotas,prom_valor_cuota,prom_tasa,prom_dias_procesamiento,max_dias_procesamiento,n_novaciones,sector_ultlibranza,subsector_ultlibranza,tipo_pagaduria_ultlibranza,periodo
0,101052294884460201,2,7050000.0,6385000.0,5982935.0,0.0,11.508381,0.93404,72.0,164258.5,23.610001,3.5,6.0,0,PENSIONADOS,Fopep,NACIONAL CENTRALIZADA,2020-01
1,101052314032556001,1,70000000.0,70000000.0,69990040.0,0.0,1.242628,0.999858,72.0,1418040.0,10.8,1.0,1.0,0,PENSIONADOS,Fiduprevisora,NACIONAL CENTRALIZADA,2020-01
2,101055260548785601,4,16175000.0,16175000.0,5261897.0,10197711.0,5.387833,0.402476,63.0,365051.75,13.59,3.75,7.0,3,ORDEN NACIONAL,Policia Nacional,NACIONAL CENTRALIZADA,2020-01
3,101055260650910902,4,12125000.0,12125000.0,7560881.0,4087683.5,4.807011,0.602487,60.0,279657.25,15.0,3.25,7.0,3,ORDEN NACIONAL,Policia Nacional,NACIONAL CENTRALIZADA,2020-01
4,101055260683554401,2,18000000.0,18000000.0,14080672.0,3618687.5,2.686464,0.831607,60.0,436291.0,15.48,6.5,9.0,1,ORDEN NACIONAL,Policia Nacional,NACIONAL CENTRALIZADA,2020-01


In [24]:
lib_df['periodo'].value_counts().sort_index()

2020-01    775372
2020-02    778205
2020-03    782149
2020-04    785455
2020-05    786076
2020-06    787022
2020-07    788423
Name: periodo, dtype: int64

In [25]:
lib_df['fecha_lib'] = pd.to_datetime(lib_df['periodo'],format='%Y-%m').dt.strftime('%Y%m').astype('int')

In [26]:
lib_df.to_parquet("s3://adl-refined-dev-popular/parquet/TC_adquisicion/base_libranzas_202001-202007",engine='pyarrow', index=False)

## Modelo

In [27]:
lib_df = pd.read_parquet("s3://adl-refined-dev-popular/parquet/TC_adquisicion/base_libranzas_201801_201912",engine='pyarrow')

In [28]:
lib_df[lib_df['id_cliente']==139955260696468901]

Unnamed: 0,id_cliente,num_lib_solicitadas,prom_monto_solicitado,prom_monto_aprobado,prom_monto_desembolsado,prom_monto_novado,min_anos_ult_lib,prom_prop_desembolso,prom_n_cuotas,prom_valor_cuota,prom_tasa,prom_dias_procesamiento,max_dias_procesamiento,n_novaciones,sector_ultlibranza,subsector_ultlibranza,tipo_pagaduria_ultlibranza,periodo,fecha_lib
30777,139955260696468901,2,15750000.0,15750000.0,9233777.0,5455780.0,7.124832,0.611499,72.0,326726.0,14.34,1.0,1.0,1,ORDEN NACIONAL,Policia Nacional,NACIONAL CENTRALIZADA,2018-01,201801
729659,139955260696468901,2,15750000.0,15750000.0,9233777.0,5455780.0,7.124832,0.611499,72.0,326726.0,14.34,1.0,1.0,1,ORDEN NACIONAL,Policia Nacional,NACIONAL CENTRALIZADA,2018-02,201802
1431496,139955260696468901,2,15750000.0,15750000.0,9233777.0,5455780.0,7.124832,0.611499,72.0,326726.0,14.34,1.0,1.0,1,ORDEN NACIONAL,Policia Nacional,NACIONAL CENTRALIZADA,2018-03,201803
2137079,139955260696468901,2,15750000.0,15750000.0,9233777.0,5455780.0,7.124832,0.611499,72.0,326726.0,14.34,1.0,1.0,1,ORDEN NACIONAL,Policia Nacional,NACIONAL CENTRALIZADA,2018-04,201804
2846053,139955260696468901,2,15750000.0,15750000.0,9233777.0,5455780.0,7.124832,0.611499,72.0,326726.0,14.34,1.0,1.0,1,ORDEN NACIONAL,Policia Nacional,NACIONAL CENTRALIZADA,2018-05,201805
3558959,139955260696468901,2,15750000.0,15750000.0,9233777.0,5455780.0,7.124832,0.611499,72.0,326726.0,14.34,1.0,1.0,1,ORDEN NACIONAL,Policia Nacional,NACIONAL CENTRALIZADA,2018-06,201806
4275834,139955260696468901,2,15750000.0,15750000.0,9233777.0,5455780.0,7.124832,0.611499,72.0,326726.0,14.34,1.0,1.0,1,ORDEN NACIONAL,Policia Nacional,NACIONAL CENTRALIZADA,2018-07,201807
4995912,139955260696468901,2,15750000.0,15750000.0,9233777.0,5455780.0,7.124832,0.611499,72.0,326726.0,14.34,1.0,1.0,1,ORDEN NACIONAL,Policia Nacional,NACIONAL CENTRALIZADA,2018-08,201808
5719182,139955260696468901,2,15750000.0,15750000.0,9233777.0,5455780.0,7.124832,0.611499,72.0,326726.0,14.34,1.0,1.0,1,ORDEN NACIONAL,Policia Nacional,NACIONAL CENTRALIZADA,2018-09,201809
6446240,139955260696468901,2,15750000.0,15750000.0,9233777.0,5455780.0,7.124832,0.611499,72.0,326726.0,14.34,1.0,1.0,1,ORDEN NACIONAL,Policia Nacional,NACIONAL CENTRALIZADA,2018-10,201810


In [29]:
lib[lib['id_cliente']==139955260696468901]

Unnamed: 0,id_cliente,sk_rc_libranza,fe_solicitud,vl_monto_solicitado,fe_desembolso,fe_finalizacion,vl_monto_aprobado,no_obligacion,vl_monto_desembolsado,no_cuotas,vl_total_cuota,ds_estado_actual,vl_tasa,ds_tipo_credito,ds_tipo_libranza,vl_monto_novado,ds_sector,ds_subsector,ds_tipo_pagaduria,años_ult_lib,dias_procesamiento,prop_desemb_vs_aprobado
87412,139955260696468901,1917182,2011-02-17,14500000.0,2011-02-18,2017-04-05,14500000.0,5703010033679,13475792.0,72,310395.0,Cancelada,15.48,Nueva,Organica,0.0,ORDEN NACIONAL,Policia Nacional,NACIONAL CENTRALIZADA,9.502902,1.0,0.929365
87413,139955260696468901,1818942,2013-05-09,17000000.0,2013-05-10,2019-06-05,17000000.0,1303480000097,4991762.0,72,343057.0,Cancelada,13.2,Novacion,Organica,10911560.0,ORDEN NACIONAL,Policia Nacional,NACIONAL CENTRALIZADA,7.278244,1.0,0.293633


In [30]:
path1 = 's3://adl-refined-dev-popular/data_orig/productos/libranzas/productos_libranzas_dwh_M202004.csv'
path2 = 's3://adl-refined-dev-popular/data_orig/productos/libranzas-t70-obligacion/productos_libranzas-t70-obligacion_cnl_M202004.txt'


lib1 = pd.read_csv(path1, sep=';', encoding = 'latin-1')
lib2 = pd.read_csv(path2, sep='|', encoding = 'latin-1')


print(lib1.shape)
print(lib2.shape)

(3168, 66)
(2697, 118)


In [31]:
lib1.head()

Unnamed: 0,SK_CLIENTE,ID_CLIENTE,SK_RC_LIBRANZA,ID_RC_LIBRANZA,SK_FE_RADICACION,SK_FE_APROBACION,SK_FE_DESEMBOLSO,SK_FE_FINALIZACION,SK_FE_ESTADO_ACTUAL,SK_CONVENIO_LIBRANZA,SK_PERSONA,DK_PERSONA,SK_OFICINA,SK_PRODUCTO_SERVICIO,SK_CLASIFICACION_TRADICIONAL,CD_OFICINA,CD_ORIGINADOR,DS_ORIGINADOR,CD_SEGMENTO_LIB,DS_SEGMENTO_LIB,CD_SUBSEGMENTO_LIB,DS_SUBSEGMENTO_LIB,NO_SOLICITUD,FE_SOLICITUD,VL_MONTO_SOLICITADO,MP_REESTRUCTURADO,NO_OBLIGACION,FE_DESEMBOLSO,VL_MONTO_APROBADO,VL_RECAPITALIZACION,VL_MONTO_DESEMBOLSADO,NO_CUOTAS,FE_FINALIZACION,NO_MESES_PER_GRACIA,VL_TOTAL_CUOTA,FE_EXIGIBILIDAD_CUOTA_1,CD_ESTADO_ACTUAL,DS_ESTADO_ACTUAL,FE_ESTADO_ACTUAL,VL_TASA,VL_TASA_EFECTIVA_ANUAL,CD_MODALIDAD_PAG_INT,DS_MODALIDAD_PAG_INT,CD_PERIODICIDAD_PAG_INT,DS_PERIODICIDAD_PAG_INT,CD_BASE_LIQUIDACION,CD_TIPO_LIBRANZA,DS_TIPO_LIBRANZA,CD_TIPO_CREDITO,DS_TIPO_CREDITO,CD_TIPO_COLOCACION,DS_TIPO_COLOCACION,NO_OBLIGACION_NOVADA,VL_MONTO_NOVADO,MP_COMPRA_IND_CARTERA,MP_AFECTA_DESP_NOMINA,CD_LINEA_CREDITO,FE_CARGA,FE_ACTUA,NO_PARTICION,CD_SECTOR,CD_SUBSECTOR,CD_TIPO_PAGADURIA,DS_SECTOR,DS_SUBSECTOR,DS_TIPO_PAGADURIA
0,-1,-1,10658363,1000003*4437701*LB*01403070007748,-1,-1,20200331,20300705,20200430,17239,-1,1000003*4437701,67,-1,1,14,1,Banco Popular,1,Fuerzas Militares,4,Libranzas Ejercito Nacional,1403070011653,2020-03-18,128000000.0,0,1403070007748,2020-03-31,128000000.0,3870341.0,127988106.0,120,2030-07-05,2,2056637.0,2020-07-05,1,En Cartera,2020-04-30,13.08,13.8933,V,Vencido,M,Mensual,360,1,Organica,1.0,Nueva,7,7-ATR HST 50 AÃO 364 DIA,0,0.0,1,1,66,2020-04-01 04:49:46,2020-05-05 20:44:11,1,1,4,1,ORDEN NACIONAL,Mindefensa Ejercito Nacional Activos,NACIONAL CENTRALIZADA
1,-1,-1,10671192,1000003*74376005*LB*26103070001806,-1,-1,20200423,20300705,20200430,26661,-1,1000003*74376005,153,-1,11,261,1,Banco Popular,3,Pensionados,5,Libranzas Caja Retiro FFMM,26103070002716,2020-04-17,47000000.0,0,26103070001806,2020-04-23,47000000.0,0.0,46686530.0,120,2030-07-05,0,659097.0,2020-05-05,1,En Cartera,2020-04-30,10.68,11.2186,V,Vencido,M,Mensual,360,1,Organica,1.0,Nueva,7,7-ATR HST 50 AÃO 364 DIA,0,0.0,1,0,66,2020-04-24 05:14:48,2020-05-05 20:44:11,1,2,5,1,PENSIONADOS,Caja De retiro de Las FFMM,NACIONAL CENTRALIZADA
2,-1,-1,10660051,1000003*1103111540*LB*20503470001754,-1,-1,20200402,20220605,20200430,12187,-1,1000003*1103111540,216,-1,6,205,1,Banco Popular,2,Policia Nacional,1,Libranzas Policia Nacional,20503470002325,2020-03-31,10000000.0,0,20503470001754,2020-04-02,10000000.0,0.0,9730585.0,24,2022-06-05,0,493654.0,2020-05-05,1,En Cartera,2020-04-30,16.08,17.3197,V,Vencido,M,Mensual,360,1,Organica,1.0,Nueva,47,47-ORD PATRU 50 AÃO 364 DIA,0,0.0,0,0,66,2020-04-03 05:23:08,2020-05-05 20:44:11,1,1,6,1,ORDEN NACIONAL,Policia Nacional,NACIONAL CENTRALIZADA
3,-1,-1,10658250,1000003*32639300*LB*04503240006082,-1,-1,20200331,20300605,20200430,19318,-1,1000003*32639300,5,-1,7,45,1,Banco Popular,3,Pensionados,1,Libranzas Colpensiones,4503240007189,2020-03-16,30200000.0,0,4503240006082,2020-03-31,30200000.0,1103288.0,30197189.0,120,2030-06-05,2,505063.0,2020-06-05,1,En Cartera,2020-04-30,13.8,14.7072,V,Vencido,M,Mensual,360,1,Organica,1.0,Nueva,24,24-DE 51 < 69 AÃO 364 DIA,0,0.0,1,1,66,2020-04-01 04:49:46,2020-05-05 20:44:11,1,2,1,1,PENSIONADOS,I.S.S. Pensionados,NACIONAL CENTRALIZADA
4,-1,-1,10671744,1000003*46376921*LB*27003070007827,-1,-1,20200424,20300605,20200430,15939,-1,1000003*46376921,92,-1,13,270,1,Banco Popular,4,Educativo,1,Libranzas Educadores,27003070010742,2020-03-19,104000000.0,0,27003070007827,2020-04-24,104000000.0,1510740.0,103998282.0,120,2030-06-05,1,1427024.0,2020-06-05,1,En Cartera,2020-04-30,9.6,10.0339,V,Vencido,M,Mensual,360,1,Organica,1.0,Nueva,7,7-ATR HST 50 AÃO 364 DIA,0,0.0,1,1,66,2020-04-25 04:45:45,2020-05-05 20:44:11,1,3,7,3,ENTES TERRITORIALES,Secretaría de Educacion,REGIONAL


In [32]:
lib2.head()

Unnamed: 0,C70IDOBLIGACION,C70LINEA,C70MODALIDAD,C70REGIMEN,C70TIPODOCTERCERO,C70IDTERCERO,C70PAGADURIA,C70CODSUCURSAL,C70OFIRECEP,C70OFIRECEPREAL,C70OFIVENTA,C70FECVENTA,C70FECRECOMPRA,C70FRECCUOTA,C70CUOTASPACTADAS,C70CUOTASPAGADAS,C70CUOTASCOBRADAS,C70MONTOAPROB,C70NROCTADESEM,C70FORMADESEM,C70TASANOMAPROB,C70TASAEFECAPROB,C70IDSEGURO,C70VALCUOTA,C70COMISION,C70IVA,C70DIASAJUSTE,C70INTANT,C70INTANTCAUSADO,C70INTANTXCAUSAR,C70INTACAUSAR,C70INTCAUSADO,C70SEGURO,C70NETODESEMBOLSADO,C70COMPROBANTEDESEM,C70FECCAUSACION,C70FECHADESEM,C70FECINI,C70FECFIN,C70FECANTCUOTA,C70FECPROXCUOTA,C70FECINIMORA,C70FECINIMORAANT,C70FECULTPAGO,C70ULTCOMPROBANTE,C70REVERSARCAUSACION,C70FECININOVEDAD,C70FECFINNOVEDAD,C70SALDOCAPITAL,C70SALDOCAPITALV,C70INTPAGADO,C70SALDOINTCTE,C70SALDOINTCTEV,C70SALDOINTMORA,C70SALDOINTCTGTES,C70SALDOINTMORACTGTES,C70SALDOAFAVOR,C70PROVISIONCAPITAL,C70PROVISIONCAPITALANT,C70PROVISIONINTCTE,C70PROVISIONINTCTEANT,C70PROVISIONINTMORA,C70PROVISIONINTMORAANT,C70FALTANTECREDITO,C70APROVECHAMIENTOS,C70SOBRANTES,C70TIPOTASACTEACT,C70TIPOTASACTEANT,C70TIPOTASAMORAACT,C70TIPOTASAMORAANT,C70VALORTASACTEACT,C70VALORTASACTEANT,C70VALORTASAMORAACT,C70VALORTASAMORAANT,C70NOVACION,C70VALORNOVADO,C70ORIGEN,C70CALIFICACION,C70IDSOLICITUD,C70TRXDESEM,C70FECCIERRE,C70ESTADO,C70CALIFICACIONXRIESGO,C70CALIFICACIONANT,C70MARCASOBRANTE,C70FECINICBRJUR,C70COBROJURIDICO,C70FECCAMBIOESTADO,C70GASTOSPAGADOS,C70GASTOSCOBRADOS,C70GRUPO,C70PLAZOMAXTASA,C70MONTOMAXTASA,C70EXONERADO,C70VALOREXONERACION,C70RESTRUCTURADA,C70TIPOSEGURO,C70VALORTOTALSEGURO,C70NROCUOTASADIFERIR,C70VALORSEGUROFINANCIAR,C70VALORSEGUROANTICIPADO,C70CUOTASPAGADASSEG,C70SALDOSEGURO,C70ESTADOCXC,C70FACTORSEGURO,C70SALDOCXC,C70TIPO_CUENTA,C70CTADESEM_NRO,C70COD_BANCO,C70COMISION_ACH,C70IVA_ACH,C70DISMINUYE_CUOTA,C70DESPRENDIBLENOMINA,C70MESESDEGRACIA,C70_REINCIDENCIA,C70MARCA_REE_MOD,ID_CLIENTE,c70tipodoctercero_HOM
0,18503070010979,3,7,1,1,97471534,8001413975,432,185,185,,,,30,120,0,1,56500000,230182766865,2,10.68,11.2186,1,812128,99000,18810,71,1172476,613238,559238,515276,0,108555,56497383,185-03073070603,31/05/2020,24/04/2020,05/07/2020,05/07/2030,05/07/2020,05/08/2020,05/08/2020,,05/07/2020,0,0,01/06/2020,01/05/2030,57896224.0,0,0,515276,0,0,0,0,0,1626884,0,0,0,0,0,0,0,0,FIJA,FIJA,MORA,MORA,11.2186,11.2186,27.13,27.235,0,0,FSII,A,18503070014993,10041,31/05/2020,70,A,,0,01/05/2001,0,30/04/2020,0,0,8U1,120,309000000,S,57896224,0,4,2722487,,2613932,108555,0,2613932,0,0.0075,38831,0,-,0,0,0,0,1,1,0,,,1000003
1,26503680000391,3,68,2,1,10175755,8200014059,1,265,265,,,,30,108,0,1,58500000,230265100560,2,13.08,13.8933,1,949988,99000,18810,11,232078,232078,0,106755,530893,36563,6999506,265-03680001930,31/05/2020,24/04/2020,05/05/2020,05/06/2029,05/06/2020,05/06/2020,05/06/2020,,05/05/2020,0,0,01/05/2020,01/04/2029,58500000.0,0,0,637648,0,0,0,0,0,1643850,0,13538,0,0,0,0,0,0,FIJA,FIJA,MORA,MORA,13.8933,13.8933,27.13,27.235,1,51114043,FSII,A,26503680000499,10041,31/05/2020,70,,,0,01/05/2001,0,30/04/2020,0,0,88D,108,221315100,S,58500000,0,4,2452671,,2416108,36563,0,2416108,0,0.0075,176027,0,-,0,0,0,0,0,0,0,,,1000003
2,30003680000931,3,68,2,1,8778069,8001413975,261,300,300,,,,30,96,0,1,95000000,230300802766,2,13.08,13.8933,1,1641556,99000,18810,41,1397146,1230087,167059,1035497,0,118750,16461050,300-03680005524,31/05/2020,24/04/2020,05/06/2020,05/07/2028,05/07/2020,05/07/2020,05/07/2020,,05/06/2020,0,0,01/06/2020,01/05/2028,95000000.0,0,0,1035497,0,0,0,0,0,2669500,0,0,0,0,0,0,0,0,FIJA,FIJA,MORA,MORA,13.8933,13.8933,27.13,27.235,1,76905244,FSII,A,30003680001075,10041,31/05/2020,70,,,0,01/05/2001,0,30/04/2020,0,0,43C,120,263340900,S,95000000,0,4,3547932,,3429182,118750,0,3429182,0,0.0075,82771,0,-,0,0,0,0,0,0,0,,,1000003
3,30003070014715,3,7,2,1,36623878,8923999991,4,300,300,,,,30,120,0,1,96000000,230300228582,2,10.68,11.2186,1,1381077,99000,18810,41,1155503,1031593,123910,866800,0,121742,95998258,300-03073340971,31/05/2020,24/04/2020,05/06/2020,05/06/2030,05/06/2020,05/07/2020,05/07/2020,,05/06/2020,0,0,01/04/2020,01/03/2030,97393313.0,0,0,866800,0,0,0,0,0,2736752,0,0,0,0,0,0,0,0,FIJA,FIJA,MORA,MORA,11.2186,11.2186,27.13,27.235,0,0,FSII,A,30003070021609,10041,31/05/2020,70,A,,0,01/05/2001,0,30/04/2020,0,0,5B7,120,263340900,S,97393313,0,4,4419371,,4297629,121742,0,4297629,0,0.0075,4102,0,-,0,0,0,0,1,1,0,,,1000003
4,5903010009873,3,1,2,1,1023975444,8300423210,360,59,59,,,,30,36,0,1,12000000,230018273177,2,16.08,17.3197,1,426878,99000,18810,37,195395,169318,26077,160800,0,15000,11671795,059-03010096240,31/05/2020,28/04/2020,05/06/2020,05/07/2023,05/07/2020,05/07/2020,05/07/2020,,05/06/2020,0,0,01/05/2020,01/04/2023,12000000.0,0,0,160800,0,0,0,0,0,337200,0,0,0,0,0,0,0,0,FIJA,FIJA,MORA,MORA,17.3197,17.3197,27.13,27.235,0,0,FSII,A,5903010012627,10049,31/05/2020,70,A,,0,01/05/2001,0,30/04/2020,0,0,8O1,60,12000000,S,12000000,0,4,159494,,151994,15000,0,151994,0,0.0075,66241,0,-,0,0,0,0,0,0,0,,,1000003


In [33]:
lib2.count()

C70IDOBLIGACION             2697
C70LINEA                    2697
C70MODALIDAD                2697
C70REGIMEN                  2697
C70TIPODOCTERCERO           2697
C70IDTERCERO                2697
C70PAGADURIA                2697
C70CODSUCURSAL              2697
C70OFIRECEP                 2697
C70OFIRECEPREAL             2697
C70OFIVENTA                    0
C70FECVENTA                    0
C70FECRECOMPRA                 0
C70FRECCUOTA                2697
C70CUOTASPACTADAS           2697
C70CUOTASPAGADAS            2697
C70CUOTASCOBRADAS           2697
C70MONTOAPROB               2697
C70NROCTADESEM              2697
C70FORMADESEM               2697
C70TASANOMAPROB             2697
C70TASAEFECAPROB            2697
C70IDSEGURO                 2697
C70VALCUOTA                 2697
C70COMISION                 2697
C70IVA                      2697
C70DIASAJUSTE               2697
C70INTANT                   2697
C70INTANTCAUSADO            2697
C70INTANTXCAUSAR            2697
C70INTACAU

In [34]:
lib1.describe()

Unnamed: 0,SK_CLIENTE,ID_CLIENTE,SK_RC_LIBRANZA,SK_FE_RADICACION,SK_FE_APROBACION,SK_FE_DESEMBOLSO,SK_FE_FINALIZACION,SK_FE_ESTADO_ACTUAL,SK_CONVENIO_LIBRANZA,SK_PERSONA,SK_OFICINA,SK_PRODUCTO_SERVICIO,SK_CLASIFICACION_TRADICIONAL,CD_OFICINA,CD_ORIGINADOR,CD_SEGMENTO_LIB,CD_SUBSEGMENTO_LIB,NO_SOLICITUD,VL_MONTO_SOLICITADO,MP_REESTRUCTURADO,NO_OBLIGACION,VL_MONTO_APROBADO,VL_RECAPITALIZACION,VL_MONTO_DESEMBOLSADO,NO_CUOTAS,NO_MESES_PER_GRACIA,VL_TOTAL_CUOTA,CD_ESTADO_ACTUAL,VL_TASA,VL_TASA_EFECTIVA_ANUAL,CD_BASE_LIQUIDACION,CD_TIPO_LIBRANZA,CD_TIPO_CREDITO,CD_TIPO_COLOCACION,NO_OBLIGACION_NOVADA,VL_MONTO_NOVADO,MP_COMPRA_IND_CARTERA,MP_AFECTA_DESP_NOMINA,CD_LINEA_CREDITO,NO_PARTICION,CD_SECTOR,CD_SUBSECTOR,CD_TIPO_PAGADURIA
count,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0,3168.0
mean,2142760.0,5.531412e+17,10666270.0,-1.0,-1.0,20200400.0,20283660.0,20200380.0,15825.842803,1838014.0,134.857955,-1.0,8.620896,332.527778,1.0,2.858586,2.184028,33256230000000.0,47424280.0,0.00221,33256230000000.0,47424280.0,388125.9,27302300.0,99.695707,0.327967,827504.1,1.001894,13.753485,14.682308,360.0,1.0,1.563763,45.448548,0.0,19771140.0,0.259785,0.200758,66.0,0.998106,1.960859,5.424558,1.308396
std,1086706.0,2.610116e+17,8016.784,0.0,0.0,35.75502,25344.26,3374.787,8725.054655,993613.9,78.204989,0.0,3.21003,196.66283,0.0,0.750675,1.760503,19666290000000.0,45670410.0,0.046962,19666290000000.0,45670410.0,1059094.0,39703710.0,30.391554,0.689183,688640.5,0.043485,2.326748,2.648336,0.0,0.0,0.495996,29.354037,0.0,30864940.0,0.438586,0.40063,0.0,0.043485,0.55147,6.266877,0.724791
min,-1.0,-1.0,10657700.0,-1.0,-1.0,20200330.0,20210500.0,20010500.0,9.0,-1.0,-1.0,-1.0,1.0,9.0,1.0,0.0,1.0,903240000000.0,1150000.0,0.0,903240000000.0,1150000.0,0.0,57210.0,12.0,0.0,21533.0,1.0,7.2,7.4424,360.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,66.0,0.0,1.0,1.0,1.0
25%,1237185.0,3.360523e+17,10659930.0,-1.0,-1.0,20200400.0,20270600.0,20200430.0,8556.0,999751.0,63.0,-1.0,7.0,182.0,1.0,3.0,1.0,18203550000000.0,19000000.0,0.0,18203550000000.0,19000000.0,0.0,6044478.0,84.0,0.0,381799.0,1.0,11.88,12.5487,360.0,1.0,1.0,23.0,0.0,0.0,0.0,0.0,66.0,1.0,2.0,1.0,1.0
50%,2140742.0,5.443523e+17,10664460.0,-1.0,-1.0,20200410.0,20300600.0,20200430.0,15136.0,1742442.0,135.0,-1.0,7.0,350.5,1.0,3.0,1.0,35053440000000.0,35400000.0,0.0,35053440000000.0,35400000.0,0.0,13163220.0,120.0,0.0,666088.5,1.0,13.8,14.7072,360.0,1.0,2.0,44.0,0.0,6428844.0,0.0,0.0,66.0,1.0,2.0,4.0,1.0
75%,3066888.0,7.840523e+17,10670970.0,-1.0,-1.0,20200420.0,20300700.0,20200430.0,23285.0,2632986.0,211.0,-1.0,11.0,484.0,1.0,3.0,3.0,48403240000000.0,62134750.0,0.0,48403240000000.0,62134750.0,0.0,31810890.0,120.0,0.0,1074600.0,1.0,15.48,16.6269,360.0,1.0,2.0,70.0,0.0,28708890.0,1.0,0.0,66.0,1.0,2.0,6.0,1.0
max,3838157.0,9.990582e+17,10690420.0,-1.0,-1.0,20200430.0,20300800.0,20200520.0,30368.0,3519419.0,257.0,-1.0,19.0,720.0,1.0,5.0,6.0,72003860000000.0,590000000.0,1.0,72003860000000.0,590000000.0,15621690.0,589958100.0,120.0,2.0,8556250.0,2.0,20.28,22.2754,360.0,1.0,2.0,98.0,0.0,375125700.0,1.0,1.0,66.0,1.0,5.0,30.0,4.0


In [35]:
lib2.describe()

Unnamed: 0,C70IDOBLIGACION,C70LINEA,C70MODALIDAD,C70REGIMEN,C70TIPODOCTERCERO,C70IDTERCERO,C70PAGADURIA,C70CODSUCURSAL,C70OFIRECEP,C70OFIRECEPREAL,C70OFIVENTA,C70FECVENTA,C70FECRECOMPRA,C70FRECCUOTA,C70CUOTASPACTADAS,C70CUOTASPAGADAS,C70CUOTASCOBRADAS,C70MONTOAPROB,C70FORMADESEM,C70TASANOMAPROB,C70TASAEFECAPROB,C70IDSEGURO,C70VALCUOTA,C70COMISION,C70IVA,C70DIASAJUSTE,C70INTANT,C70INTANTCAUSADO,C70INTANTXCAUSAR,C70INTACAUSAR,C70INTCAUSADO,C70SEGURO,C70NETODESEMBOLSADO,C70FECINIMORAANT,C70REVERSARCAUSACION,C70SALDOCAPITAL,C70SALDOCAPITALV,C70INTPAGADO,C70SALDOINTCTE,C70SALDOINTCTEV,C70SALDOINTMORA,C70SALDOINTCTGTES,C70SALDOINTMORACTGTES,C70SALDOAFAVOR,C70PROVISIONCAPITAL,C70PROVISIONCAPITALANT,C70PROVISIONINTCTE,C70PROVISIONINTCTEANT,C70PROVISIONINTMORA,C70PROVISIONINTMORAANT,C70FALTANTECREDITO,C70APROVECHAMIENTOS,C70SOBRANTES,C70VALORTASACTEACT,C70VALORTASACTEANT,C70VALORTASAMORAACT,C70VALORTASAMORAANT,C70NOVACION,C70VALORNOVADO,C70IDSOLICITUD,C70TRXDESEM,C70ESTADO,C70CALIFICACIONANT,C70MARCASOBRANTE,C70COBROJURIDICO,C70GASTOSPAGADOS,C70GASTOSCOBRADOS,C70PLAZOMAXTASA,C70MONTOMAXTASA,C70VALOREXONERACION,C70RESTRUCTURADA,C70TIPOSEGURO,C70VALORTOTALSEGURO,C70NROCUOTASADIFERIR,C70VALORSEGUROFINANCIAR,C70VALORSEGUROANTICIPADO,C70CUOTASPAGADASSEG,C70SALDOSEGURO,C70ESTADOCXC,C70FACTORSEGURO,C70SALDOCXC,C70TIPO_CUENTA,C70COD_BANCO,C70COMISION_ACH,C70IVA_ACH,C70DISMINUYE_CUOTA,C70DESPRENDIBLENOMINA,C70MESESDEGRACIA,C70_REINCIDENCIA,C70MARCA_REE_MOD,ID_CLIENTE,c70tipodoctercero_HOM
count,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,0.0,0.0,0.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,0.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,0.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,0.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,2697.0,0.0,0.0,2697.0
mean,33978690000000.0,3.0,45.969225,1.304783,1.0,143367600.0,8731577000.0,221.816463,339.752317,339.752317,,,,30.0,99.842047,0.091583,1.091212,47874130.0,2.011494,13.805161,14.740513,1.0,833370.5,99000.0,18621.690768,26.760104,488834.1,398644.0,90292.44,235283.4,266761.6,104943.5,26403710.0,,0.0,47883300.0,0.0,40030.91,502045.0,0.0,0.0,0.0,0.0,428754.3,1515304.0,0.0,6789.144605,0.0,0.0,0.0,12.238042,1.754913,382.311828,14.740513,14.740513,27.130667,27.237781,0.589544,21094150.0,33978690000000.0,10018.507972,70.107527,,0.0,0.022618,0.0,0.0,111.090842,275345300.0,48212290.0,0.00482,3.998888,5451696.0,,5343139.0,104943.5,0.0,5333645.0,0.0,0.022099,120984.6,0.022989,0.286615,0.0,0.0,0.0,0.184279,0.296255,0.002595,,,1000003.0
std,19297410000000.0,0.0,30.155879,0.460401,0.0,325121400.0,403894600.0,228.862121,192.974109,192.974109,,,,0.0,30.418524,0.289773,0.287964,46764090.0,0.106613,2.310461,2.62982,0.0,704199.7,0.0,1872.948738,22.34898,762580.4,522154.6,313902.8,368763.7,311290.1,406090.2,40215780.0,,0.0,47137680.0,0.0,160033.3,418088.3,0.0,0.0,0.0,0.0,594325.5,4810977.0,0.0,7932.814988,0.0,0.0,0.0,455.95854,91.137273,17849.600543,2.62982,2.62982,0.017536,0.045593,0.492008,31616670.0,19297410000000.0,26.192353,1.762915,,0.0,0.148709,0.0,0.0,21.438328,671567200.0,47335750.0,0.069273,0.057767,10497780.0,,10297380.0,406090.2,0.0,10277990.0,0.0,0.027488,221779.1,0.213226,3.325194,0.0,0.0,0.0,0.387783,0.657724,0.050889,,,0.0
min,903240000000.0,3.0,1.0,1.0,1.0,119991.0,8000085000.0,1.0,9.0,9.0,,,,30.0,12.0,0.0,1.0,1150000.0,2.0,7.2,7.4424,1.0,21533.0,99000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,57210.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.4424,7.4424,27.13,27.235,0.0,0.0,903240000000.0,9974.0,70.0,,0.0,0.0,0.0,0.0,24.0,12000000.0,1150000.0,0.0,1.0,9251.0,,0.0,0.0,0.0,0.0,0.0,0.0075,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,1000003.0
25%,19103340000000.0,3.0,20.0,1.0,1.0,14000080.0,8300423000.0,31.0,191.0,191.0,,,,30.0,84.0,0.0,1.0,18526000.0,2.0,11.88,12.5487,1.0,379767.0,99000.0,18810.0,12.0,91454.0,89790.0,0.0,43153.0,0.0,16750.0,5778188.0,,0.0,18500000.0,0.0,0.0,232538.0,0.0,0.0,0.0,0.0,0.0,524065.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.5487,12.5487,27.13,27.235,0.0,0.0,19103340000000.0,9994.0,70.0,,0.0,0.0,0.0,0.0,120.0,263340900.0,18680280.0,0.0,4.0,1153581.0,,1122385.0,16750.0,0.0,1120340.0,0.0,0.0075,31908.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,1000003.0
50%,36003230000000.0,3.0,47.0,1.0,1.0,27219440.0,8999991000.0,131.0,360.0,360.0,,,,30.0,120.0,0.0,1.0,35622000.0,2.0,13.8,14.7072,1.0,666363.0,99000.0,18810.0,20.0,216408.0,212432.0,0.0,87100.0,198662.0,42570.0,12114370.0,,0.0,35500000.0,0.0,0.0,399770.0,0.0,0.0,0.0,0.0,273337.0,1005980.0,0.0,5066.0,0.0,0.0,0.0,0.0,0.0,0.0,14.7072,14.7072,27.13,27.235,1.0,8091328.0,36003230000000.0,10027.0,70.0,,0.0,0.0,0.0,0.0,120.0,263340900.0,35809000.0,0.0,4.0,2513079.0,,2467094.0,42570.0,0.0,2467094.0,0.0,0.01188,58546.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,1000003.0
75%,48403660000000.0,3.0,71.0,2.0,1.0,55220990.0,9003360000.0,426.0,484.0,484.0,,,,30.0,120.0,0.0,1.0,64100000.0,2.0,15.48,16.6269,1.0,1098564.0,99000.0,18810.0,35.0,618628.0,549610.0,34020.0,242718.0,397324.0,102960.0,29884750.0,,0.0,64300000.0,0.0,0.0,671160.0,0.0,0.0,0.0,0.0,644997.0,1818100.0,0.0,10119.0,0.0,0.0,0.0,0.0,0.0,0.0,16.6269,16.6269,27.13,27.235,1.0,31210930.0,48403660000000.0,10035.0,70.0,,0.0,0.0,0.0,0.0,120.0,263340900.0,64315540.0,0.0,4.0,5794265.0,,5645416.0,102960.0,0.0,5645416.0,0.0,0.025,115120.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,1000003.0
max,72003860000000.0,3.0,98.0,2.0,1.0,1233503000.0,9013361000.0,998.0,720.0,720.0,,,,30.0,120.0,2.0,2.0,590000000.0,3.0,20.28,22.2754,1.0,8556250.0,99000.0,18810.0,111.0,12250940.0,7036872.0,5214064.0,4832982.0,2960294.0,16874170.0,589958100.0,,0.0,604121000.0,0.0,2152502.0,4832982.0,0.0,0.0,0.0,0.0,9562268.0,182006400.0,0.0,75487.0,0.0,0.0,0.0,19349.0,4733.0,920383.0,22.2754,22.2754,27.985,27.985,1.0,375125700.0,72003860000000.0,10059.0,99.0,,0.0,1.0,0.0,0.0,120.0,24843480000.0,604121000.0,1.0,4.0,195070200.0,,192724600.0,16874170.0,0.0,192724600.0,0.0,0.11451,2439836.0,2.0,52.0,0.0,0.0,0.0,1.0,2.0,1.0,,,1000003.0


In [36]:
''

''

In [37]:
lib_df = pd.read_parquet("s3://adl-refined-dev-popular/parquet/TC_adquisicion/base_libranzas_201801_201912",engine='pyarrow')
lib_df.count()

id_cliente                    17696740
num_lib_solicitadas           17696740
prom_monto_solicitado         17696740
prom_monto_aprobado           17696740
prom_monto_desembolsado       17696740
prom_monto_novado             17696740
min_anos_ult_lib              17696740
prom_prop_desembolso          17696740
prom_n_cuotas                 17696740
prom_valor_cuota              17696740
prom_tasa                     17696740
prom_dias_procesamiento       17696740
max_dias_procesamiento        17696740
n_novaciones                  17696740
sector_ultlibranza            17696740
subsector_ultlibranza         17696740
tipo_pagaduria_ultlibranza    17696740
periodo                       17696740
fecha_lib                     17696740
dtype: int64

In [38]:
lib_df['sector_ultlibranza'].value_counts()

PENSIONADOS              9913414
ORDEN NACIONAL           4041447
ENTES TERRITORIALES      3141400
PRIVADO                   320500
ENTES DESENTRALIZADOS     279979
Name: sector_ultlibranza, dtype: int64

In [39]:
# Creando Data Libranza
lib_df['fecha_lib'] = pd.to_datetime(lib_df['periodo'], format='%Y-%m').dt.strftime('%Y%m').astype('int')
lib_df.head(20)

Unnamed: 0,id_cliente,num_lib_solicitadas,prom_monto_solicitado,prom_monto_aprobado,prom_monto_desembolsado,prom_monto_novado,min_anos_ult_lib,prom_prop_desembolso,prom_n_cuotas,prom_valor_cuota,prom_tasa,prom_dias_procesamiento,max_dias_procesamiento,n_novaciones,sector_ultlibranza,subsector_ultlibranza,tipo_pagaduria_ultlibranza,periodo,fecha_lib
0,101052294884460201,2,7050000.0,6385000.0,5982935.0,0.0,11.354969,0.93404,72.0,164258.5,23.61,3.5,6.0,0,PENSIONADOS,Fopep,NACIONAL CENTRALIZADA,2018-01,201801
1,101055260548785601,4,16175000.0,16175000.0,5261897.0,10197710.0,5.234421,0.402476,63.0,365051.75,13.59,3.75,7.0,3,ORDEN NACIONAL,Policia Nacional,NACIONAL CENTRALIZADA,2018-01,201801
2,101055260650910902,4,12125000.0,12125000.0,7560881.0,4087684.0,4.653599,0.602487,60.0,279657.25,15.0,3.25,7.0,3,ORDEN NACIONAL,Policia Nacional,NACIONAL CENTRALIZADA,2018-01,201801
3,101055260683554401,2,18000000.0,18000000.0,14080670.0,3618688.0,2.533051,0.831607,60.0,436291.0,15.48,6.5,9.0,1,ORDEN NACIONAL,Policia Nacional,NACIONAL CENTRALIZADA,2018-01,201801
4,101055260720859701,2,8250000.0,8250000.0,5983176.0,1797331.0,10.423462,0.745965,66.0,202629.5,19.2148,3.0,5.0,1,ENTES TERRITORIALES,Secretaría de Educacion,REGIONAL,2018-01,201801
5,101055266255810701,1,10000000.0,10000000.0,9530928.0,0.0,13.157708,0.953093,60.0,269410.0,20.8,13.0,13.0,0,PENSIONADOS,I.S.S. Pensionados,NACIONAL CENTRALIZADA,2018-01,201801
6,101055266367157902,3,27133330.0,27133330.0,15335720.0,11251010.0,2.965928,0.618465,98.666667,563069.666667,17.12,9.0,14.0,2,PENSIONADOS,I.S.S. Pensionados,NACIONAL CENTRALIZADA,2018-01,201801
7,101055266603150201,2,3250000.0,3250000.0,2337335.0,836307.5,3.064558,0.785358,36.0,118828.0,18.48,10.5,14.0,1,PENSIONADOS,I.S.S. Pensionados,NACIONAL CENTRALIZADA,2018-01,201801
8,101055266686923002,5,17860000.0,17860000.0,10065410.0,6742780.0,3.426202,0.671437,62.4,458791.8,20.576,2.0,5.0,3,PENSIONADOS,Fiduprevisora,NACIONAL CENTRALIZADA,2018-01,201801
9,101055629594214801,2,8538500.0,8538500.0,6333382.0,2091991.0,2.579626,0.783307,54.0,232828.5,17.58,7.5,12.0,1,PENSIONADOS,I.S.S. Pensionados,NACIONAL CENTRALIZADA,2018-01,201801


In [40]:
data = pd.read_parquet("s3://adl-refined-dev-popular/parquet/TC_adquisicion/base_activos_201801_202003_v2", engine='pyarrow')
data.head()

Unnamed: 0,tipo_ident,sk_cliente,id_cliente,tipo_producto,lineacredi,nrooblig,cod_ofic,plazo,cuota_paga,fecdes,cupo_aprob,fecven,vlr_desemb,sal_capita,vlr_cuota,tasint_cte,tasint_mor,dias_morak,estado,calif_cart,fecha,fecha_activo,ano_po,SMLV,tipo_prod,fecha_desembolso,fecha_obs,apertura_1meses,apertura_3meses,apertura_6meses,apertura_12meses,edad_credito_meses,cuenta_vigente,altura_mora,utilizacion,amortizacion
0,1.0,541607.0,101052294884460201,TC,62,660004000000240385,694,730,0.0,13/06/2015,1557000.0,01/05/2022,1120445.23,1085925.92,219304.71,28.69,28.73,0,ACTIVO,A,201901,201901,2019,828116.0,TC_X,2015-06-13,2019-01-01,0,0,0,0,55,1,aldia,0.969191,0.030809
1,1.0,541607.0,101052294884460201,TC,62,660004000000240385,694,730,0.0,13/06/2015,1557000.0,01/05/2022,903920.19,845803.58,270595.74,29.01,29.05,0,ACTIVO,A,201902,201902,2019,828116.0,TC_X,2015-06-13,2019-02-01,0,0,0,0,56,1,aldia,0.935706,0.064294
2,1.0,541607.0,101052294884460201,TC,62,660004000000240385,694,730,0.0,13/06/2015,1557000.0,01/05/2022,829474.61,786827.7,160811.21,29.01,29.05,0,ACTIVO,A,201903,201903,2019,828116.0,TC_X,2015-06-13,2019-03-01,0,0,0,0,57,1,aldia,0.948586,0.051414
3,1.0,541607.0,101052294884460201,TC,62,660004000000240385,694,730,0.0,13/06/2015,1557000.0,01/05/2022,1313152.94,1272469.46,193884.92,28.93,28.96,0,ACTIVO,A,201904,201904,2019,828116.0,TC_X,2015-06-13,2019-04-01,0,0,0,0,58,1,aldia,0.969018,0.030982
4,1.0,541607.0,101052294884460201,TC,62,660004000000240385,694,730,0.0,13/06/2015,1557000.0,01/05/2022,1253627.06,1208982.4,221613.61,28.9,28.94,0,ACTIVO,A,201905,201905,2019,828116.0,TC_X,2015-06-13,2019-05-01,0,0,0,0,59,1,aldia,0.964388,0.035612


In [41]:
print(data.shape)
print(data.drop_duplicates().shape)

(9610068, 36)
(9610068, 36)


In [42]:
prueba[prueba['fecha_activo']==202005]

NameError: name 'prueba' is not defined

In [None]:
(data['id_cliente'].isnull()).sum()

In [None]:
prueba = data[data['id_cliente']==761552294082694601]

prueba['fecha_activo'].value_counts()


In [None]:
data['id_cliente'].value_counts().head(15)

In [None]:
data