# cld

Standard libraries

In [1]:
import contextlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pandas.io.formats.format as pf

pd.plotting.register_matplotlib_converters(explicit=True)

User-defined display format

In [2]:
@contextlib.contextmanager
def custom_formatting():
    orig_float_format=pd.options.display.float_format
    orig_int_format=pf.IntArrayFormatter
    pd.options.display.float_format='{:0,.2f}'.format
    class IntArrayFormatter(pf.GenericArrayFormatter):
        def _format_strings(self):
            formatter=self.formatter or '{:,d}'.format
            fmt_values=[formatter(x) for x in self.values]
            return fmt_values
    pf.IntArrayFormatter=IntArrayFormatter
    yield
    pd.options.display.float_format=orig_float_format
    pf.IntArrayFormatter=orig_int_format

User-defined functions

In [3]:
# my custom full summary by groups for ONE variable
def summary_(df_, by_, var, percentiles_):
    return df_.groupby(by=by_)[var].describe(percentiles=percentiles_).join(df_.groupby(by=by_)[var].agg(np.sum)).rename(columns={var: 'sum'})

Import cld data

In [4]:
df=pd.read_csv('cld.csv', header=0, parse_dates=['mth'],
               dtype={'ambs_acct': np.int64,
                      'ambs_crlim': np.float64,
                      'atsm_bs_crlim': np.float64,
                      'reserva': np.float64,
                      'eliminadas': object,
                      'i_na': np.int8,
                      'i_inact_201710': np.int8,
                      'i_inact_201804': np.int8,
                      'i_inact_201808': np.int8,
                      'i_inact_201901': np.int8,
                      'i_rsgs_201712': np.int8,
                      'i_rsgs_201802': np.int8,
                      'i_rsgs_201804': np.int8,
                      'i_rsgs_201806': np.int8,
                      'i_rsgs_201808': np.int8,
                      'i_rsgs_201810': np.int8,
                      'i_rsgs_201812': np.int8,
                      'i_rsgs_201902': np.int8,
                      'i_rsgs_201907': np.int8,
                      'i_rsgs_201908': np.int8})
with custom_formatting():
    display(df.head())
df.info()

Unnamed: 0,ambs_acct,mth,ambs_crlim,atsm_bs_crlim,reserva,eliminadas,i_na,i_inact_201710,i_inact_201804,i_inact_201808,...,i_rsgs_201712,i_rsgs_201802,i_rsgs_201804,i_rsgs_201806,i_rsgs_201808,i_rsgs_201810,i_rsgs_201812,i_rsgs_201902,i_rsgs_201907,i_rsgs_201908
0,4023185000022446,2017-09-01,250000.0,250000.0,7001.5,NO,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1,4023185000022446,2017-10-01,10.0,10.0,0.02,NO,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,4023185000022446,2017-11-01,275000.0,275000.0,6249.58,NO,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4023185000022446,2017-12-01,275000.0,275000.0,481.27,NO,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4023185000164651,2017-09-01,376000.0,376000.0,2448.3,NO,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6515953 entries, 0 to 6515952
Data columns (total 21 columns):
ambs_acct         int64
mth               datetime64[ns]
ambs_crlim        float64
atsm_bs_crlim     float64
reserva           float64
eliminadas        object
i_na              int8
i_inact_201710    int8
i_inact_201804    int8
i_inact_201808    int8
i_inact_201901    int8
i_rsgs_201712     int8
i_rsgs_201802     int8
i_rsgs_201804     int8
i_rsgs_201806     int8
i_rsgs_201808     int8
i_rsgs_201810     int8
i_rsgs_201812     int8
i_rsgs_201902     int8
i_rsgs_201907     int8
i_rsgs_201908     int8
dtypes: datetime64[ns](1), float64(3), int64(1), int8(15), object(1)
memory usage: 391.5+ MB


Split data by campaign

In [5]:
inact_201710=df.loc[df.i_inact_201710==1, ['ambs_acct', 'mth', 'ambs_crlim', 'atsm_bs_crlim', 'reserva', 'eliminadas', 'i_na']]
inact_201804=df.loc[df.i_inact_201804==1, ['ambs_acct', 'mth', 'ambs_crlim', 'atsm_bs_crlim', 'reserva', 'eliminadas', 'i_na']]
inact_201808=df.loc[df.i_inact_201808==1, ['ambs_acct', 'mth', 'ambs_crlim', 'atsm_bs_crlim', 'reserva', 'eliminadas', 'i_na']]
inact_201901=df.loc[df.i_inact_201901==1, ['ambs_acct', 'mth', 'ambs_crlim', 'atsm_bs_crlim', 'reserva', 'eliminadas', 'i_na']]
rsgs_201712=df.loc[df.i_rsgs_201712==1, ['ambs_acct', 'mth', 'ambs_crlim', 'atsm_bs_crlim', 'reserva', 'eliminadas', 'i_na']]
rsgs_201802=df.loc[df.i_rsgs_201802==1, ['ambs_acct', 'mth', 'ambs_crlim', 'atsm_bs_crlim', 'reserva', 'eliminadas', 'i_na']]
rsgs_201804=df.loc[df.i_rsgs_201804==1, ['ambs_acct', 'mth', 'ambs_crlim', 'atsm_bs_crlim', 'reserva', 'eliminadas', 'i_na']]
rsgs_201806=df.loc[df.i_rsgs_201806==1, ['ambs_acct', 'mth', 'ambs_crlim', 'atsm_bs_crlim', 'reserva', 'eliminadas', 'i_na']]
rsgs_201808=df.loc[df.i_rsgs_201808==1, ['ambs_acct', 'mth', 'ambs_crlim', 'atsm_bs_crlim', 'reserva', 'eliminadas', 'i_na']]
rsgs_201810=df.loc[df.i_rsgs_201810==1, ['ambs_acct', 'mth', 'ambs_crlim', 'atsm_bs_crlim', 'reserva', 'eliminadas', 'i_na']]
rsgs_201812=df.loc[df.i_rsgs_201812==1, ['ambs_acct', 'mth', 'ambs_crlim', 'atsm_bs_crlim', 'reserva', 'eliminadas', 'i_na']]
rsgs_201902=df.loc[df.i_rsgs_201902==1, ['ambs_acct', 'mth', 'ambs_crlim', 'atsm_bs_crlim', 'reserva', 'eliminadas', 'i_na']]
rsgs_201907=df.loc[df.i_rsgs_201907==1, ['ambs_acct', 'mth', 'ambs_crlim', 'atsm_bs_crlim', 'reserva', 'eliminadas', 'i_na']]
rsgs_201908=df.loc[df.i_rsgs_201908==1, ['ambs_acct', 'mth', 'ambs_crlim', 'atsm_bs_crlim', 'reserva', 'eliminadas', 'i_na']]

#### inact_201710

Search date of credit limit decrease

In [6]:
with custom_formatting():
    display(summary_(df_=inact_201710,
                     by_='mth',
                     var='ambs_crlim',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))
    display(summary_(df_=inact_201710,
                     by_='mth',
                     var='atsm_bs_crlim',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))
    display(summary_(df_=inact_201710,
                     by_='mth',
                     var='reserva',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))

Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2017-09-01,279060.0,2920.27,20413.65,0.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,35000.0,90000.0,571000.0,814931138.1
2017-10-01,279060.0,1316.64,13821.35,0.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,5000.0,32000.0,550000.0,367420497.0
2017-11-01,279060.0,1963.57,17238.58,0.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,10000.0,60000.0,561000.0,547952657.0
2017-12-01,279060.0,2018.32,17523.88,0.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,10000.0,63000.0,571000.0,563233252.0
2018-01-01,279060.0,2076.25,17716.96,0.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,11000.0,65000.0,571000.0,579398159.0
2018-02-01,279060.0,2080.9,17752.57,0.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,11000.0,65000.0,571000.0,580695082.0
2018-03-01,279060.0,2081.31,17753.61,0.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,11000.0,65000.0,571000.0,580809263.0
2018-04-01,279060.0,2070.99,17710.01,0.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,11000.0,65000.0,571000.0,577930991.0


Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2017-09-01,279008.0,13304.89,23159.38,8.0,9.0,9.0,9.0,9.0,2000.0,7500.0,19000.0,28000.0,40000.0,60000.0,102000.0,566000.0,3712169593.6
2017-10-01,279056.0,2874.95,20005.4,8.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,34000.0,88000.0,566000.0,802271125.1
2017-11-01,278837.0,1560.98,15419.7,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,5000.0,44000.0,561000.0,435260010.0
2017-12-01,278834.0,1991.94,17388.6,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,10000.0,61000.0,571000.0,555420463.0
2018-01-01,278834.0,2038.53,17594.68,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,10000.0,63000.0,571000.0,568411217.0
2018-02-01,278835.0,2081.55,17747.91,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,11000.0,65000.0,571000.0,580409831.0
2018-03-01,278835.0,2081.9,17758.11,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,11000.0,65000.0,571000.0,580506282.0
2018-04-01,278835.0,2077.98,17747.66,0.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,11000.0,65000.0,571000.0,579412237.0


Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2017-09-01,23611.0,445.47,1015.42,0.0,0.25,0.25,0.25,0.25,0.25,0.25,504.11,1199.84,2044.44,3143.39,5286.0,12966.78,10517941.6
2017-10-01,24640.0,213.29,998.19,0.0,0.0,0.02,0.1,0.21,0.25,0.25,61.32,560.12,1096.86,1868.01,3316.33,77896.49,5255511.94
2017-11-01,24699.0,318.5,1316.08,0.0,0.0,0.09,0.14,0.25,0.25,0.25,156.08,812.17,1540.33,2523.53,4453.12,71386.64,7866678.09
2017-12-01,24385.0,370.12,1457.58,0.0,0.02,0.13,0.25,0.25,0.25,0.25,212.58,952.2,1815.66,3007.03,5210.57,72364.58,9025477.73
2018-01-01,77171.0,117.85,871.2,0.0,0.21,0.25,0.25,0.25,0.25,0.25,0.25,106.37,528.71,1135.76,2468.22,68531.07,9094922.43
2018-02-01,22360.0,466.99,2155.6,0.0,0.1,0.21,0.25,0.25,0.25,0.28,280.4,1043.36,1973.3,3480.24,6802.34,132474.45,10441910.14
2018-03-01,20887.0,552.27,2408.06,0.0,0.1,0.25,0.25,0.25,0.25,0.28,324.52,1154.16,2243.14,4088.88,8769.73,103968.05,11535367.72
2018-04-01,20513.0,577.84,2569.19,0.0,0.09,0.21,0.25,0.25,0.25,0.28,315.58,1172.06,2370.87,4278.32,9686.91,106740.53,11853234.43


Fill missing credit limit

In [7]:
inact_201710['atsm_bs_crlim']=inact_201710[['ambs_crlim', 'atsm_bs_crlim']].apply(lambda x: (x.ambs_crlim if pd.isna(x.atsm_bs_crlim) else x.atsm_bs_crlim), axis=1)
with custom_formatting():
    display(inact_201710.head())
inact_201710.info()

Unnamed: 0,ambs_acct,mth,ambs_crlim,atsm_bs_crlim,reserva,eliminadas,i_na
0,4023185000022446,2017-09-01,250000.0,250000.0,7001.5,NO,0
1,4023185000022446,2017-10-01,10.0,10.0,0.02,NO,0
2,4023185000022446,2017-11-01,275000.0,275000.0,6249.58,NO,0
3,4023185000022446,2017-12-01,275000.0,275000.0,481.27,NO,0
4,4023185000164651,2017-09-01,376000.0,376000.0,2448.3,NO,0


<class 'pandas.core.frame.DataFrame'>
Int64Index: 2232480 entries, 0 to 2922351
Data columns (total 7 columns):
ambs_acct        int64
mth              datetime64[ns]
ambs_crlim       float64
atsm_bs_crlim    float64
reserva          float64
eliminadas       object
i_na             int8
dtypes: datetime64[ns](1), float64(3), int64(1), int8(1), object(1)
memory usage: 121.4+ MB


Collect data for each subject

In [8]:
inact_201710_1=inact_201710.loc[(inact_201710.mth==pd.Timestamp(year=2017, month=10, day=1)) & (pd.notna(inact_201710.ambs_acct))]
inact_201710_1=(inact_201710_1
                .assign(rn=inact_201710_1.sort_values(by='reserva', ascending=True).groupby(by='ambs_acct').cumcount()+1)
                .query('rn==1')
                .set_index('ambs_acct', verify_integrity=True)
                .filter(items=['ambs_crlim', 'atsm_bs_crlim', 'reserva', 'i_na'], axis=1)
                .rename(columns={'ambs_crlim': 'cl_a_1',
                                 'atsm_bs_crlim': 'cl_b_1',
                                 'reserva': 'rva_1'}))
with custom_formatting():
    display(inact_201710_1.head())
inact_201710_1.info()

Unnamed: 0_level_0,cl_a_1,cl_b_1,rva_1,i_na
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
4023185000022446,10.0,10.0,0.02,0
4023185000164651,10.0,10.0,0.28,0
4023185000281851,10.0,10.0,0.02,0
4023185000283956,131000.0,131000.0,1482.3,0
4023185000294250,10.0,10.0,0.28,0


<class 'pandas.core.frame.DataFrame'>
Int64Index: 279060 entries, 4023185000022446 to 5547648000165584
Data columns (total 4 columns):
cl_a_1    279060 non-null float64
cl_b_1    279060 non-null float64
rva_1     24640 non-null float64
i_na      279060 non-null int8
dtypes: float64(3), int8(1)
memory usage: 8.8 MB


In [9]:
inact_201710_0=inact_201710.loc[(inact_201710.mth==pd.Timestamp(year=2017, month=9, day=1)) & (pd.notna(inact_201710.ambs_acct))]
inact_201710_0=(inact_201710_0
                .assign(rn=inact_201710_0.sort_values(by='reserva', ascending=False).groupby(by='ambs_acct').cumcount()+1)
                .query('rn==1')
                .set_index('ambs_acct', verify_integrity=True)
                .filter(items=inact_201710_1.index, axis=0)
                .filter(items=['ambs_crlim', 'atsm_bs_crlim', 'reserva'], axis=1)
                .rename(columns={'ambs_crlim': 'cl_a_0',
                                 'atsm_bs_crlim': 'cl_b_0',
                                 'reserva': 'rva_0'}))
with custom_formatting():
    display(inact_201710_0.head())
inact_201710_0.info()

Unnamed: 0_level_0,cl_a_0,cl_b_0,rva_0
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4023185000022446,250000.0,250000.0,7001.5
4023185000164651,376000.0,376000.0,2448.3
4023185000281851,131000.0,131000.0,309.51
4023185000283956,131000.0,131000.0,627.93
4023185000294250,150000.0,150000.0,4200.9


<class 'pandas.core.frame.DataFrame'>
Int64Index: 279060 entries, 4023185000022446 to 5547648000165584
Data columns (total 3 columns):
cl_a_0    279060 non-null float64
cl_b_0    279060 non-null float64
rva_0     23611 non-null float64
dtypes: float64(3)
memory usage: 8.5 MB


In [10]:
inact_201710_c=inact_201710.loc[(inact_201710.mth==pd.Timestamp(year=2018, month=4, day=1)) & (pd.notna(inact_201710.ambs_acct))]
inact_201710_c=(inact_201710_c
                .assign(rn=inact_201710_c.sort_values(by='reserva', ascending=True).groupby(by='ambs_acct').cumcount()+1)
                .query('rn==1')
                .set_index('ambs_acct', verify_integrity=True)
                .filter(items=inact_201710_1.index, axis=0)
                .eliminadas
                .map(lambda x: (1 if x=='CANCELADA' else 0))
                .astype(np.int8)
                .to_frame(name='i_c'))
with custom_formatting():
    display(inact_201710_c.head())
inact_201710_c.info()

Unnamed: 0_level_0,i_c
ambs_acct,Unnamed: 1_level_1
4023185000022446,0
4023185000164651,0
4023185000281851,0
4023185000283956,0
4023185000294250,0


<class 'pandas.core.frame.DataFrame'>
Int64Index: 279060 entries, 4023185000022446 to 5547648000165584
Data columns (total 1 columns):
i_c    279060 non-null int8
dtypes: int8(1)
memory usage: 2.4 MB


In [11]:
inact_201710=inact_201710_0.join([inact_201710_1, inact_201710_c])
with custom_formatting():
    display(inact_201710.head())
inact_201710.info()

Unnamed: 0_level_0,cl_a_0,cl_b_0,rva_0,cl_a_1,cl_b_1,rva_1,i_na,i_c
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
4023185000022446,250000.0,250000.0,7001.5,10.0,10.0,0.02,0,0
4023185000164651,376000.0,376000.0,2448.3,10.0,10.0,0.28,0,0
4023185000281851,131000.0,131000.0,309.51,10.0,10.0,0.02,0,0
4023185000283956,131000.0,131000.0,627.93,131000.0,131000.0,1482.3,0,0
4023185000294250,150000.0,150000.0,4200.9,10.0,10.0,0.28,0,0


<class 'pandas.core.frame.DataFrame'>
Int64Index: 279060 entries, 4023185000022446 to 5547648000165584
Data columns (total 8 columns):
cl_a_0    279060 non-null float64
cl_b_0    279060 non-null float64
rva_0     23611 non-null float64
cl_a_1    279060 non-null float64
cl_b_1    279060 non-null float64
rva_1     24640 non-null float64
i_na      279060 non-null int8
i_c       279060 non-null int8
dtypes: float64(6), int8(2)
memory usage: 25.4 MB


Choose credit lines for the reserve savings estimation

In [12]:
with custom_formatting():
    display(inact_201710[['cl_a_0']].describe(percentiles=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100).T.assign(sum=inact_201710.cl_a_0.sum()))
    display(inact_201710[['cl_b_0']].describe(percentiles=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100).T.assign(sum=inact_201710.cl_b_0.sum()))
    display(inact_201710[['cl_a_1']].describe(percentiles=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100).T.assign(sum=inact_201710.cl_a_1.sum()))
    display(inact_201710[['cl_b_1']].describe(percentiles=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100).T.assign(sum=inact_201710.cl_b_1.sum()))

Unnamed: 0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
cl_a_0,279060.0,2920.27,20413.65,0.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,35000.0,90000.0,571000.0,814931138.1


Unnamed: 0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
cl_b_0,279060.0,13324.41,23232.33,8.0,9.0,9.0,9.0,9.0,2000.0,7500.0,19000.0,28000.0,40000.0,60000.0,103000.0,566000.0,3718309593.6


Unnamed: 0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
cl_a_1,279060.0,1316.64,13821.35,0.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,5000.0,32000.0,550000.0,367420497.0


Unnamed: 0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
cl_b_1,279060.0,2875.34,20005.91,0.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,34000.0,88000.0,566000.0,802392125.1


In [13]:
inact_201710=(inact_201710
              .reset_index(drop=True)
              .drop(['cl_a_0', 'cl_b_1'], axis=1)
              .rename(columns={'cl_b_0': 'cl_0',
                               'cl_a_1':'cl_1'})
              .assign(rva_e_0=lambda x: np.where(pd.isna(x.rva_0), x.cl_0*0.0418*0.67, x.rva_0),
                      rva_e_1=lambda x: np.where(pd.isna(x.rva_1), x.cl_1*0.0418*0.67, x.rva_1),
                      by='inact',
                      mth=pd.Timestamp(year=2017, month=10, day=1))
              .loc[:, ['by', 'mth', 'i_na', 'cl_0', 'cl_1', 'rva_e_0', 'rva_e_1', 'i_c']])
with custom_formatting():
    display(inact_201710.head())
inact_201710.info()

Unnamed: 0,by,mth,i_na,cl_0,cl_1,rva_e_0,rva_e_1,i_c
0,inact,2017-10-01,0,250000.0,10.0,7001.5,0.02,0
1,inact,2017-10-01,0,376000.0,10.0,2448.3,0.28,0
2,inact,2017-10-01,0,131000.0,10.0,309.51,0.02,0
3,inact,2017-10-01,0,131000.0,131000.0,627.93,1482.3,0
4,inact,2017-10-01,0,150000.0,10.0,4200.9,0.28,0


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 279060 entries, 0 to 279059
Data columns (total 8 columns):
by         279060 non-null object
mth        279060 non-null datetime64[ns]
i_na       279060 non-null int8
cl_0       279060 non-null float64
cl_1       279060 non-null float64
rva_e_0    279060 non-null float64
rva_e_1    279060 non-null float64
i_c        279060 non-null int8
dtypes: datetime64[ns](1), float64(4), int8(2), object(1)
memory usage: 13.3+ MB


#### inact_201804

Search date of credit limit decrease

In [14]:
with custom_formatting():
    display(summary_(df_=inact_201804,
                     by_='mth',
                     var='ambs_crlim',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))
    display(summary_(df_=inact_201804,
                     by_='mth',
                     var='atsm_bs_crlim',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))
    display(summary_(df_=inact_201804,
                     by_='mth',
                     var='reserva',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))

Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-03-01,175941.0,23013.93,29858.06,1003.24,2000.0,3000.0,3000.0,3500.0,7000.0,14000.0,26500.0,50000.0,77500.0,100000.0,156000.0,467000.0,4049093033.18
2018-04-01,175941.0,23042.93,29898.67,1003.24,2000.0,3000.0,3000.0,3500.0,7000.0,14000.0,26500.0,50000.0,77500.0,100000.0,156000.0,467000.0,4054195533.18
2018-05-01,175941.0,296.68,5516.8,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,466000.0,52197849.0
2018-06-01,175941.0,416.68,6544.04,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,466000.0,73311269.0
2018-07-01,175941.0,535.72,7486.25,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11000.0,466000.0,94254658.0
2018-08-01,175941.0,673.84,8448.85,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,17000.0,467000.0,118556520.0
2018-09-01,175941.0,585.69,7860.56,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,13000.0,467000.0,103047726.0
2018-10-01,175941.0,607.54,8165.61,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,13500.0,547000.0,106891426.0


Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-03-01,175941.0,23009.38,29853.77,1003.24,2000.0,3000.0,3000.0,3500.0,7000.0,14000.0,26500.0,50000.0,77500.0,100000.0,156000.0,467000.0,4048293533.18
2018-04-01,175941.0,23032.43,29880.79,1003.24,2000.0,3000.0,3000.0,3500.0,7000.0,14000.0,26500.0,50000.0,77500.0,100000.0,156000.0,467000.0,4052349533.18
2018-05-01,175941.0,2915.36,10607.43,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,9000.0,19000.0,28000.0,46000.0,333000.0,512931744.18
2018-06-01,175941.0,362.24,6047.37,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,466000.0,63732627.0
2018-07-01,175941.0,470.91,6926.81,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,7000.0,466000.0,82852485.0
2018-08-01,175941.0,629.43,8219.1,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,15000.0,467000.0,110741883.0
2018-09-01,175940.0,585.46,7840.49,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,13000.0,467000.0,103005328.0
2018-10-01,175941.0,594.45,7959.14,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0,13000.0,467000.0,104588019.0


Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-03-01,3207.0,1150.37,1586.27,28.1,56.01,56.01,84.02,112.02,252.05,574.12,1344.29,2800.6,4444.55,6161.32,8328.98,13078.8,3689222.04
2018-04-01,3884.0,1110.3,1550.6,0.0,56.01,56.01,84.02,126.03,252.05,560.12,1288.28,2716.58,4200.9,5877.06,8065.73,15729.51,4312406.84
2018-05-01,4481.0,250.26,944.14,0.01,0.11,0.23,0.31,0.31,0.31,0.31,0.31,644.14,1400.3,2268.49,4301.72,19645.98,1121429.04
2018-06-01,4860.0,1409.05,6564.45,0.0,0.11,0.23,0.31,0.31,0.31,0.31,656.54,2240.86,6639.77,12056.02,25799.2,183694.85,6847960.95
2018-07-01,5233.0,1353.96,6086.25,0.0,0.11,0.23,0.31,0.31,0.31,24.76,829.17,2373.43,5623.43,10406.9,23629.18,210791.67,7085272.83
2018-08-01,4816.0,1538.56,5192.54,0.0,0.11,0.23,0.31,0.31,0.31,308.07,1118.65,3222.12,6382.73,10868.14,22991.31,104367.34,7409689.76
2018-09-01,4799.0,1515.74,5024.08,0.0,0.11,0.23,0.31,0.31,0.31,319.11,1131.52,3219.49,6396.6,11213.32,18822.99,128313.62,7274038.16
2018-10-01,4701.0,1639.11,5286.85,0.0,0.13,0.23,0.31,0.31,0.31,355.78,1222.45,3462.2,6693.43,12148.92,23664.56,141167.33,7705450.07


Collect data for each subject

In [15]:
inact_201804_1=inact_201804.loc[(inact_201804.mth==pd.Timestamp(year=2018, month=5, day=1)) & (pd.notna(inact_201804.ambs_acct))]
inact_201804_1=(inact_201804_1
                .assign(rn=inact_201804_1.sort_values(by='reserva', ascending=True).groupby(by='ambs_acct').cumcount()+1)
                .query('rn==1')
                .set_index('ambs_acct', verify_integrity=True)
                .filter(items=['ambs_crlim', 'reserva', 'i_na'], axis=1)
                .rename(columns={'ambs_crlim': 'cl_1',
                                 'reserva': 'rva_1'}))
with custom_formatting():
    display(inact_201804_1.head())
inact_201804_1.info()

Unnamed: 0_level_0,cl_1,rva_1,i_na
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4023187960044394,11.0,,1
4023187960047603,11.0,,1
4023187960047744,11.0,,1
4023187960047769,11.0,,1
4023187960047827,11.0,,1


<class 'pandas.core.frame.DataFrame'>
Int64Index: 175941 entries, 4023187960044394 to 5547640001707134
Data columns (total 3 columns):
cl_1     175941 non-null float64
rva_1    4481 non-null float64
i_na     175941 non-null int8
dtypes: float64(2), int8(1)
memory usage: 4.2 MB


In [16]:
inact_201804_0=inact_201804.loc[(inact_201804.mth==pd.Timestamp(year=2018, month=4, day=1)) & (pd.notna(inact_201804.ambs_acct))]
inact_201804_0=(inact_201804_0
                .assign(rn=inact_201804_0.sort_values(by='reserva', ascending=False).groupby(by='ambs_acct').cumcount()+1)
                .query('rn==1')
                .set_index('ambs_acct', verify_integrity=True)
                .filter(items=inact_201804_1.index, axis=0)
                .filter(items=['ambs_crlim', 'reserva'], axis=1)
                .rename(columns={'ambs_crlim': 'cl_0',
                                 'reserva': 'rva_0'}))
with custom_formatting():
    display(inact_201804_0.head())
inact_201804_0.info()

Unnamed: 0_level_0,cl_0,rva_0
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1
4023187960044394,184000.0,
4023187960047603,80000.0,
4023187960047744,200000.0,
4023187960047769,80000.0,
4023187960047827,130000.0,


<class 'pandas.core.frame.DataFrame'>
Int64Index: 175941 entries, 4023187960044394 to 5547640001707134
Data columns (total 2 columns):
cl_0     175941 non-null float64
rva_0    3884 non-null float64
dtypes: float64(2)
memory usage: 4.0 MB


In [17]:
inact_201804_c=inact_201804.loc[(inact_201804.mth==pd.Timestamp(year=2018, month=10, day=1)) & (pd.notna(inact_201804.ambs_acct))]
inact_201804_c=(inact_201804_c
                .assign(rn=inact_201804_c.sort_values(by='reserva', ascending=True).groupby(by='ambs_acct').cumcount()+1)
                .query('rn==1')
                .set_index('ambs_acct', verify_integrity=True)
                .filter(items=inact_201804_1.index, axis=0)
                .eliminadas
                .map(lambda x: (1 if x=='CANCELADA' else 0))
                .astype(np.int8)
                .to_frame(name='i_c'))
with custom_formatting():
    display(inact_201804_c.head())
inact_201804_c.info()

Unnamed: 0_level_0,i_c
ambs_acct,Unnamed: 1_level_1
4023187960044394,0
4023187960047603,0
4023187960047744,0
4023187960047769,0
4023187960047827,0


<class 'pandas.core.frame.DataFrame'>
Int64Index: 175941 entries, 4023187960044394 to 5547640001707134
Data columns (total 1 columns):
i_c    175941 non-null int8
dtypes: int8(1)
memory usage: 1.5 MB


In [18]:
inact_201804=inact_201804_0.join([inact_201804_1, inact_201804_c])
with custom_formatting():
    display(inact_201804.head())
inact_201804.info()

Unnamed: 0_level_0,cl_0,rva_0,cl_1,rva_1,i_na,i_c
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
4023187960044394,184000.0,,11.0,,1,0
4023187960047603,80000.0,,11.0,,1,0
4023187960047744,200000.0,,11.0,,1,0
4023187960047769,80000.0,,11.0,,1,0
4023187960047827,130000.0,,11.0,,1,0


<class 'pandas.core.frame.DataFrame'>
Int64Index: 175941 entries, 4023187960044394 to 5547640001707134
Data columns (total 6 columns):
cl_0     175941 non-null float64
rva_0    3884 non-null float64
cl_1     175941 non-null float64
rva_1    4481 non-null float64
i_na     175941 non-null int8
i_c      175941 non-null int8
dtypes: float64(4), int8(2)
memory usage: 12.0 MB


In [19]:
inact_201804=(inact_201804
              .reset_index(drop=True)
              .assign(rva_e_0=lambda x: np.where(pd.isna(x.rva_0), x.cl_0*0.0418*0.67, x.rva_0),
                      rva_e_1=lambda x: np.where(pd.isna(x.rva_1), x.cl_1*0.0418*0.67, x.rva_1),
                      by='inact',
                      mth=pd.Timestamp(year=2018, month=4, day=1))
              .loc[:, ['by', 'mth', 'i_na', 'cl_0', 'cl_1', 'rva_e_0', 'rva_e_1', 'i_c']])
with custom_formatting():
    display(inact_201804.head())
inact_201804.info()

Unnamed: 0,by,mth,i_na,cl_0,cl_1,rva_e_0,rva_e_1,i_c
0,inact,2018-04-01,1,184000.0,11.0,5153.1,0.31,0
1,inact,2018-04-01,1,80000.0,11.0,2240.48,0.31,0
2,inact,2018-04-01,1,200000.0,11.0,5601.2,0.31,0
3,inact,2018-04-01,1,80000.0,11.0,2240.48,0.31,0
4,inact,2018-04-01,1,130000.0,11.0,3640.78,0.31,0


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 175941 entries, 0 to 175940
Data columns (total 8 columns):
by         175941 non-null object
mth        175941 non-null datetime64[ns]
i_na       175941 non-null int8
cl_0       175941 non-null float64
cl_1       175941 non-null float64
rva_e_0    175941 non-null float64
rva_e_1    175941 non-null float64
i_c        175941 non-null int8
dtypes: datetime64[ns](1), float64(4), int8(2), object(1)
memory usage: 8.4+ MB


#### inact_201808

Search date of credit limit decrease

In [20]:
with custom_formatting():
    display(summary_(df_=inact_201808,
                     by_='mth',
                     var='ambs_crlim',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))
    display(summary_(df_=inact_201808,
                     by_='mth',
                     var='atsm_bs_crlim',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))
    display(summary_(df_=inact_201808,
                     by_='mth',
                     var='reserva',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))

Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-07-01,94626.0,23720.05,31428.03,2000.0,2000.0,3000.0,3000.0,3000.0,6500.0,14000.0,27000.0,51000.0,81000.0,100000.0,164000.0,466000.0,2244533840.5
2018-08-01,94626.0,23812.67,31536.18,2000.0,2000.0,3000.0,3000.0,3000.0,6500.0,14000.0,27000.0,51500.0,81000.0,100000.0,164500.0,466000.0,2253297841.5
2018-09-01,94626.0,209.84,4024.77,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,391000.0,19856546.0
2018-10-01,94626.0,395.73,5985.19,0.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,3500.0,391000.0,37446018.0
2018-11-01,94626.0,488.96,6782.14,0.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,10000.0,466000.0,46268222.0
2018-12-01,94626.0,511.02,7063.82,0.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,11000.0,466000.0,48355314.0
2019-01-01,94626.0,518.95,7108.38,0.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,11500.0,466000.0,49106486.0
2019-02-01,94626.0,469.9,6882.93,0.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,8000.0,466000.0,44464660.0


Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-07-01,94608.0,23593.4,31315.89,1.0,2000.0,3000.0,3000.0,3000.0,6500.0,14000.0,27000.0,51000.0,80500.0,100000.0,163000.0,466000.0,2232124849.5
2018-08-01,94626.0,23743.57,31449.69,2000.0,2000.0,3000.0,3000.0,3000.0,6500.0,14000.0,27000.0,51000.0,81000.0,100000.0,164000.0,466000.0,2246758840.5
2018-09-01,94626.0,359.48,5704.54,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,3000.0,466000.0,34016213.0
2018-10-01,94625.0,325.8,5349.03,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,391000.0,30828356.0
2018-11-01,94625.0,438.69,6218.22,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,7000.0,391000.0,41511386.0
2018-12-01,94625.0,501.55,6910.53,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,11000.0,466000.0,47458934.0
2019-01-01,94625.0,518.01,7106.68,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,11500.0,466000.0,49016534.0
2019-02-01,94625.0,466.37,6836.54,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,12.0,8000.0,466000.0,44130658.0


Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-07-01,2792.0,989.56,1417.68,42.0,56.01,84.02,84.02,140.03,252.05,532.11,1120.24,2321.7,3364.5,4711.31,7001.5,30820.65,2762852.85
2018-08-01,2951.0,974.52,1400.28,42.0,56.01,84.02,84.02,140.03,252.05,532.11,1120.24,2240.48,3297.71,4712.01,7001.5,30820.65,2875796.64
2018-09-01,3309.0,129.57,524.9,0.08,0.14,0.34,0.34,0.34,0.34,0.34,0.34,336.07,812.17,1436.71,2602.32,10950.35,428737.45
2018-10-01,3532.0,427.73,1553.68,0.0,0.17,0.34,0.34,0.34,0.34,0.34,348.99,963.29,1624.35,2800.6,7023.17,27679.41,1510746.42
2018-11-01,3721.0,566.57,2286.32,0.0,0.17,0.34,0.34,0.34,0.34,0.34,637.52,1241.1,1934.97,3344.65,7826.62,63932.28,2108214.36
2018-12-01,3753.0,627.79,2575.99,0.0,0.17,0.25,0.34,0.34,0.34,0.38,698.65,1282.9,2064.21,3669.9,7743.8,79980.3,2356092.76
2019-01-01,3673.0,757.98,3243.17,0.0,0.17,0.25,0.34,0.34,0.34,2.32,765.59,1416.54,2138.25,4918.76,10935.69,99602.6,2784044.2
2019-02-01,2793.0,1165.2,4654.03,0.0,0.12,0.25,0.34,0.34,0.34,238.29,1050.26,1700.32,3988.98,7388.16,15986.09,115643.81,3254400.56


Collect data for each subject

In [21]:
inact_201808_1=inact_201808.loc[(inact_201808.mth==pd.Timestamp(year=2018, month=9, day=1)) & (pd.notna(inact_201808.ambs_acct))]
inact_201808_1=(inact_201808_1
                .assign(rn=inact_201808_1.sort_values(by='reserva', ascending=True).groupby(by='ambs_acct').cumcount()+1)
                .query('rn==1')
                .set_index('ambs_acct', verify_integrity=True)
                .filter(items=['ambs_crlim', 'reserva', 'i_na'], axis=1)
                .rename(columns={'ambs_crlim': 'cl_1',
                                 'reserva': 'rva_1'}))
with custom_formatting():
    display(inact_201808_1.head())
inact_201808_1.info()

Unnamed: 0_level_0,cl_1,rva_1,i_na
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4023187960055747,12.0,,0
4023187960062370,12.0,,1
4023187960062529,12.0,,1
4023187960062602,12.0,,1
4023187960062669,12.0,,1


<class 'pandas.core.frame.DataFrame'>
Int64Index: 94626 entries, 4023187960055747 to 5547648000156682
Data columns (total 3 columns):
cl_1     94626 non-null float64
rva_1    3309 non-null float64
i_na     94626 non-null int8
dtypes: float64(2), int8(1)
memory usage: 2.3 MB


In [22]:
inact_201808_0=inact_201808.loc[(inact_201808.mth==pd.Timestamp(year=2018, month=8, day=1)) & (pd.notna(inact_201808.ambs_acct))]
inact_201808_0=(inact_201808_0
                .assign(rn=inact_201808_0.sort_values(by='reserva', ascending=False).groupby(by='ambs_acct').cumcount()+1)
                .query('rn==1')
                .set_index('ambs_acct', verify_integrity=True)
                .filter(items=inact_201808_1.index, axis=0)
                .filter(items=['ambs_crlim', 'reserva'], axis=1)
                .rename(columns={'ambs_crlim': 'cl_0',
                                 'reserva': 'rva_0'}))
with custom_formatting():
    display(inact_201808_0.head())
inact_201808_0.info()

Unnamed: 0_level_0,cl_0,rva_0
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1
4023187960055747,300000.0,8401.8
4023187960062370,180000.0,
4023187960062529,300000.0,
4023187960062602,260000.0,
4023187960062669,130000.0,


<class 'pandas.core.frame.DataFrame'>
Int64Index: 94626 entries, 4023187960055747 to 5547648000156682
Data columns (total 2 columns):
cl_0     94626 non-null float64
rva_0    2951 non-null float64
dtypes: float64(2)
memory usage: 2.2 MB


In [23]:
inact_201808_c=inact_201808.loc[(inact_201808.mth==pd.Timestamp(year=2019, month=2, day=1)) & (pd.notna(inact_201808.ambs_acct))]
inact_201808_c=(inact_201808_c
                .assign(rn=inact_201808_c.sort_values(by='reserva', ascending=True).groupby(by='ambs_acct').cumcount()+1)
                .query('rn==1')
                .set_index('ambs_acct', verify_integrity=True)
                .filter(items=inact_201808_1.index, axis=0)
                .eliminadas
                .map(lambda x: (1 if x=='CANCELADA' else 0))
                .astype(np.int8)
                .to_frame(name='i_c'))
with custom_formatting():
    display(inact_201808_c.head())
inact_201808_c.info()

Unnamed: 0_level_0,i_c
ambs_acct,Unnamed: 1_level_1
4023187960055747,1
4023187960062370,0
4023187960062529,0
4023187960062602,0
4023187960062669,0


<class 'pandas.core.frame.DataFrame'>
Int64Index: 94626 entries, 4023187960055747 to 5547648000156682
Data columns (total 1 columns):
i_c    94626 non-null int8
dtypes: int8(1)
memory usage: 831.7 KB


In [24]:
inact_201808=inact_201808_0.join([inact_201808_1, inact_201808_c])
with custom_formatting():
    display(inact_201808.head())
inact_201808.info()

Unnamed: 0_level_0,cl_0,rva_0,cl_1,rva_1,i_na,i_c
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
4023187960055747,300000.0,8401.8,12.0,,0,1
4023187960062370,180000.0,,12.0,,1,0
4023187960062529,300000.0,,12.0,,1,0
4023187960062602,260000.0,,12.0,,1,0
4023187960062669,130000.0,,12.0,,1,0


<class 'pandas.core.frame.DataFrame'>
Int64Index: 94626 entries, 4023187960055747 to 5547648000156682
Data columns (total 6 columns):
cl_0     94626 non-null float64
rva_0    2951 non-null float64
cl_1     94626 non-null float64
rva_1    3309 non-null float64
i_na     94626 non-null int8
i_c      94626 non-null int8
dtypes: float64(4), int8(2)
memory usage: 6.3 MB


In [25]:
inact_201808=(inact_201808
              .reset_index(drop=True)
              .assign(rva_e_0=lambda x: np.where(pd.isna(x.rva_0), x.cl_0*0.0418*0.67, x.rva_0),
                      rva_e_1=lambda x: np.where(pd.isna(x.rva_1), x.cl_1*0.0418*0.67, x.rva_1),
                      by='inact',
                      mth=pd.Timestamp(year=2018, month=8, day=1))
              .loc[:, ['by', 'mth', 'i_na', 'cl_0', 'cl_1', 'rva_e_0', 'rva_e_1', 'i_c']])
with custom_formatting():
    display(inact_201808.head())
inact_201808.info()

Unnamed: 0,by,mth,i_na,cl_0,cl_1,rva_e_0,rva_e_1,i_c
0,inact,2018-08-01,0,300000.0,12.0,8401.8,0.34,1
1,inact,2018-08-01,1,180000.0,12.0,5041.08,0.34,0
2,inact,2018-08-01,1,300000.0,12.0,8401.8,0.34,0
3,inact,2018-08-01,1,260000.0,12.0,7281.56,0.34,0
4,inact,2018-08-01,1,130000.0,12.0,3640.78,0.34,0


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 94626 entries, 0 to 94625
Data columns (total 8 columns):
by         94626 non-null object
mth        94626 non-null datetime64[ns]
i_na       94626 non-null int8
cl_0       94626 non-null float64
cl_1       94626 non-null float64
rva_e_0    94626 non-null float64
rva_e_1    94626 non-null float64
i_c        94626 non-null int8
dtypes: datetime64[ns](1), float64(4), int8(2), object(1)
memory usage: 4.5+ MB


#### inact_201901

Search date of credit limit decrease

In [26]:
with custom_formatting():
    display(summary_(df_=inact_201901,
                     by_='mth',
                     var='ambs_crlim',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))
    display(summary_(df_=inact_201901,
                     by_='mth',
                     var='atsm_bs_crlim',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))
    display(summary_(df_=inact_201901,
                     by_='mth',
                     var='reserva',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))

Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-12-01,85415.0,23826.84,30579.28,1105.0,3000.0,3000.0,3000.0,3000.0,7500.0,14500.0,27000.0,51000.0,77000.0,100000.0,160000.0,467000.0,2035169628.05
2019-01-01,85415.0,23931.98,30702.51,1105.0,3000.0,3000.0,3000.0,3000.0,7500.0,14500.0,27000.0,51000.0,78000.0,100000.0,160860.0,467000.0,2044149729.05
2019-02-01,85415.0,499.67,6481.75,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,11000.0,306000.0,42679562.0
2019-03-01,85415.0,1173.38,9728.54,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,12000.0,35000.0,513000.0,100224279.0
2019-04-01,85415.0,1356.49,10722.81,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,15000.0,39500.0,513000.0,115864912.0
2019-05-01,85415.0,1366.97,10783.49,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,15000.0,40000.0,513000.0,116759704.0
2019-06-01,85415.0,1362.83,10772.61,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,15000.0,40000.0,513000.0,116405904.0
2019-07-01,85415.0,1359.24,10758.5,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,15000.0,39000.0,513000.0,116099904.0


Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-12-01,85415.0,23809.47,30568.84,1105.0,3000.0,3000.0,3000.0,3000.0,7500.0,14500.0,27000.0,51000.0,77000.0,100000.0,160000.0,467000.0,2033685628.05
2019-01-01,85415.0,23900.8,30679.13,1105.0,3000.0,3000.0,3000.0,3000.0,7500.0,14500.0,27000.0,51000.0,78000.0,100000.0,160000.0,467000.0,2041486729.05
2019-02-01,85415.0,445.61,6094.75,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,6000.0,301000.0,38061882.0
2019-03-01,85415.0,904.77,8768.58,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,27500.0,306000.0,77280724.0
2019-04-01,85415.0,1292.61,10446.54,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,14000.0,37930.0,513000.0,110408695.0
2019-05-01,85415.0,1366.91,10783.48,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,15000.0,40000.0,513000.0,116754717.0
2019-06-01,85415.0,1363.53,10772.09,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,15000.0,40000.0,513000.0,116465704.0
2019-07-01,85415.0,1359.29,10758.59,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,13.0,15000.0,39000.0,513000.0,116103904.0


Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-12-01,1122.0,1113.53,1500.5,30.95,56.01,67.21,84.02,112.02,266.06,574.12,1368.79,2800.6,4087.48,5404.46,7001.5,13078.8,1249376.06
2019-01-01,1291.0,1085.32,1431.58,30.95,56.01,84.02,84.02,126.03,280.06,588.13,1316.28,2632.56,3892.83,5167.11,7001.5,13078.8,1401152.15
2019-02-01,1572.0,234.44,762.27,0.08,0.13,0.36,0.36,0.36,0.36,0.36,0.36,700.15,1356.88,2344.8,4010.18,8569.84,368545.33
2019-03-01,1861.0,1041.36,6319.64,0.0,0.13,0.36,0.36,0.36,0.36,1.34,685.22,1848.4,3892.83,7001.5,14428.03,194154.7,1937970.21
2019-04-01,2190.0,857.92,2529.9,0.0,0.13,0.27,0.36,0.36,0.36,140.03,782.53,1853.21,3286.8,6438.16,11644.36,44669.19,1878837.19
2019-05-01,1790.0,1168.17,3829.68,0.0,0.13,0.27,0.27,0.36,0.36,360.34,1057.96,2475.91,4426.16,7341.54,12789.79,91433.13,2091018.8
2019-06-01,1794.0,1305.67,4382.77,0.0,0.13,0.27,0.36,0.36,0.41,360.86,1172.78,3013.52,4898.16,8185.69,14750.8,131776.94,2342364.5
2019-07-01,1805.0,1627.34,5418.17,0.0,0.13,0.27,0.36,0.36,0.64,398.92,1288.28,4097.33,6739.74,11504.55,17745.92,166700.51,2937357.65


Collect data for each subject

In [27]:
inact_201901_1=inact_201901.loc[(inact_201901.mth==pd.Timestamp(year=2019, month=2, day=1)) & (pd.notna(inact_201901.ambs_acct))]
inact_201901_1=(inact_201901_1
                .assign(rn=inact_201901_1.sort_values(by='reserva', ascending=True).groupby(by='ambs_acct').cumcount()+1)
                .query('rn==1')
                .set_index('ambs_acct', verify_integrity=True)
                .filter(items=['ambs_crlim', 'reserva', 'i_na'], axis=1)
                .rename(columns={'ambs_crlim': 'cl_1',
                                 'reserva': 'rva_1'}))
with custom_formatting():
    display(inact_201901_1.head())
inact_201901_1.info()

Unnamed: 0_level_0,cl_1,rva_1,i_na
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4023187960055853,13.0,,1
4023187960057602,13.0,,1
4023187960062628,13.0,0.36,0
4023187960068856,13.0,,1
4023187960069029,13.0,,1


<class 'pandas.core.frame.DataFrame'>
Int64Index: 85415 entries, 4023187960055853 to 5547640002329250
Data columns (total 3 columns):
cl_1     85415 non-null float64
rva_1    1572 non-null float64
i_na     85415 non-null int8
dtypes: float64(2), int8(1)
memory usage: 2.0 MB


In [28]:
inact_201901_0=inact_201901.loc[(inact_201901.mth==pd.Timestamp(year=2019, month=1, day=1)) & (pd.notna(inact_201901.ambs_acct))]
inact_201901_0=(inact_201901_0
                .assign(rn=inact_201901_0.sort_values(by='reserva', ascending=False).groupby(by='ambs_acct').cumcount()+1)
                .query('rn==1')
                .set_index('ambs_acct', verify_integrity=True)
                .filter(items=inact_201901_1.index, axis=0)
                .filter(items=['ambs_crlim', 'reserva'], axis=1)
                .rename(columns={'ambs_crlim': 'cl_0',
                                 'reserva': 'rva_0'}))
with custom_formatting():
    display(inact_201901_0.head())
inact_201901_0.info()

Unnamed: 0_level_0,cl_0,rva_0
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1
4023187960055853,300000.0,
4023187960057602,153000.0,
4023187960062628,143000.0,4004.86
4023187960068856,195000.0,
4023187960069029,98500.0,


<class 'pandas.core.frame.DataFrame'>
Int64Index: 85415 entries, 4023187960055853 to 5547640002329250
Data columns (total 2 columns):
cl_0     85415 non-null float64
rva_0    1291 non-null float64
dtypes: float64(2)
memory usage: 2.0 MB


In [29]:
inact_201901_c=inact_201901.loc[(inact_201901.mth==pd.Timestamp(year=2019, month=7, day=1)) & (pd.notna(inact_201901.ambs_acct))]
inact_201901_c=(inact_201901_c
                .assign(rn=inact_201901_c.sort_values(by='reserva', ascending=True).groupby(by='ambs_acct').cumcount()+1)
                .query('rn==1')
                .set_index('ambs_acct', verify_integrity=True)
                .filter(items=inact_201901_1.index, axis=0)
                .eliminadas
                .map(lambda x: (1 if x=='CANCELADA' else 0))
                .astype(np.int8)
                .to_frame(name='i_c'))
with custom_formatting():
    display(inact_201901_c.head())
inact_201901_c.info()

Unnamed: 0_level_0,i_c
ambs_acct,Unnamed: 1_level_1
4023187960055853,1
4023187960057602,1
4023187960062628,1
4023187960068856,1
4023187960069029,1


<class 'pandas.core.frame.DataFrame'>
Int64Index: 85415 entries, 4023187960055853 to 5547640002329250
Data columns (total 1 columns):
i_c    85415 non-null int8
dtypes: int8(1)
memory usage: 750.7 KB


In [30]:
inact_201901=inact_201901_0.join([inact_201901_1, inact_201901_c])
with custom_formatting():
    display(inact_201901.head())
inact_201901.info()

Unnamed: 0_level_0,cl_0,rva_0,cl_1,rva_1,i_na,i_c
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
4023187960055853,300000.0,,13.0,,1,1
4023187960057602,153000.0,,13.0,,1,1
4023187960062628,143000.0,4004.86,13.0,0.36,0,1
4023187960068856,195000.0,,13.0,,1,1
4023187960069029,98500.0,,13.0,,1,1


<class 'pandas.core.frame.DataFrame'>
Int64Index: 85415 entries, 4023187960055853 to 5547640002329250
Data columns (total 6 columns):
cl_0     85415 non-null float64
rva_0    1291 non-null float64
cl_1     85415 non-null float64
rva_1    1572 non-null float64
i_na     85415 non-null int8
i_c      85415 non-null int8
dtypes: float64(4), int8(2)
memory usage: 5.9 MB


In [31]:
inact_201901=(inact_201901
              .reset_index(drop=True)
              .assign(rva_e_0=lambda x: np.where(pd.isna(x.rva_0), x.cl_0*0.0418*0.67, x.rva_0),
                      rva_e_1=lambda x: np.where(pd.isna(x.rva_1), x.cl_1*0.0418*0.67, x.rva_1),
                      by='inact',
                      mth=pd.Timestamp(year=2019, month=1, day=1))
              .loc[:, ['by', 'mth', 'i_na', 'cl_0', 'cl_1', 'rva_e_0', 'rva_e_1', 'i_c']])
with custom_formatting():
    display(inact_201901.head())
inact_201901.info()

Unnamed: 0,by,mth,i_na,cl_0,cl_1,rva_e_0,rva_e_1,i_c
0,inact,2019-01-01,1,300000.0,13.0,8401.8,0.36,1
1,inact,2019-01-01,1,153000.0,13.0,4284.92,0.36,1
2,inact,2019-01-01,0,143000.0,13.0,4004.86,0.36,1
3,inact,2019-01-01,1,195000.0,13.0,5461.17,0.36,1
4,inact,2019-01-01,1,98500.0,13.0,2758.59,0.36,1


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 85415 entries, 0 to 85414
Data columns (total 8 columns):
by         85415 non-null object
mth        85415 non-null datetime64[ns]
i_na       85415 non-null int8
cl_0       85415 non-null float64
cl_1       85415 non-null float64
rva_e_0    85415 non-null float64
rva_e_1    85415 non-null float64
i_c        85415 non-null int8
dtypes: datetime64[ns](1), float64(4), int8(2), object(1)
memory usage: 4.1+ MB


#### rsgs_201712

Search date of credit limit decrease

In [32]:
with custom_formatting():
    display(summary_(df_=rsgs_201712,
                     by_='mth',
                     var='ambs_crlim',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))
    display(summary_(df_=rsgs_201712,
                     by_='mth',
                     var='atsm_bs_crlim',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))
    display(summary_(df_=rsgs_201712,
                     by_='mth',
                     var='reserva',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))

Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2017-11-01,34179.0,39444.77,44412.4,3000.0,4000.0,5000.0,7000.0,9100.0,14500.0,25000.0,46000.0,84000.0,121640.0,170000.0,240000.0,565000.0,1348182726.5
2017-12-01,34179.0,24436.56,34489.02,1000.0,1000.0,1000.0,1000.0,2000.0,5000.0,13500.0,29000.0,56000.0,86250.0,123887.5,177000.0,497000.0,835217263.55
2018-01-01,34179.0,24499.57,34518.14,0.0,1000.0,1000.0,1000.0,2000.0,5000.0,13500.0,29000.0,56250.0,86910.0,124000.0,177165.0,497000.0,837370713.55
2018-02-01,34179.0,24509.63,34516.16,0.0,1000.0,1000.0,1000.0,2000.0,5000.0,13650.0,29000.0,56250.0,86910.0,123887.5,177165.0,497000.0,837714513.55
2018-03-01,34179.0,24510.29,34514.8,0.0,1000.0,1000.0,1000.0,2000.0,5000.0,13750.0,29000.0,56250.0,86910.0,123887.5,177165.0,497000.0,837737163.55
2018-04-01,34179.0,24505.91,34492.19,0.0,1000.0,1000.0,1000.0,2000.0,5000.0,13750.0,29000.0,56250.0,86652.5,123750.0,177000.0,497000.0,837587398.55
2018-05-01,34179.0,24477.79,34489.8,0.0,1000.0,1000.0,1000.0,2000.0,5000.0,13500.0,29000.0,56250.0,86250.0,123750.0,177000.0,497000.0,836626314.55
2018-06-01,34179.0,24482.97,34494.88,0.0,1000.0,1000.0,1000.0,2000.0,5000.0,13500.0,29000.0,56250.0,86287.5,123750.0,177000.0,497000.0,836803303.55


Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2017-11-01,34178.0,39444.02,44412.83,3000.0,4000.0,5000.0,7000.0,9100.0,14500.0,25000.0,46000.0,84000.0,121660.0,170000.0,240000.0,565000.0,1348117726.5
2017-12-01,34177.0,36677.78,42573.62,1000.0,2000.0,3000.0,5000.0,7500.0,13000.0,23000.0,43000.0,80000.0,115000.0,160000.0,229000.0,565000.0,1253536334.0
2018-01-01,34176.0,24449.88,34491.82,1000.0,1000.0,1000.0,1000.0,2000.0,5000.0,13500.0,29000.0,56250.0,86250.0,123906.25,177000.0,497000.0,835599238.55
2018-02-01,34175.0,24514.1,34524.77,1000.0,1000.0,1000.0,1000.0,2000.0,5000.0,13650.0,29000.0,56250.0,87000.0,124000.0,177195.0,497000.0,837769463.55
2018-03-01,34174.0,24511.59,34517.02,1000.0,1000.0,1000.0,1000.0,2000.0,5000.0,13700.0,29000.0,56250.0,86935.0,123918.75,177202.5,497000.0,837658913.55
2018-04-01,34173.0,24428.07,34381.59,0.0,1000.0,1000.0,1000.0,2000.0,5000.0,13500.0,29000.0,56220.0,86250.0,123750.0,177000.0,497000.0,834780388.55
2018-05-01,34168.0,23978.44,34192.6,0.0,0.0,1000.0,1000.0,2000.0,5000.0,13000.0,28050.0,56000.0,85412.5,122000.0,175165.0,497000.0,819295449.55
2018-06-01,34146.0,23714.86,33978.15,0.0,0.0,0.0,1000.0,2000.0,5000.0,13000.0,28000.0,55000.0,84993.75,120843.75,174000.0,497000.0,809767739.55


Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2017-11-01,34154.0,3859.41,8572.63,0.0,0.0,0.0,7.71,83.28,440.97,1455.84,3957.16,9025.98,14713.42,22839.99,37886.49,290128.35,131814203.02
2017-12-01,34113.0,3277.63,8198.54,0.0,0.0,3.26,16.17,45.48,251.09,966.18,3117.95,7696.63,13208.3,20750.63,34512.34,298222.39,111809770.2
2018-01-01,33559.0,3361.71,8534.49,0.0,0.0,9.6,21.0,47.57,253.86,976.68,3094.98,7797.75,13692.29,21400.65,36579.75,309838.09,112815631.18
2018-02-01,32642.0,3936.57,9967.71,0.0,7.31,21.0,31.55,74.57,330.19,1163.91,3602.17,9066.59,16157.29,24983.85,43174.64,317584.05,128497552.23
2018-03-01,32096.0,4227.36,10765.25,0.0,7.82,21.0,32.29,79.84,330.72,1160.61,3710.97,9943.43,17419.03,28087.69,47669.47,272972.41,135681424.85
2018-04-01,30974.0,4288.49,11287.87,0.0,2.27,20.75,31.24,77.02,324.39,1107.71,3618.38,10057.03,17978.02,29129.97,49359.54,280251.69,132831575.04
2018-05-01,30242.0,4326.49,11552.06,0.0,4.42,20.36,29.04,70.0,296.79,1067.09,3515.08,10237.04,18560.05,29684.1,51051.74,268184.43,130841752.58
2018-06-01,29440.0,4276.33,11567.47,0.0,4.66,20.16,31.55,74.07,291.87,1034.48,3382.29,10140.51,18535.02,29566.54,51067.93,275336.01,125895146.39


Collect data for each subject

In [33]:
rsgs_201712_1=rsgs_201712.loc[(rsgs_201712.mth==pd.Timestamp(year=2017, month=12, day=1)) & (pd.notna(rsgs_201712.ambs_acct))]
rsgs_201712_1=(rsgs_201712_1
                .assign(rn=rsgs_201712_1.sort_values(by='reserva', ascending=True).groupby(by='ambs_acct').cumcount()+1)
                .query('rn==1')
                .set_index('ambs_acct', verify_integrity=True)
                .filter(items=['ambs_crlim', 'reserva', 'i_na'], axis=1)
                .rename(columns={'ambs_crlim': 'cl_1',
                                 'reserva': 'rva_1'}))
with custom_formatting():
    display(rsgs_201712_1.head())
rsgs_201712_1.info()

Unnamed: 0_level_0,cl_1,rva_1,i_na
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4023187960000164,256000.0,12166.48,0
4023187960000891,114000.0,5517.27,0
4023187960005379,81000.0,16481.48,0
4023187960005858,267000.0,15436.42,0
4023187960010130,394000.0,18293.9,0


<class 'pandas.core.frame.DataFrame'>
Int64Index: 34179 entries, 4023187960000164 to 5547640001762246
Data columns (total 3 columns):
cl_1     34179 non-null float64
rva_1    34113 non-null float64
i_na     34179 non-null int8
dtypes: float64(2), int8(1)
memory usage: 834.4 KB


In [34]:
rsgs_201712_0=rsgs_201712.loc[(rsgs_201712.mth==pd.Timestamp(year=2017, month=11, day=1)) & (pd.notna(rsgs_201712.ambs_acct))]
rsgs_201712_0=(rsgs_201712_0
                .assign(rn=rsgs_201712_0.sort_values(by='reserva', ascending=False).groupby(by='ambs_acct').cumcount()+1)
                .query('rn==1')
                .set_index('ambs_acct', verify_integrity=True)
                .filter(items=rsgs_201712_1.index, axis=0)
                .filter(items=['ambs_crlim', 'reserva'], axis=1)
                .rename(columns={'ambs_crlim': 'cl_0',
                                 'reserva': 'rva_0'}))
with custom_formatting():
    display(rsgs_201712_0.head())
rsgs_201712_0.info()

Unnamed: 0_level_0,cl_0,rva_0
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1
4023187960000164,300000.0,13969.57
4023187960000891,120000.0,5771.66
4023187960005379,90000.0,30755.25
4023187960005858,415000.0,22659.89
4023187960010130,412000.0,20943.93


<class 'pandas.core.frame.DataFrame'>
Int64Index: 34179 entries, 4023187960000164 to 5547640001762246
Data columns (total 2 columns):
cl_0     34179 non-null float64
rva_0    34154 non-null float64
dtypes: float64(2)
memory usage: 801.1 KB


In [35]:
rsgs_201712=rsgs_201712_0.join(rsgs_201712_1)
with custom_formatting():
    display(rsgs_201712.head())
rsgs_201712.info()

Unnamed: 0_level_0,cl_0,rva_0,cl_1,rva_1,i_na
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
4023187960000164,300000.0,13969.57,256000.0,12166.48,0
4023187960000891,120000.0,5771.66,114000.0,5517.27,0
4023187960005379,90000.0,30755.25,81000.0,16481.48,0
4023187960005858,415000.0,22659.89,267000.0,15436.42,0
4023187960010130,412000.0,20943.93,394000.0,18293.9,0


<class 'pandas.core.frame.DataFrame'>
Int64Index: 34179 entries, 4023187960000164 to 5547640001762246
Data columns (total 5 columns):
cl_0     34179 non-null float64
rva_0    34154 non-null float64
cl_1     34179 non-null float64
rva_1    34113 non-null float64
i_na     34179 non-null int8
dtypes: float64(4), int8(1)
memory usage: 2.6 MB


In [36]:
rsgs_201712=(rsgs_201712
              .reset_index(drop=True)
              .assign(rva_e_0=lambda x: np.where(pd.isna(x.rva_0), x.cl_0*0.0418*0.67, x.rva_0),
                      rva_e_1=lambda x: np.where(pd.isna(x.rva_1), x.cl_1*0.0418*0.67, x.rva_1),
                      by='rsgs',
                      mth=pd.Timestamp(year=2017, month=12, day=1))
              .loc[:, ['by', 'mth', 'i_na', 'cl_0', 'cl_1', 'rva_e_0', 'rva_e_1']])
with custom_formatting():
    display(rsgs_201712.head())
rsgs_201712.info()

Unnamed: 0,by,mth,i_na,cl_0,cl_1,rva_e_0,rva_e_1
0,rsgs,2017-12-01,0,300000.0,256000.0,13969.57,12166.48
1,rsgs,2017-12-01,0,120000.0,114000.0,5771.66,5517.27
2,rsgs,2017-12-01,0,90000.0,81000.0,30755.25,16481.48
3,rsgs,2017-12-01,0,415000.0,267000.0,22659.89,15436.42
4,rsgs,2017-12-01,0,412000.0,394000.0,20943.93,18293.9


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34179 entries, 0 to 34178
Data columns (total 7 columns):
by         34179 non-null object
mth        34179 non-null datetime64[ns]
i_na       34179 non-null int8
cl_0       34179 non-null float64
cl_1       34179 non-null float64
rva_e_0    34179 non-null float64
rva_e_1    34179 non-null float64
dtypes: datetime64[ns](1), float64(4), int8(1), object(1)
memory usage: 1.6+ MB


#### rsgs_201802

Search date of credit limit decrease

In [37]:
with custom_formatting():
    display(summary_(df_=rsgs_201802,
                     by_='mth',
                     var='ambs_crlim',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))
    display(summary_(df_=rsgs_201802,
                     by_='mth',
                     var='atsm_bs_crlim',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))
    display(summary_(df_=rsgs_201802,
                     by_='mth',
                     var='reserva',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))

Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-01-01,20467.0,40615.89,46166.79,3000.0,4000.0,5000.0,7000.0,9500.0,15000.0,26000.0,47500.0,86000.0,127000.0,181000.0,244000.0,542000.0,831285429.5
2018-02-01,20467.0,29033.7,38651.58,0.0,1000.0,1000.0,2000.0,3000.0,7600.0,16875.0,35000.0,66000.0,99000.0,139000.0,197085.0,501000.0,594232725.5
2018-03-01,20467.0,29179.92,38772.96,0.0,1000.0,1000.0,2000.0,3000.0,7887.5,17000.0,35000.0,67000.0,100000.0,139000.0,198000.0,501000.0,597225480.5
2018-04-01,20467.0,29175.25,38765.2,0.0,1000.0,1000.0,2000.0,3000.0,7887.5,17000.0,35000.0,67000.0,100000.0,139000.0,198000.0,501000.0,597129880.5
2018-05-01,20467.0,29176.42,38764.58,0.0,1000.0,1000.0,2000.0,3000.0,7900.0,17000.0,35000.0,67000.0,100000.0,139000.0,198000.0,501000.0,597153791.5
2018-06-01,20467.0,29172.38,38759.75,0.0,1000.0,1000.0,2000.0,3000.0,7900.0,17000.0,35000.0,66730.0,100000.0,139000.0,198000.0,501000.0,597071191.5
2018-07-01,20467.0,29175.08,38758.77,0.0,1000.0,1000.0,2000.0,3000.0,7975.0,17000.0,35000.0,66730.0,100000.0,139000.0,198000.0,501000.0,597126441.5
2018-08-01,20467.0,29174.16,38757.14,0.0,1000.0,1000.0,2000.0,3000.0,7975.0,17000.0,35000.0,66730.0,100000.0,139000.0,198000.0,501000.0,597107541.5


Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-01-01,20467.0,40615.89,46166.79,3000.0,4000.0,5000.0,7000.0,9500.0,15000.0,26000.0,47500.0,86000.0,127000.0,181000.0,244000.0,542000.0,831285429.5
2018-02-01,20467.0,39584.52,44306.25,1000.0,3500.0,5000.0,7000.0,9000.0,15000.0,25500.0,46000.0,83000.0,122000.0,171000.0,235000.0,523000.0,810176353.5
2018-03-01,20465.0,29093.32,38704.04,1000.0,1000.0,1000.0,2000.0,3000.0,7700.0,17000.0,35000.0,66000.0,99000.0,139000.0,197520.0,501000.0,595394700.5
2018-04-01,20465.0,29179.5,38771.91,1000.0,1000.0,1000.0,2000.0,3000.0,7900.0,17000.0,35000.0,67000.0,100000.0,139000.0,198000.0,501000.0,597158480.5
2018-05-01,20465.0,29178.98,38765.58,11.0,1000.0,1000.0,2000.0,3000.0,7900.0,17000.0,35000.0,67000.0,100000.0,139000.0,198000.0,501000.0,597147891.5
2018-06-01,20465.0,29108.64,38724.76,0.0,1000.0,1000.0,2000.0,3000.0,7700.0,17000.0,35000.0,66000.0,99000.0,139000.0,198000.0,501000.0,595708391.5
2018-07-01,20461.0,28651.1,38525.57,0.0,0.0,1000.0,1000.0,3000.0,7125.0,16200.0,34000.0,66000.0,98000.0,138000.0,197550.0,501000.0,586230141.5
2018-08-01,20442.0,28285.24,38294.44,0.0,0.0,0.0,1000.0,3000.0,7000.0,16000.0,34000.0,65000.0,97000.0,137487.5,196000.0,501000.0,578206891.5


Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-01-01,20466.0,4702.11,9195.85,0.0,0.0,17.33,76.22,212.75,732.84,2062.11,5060.94,11110.78,17647.57,25540.62,41109.35,308445.77,96233403.51
2018-02-01,20439.0,3959.83,8593.56,0.0,19.62,43.22,84.46,177.68,503.45,1489.53,3967.25,9178.31,15374.7,23777.9,37978.45,313668.43,80935044.55
2018-03-01,20210.0,4120.21,9729.35,0.0,12.39,34.09,72.67,156.3,464.68,1381.7,3834.31,9404.72,16338.84,26017.04,46540.28,330813.26,83269503.62
2018-04-01,19965.0,4699.51,11107.63,0.0,8.2,37.24,82.58,174.9,541.4,1561.1,4372.51,10501.22,18525.33,29931.22,53577.63,339464.07,93825805.48
2018-05-01,19695.0,5044.84,12278.84,0.0,15.63,42.0,83.99,176.77,522.51,1548.38,4453.84,11619.48,20599.15,33125.29,57258.83,356023.3,99358163.66
2018-06-01,19078.0,5147.64,12964.74,-413.21,13.9,38.59,77.91,166.58,489.96,1437.36,4317.76,12084.95,21746.03,34395.5,60573.93,372582.51,98206707.69
2018-07-01,18627.0,5245.35,13555.19,0.0,10.66,36.02,71.09,154.77,470.6,1398.79,4209.87,12294.82,23052.04,35273.59,58751.94,302083.01,97705126.69
2018-08-01,18103.0,5385.58,14047.35,0.0,8.98,31.55,65.34,145.68,443.44,1367.32,4254.5,12605.79,23874.86,37256.18,61950.34,344553.29,97495187.7


Collect data for each subject

In [38]:
rsgs_201802_1=rsgs_201802.loc[(rsgs_201802.mth==pd.Timestamp(year=2018, month=2, day=1)) & (pd.notna(rsgs_201802.ambs_acct))]
rsgs_201802_1=(rsgs_201802_1
                .assign(rn=rsgs_201802_1.sort_values(by='reserva', ascending=True).groupby(by='ambs_acct').cumcount()+1)
                .query('rn==1')
                .set_index('ambs_acct', verify_integrity=True)
                .filter(items=['ambs_crlim', 'reserva', 'i_na'], axis=1)
                .rename(columns={'ambs_crlim': 'cl_1',
                                 'reserva': 'rva_1'}))
with custom_formatting():
    display(rsgs_201802_1.head())
rsgs_201802_1.info()

Unnamed: 0_level_0,cl_1,rva_1,i_na
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4023187960004521,183000.0,12210.49,0
4023187960005122,226100.0,25494.6,0
4023187960006393,230000.0,56956.85,0
4023187960006740,92000.0,5645.39,0
4023187960009744,237000.0,9292.93,0


<class 'pandas.core.frame.DataFrame'>
Int64Index: 20467 entries, 4023187960004521 to 5547640001883513
Data columns (total 3 columns):
cl_1     20467 non-null float64
rva_1    20439 non-null float64
i_na     20467 non-null int8
dtypes: float64(2), int8(1)
memory usage: 499.7 KB


In [39]:
rsgs_201802_0=rsgs_201802.loc[(rsgs_201802.mth==pd.Timestamp(year=2018, month=1, day=1)) & (pd.notna(rsgs_201802.ambs_acct))]
rsgs_201802_0=(rsgs_201802_0
                .assign(rn=rsgs_201802_0.sort_values(by='reserva', ascending=False).groupby(by='ambs_acct').cumcount()+1)
                .query('rn==1')
                .set_index('ambs_acct', verify_integrity=True)
                .filter(items=rsgs_201802_1.index, axis=0)
                .filter(items=['ambs_crlim', 'reserva'], axis=1)
                .rename(columns={'ambs_crlim': 'cl_0',
                                 'reserva': 'rva_0'}))
with custom_formatting():
    display(rsgs_201802_0.head())
rsgs_201802_0.info()

Unnamed: 0_level_0,cl_0,rva_0
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1
4023187960004521,244000.0,14501.4
4023187960005122,239000.0,24981.85
4023187960006393,234000.0,117987.81
4023187960006740,100000.0,5783.71
4023187960009744,327000.0,25426.62


<class 'pandas.core.frame.DataFrame'>
Int64Index: 20467 entries, 4023187960004521 to 5547640001883513
Data columns (total 2 columns):
cl_0     20467 non-null float64
rva_0    20466 non-null float64
dtypes: float64(2)
memory usage: 479.7 KB


In [40]:
rsgs_201802=rsgs_201802_0.join(rsgs_201802_1)
with custom_formatting():
    display(rsgs_201802.head())
rsgs_201802.info()

Unnamed: 0_level_0,cl_0,rva_0,cl_1,rva_1,i_na
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
4023187960004521,244000.0,14501.4,183000.0,12210.49,0
4023187960005122,239000.0,24981.85,226100.0,25494.6,0
4023187960006393,234000.0,117987.81,230000.0,56956.85,0
4023187960006740,100000.0,5783.71,92000.0,5645.39,0
4023187960009744,327000.0,25426.62,237000.0,9292.93,0


<class 'pandas.core.frame.DataFrame'>
Int64Index: 20467 entries, 4023187960004521 to 5547640001883513
Data columns (total 5 columns):
cl_0     20467 non-null float64
rva_0    20466 non-null float64
cl_1     20467 non-null float64
rva_1    20439 non-null float64
i_na     20467 non-null int8
dtypes: float64(4), int8(1)
memory usage: 1.4 MB


In [41]:
rsgs_201802=(rsgs_201802
              .reset_index(drop=True)
              .assign(rva_e_0=lambda x: np.where(pd.isna(x.rva_0), x.cl_0*0.0418*0.67, x.rva_0),
                      rva_e_1=lambda x: np.where(pd.isna(x.rva_1), x.cl_1*0.0418*0.67, x.rva_1),
                      by='rsgs',
                      mth=pd.Timestamp(year=2018, month=2, day=1))
              .loc[:, ['by', 'mth', 'i_na', 'cl_0', 'cl_1', 'rva_e_0', 'rva_e_1']])
with custom_formatting():
    display(rsgs_201802.head())
rsgs_201802.info()

Unnamed: 0,by,mth,i_na,cl_0,cl_1,rva_e_0,rva_e_1
0,rsgs,2018-02-01,0,244000.0,183000.0,14501.4,12210.49
1,rsgs,2018-02-01,0,239000.0,226100.0,24981.85,25494.6
2,rsgs,2018-02-01,0,234000.0,230000.0,117987.81,56956.85
3,rsgs,2018-02-01,0,100000.0,92000.0,5783.71,5645.39
4,rsgs,2018-02-01,0,327000.0,237000.0,25426.62,9292.93


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20467 entries, 0 to 20466
Data columns (total 7 columns):
by         20467 non-null object
mth        20467 non-null datetime64[ns]
i_na       20467 non-null int8
cl_0       20467 non-null float64
cl_1       20467 non-null float64
rva_e_0    20467 non-null float64
rva_e_1    20467 non-null float64
dtypes: datetime64[ns](1), float64(4), int8(1), object(1)
memory usage: 979.5+ KB


#### rsgs_201804

Search date of credit limit decrease

In [42]:
with custom_formatting():
    display(summary_(df_=rsgs_201804,
                     by_='mth',
                     var='ambs_crlim',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))
    display(summary_(df_=rsgs_201804,
                     by_='mth',
                     var='atsm_bs_crlim',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))
    display(summary_(df_=rsgs_201804,
                     by_='mth',
                     var='reserva',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))

Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-03-01,26656.0,38462.46,42756.14,3000.0,3000.0,5000.0,6000.0,8500.0,14000.0,25000.0,46000.0,83000.0,117000.0,163000.0,221000.0,571000.0,1025255282.45
2018-04-01,26656.0,26882.9,35095.73,0.0,1000.0,1000.0,2000.0,3000.0,7000.0,16000.0,33000.0,62000.0,90000.0,127000.0,176000.0,501000.0,716590700.0
2018-05-01,26656.0,26922.54,35194.83,0.0,1000.0,1000.0,2000.0,3000.0,7000.0,16000.0,33000.0,62300.0,90675.0,127000.0,177000.0,501000.0,717647200.0
2018-06-01,26656.0,26884.07,35146.72,0.0,1000.0,1000.0,2000.0,3000.0,7000.0,16000.0,33000.0,62000.0,90525.0,127000.0,176000.0,501000.0,716621700.0
2018-07-01,26656.0,26879.62,35145.57,0.0,1000.0,1000.0,2000.0,3000.0,7000.0,16000.0,33000.0,62000.0,90525.0,127000.0,176000.0,501000.0,716503100.0
2018-08-01,26656.0,26864.63,35129.25,0.0,1000.0,1000.0,2000.0,3000.0,7000.0,16000.0,33000.0,62000.0,90000.0,127000.0,175725.0,501000.0,716103700.0
2018-09-01,26656.0,26866.49,35130.05,0.0,1000.0,1000.0,2000.0,3000.0,7000.0,16000.0,33000.0,62000.0,90000.0,127000.0,175725.0,501000.0,716153272.0
2018-10-01,26656.0,26848.99,35104.98,0.0,1000.0,1000.0,2000.0,3000.0,7000.0,16000.0,33000.0,62000.0,90000.0,126812.5,175500.0,501000.0,715686560.0


Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-03-01,26656.0,38462.46,42756.14,3000.0,3000.0,5000.0,6000.0,8500.0,14000.0,25000.0,46000.0,83000.0,117000.0,163000.0,221000.0,571000.0,1025255282.45
2018-04-01,26655.0,35762.69,40788.18,1000.0,2000.0,3000.0,5000.0,7000.0,12000.0,23000.0,43000.0,77400.0,111000.0,154000.0,209000.0,501000.0,953254533.45
2018-05-01,26653.0,26914.25,35178.64,1000.0,1000.0,1000.0,2000.0,3000.0,7000.0,16000.0,33000.0,62000.0,90200.0,127000.0,176480.0,501000.0,717345400.0
2018-06-01,26652.0,26913.47,35184.22,1000.0,1000.0,1000.0,2000.0,3000.0,7000.0,16000.0,33000.0,62180.0,90735.0,127000.0,176490.0,501000.0,717297700.0
2018-07-01,26651.0,26873.82,35100.9,0.0,1000.0,1000.0,2000.0,3000.0,7000.0,16000.0,33000.0,62000.0,90250.0,127000.0,175750.0,501000.0,716214100.0
2018-08-01,26647.0,26372.73,34753.25,0.0,0.0,1000.0,1000.0,2000.0,6000.0,15000.0,32000.0,61000.0,90000.0,125425.0,172770.0,501000.0,702754200.0
2018-09-01,26616.0,25746.17,34345.51,0.0,0.0,0.0,1000.0,2000.0,6000.0,15000.0,32000.0,60800.0,88000.0,123000.0,170850.0,501000.0,685259936.0
2018-10-01,26536.0,25427.83,34218.04,0.0,0.0,0.0,0.0,2000.0,6000.0,15000.0,31000.0,60000.0,87000.0,121500.0,169650.0,501000.0,674752972.0


Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-03-01,26656.0,4331.58,8570.82,0.0,23.05,63.21,119.41,252.88,672.14,1832.61,4618.35,9884.11,16056.27,24235.19,38329.48,235106.44,115462642.24
2018-04-01,26309.0,3975.75,9204.11,0.0,6.3,26.08,59.79,126.02,434.09,1363.36,3854.04,9187.06,15485.25,24683.07,40377.52,272704.48,104598110.21
2018-05-01,25833.0,4472.08,10669.11,0.0,9.39,31.55,61.88,131.88,469.6,1427.95,4121.68,10459.18,17832.72,27949.7,48461.47,339761.61,115527204.52
2018-06-01,25522.0,4971.96,12008.11,0.0,10.48,34.05,67.93,134.82,494.34,1532.15,4469.6,11467.38,20110.32,32260.37,55443.49,348821.9,126894323.03
2018-07-01,24493.0,5164.93,12476.19,0.0,8.43,31.64,65.62,140.68,467.25,1485.96,4545.11,12010.87,21703.76,34274.42,60120.86,335783.77,126504532.74
2018-08-01,23495.0,5119.65,12454.84,0.0,8.15,29.28,63.0,142.54,448.31,1418.65,4408.7,12134.51,21567.46,34962.51,58284.76,313770.47,120286076.88
2018-09-01,22895.0,5062.7,12784.66,0.0,8.78,28.01,63.0,138.64,410.97,1308.44,4220.8,12021.24,21315.91,34966.76,58033.14,322137.68,115910402.83
2018-10-01,22132.0,5018.34,12388.93,0.0,5.93,27.84,58.39,135.08,403.61,1288.86,4257.23,12164.92,21206.93,35010.87,58990.18,288801.22,111065838.26


Collect data for each subject

In [43]:
rsgs_201804_1=rsgs_201804.loc[(rsgs_201804.mth==pd.Timestamp(year=2018, month=4, day=1)) & (pd.notna(rsgs_201804.ambs_acct))]
rsgs_201804_1=(rsgs_201804_1
                .assign(rn=rsgs_201804_1.sort_values(by='reserva', ascending=True).groupby(by='ambs_acct').cumcount()+1)
                .query('rn==1')
                .set_index('ambs_acct', verify_integrity=True)
                .filter(items=['ambs_crlim', 'reserva', 'i_na'], axis=1)
                .rename(columns={'ambs_crlim': 'cl_1',
                                 'reserva': 'rva_1'}))
with custom_formatting():
    display(rsgs_201804_1.head())
rsgs_201804_1.info()

Unnamed: 0_level_0,cl_1,rva_1,i_na
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4023187960000875,190500.0,8641.38,0
4023187960008415,273000.0,10949.21,0
4023187960010536,120000.0,5926.53,0
4023187960013266,275500.0,18411.05,0
4023187960014371,244000.0,1182.5,0


<class 'pandas.core.frame.DataFrame'>
Int64Index: 26656 entries, 4023187960000875 to 5547640002145953
Data columns (total 3 columns):
cl_1     26656 non-null float64
rva_1    26309 non-null float64
i_na     26656 non-null int8
dtypes: float64(2), int8(1)
memory usage: 650.8 KB


In [44]:
rsgs_201804_0=rsgs_201804.loc[(rsgs_201804.mth==pd.Timestamp(year=2018, month=3, day=1)) & (pd.notna(rsgs_201804.ambs_acct))]
rsgs_201804_0=(rsgs_201804_0
                .assign(rn=rsgs_201804_0.sort_values(by='reserva', ascending=False).groupby(by='ambs_acct').cumcount()+1)
                .query('rn==1')
                .set_index('ambs_acct', verify_integrity=True)
                .filter(items=rsgs_201804_1.index, axis=0)
                .filter(items=['ambs_crlim', 'reserva'], axis=1)
                .rename(columns={'ambs_crlim': 'cl_0',
                                 'reserva': 'rva_0'}))
with custom_formatting():
    display(rsgs_201804_0.head())
rsgs_201804_0.info()

Unnamed: 0_level_0,cl_0,rva_0
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1
4023187960000875,254000.0,8162.16
4023187960008415,325000.0,35577.94
4023187960010536,120000.0,5455.21
4023187960013266,501000.0,25152.54
4023187960014371,325000.0,3687.86


<class 'pandas.core.frame.DataFrame'>
Int64Index: 26656 entries, 4023187960000875 to 5547640002145953
Data columns (total 2 columns):
cl_0     26656 non-null float64
rva_0    26656 non-null float64
dtypes: float64(2)
memory usage: 624.8 KB


In [45]:
rsgs_201804=rsgs_201804_0.join(rsgs_201804_1)
with custom_formatting():
    display(rsgs_201804.head())
rsgs_201804.info()

Unnamed: 0_level_0,cl_0,rva_0,cl_1,rva_1,i_na
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
4023187960000875,254000.0,8162.16,190500.0,8641.38,0
4023187960008415,325000.0,35577.94,273000.0,10949.21,0
4023187960010536,120000.0,5455.21,120000.0,5926.53,0
4023187960013266,501000.0,25152.54,275500.0,18411.05,0
4023187960014371,325000.0,3687.86,244000.0,1182.5,0


<class 'pandas.core.frame.DataFrame'>
Int64Index: 26656 entries, 4023187960000875 to 5547640002145953
Data columns (total 5 columns):
cl_0     26656 non-null float64
rva_0    26656 non-null float64
cl_1     26656 non-null float64
rva_1    26309 non-null float64
i_na     26656 non-null int8
dtypes: float64(4), int8(1)
memory usage: 2.3 MB


In [46]:
rsgs_201804=(rsgs_201804
              .reset_index(drop=True)
              .assign(rva_e_0=lambda x: np.where(pd.isna(x.rva_0), x.cl_0*0.0418*0.67, x.rva_0),
                      rva_e_1=lambda x: np.where(pd.isna(x.rva_1), x.cl_1*0.0418*0.67, x.rva_1),
                      by='rsgs',
                      mth=pd.Timestamp(year=2018, month=4, day=1))
              .loc[:, ['by', 'mth', 'i_na', 'cl_0', 'cl_1', 'rva_e_0', 'rva_e_1']])
with custom_formatting():
    display(rsgs_201804.head())
rsgs_201804.info()

Unnamed: 0,by,mth,i_na,cl_0,cl_1,rva_e_0,rva_e_1
0,rsgs,2018-04-01,0,254000.0,190500.0,8162.16,8641.38
1,rsgs,2018-04-01,0,325000.0,273000.0,35577.94,10949.21
2,rsgs,2018-04-01,0,120000.0,120000.0,5455.21,5926.53
3,rsgs,2018-04-01,0,501000.0,275500.0,25152.54,18411.05
4,rsgs,2018-04-01,0,325000.0,244000.0,3687.86,1182.5


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26656 entries, 0 to 26655
Data columns (total 7 columns):
by         26656 non-null object
mth        26656 non-null datetime64[ns]
i_na       26656 non-null int8
cl_0       26656 non-null float64
cl_1       26656 non-null float64
rva_e_0    26656 non-null float64
rva_e_1    26656 non-null float64
dtypes: datetime64[ns](1), float64(4), int8(1), object(1)
memory usage: 1.2+ MB


#### rsgs_201806

Search date of credit limit decrease

In [47]:
with custom_formatting():
    display(summary_(df_=rsgs_201806,
                     by_='mth',
                     var='ambs_crlim',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))
    display(summary_(df_=rsgs_201806,
                     by_='mth',
                     var='atsm_bs_crlim',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))
    display(summary_(df_=rsgs_201806,
                     by_='mth',
                     var='reserva',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))

Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-05-01,21628.0,37686.04,43559.81,3000.0,3000.0,5000.0,6000.0,8500.0,13500.0,24000.0,44000.0,79000.0,115000.0,161162.5,229365.0,550000.0,815073748.25
2018-06-01,21628.0,26877.85,36086.42,0.0,1000.0,1000.0,2000.0,3000.0,7000.0,16000.0,32000.0,61000.0,90000.0,127000.0,184000.0,538000.0,581314100.0
2018-07-01,21628.0,26895.26,36101.24,0.0,1000.0,1000.0,2000.0,3000.0,7000.0,16000.0,32000.0,61000.0,90650.0,127162.5,184000.0,538000.0,581690600.0
2018-08-01,21628.0,26889.14,36080.51,0.0,1000.0,1000.0,2000.0,3000.0,7000.0,16000.0,32000.0,61000.0,90000.0,127000.0,183730.0,538000.0,581558300.0
2018-09-01,21628.0,26885.97,36077.43,0.0,1000.0,1000.0,2000.0,3000.0,7000.0,16000.0,32000.0,61000.0,90000.0,127000.0,183730.0,538000.0,581489812.0
2018-10-01,21628.0,26883.81,36071.4,0.0,1000.0,1000.0,2000.0,3000.0,7000.0,16000.0,32000.0,61000.0,90000.0,127000.0,183730.0,538000.0,581443012.0
2018-11-01,21628.0,26881.22,36071.99,0.0,1000.0,1000.0,2000.0,3000.0,7000.0,16000.0,32000.0,61000.0,90000.0,127000.0,183730.0,538000.0,581387012.0
2018-12-01,21628.0,26875.3,36068.33,0.0,1000.0,1000.0,2000.0,3000.0,7000.0,16000.0,32000.0,61000.0,90000.0,127000.0,183730.0,538000.0,581258912.0


Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-05-01,21628.0,37686.04,43559.81,3000.0,3000.0,5000.0,6000.0,8500.0,13500.0,24000.0,44000.0,79000.0,115000.0,161162.5,229365.0,550000.0,815073748.25
2018-06-01,21628.0,33374.97,40937.61,1000.0,1000.0,2000.0,3000.0,5000.0,10000.0,21000.0,40000.0,73000.0,106000.0,149000.0,209000.0,550000.0,721833924.25
2018-07-01,21627.0,26886.03,36097.19,1000.0,1000.0,1000.0,2000.0,3000.0,7000.0,16000.0,32000.0,61000.0,90000.0,127175.0,184000.0,538000.0,581464100.0
2018-08-01,21627.0,26900.18,36097.28,1000.0,1000.0,1000.0,2000.0,3000.0,7000.0,16000.0,32000.0,61000.0,90700.0,127175.0,184000.0,538000.0,581770200.0
2018-09-01,21626.0,26889.57,36078.99,0.0,1000.0,1000.0,2000.0,3000.0,7000.0,16000.0,32000.0,61000.0,90000.0,127000.0,183750.0,538000.0,581513800.0
2018-10-01,21625.0,26732.45,35860.59,0.0,1000.0,1000.0,2000.0,3000.0,7000.0,16000.0,32000.0,61000.0,90000.0,126400.0,183000.0,538000.0,578089312.0
2018-11-01,21617.0,26252.48,35527.57,0.0,0.0,1000.0,1000.0,2000.0,7000.0,15000.0,31000.0,60000.0,89000.0,124800.0,181840.0,538000.0,567499812.0
2018-12-01,21592.0,25911.0,35302.39,0.0,0.0,0.0,1000.0,2000.0,6000.0,15000.0,31000.0,59000.0,88000.0,122000.0,180000.0,538000.0,559470312.0


Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-05-01,21627.0,4624.79,9194.53,0.0,40.74,98.85,177.32,319.17,786.52,1954.03,4766.63,10624.45,17361.56,26190.31,41788.11,238380.11,100020245.05
2018-06-01,21536.0,3713.47,9135.38,0.0,18.37,42.35,77.34,155.23,442.19,1274.71,3455.81,8244.86,14100.22,23210.57,40047.1,243981.52,79973376.74
2018-07-01,21284.0,4041.52,10414.79,0.0,9.0,29.46,62.37,144.55,451.31,1308.18,3603.39,8825.72,15250.65,26073.98,45836.66,299018.35,86019664.05
2018-08-01,21017.0,4420.81,11391.46,0.0,9.84,33.42,70.96,154.58,485.63,1405.71,3921.01,9721.31,17112.84,28742.0,50288.67,306992.18,92912085.06
2018-09-01,20673.0,4644.92,11963.64,0.0,9.39,35.06,72.09,151.97,474.05,1381.2,3913.49,10347.92,19032.01,30755.8,54727.8,316345.34,96024530.34
2018-10-01,20007.0,4687.6,12286.36,0.0,11.47,37.41,74.55,150.84,457.76,1326.13,3892.91,10713.47,19074.72,31620.23,56515.62,355672.75,93784729.31
2018-11-01,19549.0,4769.51,12452.63,0.0,10.35,31.83,66.29,146.41,442.77,1312.29,3891.99,11185.62,19722.36,32432.91,55380.98,407099.41,93239241.25
2018-12-01,18968.0,4676.01,12129.94,0.0,7.69,28.01,59.23,129.54,404.27,1253.97,3815.96,10887.31,20018.91,32531.61,54511.1,417955.4,88694512.58


Collect data for each subject

In [48]:
rsgs_201806_1=rsgs_201806.loc[(rsgs_201806.mth==pd.Timestamp(year=2018, month=6, day=1)) & (pd.notna(rsgs_201806.ambs_acct))]
rsgs_201806_1=(rsgs_201806_1
                .assign(rn=rsgs_201806_1.sort_values(by='reserva', ascending=True).groupby(by='ambs_acct').cumcount()+1)
                .query('rn==1')
                .set_index('ambs_acct', verify_integrity=True)
                .filter(items=['ambs_crlim', 'reserva', 'i_na'], axis=1)
                .rename(columns={'ambs_crlim': 'cl_1',
                                 'reserva': 'rva_1'}))
with custom_formatting():
    display(rsgs_201806_1.head())
rsgs_201806_1.info()

Unnamed: 0_level_0,cl_1,rva_1,i_na
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4023187960012813,140000.0,11232.21,0
4023187960013142,157000.0,19598.09,0
4023187960016467,281000.0,34962.32,0
4023187960019875,113000.0,72630.54,0
4023187960021541,23000.0,3565.78,0


<class 'pandas.core.frame.DataFrame'>
Int64Index: 21628 entries, 4023187960012813 to 5547640002268540
Data columns (total 3 columns):
cl_1     21628 non-null float64
rva_1    21536 non-null float64
i_na     21628 non-null int8
dtypes: float64(2), int8(1)
memory usage: 528.0 KB


In [49]:
rsgs_201806_0=rsgs_201806.loc[(rsgs_201806.mth==pd.Timestamp(year=2018, month=5, day=1)) & (pd.notna(rsgs_201806.ambs_acct))]
rsgs_201806_0=(rsgs_201806_0
                .assign(rn=rsgs_201806_0.sort_values(by='reserva', ascending=False).groupby(by='ambs_acct').cumcount()+1)
                .query('rn==1')
                .set_index('ambs_acct', verify_integrity=True)
                .filter(items=rsgs_201806_1.index, axis=0)
                .filter(items=['ambs_crlim', 'reserva'], axis=1)
                .rename(columns={'ambs_crlim': 'cl_0',
                                 'reserva': 'rva_0'}))
with custom_formatting():
    display(rsgs_201806_0.head())
rsgs_201806_0.info()

Unnamed: 0_level_0,cl_0,rva_0
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1
4023187960012813,142000.0,11142.88
4023187960013142,193500.0,13051.19
4023187960016467,300000.0,35267.74
4023187960019875,121000.0,53687.94
4023187960021541,300000.0,2680.95


<class 'pandas.core.frame.DataFrame'>
Int64Index: 21628 entries, 4023187960012813 to 5547640002268540
Data columns (total 2 columns):
cl_0     21628 non-null float64
rva_0    21627 non-null float64
dtypes: float64(2)
memory usage: 506.9 KB


In [50]:
rsgs_201806=rsgs_201806_0.join(rsgs_201806_1)
with custom_formatting():
    display(rsgs_201806.head())
rsgs_201806.info()

Unnamed: 0_level_0,cl_0,rva_0,cl_1,rva_1,i_na
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
4023187960012813,142000.0,11142.88,140000.0,11232.21,0
4023187960013142,193500.0,13051.19,157000.0,19598.09,0
4023187960016467,300000.0,35267.74,281000.0,34962.32,0
4023187960019875,121000.0,53687.94,113000.0,72630.54,0
4023187960021541,300000.0,2680.95,23000.0,3565.78,0


<class 'pandas.core.frame.DataFrame'>
Int64Index: 21628 entries, 4023187960012813 to 5547640002268540
Data columns (total 5 columns):
cl_0     21628 non-null float64
rva_0    21627 non-null float64
cl_1     21628 non-null float64
rva_1    21536 non-null float64
i_na     21628 non-null int8
dtypes: float64(4), int8(1)
memory usage: 1.5 MB


In [51]:
rsgs_201806=(rsgs_201806
              .reset_index(drop=True)
              .assign(rva_e_0=lambda x: np.where(pd.isna(x.rva_0), x.cl_0*0.0418*0.67, x.rva_0),
                      rva_e_1=lambda x: np.where(pd.isna(x.rva_1), x.cl_1*0.0418*0.67, x.rva_1),
                      by='rsgs',
                      mth=pd.Timestamp(year=2018, month=6, day=1))
              .loc[:, ['by', 'mth', 'i_na', 'cl_0', 'cl_1', 'rva_e_0', 'rva_e_1']])
with custom_formatting():
    display(rsgs_201806.head())
rsgs_201806.info()

Unnamed: 0,by,mth,i_na,cl_0,cl_1,rva_e_0,rva_e_1
0,rsgs,2018-06-01,0,142000.0,140000.0,11142.88,11232.21
1,rsgs,2018-06-01,0,193500.0,157000.0,13051.19,19598.09
2,rsgs,2018-06-01,0,300000.0,281000.0,35267.74,34962.32
3,rsgs,2018-06-01,0,121000.0,113000.0,53687.94,72630.54
4,rsgs,2018-06-01,0,300000.0,23000.0,2680.95,3565.78


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21628 entries, 0 to 21627
Data columns (total 7 columns):
by         21628 non-null object
mth        21628 non-null datetime64[ns]
i_na       21628 non-null int8
cl_0       21628 non-null float64
cl_1       21628 non-null float64
rva_e_0    21628 non-null float64
rva_e_1    21628 non-null float64
dtypes: datetime64[ns](1), float64(4), int8(1), object(1)
memory usage: 1.0+ MB


#### rsgs_201808

Search date of credit limit decrease

In [52]:
with custom_formatting():
    display(summary_(df_=rsgs_201808,
                     by_='mth',
                     var='ambs_crlim',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))
    display(summary_(df_=rsgs_201808,
                     by_='mth',
                     var='atsm_bs_crlim',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))
    display(summary_(df_=rsgs_201808,
                     by_='mth',
                     var='reserva',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))

Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-07-01,22468.0,43124.8,51919.03,3000.0,3950.5,5000.0,6000.0,9000.0,14500.0,26000.0,50000.0,94500.0,141000.0,200000.0,270665.0,575000.0,968928001.0
2018-08-01,22468.0,30761.56,42871.69,1000.0,1000.0,1000.0,2000.0,3000.0,7000.0,17000.0,36000.0,71000.0,109000.0,158000.0,221665.0,544000.0,691150825.0
2018-09-01,22468.0,30783.75,42902.13,0.0,1000.0,1000.0,2000.0,3000.0,7000.0,17000.0,36000.0,71000.0,109000.0,158000.0,222000.0,544000.0,691649325.0
2018-10-01,22468.0,30755.56,42879.65,0.0,1000.0,1000.0,2000.0,3000.0,7000.0,17000.0,36000.0,71000.0,109000.0,158000.0,222000.0,544000.0,691016025.0
2018-11-01,22468.0,30758.9,42880.19,0.0,1000.0,1000.0,2000.0,3000.0,7000.0,17000.0,36000.0,71000.0,109000.0,158000.0,222000.0,544000.0,691091025.0
2018-12-01,22468.0,30731.48,42868.22,0.0,1000.0,1000.0,2000.0,3000.0,7000.0,17000.0,36000.0,71000.0,109000.0,158000.0,222000.0,544000.0,690474825.0
2019-01-01,22468.0,30731.28,42868.43,0.0,1000.0,1000.0,2000.0,3000.0,7000.0,17000.0,36000.0,71000.0,109000.0,158000.0,222000.0,544000.0,690470325.0
2019-02-01,22468.0,30709.34,42861.02,0.0,1000.0,1000.0,2000.0,3000.0,7000.0,17000.0,36000.0,71000.0,109000.0,157325.0,222000.0,544000.0,689977377.0


Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-07-01,22468.0,43124.8,51919.03,3000.0,3950.5,5000.0,6000.0,9000.0,14500.0,26000.0,50000.0,94500.0,141000.0,200000.0,270665.0,575000.0,968928001.0
2018-08-01,22468.0,38710.63,49077.22,1000.0,1000.0,2000.0,3000.0,5000.0,11500.0,23000.0,45525.0,86000.0,130000.0,182000.0,252330.0,575000.0,869750478.0
2018-09-01,22467.0,30775.57,42901.35,1000.0,1000.0,1000.0,2000.0,3000.0,7000.0,17000.0,36000.0,71000.0,109000.0,158000.0,222000.0,544000.0,691434825.0
2018-10-01,22466.0,30778.03,42898.27,1000.0,1000.0,1000.0,2000.0,3000.0,7000.0,17000.0,36000.0,71000.0,109000.0,158000.0,222000.0,544000.0,691459325.0
2018-11-01,22466.0,30756.03,42880.78,1000.0,1000.0,1000.0,2000.0,3000.0,7000.0,17000.0,36000.0,71000.0,109000.0,158000.0,222000.0,544000.0,690965025.0
2018-12-01,22466.0,30121.91,42480.09,0.0,0.0,1000.0,1000.0,3000.0,7000.0,16300.0,36000.0,70000.0,107875.0,156000.0,218000.0,536000.0,676718825.0
2019-01-01,22444.0,29373.08,41894.7,0.0,0.0,0.0,1000.0,2000.0,6000.0,16000.0,35000.0,69000.0,104940.0,152462.5,215785.0,536000.0,659249425.0
2019-02-01,22376.0,28806.6,41328.48,0.0,0.0,0.0,0.0,2000.0,6000.0,15700.0,34000.0,68000.0,103000.0,150000.0,211125.0,536000.0,644576577.0


Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-07-01,22468.0,5490.76,12248.03,0.0,23.76,64.01,114.27,255.8,701.62,2006.09,5280.72,12516.68,21480.01,33694.38,56473.88,389493.97,123366323.47
2018-08-01,22141.0,5143.38,13234.24,0.0,10.63,30.79,66.35,154.77,495.99,1513.97,4393.81,11529.74,20101.16,34792.35,62326.82,399314.09,113879665.84
2018-09-01,21789.0,5703.09,15112.73,0.0,8.62,31.55,64.2,151.16,521.73,1598.23,4699.38,12487.31,22760.07,40706.45,72017.8,408452.13,124264545.55
2018-10-01,21391.0,6434.49,16849.67,0.0,8.62,36.68,72.63,168.55,556.23,1737.46,5292.78,14349.61,25988.27,45434.07,79129.03,394867.3,137640190.5
2018-11-01,20548.0,6507.18,17272.57,0.0,7.87,34.74,68.1,161.18,544.1,1667.22,5315.42,14770.98,26975.26,44944.13,80594.97,358896.58,133709535.75
2018-12-01,19676.0,6356.15,17045.84,0.0,6.75,30.16,63.1,146.99,488.4,1525.42,5030.76,14526.31,26654.76,45158.42,81338.27,367869.0,125063541.87
2019-01-01,19050.0,6108.41,16563.57,0.0,4.21,25.0,56.01,126.35,436.93,1424.22,4805.66,13690.88,26285.73,44042.27,80130.05,312336.53,116365270.88
2019-02-01,18398.0,5887.3,16363.06,0.0,4.69,23.63,56.01,127.88,420.93,1358.61,4603.91,13427.04,24758.79,42218.91,75421.09,344769.77,108314538.61


Collect data for each subject

In [53]:
rsgs_201808_1=rsgs_201808.loc[(rsgs_201808.mth==pd.Timestamp(year=2018, month=8, day=1)) & (pd.notna(rsgs_201808.ambs_acct))]
rsgs_201808_1=(rsgs_201808_1
                .assign(rn=rsgs_201808_1.sort_values(by='reserva', ascending=True).groupby(by='ambs_acct').cumcount()+1)
                .query('rn==1')
                .set_index('ambs_acct', verify_integrity=True)
                .filter(items=['ambs_crlim', 'reserva', 'i_na'], axis=1)
                .rename(columns={'ambs_crlim': 'cl_1',
                                 'reserva': 'rva_1'}))
with custom_formatting():
    display(rsgs_201808_1.head())
rsgs_201808_1.info()

Unnamed: 0_level_0,cl_1,rva_1,i_na
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4023187960001352,95000.0,36905.2,0
4023187960005379,78000.0,38343.66,0
4023187960009652,97000.0,15433.82,0
4023187960011146,10000.0,80.79,0
4023187960011633,22000.0,37.44,0


<class 'pandas.core.frame.DataFrame'>
Int64Index: 22468 entries, 4023187960001352 to 5547640002351726
Data columns (total 3 columns):
cl_1     22468 non-null float64
rva_1    22141 non-null float64
i_na     22468 non-null int8
dtypes: float64(2), int8(1)
memory usage: 548.5 KB


In [54]:
rsgs_201808_0=rsgs_201808.loc[(rsgs_201808.mth==pd.Timestamp(year=2018, month=7, day=1)) & (pd.notna(rsgs_201808.ambs_acct))]
rsgs_201808_0=(rsgs_201808_0
                .assign(rn=rsgs_201808_0.sort_values(by='reserva', ascending=False).groupby(by='ambs_acct').cumcount()+1)
                .query('rn==1')
                .set_index('ambs_acct', verify_integrity=True)
                .filter(items=rsgs_201808_1.index, axis=0)
                .filter(items=['ambs_crlim', 'reserva'], axis=1)
                .rename(columns={'ambs_crlim': 'cl_0',
                                 'reserva': 'rva_0'}))
with custom_formatting():
    display(rsgs_201808_0.head())
rsgs_201808_0.info()

Unnamed: 0_level_0,cl_0,rva_0
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1
4023187960001352,110000.0,16746.52
4023187960005379,81000.0,21773.61
4023187960009652,260000.0,40492.4
4023187960011146,90000.0,937.08
4023187960011633,302000.0,1351.18


<class 'pandas.core.frame.DataFrame'>
Int64Index: 22468 entries, 4023187960001352 to 5547640002351726
Data columns (total 2 columns):
cl_0     22468 non-null float64
rva_0    22468 non-null float64
dtypes: float64(2)
memory usage: 526.6 KB


In [55]:
rsgs_201808=rsgs_201808_0.join(rsgs_201808_1)
with custom_formatting():
    display(rsgs_201808.head())
rsgs_201808.info()

Unnamed: 0_level_0,cl_0,rva_0,cl_1,rva_1,i_na
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
4023187960001352,110000.0,16746.52,95000.0,36905.2,0
4023187960005379,81000.0,21773.61,78000.0,38343.66,0
4023187960009652,260000.0,40492.4,97000.0,15433.82,0
4023187960011146,90000.0,937.08,10000.0,80.79,0
4023187960011633,302000.0,1351.18,22000.0,37.44,0


<class 'pandas.core.frame.DataFrame'>
Int64Index: 22468 entries, 4023187960001352 to 5547640002351726
Data columns (total 5 columns):
cl_0     22468 non-null float64
rva_0    22468 non-null float64
cl_1     22468 non-null float64
rva_1    22141 non-null float64
i_na     22468 non-null int8
dtypes: float64(4), int8(1)
memory usage: 1.5 MB


In [56]:
rsgs_201808=(rsgs_201808
              .reset_index(drop=True)
              .assign(rva_e_0=lambda x: np.where(pd.isna(x.rva_0), x.cl_0*0.0418*0.67, x.rva_0),
                      rva_e_1=lambda x: np.where(pd.isna(x.rva_1), x.cl_1*0.0418*0.67, x.rva_1),
                      by='rsgs',
                      mth=pd.Timestamp(year=2018, month=8, day=1))
              .loc[:, ['by', 'mth', 'i_na', 'cl_0', 'cl_1', 'rva_e_0', 'rva_e_1']])
with custom_formatting():
    display(rsgs_201808.head())
rsgs_201808.info()

Unnamed: 0,by,mth,i_na,cl_0,cl_1,rva_e_0,rva_e_1
0,rsgs,2018-08-01,0,110000.0,95000.0,16746.52,36905.2
1,rsgs,2018-08-01,0,81000.0,78000.0,21773.61,38343.66
2,rsgs,2018-08-01,0,260000.0,97000.0,40492.4,15433.82
3,rsgs,2018-08-01,0,90000.0,10000.0,937.08,80.79
4,rsgs,2018-08-01,0,302000.0,22000.0,1351.18,37.44


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22468 entries, 0 to 22467
Data columns (total 7 columns):
by         22468 non-null object
mth        22468 non-null datetime64[ns]
i_na       22468 non-null int8
cl_0       22468 non-null float64
cl_1       22468 non-null float64
rva_e_0    22468 non-null float64
rva_e_1    22468 non-null float64
dtypes: datetime64[ns](1), float64(4), int8(1), object(1)
memory usage: 1.1+ MB


#### rsgs_201810

Search date of credit limit decrease

In [57]:
with custom_formatting():
    display(summary_(df_=rsgs_201810,
                     by_='mth',
                     var='ambs_crlim',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))
    display(summary_(df_=rsgs_201810,
                     by_='mth',
                     var='atsm_bs_crlim',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))
    display(summary_(df_=rsgs_201810,
                     by_='mth',
                     var='reserva',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))

Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-09-01,16085.0,38389.68,43200.28,10.0,3500.0,5000.0,6000.0,9000.0,14000.0,25000.0,45000.0,81000.0,114000.0,164000.0,230160.0,518000.0,617498021.5
2018-10-01,16085.0,29993.75,37973.47,1000.0,1000.0,1000.0,2000.0,4000.0,8600.0,18000.0,36000.0,68000.0,98000.0,137000.0,199160.0,492000.0,482449454.0
2018-11-01,16085.0,29995.61,37976.37,1000.0,1000.0,1000.0,2000.0,4000.0,8600.0,18000.0,36000.0,68000.0,98000.0,137000.0,199160.0,492000.0,482479454.0
2018-12-01,16085.0,29965.17,37945.51,1000.0,1000.0,1000.0,2000.0,4000.0,8500.0,18000.0,36000.0,68000.0,97000.0,137000.0,199160.0,492000.0,481989774.0
2019-01-01,16085.0,29964.15,37941.87,1000.0,1000.0,1000.0,2000.0,4000.0,8500.0,18000.0,36000.0,68000.0,97000.0,137000.0,199160.0,492000.0,481973274.0
2019-02-01,16085.0,29953.66,37935.81,1000.0,1000.0,1000.0,2000.0,4000.0,8500.0,18000.0,36000.0,68000.0,97000.0,137000.0,199160.0,492000.0,481804674.0
2019-03-01,16085.0,29955.91,37937.74,1000.0,1000.0,1000.0,2000.0,4000.0,8500.0,18000.0,36000.0,68000.0,97000.0,137000.0,199160.0,492000.0,481840774.0
2019-04-01,16085.0,29937.84,37928.57,1000.0,1000.0,1000.0,2000.0,4000.0,8500.0,18000.0,36000.0,68000.0,97000.0,137000.0,199000.0,492000.0,481550224.0


Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-09-01,16085.0,38389.68,43200.28,10.0,3500.0,5000.0,6000.0,9000.0,14000.0,25000.0,45000.0,81000.0,114000.0,164000.0,230160.0,518000.0,617498021.5
2018-10-01,16085.0,35764.33,41124.61,10.0,3000.0,3500.0,5000.0,7150.0,12000.0,23000.0,43000.0,77000.0,109880.0,154900.0,216000.0,492000.0,575269246.5
2018-11-01,16085.0,29993.75,37973.47,1000.0,1000.0,1000.0,2000.0,4000.0,8600.0,18000.0,36000.0,68000.0,98000.0,137000.0,199160.0,492000.0,482449454.0
2018-12-01,16085.0,29973.3,37945.6,1000.0,1000.0,1000.0,2000.0,4000.0,8600.0,18000.0,36000.0,68000.0,97000.0,137000.0,199000.0,492000.0,482120454.0
2019-01-01,16085.0,29965.26,37945.56,0.0,1000.0,1000.0,2000.0,4000.0,8500.0,18000.0,36000.0,68000.0,97000.0,137000.0,199160.0,492000.0,481991274.0
2019-02-01,16085.0,29795.37,37908.78,0.0,0.0,1000.0,2000.0,4000.0,8500.0,18000.0,36000.0,67500.0,97000.0,137000.0,199000.0,492000.0,479258474.0
2019-03-01,16071.0,29340.76,37634.26,0.0,0.0,1000.0,2000.0,3000.0,8000.0,17500.0,35000.0,67000.0,96000.0,136000.0,198000.0,492000.0,471535274.0
2019-04-01,16056.0,28890.12,37403.26,0.0,0.0,0.0,1000.0,3000.0,8000.0,17000.0,35000.0,66000.0,95000.0,134625.0,197000.0,492000.0,463859824.0


Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-09-01,16085.0,5658.79,11100.67,0.0,97.26,174.23,285.56,453.2,984.75,2354.92,5734.81,12978.0,21060.77,32631.02,52351.92,262539.16,91021683.04
2018-10-01,16005.0,4607.65,10410.23,0.0,24.88,57.6,115.49,230.24,597.95,1602.97,4387.58,10705.8,17540.34,29100.63,46822.54,272135.22,73745490.49
2018-11-01,15846.0,4848.46,11555.4,0.0,12.78,40.61,88.84,196.95,549.8,1549.24,4425.73,11261.29,18980.01,31187.95,53135.69,287600.68,76828728.45
2018-12-01,15677.0,5307.45,12836.64,0.0,12.1,40.0,90.63,194.07,558.79,1599.04,4693.88,12518.26,21362.2,34775.44,60224.35,293940.6,83204906.04
2019-01-01,15304.0,5434.31,13667.46,0.0,11.73,37.9,83.33,183.31,518.69,1511.15,4653.41,12445.07,21732.44,36742.43,63990.17,308271.86,83166716.97
2019-02-01,15011.0,5411.08,13763.33,0.0,12.33,41.52,83.99,178.11,493.2,1430.02,4450.55,12885.68,22353.11,36677.31,67031.47,322610.09,81225759.62
2019-03-01,14626.0,5657.13,14471.44,0.0,14.12,41.98,84.02,178.14,497.33,1429.55,4583.5,13238.94,23948.03,39951.1,66918.2,261990.14,82741114.96
2019-04-01,14179.0,5776.1,15129.45,0.0,14.12,42.0,85.25,181.15,498.71,1409.72,4620.16,13165.02,24671.19,41360.9,72737.39,283135.71,81899356.07


Collect data for each subject

In [58]:
rsgs_201810_1=rsgs_201810.loc[(rsgs_201810.mth==pd.Timestamp(year=2018, month=10, day=1)) & (pd.notna(rsgs_201810.ambs_acct))]
rsgs_201810_1=(rsgs_201810_1
                .assign(rn=rsgs_201810_1.sort_values(by='reserva', ascending=True).groupby(by='ambs_acct').cumcount()+1)
                .query('rn==1')
                .set_index('ambs_acct', verify_integrity=True)
                .filter(items=['ambs_crlim', 'reserva', 'i_na'], axis=1)
                .rename(columns={'ambs_crlim': 'cl_1',
                                 'reserva': 'rva_1'}))
with custom_formatting():
    display(rsgs_201810_1.head())
rsgs_201810_1.info()

Unnamed: 0_level_0,cl_1,rva_1,i_na
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4023187960003192,98000.0,5776.46,0
4023187960004521,173000.0,61450.34,0
4023187960006393,219000.0,54705.58,0
4023187960010379,332000.0,67675.59,0
4023187960016855,68000.0,2660.71,0


<class 'pandas.core.frame.DataFrame'>
Int64Index: 16085 entries, 4023187960003192 to 5547640002461665
Data columns (total 3 columns):
cl_1     16085 non-null float64
rva_1    16005 non-null float64
i_na     16085 non-null int8
dtypes: float64(2), int8(1)
memory usage: 392.7 KB


In [59]:
rsgs_201810_0=rsgs_201810.loc[(rsgs_201810.mth==pd.Timestamp(year=2018, month=9, day=1)) & (pd.notna(rsgs_201810.ambs_acct))]
rsgs_201810_0=(rsgs_201810_0
                .assign(rn=rsgs_201810_0.sort_values(by='reserva', ascending=False).groupby(by='ambs_acct').cumcount()+1)
                .query('rn==1')
                .set_index('ambs_acct', verify_integrity=True)
                .filter(items=rsgs_201810_1.index, axis=0)
                .filter(items=['ambs_crlim', 'reserva'], axis=1)
                .rename(columns={'ambs_crlim': 'cl_0',
                                 'reserva': 'rva_0'}))
with custom_formatting():
    display(rsgs_201810_0.head())
rsgs_201810_0.info()

Unnamed: 0_level_0,cl_0,rva_0
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1
4023187960003192,104000.0,5941.11
4023187960004521,183000.0,38216.98
4023187960006393,230000.0,68710.48
4023187960010379,380000.0,135659.7
4023187960016855,71000.0,2810.19


<class 'pandas.core.frame.DataFrame'>
Int64Index: 16085 entries, 4023187960003192 to 5547640002461665
Data columns (total 2 columns):
cl_0     16085 non-null float64
rva_0    16085 non-null float64
dtypes: float64(2)
memory usage: 377.0 KB


In [60]:
rsgs_201810=rsgs_201810_0.join(rsgs_201810_1)
with custom_formatting():
    display(rsgs_201810.head())
rsgs_201810.info()

Unnamed: 0_level_0,cl_0,rva_0,cl_1,rva_1,i_na
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
4023187960003192,104000.0,5941.11,98000.0,5776.46,0
4023187960004521,183000.0,38216.98,173000.0,61450.34,0
4023187960006393,230000.0,68710.48,219000.0,54705.58,0
4023187960010379,380000.0,135659.7,332000.0,67675.59,0
4023187960016855,71000.0,2810.19,68000.0,2660.71,0


<class 'pandas.core.frame.DataFrame'>
Int64Index: 16085 entries, 4023187960003192 to 5547640002461665
Data columns (total 5 columns):
cl_0     16085 non-null float64
rva_0    16085 non-null float64
cl_1     16085 non-null float64
rva_1    16005 non-null float64
i_na     16085 non-null int8
dtypes: float64(4), int8(1)
memory usage: 1.3 MB


In [61]:
rsgs_201810=(rsgs_201810
              .reset_index(drop=True)
              .assign(rva_e_0=lambda x: np.where(pd.isna(x.rva_0), x.cl_0*0.0418*0.67, x.rva_0),
                      rva_e_1=lambda x: np.where(pd.isna(x.rva_1), x.cl_1*0.0418*0.67, x.rva_1),
                      by='rsgs',
                      mth=pd.Timestamp(year=2018, month=10, day=1))
              .loc[:, ['by', 'mth', 'i_na', 'cl_0', 'cl_1', 'rva_e_0', 'rva_e_1']])
with custom_formatting():
    display(rsgs_201810.head())
rsgs_201810.info()

Unnamed: 0,by,mth,i_na,cl_0,cl_1,rva_e_0,rva_e_1
0,rsgs,2018-10-01,0,104000.0,98000.0,5941.11,5776.46
1,rsgs,2018-10-01,0,183000.0,173000.0,38216.98,61450.34
2,rsgs,2018-10-01,0,230000.0,219000.0,68710.48,54705.58
3,rsgs,2018-10-01,0,380000.0,332000.0,135659.7,67675.59
4,rsgs,2018-10-01,0,71000.0,68000.0,2810.19,2660.71


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16085 entries, 0 to 16084
Data columns (total 7 columns):
by         16085 non-null object
mth        16085 non-null datetime64[ns]
i_na       16085 non-null int8
cl_0       16085 non-null float64
cl_1       16085 non-null float64
rva_e_0    16085 non-null float64
rva_e_1    16085 non-null float64
dtypes: datetime64[ns](1), float64(4), int8(1), object(1)
memory usage: 769.8+ KB


#### rsgs_201812

Search date of credit limit decrease

In [62]:
with custom_formatting():
    display(summary_(df_=rsgs_201812,
                     by_='mth',
                     var='ambs_crlim',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))
    display(summary_(df_=rsgs_201812,
                     by_='mth',
                     var='atsm_bs_crlim',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))
    display(summary_(df_=rsgs_201812,
                     by_='mth',
                     var='reserva',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))

Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-11-01,20730.0,38600.19,42386.96,10.0,3046.75,5000.0,6000.0,9000.0,14500.0,25500.0,46000.0,81000.0,112000.0,156000.0,223710.0,562000.0,800181970.5
2018-12-01,20730.0,29855.69,36121.35,1000.0,1000.0,1000.0,2000.0,4000.0,9000.0,18000.0,37000.0,67000.0,94000.0,132000.0,189000.0,474000.0,618908375.0
2019-01-01,20730.0,29856.84,36120.95,0.0,1000.0,1000.0,2000.0,4000.0,9000.0,18000.0,37000.0,67000.0,94000.0,132000.0,189000.0,474000.0,618932375.0
2019-02-01,20730.0,29844.82,36117.79,0.0,1000.0,1000.0,2000.0,4000.0,9000.0,18000.0,37000.0,67000.0,94000.0,132000.0,189000.0,474000.0,618683038.0
2019-03-01,20730.0,29845.29,36115.18,0.0,1000.0,1000.0,2000.0,4000.0,9000.0,18000.0,37000.0,67000.0,94000.0,132000.0,189000.0,474000.0,618692838.0
2019-04-01,20730.0,29831.56,36104.97,0.0,1000.0,1000.0,2000.0,4000.0,9000.0,18000.0,37000.0,67000.0,93775.0,132000.0,189000.0,474000.0,618408288.0
2019-05-01,20730.0,29811.35,36090.53,0.0,1000.0,1000.0,2000.0,4000.0,9000.0,18000.0,37000.0,67000.0,93275.0,132000.0,188710.0,474000.0,617989388.0
2019-06-01,20730.0,29296.48,35646.59,0.0,1000.0,1000.0,2000.0,4000.0,9000.0,18000.0,36000.0,66000.0,92275.0,130000.0,184710.0,474000.0,607315988.0


Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-11-01,20730.0,38600.19,42386.96,10.0,3046.75,5000.0,6000.0,9000.0,14500.0,25500.0,46000.0,81000.0,112000.0,156000.0,223710.0,562000.0,800181970.5
2018-12-01,20730.0,36163.87,40103.78,10.0,3000.0,4000.0,5000.0,8000.0,13000.0,24000.0,44000.0,77000.0,106500.0,149000.0,204000.0,521000.0,749676969.5
2019-01-01,20729.0,29856.5,36122.04,1000.0,1000.0,1000.0,2000.0,4000.0,9000.0,18000.0,37000.0,67000.0,94000.0,132000.0,189000.0,474000.0,618895375.0
2019-02-01,20729.0,29853.92,36121.27,0.0,1000.0,1000.0,2000.0,4000.0,9000.0,18000.0,37000.0,67000.0,94000.0,132000.0,189000.0,474000.0,618841988.0
2019-03-01,20729.0,29838.17,36116.15,0.0,1000.0,1000.0,2000.0,4000.0,9000.0,18000.0,37000.0,67000.0,94000.0,132000.0,189000.0,474000.0,618515438.0
2019-04-01,20727.0,29664.16,35944.89,0.0,1000.0,1000.0,2000.0,4000.0,9000.0,18000.0,37000.0,67000.0,93000.0,131850.0,186740.0,474000.0,614849088.0
2019-05-01,20720.0,29348.99,35819.67,0.0,0.0,1000.0,2000.0,3900.0,8500.0,18000.0,36350.0,66000.0,92525.0,130000.0,186000.0,474000.0,608111088.0
2019-06-01,20706.0,28952.92,35634.61,0.0,0.0,1000.0,2000.0,3000.0,8000.0,18000.0,36000.0,66000.0,92000.0,129000.0,183950.0,474000.0,599499088.0


Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-11-01,20730.0,5190.63,11037.49,0.0,99.5,183.19,287.36,452.4,954.63,2223.42,5209.96,11320.7,18471.14,28520.57,47707.26,331618.03,107601737.78
2018-12-01,20630.0,4095.43,10163.49,0.0,15.17,48.61,101.14,200.77,555.19,1466.23,3890.87,9142.86,15301.06,23836.42,42972.01,334609.3,84488668.72
2019-01-01,20389.0,4100.62,10464.84,0.0,8.39,32.23,74.54,165.43,500.84,1351.71,3676.28,9007.04,15779.15,25408.88,47087.37,283213.73,83607510.25
2019-02-01,20228.0,4491.63,11532.79,0.0,10.77,40.81,87.6,186.69,535.66,1407.89,3965.78,10004.95,17639.75,28391.57,52561.41,295408.28,90856746.0
2019-03-01,19906.0,4841.33,12483.68,0.0,13.92,42.75,90.05,181.02,510.42,1377.74,4119.78,11116.31,19505.6,32432.36,57561.22,325334.94,96371530.61
2019-04-01,19584.0,4852.86,13063.51,0.0,8.48,41.61,89.29,180.77,484.44,1302.27,3828.82,10927.53,20197.11,33792.9,58398.46,362493.38,95038399.89
2019-05-01,19177.0,4937.84,13510.91,0.0,8.76,39.58,84.48,176.49,470.72,1263.78,3735.5,11210.02,21098.8,35079.01,60082.92,382542.34,94692959.77
2019-06-01,18660.0,4898.48,13418.36,0.0,8.04,36.18,83.12,175.21,463.39,1243.69,3693.69,11049.13,21166.18,34674.99,61175.89,397165.98,91405661.16


Collect data for each subject

In [63]:
rsgs_201812_1=rsgs_201812.loc[(rsgs_201812.mth==pd.Timestamp(year=2018, month=12, day=1)) & (pd.notna(rsgs_201812.ambs_acct))]
rsgs_201812_1=(rsgs_201812_1
                .assign(rn=rsgs_201812_1.sort_values(by='reserva', ascending=True).groupby(by='ambs_acct').cumcount()+1)
                .query('rn==1')
                .set_index('ambs_acct', verify_integrity=True)
                .filter(items=['ambs_crlim', 'reserva', 'i_na'], axis=1)
                .rename(columns={'ambs_crlim': 'cl_1',
                                 'reserva': 'rva_1'}))
with custom_formatting():
    display(rsgs_201812_1.head())
rsgs_201812_1.info()

Unnamed: 0_level_0,cl_1,rva_1,i_na
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4023187960009744,205000.0,14307.89,0
4023187960013266,259000.0,32406.01,0
4023187960014363,160000.0,36628.17,0
4023187960018307,201000.0,14522.75,0
4023187960022911,148000.0,21314.2,0


<class 'pandas.core.frame.DataFrame'>
Int64Index: 20730 entries, 4023187960009744 to 5547640002597070
Data columns (total 3 columns):
cl_1     20730 non-null float64
rva_1    20630 non-null float64
i_na     20730 non-null int8
dtypes: float64(2), int8(1)
memory usage: 506.1 KB


In [64]:
rsgs_201812_0=rsgs_201812.loc[(rsgs_201812.mth==pd.Timestamp(year=2018, month=11, day=1)) & (pd.notna(rsgs_201812.ambs_acct))]
rsgs_201812_0=(rsgs_201812_0
                .assign(rn=rsgs_201812_0.sort_values(by='reserva', ascending=False).groupby(by='ambs_acct').cumcount()+1)
                .query('rn==1')
                .set_index('ambs_acct', verify_integrity=True)
                .filter(items=rsgs_201812_1.index, axis=0)
                .filter(items=['ambs_crlim', 'reserva'], axis=1)
                .rename(columns={'ambs_crlim': 'cl_0',
                                 'reserva': 'rva_0'}))
with custom_formatting():
    display(rsgs_201812_0.head())
rsgs_201812_0.info()

Unnamed: 0_level_0,cl_0,rva_0
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1
4023187960009744,237000.0,16600.98
4023187960013266,275500.0,31352.75
4023187960014363,176000.0,17429.38
4023187960018307,260000.0,18936.16
4023187960022911,332000.0,34966.83


<class 'pandas.core.frame.DataFrame'>
Int64Index: 20730 entries, 4023187960009744 to 5547640002597070
Data columns (total 2 columns):
cl_0     20730 non-null float64
rva_0    20730 non-null float64
dtypes: float64(2)
memory usage: 485.9 KB


In [65]:
rsgs_201812=rsgs_201812_0.join(rsgs_201812_1)
with custom_formatting():
    display(rsgs_201812.head())
rsgs_201812.info()

Unnamed: 0_level_0,cl_0,rva_0,cl_1,rva_1,i_na
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
4023187960009744,237000.0,16600.98,205000.0,14307.89,0
4023187960013266,275500.0,31352.75,259000.0,32406.01,0
4023187960014363,176000.0,17429.38,160000.0,36628.17,0
4023187960018307,260000.0,18936.16,201000.0,14522.75,0
4023187960022911,332000.0,34966.83,148000.0,21314.2,0


<class 'pandas.core.frame.DataFrame'>
Int64Index: 20730 entries, 4023187960009744 to 5547640002597070
Data columns (total 5 columns):
cl_0     20730 non-null float64
rva_0    20730 non-null float64
cl_1     20730 non-null float64
rva_1    20630 non-null float64
i_na     20730 non-null int8
dtypes: float64(4), int8(1)
memory usage: 1.4 MB


In [66]:
rsgs_201812=(rsgs_201812
              .reset_index(drop=True)
              .assign(rva_e_0=lambda x: np.where(pd.isna(x.rva_0), x.cl_0*0.0418*0.67, x.rva_0),
                      rva_e_1=lambda x: np.where(pd.isna(x.rva_1), x.cl_1*0.0418*0.67, x.rva_1),
                      by='rsgs',
                      mth=pd.Timestamp(year=2018, month=12, day=1))
              .loc[:, ['by', 'mth', 'i_na', 'cl_0', 'cl_1', 'rva_e_0', 'rva_e_1']])
with custom_formatting():
    display(rsgs_201812.head())
rsgs_201812.info()

Unnamed: 0,by,mth,i_na,cl_0,cl_1,rva_e_0,rva_e_1
0,rsgs,2018-12-01,0,237000.0,205000.0,16600.98,14307.89
1,rsgs,2018-12-01,0,275500.0,259000.0,31352.75,32406.01
2,rsgs,2018-12-01,0,176000.0,160000.0,17429.38,36628.17
3,rsgs,2018-12-01,0,260000.0,201000.0,18936.16,14522.75
4,rsgs,2018-12-01,0,332000.0,148000.0,34966.83,21314.2


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20730 entries, 0 to 20729
Data columns (total 7 columns):
by         20730 non-null object
mth        20730 non-null datetime64[ns]
i_na       20730 non-null int8
cl_0       20730 non-null float64
cl_1       20730 non-null float64
rva_e_0    20730 non-null float64
rva_e_1    20730 non-null float64
dtypes: datetime64[ns](1), float64(4), int8(1), object(1)
memory usage: 992.0+ KB


#### rsgs_201902

Search date of credit limit decrease

In [67]:
with custom_formatting():
    display(summary_(df_=rsgs_201902,
                     by_='mth',
                     var='ambs_crlim',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))
    display(summary_(df_=rsgs_201902,
                     by_='mth',
                     var='atsm_bs_crlim',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))
    display(summary_(df_=rsgs_201902,
                     by_='mth',
                     var='reserva',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))

Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2019-01-01,17823.0,39877.85,43433.61,3000.0,4000.0,5000.0,6610.0,9000.0,15000.0,26000.0,48000.0,83000.0,120000.0,165000.0,234780.0,519000.0,710742902.0
2019-02-01,17823.0,28435.55,37266.13,300.0,1100.0,1100.0,1400.0,2500.0,7200.0,16300.0,35200.0,65300.0,94780.0,132300.0,197190.0,509000.0,506806850.0
2019-03-01,17823.0,31332.53,38401.5,1000.0,1000.0,2000.0,2500.0,4000.0,9000.0,19000.0,39000.0,70000.0,100000.0,139000.0,202500.0,511000.0,558439730.0
2019-04-01,17823.0,31310.75,38387.96,1000.0,1000.0,2000.0,2500.0,4000.0,9000.0,19000.0,39000.0,70000.0,100000.0,139000.0,202500.0,511000.0,558051530.0
2019-05-01,17823.0,31311.65,38387.49,1000.0,1000.0,2000.0,2500.0,4000.0,9000.0,19000.0,39000.0,70000.0,100000.0,139000.0,202500.0,511000.0,558067530.0
2019-06-01,17823.0,30710.6,38009.65,1000.0,1000.0,2000.0,2000.0,4000.0,9000.0,18500.0,38000.0,69000.0,98000.0,137000.0,201000.0,511000.0,547355030.0
2019-07-01,17823.0,30561.69,37938.24,1000.0,1000.0,2000.0,2000.0,4000.0,9000.0,18000.0,37500.0,68000.0,98000.0,137000.0,201000.0,511000.0,544701030.0


Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2019-01-01,17823.0,39877.85,43433.61,3000.0,4000.0,5000.0,6610.0,9000.0,15000.0,26000.0,48000.0,83000.0,120000.0,165000.0,234780.0,519000.0,710742902.0
2019-02-01,17823.0,38717.26,41707.15,600.0,3000.0,5000.0,6000.0,9000.0,15000.0,26000.0,47000.0,81000.0,115000.0,158000.0,224000.0,519000.0,690057722.0
2019-03-01,17822.0,29888.51,38024.74,300.0,1000.0,1100.0,2000.0,3000.0,8000.0,17700.0,37000.0,68000.0,98000.0,137000.0,201000.0,511000.0,532673000.0
2019-04-01,17823.0,31313.32,38382.01,1000.0,1000.0,2000.0,2500.0,4000.0,9000.0,19000.0,39000.0,70000.0,100000.0,139000.0,202500.0,511000.0,558097230.0
2019-05-01,17823.0,31310.36,38388.25,0.0,1000.0,2000.0,2500.0,4000.0,9000.0,19000.0,39000.0,70000.0,100000.0,139000.0,202500.0,511000.0,558044530.0
2019-06-01,17822.0,31071.63,38213.86,0.0,1000.0,1000.0,2000.0,4000.0,9000.0,19000.0,38500.0,69500.0,99500.0,138000.0,201000.0,511000.0,553758530.0
2019-07-01,17821.0,30173.81,37769.4,0.0,0.0,1000.0,2000.0,3500.0,9000.0,18000.0,37000.0,67500.0,98000.0,136000.0,200000.0,511000.0,537727530.0


Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2019-01-01,17823.0,5088.5,10699.38,0.0,93.0,172.64,271.54,440.71,947.5,2230.05,5242.09,11185.45,18217.26,27284.14,44034.11,360733.6,90692365.16
2019-02-01,17768.0,4007.12,9643.2,0.0,10.62,31.16,69.67,161.71,525.48,1476.17,3904.58,9122.38,14832.49,23489.73,39643.96,367066.4,71198425.1
2019-03-01,17612.0,4409.58,10806.22,0.0,12.7,40.03,87.56,186.86,531.46,1475.34,4040.13,10025.69,17480.83,27695.61,46122.03,395622.34,77661453.39
2019-04-01,17474.0,5090.63,12076.53,0.0,16.65,50.04,105.15,226.01,627.57,1669.19,4535.34,11554.45,20878.64,32923.38,55456.63,264950.1,88953625.86
2019-05-01,17177.0,5342.86,13265.13,0.0,15.24,42.0,98.24,203.76,566.31,1532.77,4549.85,12329.34,22404.45,35839.75,61684.1,324349.58,91774236.4
2019-06-01,16878.0,5512.29,14280.48,0.0,14.78,45.05,98.44,197.58,547.91,1477.9,4512.64,12581.63,23483.63,37244.51,66120.29,375025.81,93036405.61
2019-07-01,16402.0,5649.54,14651.02,0.0,14.97,42.81,88.37,182.62,539.72,1454.08,4520.76,13145.35,24175.56,39762.88,67485.75,385026.5,92663728.52


Collect data for each subject

In [68]:
rsgs_201902_1=rsgs_201902.loc[(rsgs_201902.mth==pd.Timestamp(year=2019, month=2, day=1)) & (pd.notna(rsgs_201902.ambs_acct))]
rsgs_201902_1=(rsgs_201902_1
                .assign(rn=rsgs_201902_1.sort_values(by='reserva', ascending=True).groupby(by='ambs_acct').cumcount()+1)
                .query('rn==1')
                .set_index('ambs_acct', verify_integrity=True)
                .filter(items=['ambs_crlim', 'reserva', 'i_na'], axis=1)
                .rename(columns={'ambs_crlim': 'cl_1',
                                 'reserva': 'rva_1'}))
with custom_formatting():
    display(rsgs_201902_1.head())
rsgs_201902_1.info()

Unnamed: 0_level_0,cl_1,rva_1,i_na
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4023187960005122,218900.0,28104.05,0
4023187960007201,297000.0,53565.13,0
4023187960007607,196000.0,59040.28,0
4023187960009637,74200.0,9401.73,0
4023187960015543,93000.0,1691.08,0


<class 'pandas.core.frame.DataFrame'>
Int64Index: 17823 entries, 4023187960005122 to 5547640002725945
Data columns (total 3 columns):
cl_1     17823 non-null float64
rva_1    17768 non-null float64
i_na     17823 non-null int8
dtypes: float64(2), int8(1)
memory usage: 435.1 KB


In [69]:
rsgs_201902_0=rsgs_201902.loc[(rsgs_201902.mth==pd.Timestamp(year=2019, month=1, day=1)) & (pd.notna(rsgs_201902.ambs_acct))]
rsgs_201902_0=(rsgs_201902_0
                .assign(rn=rsgs_201902_0.sort_values(by='reserva', ascending=False).groupby(by='ambs_acct').cumcount()+1)
                .query('rn==1')
                .set_index('ambs_acct', verify_integrity=True)
                .filter(items=rsgs_201902_1.index, axis=0)
                .filter(items=['ambs_crlim', 'reserva'], axis=1)
                .rename(columns={'ambs_crlim': 'cl_0',
                                 'reserva': 'rva_0'}))
with custom_formatting():
    display(rsgs_201902_0.head())
rsgs_201902_0.info()

Unnamed: 0_level_0,cl_0,rva_0
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1
4023187960005122,226100.0,28138.13
4023187960007201,300000.0,71772.45
4023187960007607,208000.0,61636.27
4023187960009637,80000.0,9192.12
4023187960015543,93000.0,2231.27


<class 'pandas.core.frame.DataFrame'>
Int64Index: 17823 entries, 4023187960005122 to 5547640002725945
Data columns (total 2 columns):
cl_0     17823 non-null float64
rva_0    17823 non-null float64
dtypes: float64(2)
memory usage: 417.7 KB


In [70]:
rsgs_201902=rsgs_201902_0.join(rsgs_201902_1)
with custom_formatting():
    display(rsgs_201902.head())
rsgs_201902.info()

Unnamed: 0_level_0,cl_0,rva_0,cl_1,rva_1,i_na
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
4023187960005122,226100.0,28138.13,218900.0,28104.05,0
4023187960007201,300000.0,71772.45,297000.0,53565.13,0
4023187960007607,208000.0,61636.27,196000.0,59040.28,0
4023187960009637,80000.0,9192.12,74200.0,9401.73,0
4023187960015543,93000.0,2231.27,93000.0,1691.08,0


<class 'pandas.core.frame.DataFrame'>
Int64Index: 17823 entries, 4023187960005122 to 5547640002725945
Data columns (total 5 columns):
cl_0     17823 non-null float64
rva_0    17823 non-null float64
cl_1     17823 non-null float64
rva_1    17768 non-null float64
i_na     17823 non-null int8
dtypes: float64(4), int8(1)
memory usage: 1.3 MB


In [71]:
rsgs_201902=(rsgs_201902
              .reset_index(drop=True)
              .assign(rva_e_0=lambda x: np.where(pd.isna(x.rva_0), x.cl_0*0.0418*0.67, x.rva_0),
                      rva_e_1=lambda x: np.where(pd.isna(x.rva_1), x.cl_1*0.0418*0.67, x.rva_1),
                      by='rsgs',
                      mth=pd.Timestamp(year=2019, month=2, day=1))
              .loc[:, ['by', 'mth', 'i_na', 'cl_0', 'cl_1', 'rva_e_0', 'rva_e_1']])
with custom_formatting():
    display(rsgs_201902.head())
rsgs_201902.info()

Unnamed: 0,by,mth,i_na,cl_0,cl_1,rva_e_0,rva_e_1
0,rsgs,2019-02-01,0,226100.0,218900.0,28138.13,28104.05
1,rsgs,2019-02-01,0,300000.0,297000.0,71772.45,53565.13
2,rsgs,2019-02-01,0,208000.0,196000.0,61636.27,59040.28
3,rsgs,2019-02-01,0,80000.0,74200.0,9192.12,9401.73
4,rsgs,2019-02-01,0,93000.0,93000.0,2231.27,1691.08


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17823 entries, 0 to 17822
Data columns (total 7 columns):
by         17823 non-null object
mth        17823 non-null datetime64[ns]
i_na       17823 non-null int8
cl_0       17823 non-null float64
cl_1       17823 non-null float64
rva_e_0    17823 non-null float64
rva_e_1    17823 non-null float64
dtypes: datetime64[ns](1), float64(4), int8(1), object(1)
memory usage: 852.9+ KB


#### rsgs_201907

Search date of credit limit decrease

In [72]:
with custom_formatting():
    display(summary_(df_=rsgs_201907,
                     by_='mth',
                     var='ambs_crlim',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))
    display(summary_(df_=rsgs_201907,
                     by_='mth',
                     var='atsm_bs_crlim',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))
    display(summary_(df_=rsgs_201907,
                     by_='mth',
                     var='reserva',
                     percentiles_=np.array([1,2.5,5,10,25,50,75,90,95,97.5,99])/100))

Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2019-06-01,7909.0,33274.12,39743.81,4000.0,4000.0,5000.0,6000.0,8000.0,12000.0,21000.0,39000.0,70000.0,100800.0,140650.0,214520.0,700000.0,263165045.0
2019-07-01,7909.0,25036.14,32898.62,2000.0,2000.0,2000.0,2000.0,3000.0,7000.0,15000.0,30000.0,55000.0,83000.0,112000.0,160920.0,482000.0,198010850.0


Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2019-06-01,7909.0,33250.92,39712.08,3000.0,4000.0,5000.0,5500.0,7995.0,12000.0,21000.0,39000.0,70000.0,100800.0,140650.0,208960.0,700000.0,262981545.0
2019-07-01,7909.0,28489.42,35017.8,2000.0,2000.0,2000.0,4000.0,5000.0,10000.0,18000.0,34000.0,60000.0,90000.0,119300.0,180840.0,482000.0,225322846.0


Unnamed: 0_level_0,count,mean,std,min,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,max,sum
mth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2019-06-01,7909.0,5711.48,11763.02,0.0,88.87,178.22,257.42,420.09,904.12,2127.93,5793.53,13595.14,21973.83,33830.0,52703.99,344056.16,45172101.02
2019-07-01,7808.0,4722.22,11810.65,0.0,20.02,56.01,100.95,203.76,522.32,1481.31,4281.62,11029.53,18125.89,29453.59,50816.5,364522.02,36871104.55


Collect data for each subject

In [73]:
rsgs_201907_1=rsgs_201907.loc[(rsgs_201907.mth==pd.Timestamp(year=2019, month=7, day=1)) & (pd.notna(rsgs_201907.ambs_acct))]
rsgs_201907_1=(rsgs_201907_1
                .assign(rn=rsgs_201907_1.sort_values(by='reserva', ascending=True).groupby(by='ambs_acct').cumcount()+1)
                .query('rn==1')
                .set_index('ambs_acct', verify_integrity=True)
                .filter(items=['ambs_crlim', 'reserva', 'i_na'], axis=1)
                .rename(columns={'ambs_crlim': 'cl_1',
                                 'reserva': 'rva_1'}))
with custom_formatting():
    display(rsgs_201907_1.head())
rsgs_201907_1.info()

Unnamed: 0_level_0,cl_1,rva_1,i_na
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4931730010144655,48000.0,16988.22,0
4931720041790667,9000.0,514.61,0
4913660000229343,56400.0,6454.17,0
4931730012126833,98000.0,55192.09,0
5445480011833379,13000.0,2387.23,0


<class 'pandas.core.frame.DataFrame'>
Int64Index: 7909 entries, 4931730010144655 to 4931720038433362
Data columns (total 3 columns):
cl_1     7909 non-null float64
rva_1    7808 non-null float64
i_na     7909 non-null int8
dtypes: float64(2), int8(1)
memory usage: 193.1 KB


In [74]:
rsgs_201907_0=rsgs_201907.loc[(rsgs_201907.mth==pd.Timestamp(year=2019, month=6, day=1)) & (pd.notna(rsgs_201907.ambs_acct))]
rsgs_201907_0=(rsgs_201907_0
                .assign(rn=rsgs_201907_0.sort_values(by='reserva', ascending=False).groupby(by='ambs_acct').cumcount()+1)
                .query('rn==1')
                .set_index('ambs_acct', verify_integrity=True)
                .filter(items=rsgs_201907_1.index, axis=0)
                .filter(items=['ambs_crlim', 'reserva'], axis=1)
                .rename(columns={'ambs_crlim': 'cl_0',
                                 'reserva': 'rva_0'}))
with custom_formatting():
    display(rsgs_201907_0.head())
rsgs_201907_0.info()

Unnamed: 0_level_0,cl_0,rva_0
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1
4931730010144655,62000.0,40190.79
4931720041790667,14000.0,1222.48
4913660000229343,56400.0,6626.55
4931730012126833,105000.0,57623.38
5445480011833379,15000.0,6899.76


<class 'pandas.core.frame.DataFrame'>
Int64Index: 7909 entries, 4931730010144655 to 4931720038433362
Data columns (total 2 columns):
cl_0     7909 non-null float64
rva_0    7909 non-null float64
dtypes: float64(2)
memory usage: 185.4 KB


In [75]:
rsgs_201907=rsgs_201907_0.join(rsgs_201907_1)
with custom_formatting():
    display(rsgs_201907.head())
rsgs_201907.info()

Unnamed: 0_level_0,cl_0,rva_0,cl_1,rva_1,i_na
ambs_acct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
4931730010144655,62000.0,40190.79,48000.0,16988.22,0
4931720041790667,14000.0,1222.48,9000.0,514.61,0
4913660000229343,56400.0,6626.55,56400.0,6454.17,0
4931730012126833,105000.0,57623.38,98000.0,55192.09,0
5445480011833379,15000.0,6899.76,13000.0,2387.23,0


<class 'pandas.core.frame.DataFrame'>
Int64Index: 7909 entries, 4931730010144655 to 4931720038433362
Data columns (total 5 columns):
cl_0     7909 non-null float64
rva_0    7909 non-null float64
cl_1     7909 non-null float64
rva_1    7808 non-null float64
i_na     7909 non-null int8
dtypes: float64(4), int8(1)
memory usage: 636.7 KB


In [76]:
rsgs_201907=(rsgs_201907
              .reset_index(drop=True)
              .assign(rva_e_0=lambda x: np.where(pd.isna(x.rva_0), x.cl_0*0.0418*0.67, x.rva_0),
                      rva_e_1=lambda x: np.where(pd.isna(x.rva_1), x.cl_1*0.0418*0.67, x.rva_1),
                      by='rsgs',
                      mth=pd.Timestamp(year=2019, month=7, day=1))
              .loc[:, ['by', 'mth', 'i_na', 'cl_0', 'cl_1', 'rva_e_0', 'rva_e_1']])
with custom_formatting():
    display(rsgs_201907.head())
rsgs_201907.info()

Unnamed: 0,by,mth,i_na,cl_0,cl_1,rva_e_0,rva_e_1
0,rsgs,2019-07-01,0,62000.0,48000.0,40190.79,16988.22
1,rsgs,2019-07-01,0,14000.0,9000.0,1222.48,514.61
2,rsgs,2019-07-01,0,56400.0,56400.0,6626.55,6454.17
3,rsgs,2019-07-01,0,105000.0,98000.0,57623.38,55192.09
4,rsgs,2019-07-01,0,15000.0,13000.0,6899.76,2387.23


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7909 entries, 0 to 7908
Data columns (total 7 columns):
by         7909 non-null object
mth        7909 non-null datetime64[ns]
i_na       7909 non-null int8
cl_0       7909 non-null float64
cl_1       7909 non-null float64
rva_e_0    7909 non-null float64
rva_e_1    7909 non-null float64
dtypes: datetime64[ns](1), float64(4), int8(1), object(1)
memory usage: 378.5+ KB


Concatenate all campaigns for grouped summarization

In [77]:
df=pd.concat([inact_201710,
              inact_201804,
              inact_201808,
              inact_201901,
              rsgs_201712,
              rsgs_201802,
              rsgs_201804,
              rsgs_201806,
              rsgs_201808,
              rsgs_201810,
              rsgs_201812,
              rsgs_201902,
              rsgs_201907],
             ignore_index=True, sort=False)
with custom_formatting():
    display(df.sample(n=5))
df.info()

Unnamed: 0,by,mth,i_na,cl_0,cl_1,rva_e_0,rva_e_1,i_c
269753,inact,2017-10-01,1,9.0,9.0,0.25,0.25,0.0
775304,rsgs,2018-10-01,0,11000.0,4000.0,5982.31,527.69,
306308,inact,2018-04-01,1,8500.0,11.0,238.05,0.31,1.0
306986,inact,2018-04-01,1,5000.0,11.0,140.03,0.31,1.0
217732,inact,2017-10-01,1,2000.0,9.0,56.01,0.25,1.0


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 822987 entries, 0 to 822986
Data columns (total 8 columns):
by         822987 non-null object
mth        822987 non-null datetime64[ns]
i_na       822987 non-null int8
cl_0       822987 non-null float64
cl_1       822987 non-null float64
rva_e_0    822987 non-null float64
rva_e_1    822987 non-null float64
i_c        635042 non-null float64
dtypes: datetime64[ns](1), float64(5), int8(1), object(1)
memory usage: 44.7+ MB


In [78]:
cld_n=(
    df
    .groupby(by=['by', 'mth'])
    .apply(lambda g: pd.DataFrame({'prosp_n': [g.shape[0]],
                                   'na_n': [np.sum(g.i_na)],
                                   'na_p': [np.mean(g.i_na)],
                                   'rd_n': [np.sum((g.cl_1<g.cl_0) & (g.rva_e_1<g.rva_e_0))],
                                   'rd_p': [np.mean((g.cl_1<g.cl_0) & (g.rva_e_1<g.rva_e_0))],
                                   'rd_ea_n': [np.sum((g.i_na==0) & (g.cl_1<g.cl_0) & (g.rva_e_1<g.rva_e_0))],
                                   'rd_ea_p': [np.mean((g.i_na==0) & (g.cl_1<g.cl_0) & (g.rva_e_1<g.rva_e_0))],
                                   'c_n': [np.sum((g.i_c==1) & (g.cl_1<g.cl_0) & (g.rva_e_1<g.rva_e_0))],
                                   'c_p': [np.mean((g.i_c==1) & (g.cl_1<g.cl_0) & (g.rva_e_1<g.rva_e_0))]}))
)
cld_n.index=cld_n.index.droplevel(2)
with custom_formatting():
    display(cld_n)

Unnamed: 0_level_0,Unnamed: 1_level_0,prosp_n,na_n,na_p,rd_n,rd_p,rd_ea_n,rd_ea_p,c_n,c_p
by,mth,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
inact,2017-10-01,279060,239832,0.86,205840,0.74,22362,0.08,196164,0.7
inact,2018-04-01,175941,170646,0.97,174655,0.99,4403,0.03,146925,0.84
inact,2018-08-01,94626,90837,0.96,94084,0.99,3332,0.04,89310,0.94
inact,2019-01-01,85415,83153,0.97,84277,0.99,1927,0.02,81249,0.95
rsgs,2017-12-01,34179,9,0.0,22645,0.66,22636,0.66,0,0.0
rsgs,2018-02-01,20467,0,0.0,14207,0.69,14207,0.69,0,0.0
rsgs,2018-04-01,26656,0,0.0,17133,0.64,17133,0.64,0,0.0
rsgs,2018-06-01,21628,0,0.0,16021,0.74,16021,0.74,0,0.0
rsgs,2018-08-01,22468,0,0.0,14426,0.64,14426,0.64,0,0.0
rsgs,2018-10-01,16085,0,0.0,11783,0.73,11783,0.73,0,0.0


In [79]:
cld_inact_n=cld_n.loc['inact']
cld_inact_n.columns=pd.MultiIndex.from_product([['inact'], cld_inact_n.columns])
cld_rsgs_n=cld_n.loc['rsgs'].drop(['na_n', 'na_p', 'rd_n', 'rd_p', 'c_n', 'c_p'], axis=1)
cld_rsgs_n.columns=pd.MultiIndex.from_product([['rsgs'], cld_rsgs_n.columns])
cld_n=cld_inact_n.join(cld_rsgs_n, how='outer').fillna(0)
with custom_formatting():
    display(cld_n)

Unnamed: 0_level_0,inact,inact,inact,inact,inact,inact,inact,inact,inact,rsgs,rsgs,rsgs
Unnamed: 0_level_1,prosp_n,na_n,na_p,rd_n,rd_p,rd_ea_n,rd_ea_p,c_n,c_p,prosp_n,rd_ea_n,rd_ea_p
mth,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2017-10-01,279060.0,239832.0,0.86,205840.0,0.74,22362.0,0.08,196164.0,0.7,0.0,0.0,0.0
2017-12-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,34179.0,22636.0,0.66
2018-02-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20467.0,14207.0,0.69
2018-04-01,175941.0,170646.0,0.97,174655.0,0.99,4403.0,0.03,146925.0,0.84,26656.0,17133.0,0.64
2018-06-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,21628.0,16021.0,0.74
2018-08-01,94626.0,90837.0,0.96,94084.0,0.99,3332.0,0.04,89310.0,0.94,22468.0,14426.0,0.64
2018-10-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16085.0,11783.0,0.73
2018-12-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20730.0,16082.0,0.78
2019-01-01,85415.0,83153.0,0.97,84277.0,0.99,1927.0,0.02,81249.0,0.95,0.0,0.0,0.0
2019-02-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17823.0,13338.0,0.75


In [80]:
cld_n=(
    pd.DataFrame(index=pd.date_range(start=cld_n.index.min(),
                                     end=cld_n.index.max(),
                                     freq='MS',
                                     name='mth'),
                 columns=pd.MultiIndex(levels=[[], []],
                                       codes=[[], []]))
    .join(cld_n)
    .sort_index(ascending=True)
    .fillna(0)
)
cld_n.to_csv('cld_n.csv')
with custom_formatting():
    display(cld_n)

Unnamed: 0_level_0,inact,inact,inact,inact,inact,inact,inact,inact,inact,rsgs,rsgs,rsgs
Unnamed: 0_level_1,prosp_n,na_n,na_p,rd_n,rd_p,rd_ea_n,rd_ea_p,c_n,c_p,prosp_n,rd_ea_n,rd_ea_p
mth,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2017-10-01,279060.0,239832.0,0.86,205840.0,0.74,22362.0,0.08,196164.0,0.7,0.0,0.0,0.0
2017-11-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-12-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,34179.0,22636.0,0.66
2018-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2018-02-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20467.0,14207.0,0.69
2018-03-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2018-04-01,175941.0,170646.0,0.97,174655.0,0.99,4403.0,0.03,146925.0,0.84,26656.0,17133.0,0.64
2018-05-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2018-06-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,21628.0,16021.0,0.74
2018-07-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [81]:
cld_amt=(
    df
    .loc[(df.i_na==0) & (df.cl_1<df.cl_0) & (df.rva_e_1<df.rva_e_0)]
    .drop(['i_na', 'i_c'], axis=1)
    .assign(cl_rd=lambda x: x.cl_0-x.cl_1,
            cl_rd_pct=lambda x: x.cl_rd/x.cl_0,
            rva_rd=lambda x: x.rva_e_0-x.rva_e_1,
            rva_rd_pct=lambda x: x.rva_rd/x.rva_e_0)
    .groupby(by=['by', 'mth'])
    .apply(lambda g: pd.DataFrame({'n': [g.shape[0]],
                                   'cl_0_mean': [np.mean(g.cl_0)],
                                   'cl_0_median': [np.median(g.cl_0)],
                                   'cl_0_sum': [np.sum(g.cl_0)],
                                   'cl_1_mean': [np.mean(g.cl_1)],
                                   'cl_1_median': [np.median(g.cl_1)],
                                   'cl_1_sum': [np.sum(g.cl_1)],
                                   'cl_rd_mean': [np.mean(g.cl_rd)],
                                   'cl_rd_median': [np.median(g.cl_rd)],
                                   'cl_rd_sum': [np.sum(g.cl_rd)],
                                   'cl_rd_pct_mean': [np.mean(g.cl_rd_pct)],
                                   'cl_rd_pct_median': [np.median(g.cl_rd_pct)],
                                   'rva_0_mean': [np.mean(g.rva_e_0)],
                                   'rva_0_median': [np.median(g.rva_e_0)],
                                   'rva_0_sum': [np.sum(g.rva_e_0)],
                                   'rva_1_mean': [np.mean(g.rva_e_1)],
                                   'rva_1_median': [np.median(g.rva_e_1)],
                                   'rva_1_sum': [np.sum(g.rva_e_1)],
                                   'rva_rd_mean': [np.mean(g.rva_rd)],
                                   'rva_rd_median': [np.median(g.rva_rd)],
                                   'rva_rd_sum': [np.sum(g.rva_rd)],
                                   'rva_rd_pct_mean': [np.mean(g.rva_rd_pct)],
                                   'rva_rd_pct_median': [np.median(g.rva_rd_pct)]}))
)
cld_amt.index=cld_amt.index.droplevel(2)
with custom_formatting():
    display(cld_amt)

Unnamed: 0_level_0,Unnamed: 1_level_0,n,cl_0_mean,cl_0_median,cl_0_sum,cl_1_mean,cl_1_median,cl_1_sum,cl_rd_mean,cl_rd_median,cl_rd_sum,...,rva_0_median,rva_0_sum,rva_1_mean,rva_1_median,rva_1_sum,rva_rd_mean,rva_rd_median,rva_rd_sum,rva_rd_pct_mean,rva_rd_pct_median
by,mth,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
inact,2017-10-01,22362,25190.72,2000.0,563314798.1,1482.72,9.0,33156633.0,23707.99,1991.0,530158165.1,...,56.01,7776963.81,19.14,0.25,428071.66,328.63,55.76,7348892.15,0.96,1.0
inact,2018-04-01,4403,37429.14,20000.0,164800517.18,39.59,11.0,174300.0,37389.56,19989.0,164626217.18,...,560.12,4579187.16,11.35,0.31,49979.01,1028.66,559.81,4529208.15,0.99,1.0
inact,2018-08-01,3332,33407.11,18000.0,111312490.5,13.5,12.0,44972.0,33393.61,17988.0,111267518.5,...,504.11,3072621.59,0.38,0.34,1260.55,921.78,503.77,3071361.04,1.0,1.0
inact,2019-01-01,1927,34279.58,19500.0,66056751.05,49.05,13.0,94512.0,34230.53,19487.0,65962239.05,...,532.11,1840391.49,1.53,0.36,2951.44,953.52,531.75,1837440.05,1.0,1.0
rsgs,2017-12-01,22636,37375.95,24000.0,846042086.5,23345.62,13000.0,528451476.8,14030.33,7000.0,317590609.7,...,1744.98,93719444.31,2324.47,815.84,52616707.04,1815.81,587.64,41102737.27,0.47,0.45
rsgs,2018-02-01,14207,39299.95,25000.0,558334319.5,28624.19,17000.0,406663826.75,10675.76,5500.0,151670492.75,...,2413.6,70829600.56,3012.73,1341.52,42801797.26,1972.82,648.74,28027803.31,0.39,0.36
rsgs,2018-04-01,17133,37796.23,25000.0,647562726.45,25990.08,15300.0,445288100.0,11806.14,6000.0,202274626.45,...,1899.56,69978426.75,2288.8,955.63,39214040.3,1795.62,593.48,30764386.45,0.45,0.44
rsgs,2018-06-01,16021,36141.64,24000.0,579025286.75,25306.08,15000.0,405428700.0,10835.56,5500.0,173596586.75,...,2060.24,71918969.27,2446.21,1020.27,39190665.74,2042.84,661.9,32728303.53,0.44,0.42
rsgs,2018-08-01,14426,41179.21,26000.0,594051309.5,29113.77,16600.0,419995200.0,12065.44,5000.0,174056109.5,...,2125.08,73318956.4,2836.42,1101.28,40918214.18,2246.0,631.12,32400742.22,0.43,0.42
rsgs,2018-10-01,11783,37118.15,24000.0,437363152.5,28281.03,17000.0,333235400.0,8837.12,4500.0,104127752.5,...,2332.25,63332712.81,2913.49,1224.49,34329659.12,2461.43,707.15,29003053.7,0.42,0.4


In [82]:
cld_inact_amt=cld_amt.loc['inact']
cld_inact_amt.columns=pd.MultiIndex.from_product([['inact'], cld_inact_amt.columns])
cld_rsgs_amt=cld_amt.loc['rsgs']
cld_rsgs_amt.columns=pd.MultiIndex.from_product([['rsgs'], cld_rsgs_amt.columns])
cld_amt=cld_inact_amt.join(cld_rsgs_amt, how='outer').fillna(0)
with custom_formatting():
    display(cld_amt)

Unnamed: 0_level_0,inact,inact,inact,inact,inact,inact,inact,inact,inact,inact,...,rsgs,rsgs,rsgs,rsgs,rsgs,rsgs,rsgs,rsgs,rsgs,rsgs
Unnamed: 0_level_1,n,cl_0_mean,cl_0_median,cl_0_sum,cl_1_mean,cl_1_median,cl_1_sum,cl_rd_mean,cl_rd_median,cl_rd_sum,...,rva_0_median,rva_0_sum,rva_1_mean,rva_1_median,rva_1_sum,rva_rd_mean,rva_rd_median,rva_rd_sum,rva_rd_pct_mean,rva_rd_pct_median
mth,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2017-10-01,22362.0,25190.72,2000.0,563314798.1,1482.72,9.0,33156633.0,23707.99,1991.0,530158165.1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-12-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1744.98,93719444.31,2324.47,815.84,52616707.04,1815.81,587.64,41102737.27,0.47,0.45
2018-02-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2413.6,70829600.56,3012.73,1341.52,42801797.26,1972.82,648.74,28027803.31,0.39,0.36
2018-04-01,4403.0,37429.14,20000.0,164800517.18,39.59,11.0,174300.0,37389.56,19989.0,164626217.18,...,1899.56,69978426.75,2288.8,955.63,39214040.3,1795.62,593.48,30764386.45,0.45,0.44
2018-06-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2060.24,71918969.27,2446.21,1020.27,39190665.74,2042.84,661.9,32728303.53,0.44,0.42
2018-08-01,3332.0,33407.11,18000.0,111312490.5,13.5,12.0,44972.0,33393.61,17988.0,111267518.5,...,2125.08,73318956.4,2836.42,1101.28,40918214.18,2246.0,631.12,32400742.22,0.43,0.42
2018-10-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2332.25,63332712.81,2913.49,1224.49,34329659.12,2461.43,707.15,29003053.7,0.42,0.4
2018-12-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2175.13,78176210.27,2672.81,1154.91,42984193.0,2188.29,625.96,35192017.27,0.41,0.38
2019-01-01,1927.0,34279.58,19500.0,66056751.05,49.05,13.0,94512.0,34230.53,19487.0,65962239.05,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-02-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2160.65,63356517.34,2581.62,1143.92,34433650.96,2168.46,662.06,28922866.38,0.43,0.4


In [83]:
cld_amt=(
    pd.DataFrame(index=pd.date_range(start=cld_amt.index.min(),
                                     end=cld_amt.index.max(),
                                     freq='MS',
                                     name='mth'),
                 columns=pd.MultiIndex(levels=[[], []],
                                       codes=[[], []]))
    .join(cld_amt)
    .sort_index(ascending=True)
    .fillna(0)
)
cld_amt.to_csv('cld_amt.csv')
with custom_formatting():
    display(cld_amt)

Unnamed: 0_level_0,inact,inact,inact,inact,inact,inact,inact,inact,inact,inact,...,rsgs,rsgs,rsgs,rsgs,rsgs,rsgs,rsgs,rsgs,rsgs,rsgs
Unnamed: 0_level_1,n,cl_0_mean,cl_0_median,cl_0_sum,cl_1_mean,cl_1_median,cl_1_sum,cl_rd_mean,cl_rd_median,cl_rd_sum,...,rva_0_median,rva_0_sum,rva_1_mean,rva_1_median,rva_1_sum,rva_rd_mean,rva_rd_median,rva_rd_sum,rva_rd_pct_mean,rva_rd_pct_median
mth,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2017-10-01,22362.0,25190.72,2000.0,563314798.1,1482.72,9.0,33156633.0,23707.99,1991.0,530158165.1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-11-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2017-12-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1744.98,93719444.31,2324.47,815.84,52616707.04,1815.81,587.64,41102737.27,0.47,0.45
2018-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2018-02-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2413.6,70829600.56,3012.73,1341.52,42801797.26,1972.82,648.74,28027803.31,0.39,0.36
2018-03-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2018-04-01,4403.0,37429.14,20000.0,164800517.18,39.59,11.0,174300.0,37389.56,19989.0,164626217.18,...,1899.56,69978426.75,2288.8,955.63,39214040.3,1795.62,593.48,30764386.45,0.45,0.44
2018-05-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2018-06-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2060.24,71918969.27,2446.21,1020.27,39190665.74,2042.84,661.9,32728303.53,0.44,0.42
2018-07-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
