# Reforma

## Preâmbulo

In [1]:
import numpy as np
import pandas as pd
import psycopg2
import pandas.io.sql as sqlio
import time

# Track execution time                                                          
start_time = time.time()

### Parâmetros

In [2]:
# Simulation Parameters
#TB_REFORMA = 'FATO_REFORMA_SAMPLE'
TB_REFORMA = 'FATO_REFORMA'
ANO_INICIO = 1995
ANO_FIM = 2016
DADOS_FAZENDA = '../dataset/dados_fazenda.xlsx'

# Conection parameters
HOST='tama'
PORT='5432'
DBNAME='prevdb'
USER='prevdb_user'
PASS='pr3v'

### Biblioteca

In [3]:
def ds_query(sql_query):
    """
        Query Dataset

    Parâmetros
    ----------
        sql : string
            SQL query to be performed against the dataset
        
    Retorno
    -------
        Pandas Dataframe
    """
    # Connect to an existing database
    try:
        conn = psycopg2.connect("host='{}' port={} dbname='{}'user={} password={}"
                .format(HOST, PORT, DBNAME, USER, PASS))
        df = sqlio.read_sql_query(sql, conn)
        # Close communication with the database                                     
        conn.close()
        return df
    except:
        print("Unable to connect to the database")
        return

## Simulação

### FATO_REFORMA

In [4]:
sql = """
SELECT *
FROM {table_name} 
LIMIT 10
""".format(table_name=TB_REFORMA,
           ano=ANO_FIM)
fato_pessoa = ds_query(sql)
fato_pessoa

Unnamed: 0,index,ano_nasc,dt_nasc,dt_obito,sexo,clientela,ano_inicio_contrib,ano_dib,idade_dib,tempo_contrib,especie,pec6_ano_dib,pec6_idade_dib,pec6_gap,pec6_prob,pec6_percent
0,832,1958,19580513,20160617,3,1,2006,2013,55,7,32,2013.0,55.0,0,1.0,0.6
1,833,1966,19660919,0,1,1,2011,2015,48,4,32,2015.0,48.0,0,1.0,0.6
2,834,1964,19640618,0,1,1,1986,2015,50,29,46,2024.0,60.0,10,0.910483,0.98
3,835,1956,19561117,0,3,2,2012,2012,55,0,41,2032.0,76.0,21,0.755011,0.62
4,836,1975,19750206,0,3,1,2007,2015,40,8,32,2015.0,40.0,0,1.0,0.6
5,837,1952,19520902,0,1,1,1979,2015,62,36,42,2017.0,65.0,3,0.949612,0.98
6,838,1952,19520610,0,1,2,2012,2012,60,0,41,2032.0,80.0,20,0.535958,0.6
7,839,1949,19490329,0,3,1,2005,2013,64,8,32,2013.0,64.0,0,1.0,0.6
8,840,1956,19561212,0,1,1,2008,2013,56,5,32,2013.0,56.0,0,1.0,0.6
9,841,1954,19540112,0,1,1,1980,2015,61,35,42,2019.0,65.0,4,0.935076,0.98


### Aposentados RGPS em ANO_FIM

In [5]:
sql = """
SELECT 
	ESPECIE
    ,CLIENTELA
	,SEXO
    ,IDADE_DIB
	,PEC6_IDADE_DIB
	--,PEC6_TEMPO_CONTRIB
	,PEC6_GAP
	,PEC6_ANO_DIB
	,PEC6_PROB
    ,PEC6_PERCENT
FROM {table_name} 
WHERE ANO_DIB = {ano}
""".format(table_name=TB_REFORMA,
           ano=ANO_FIM)
fato_pessoa = ds_query(sql)
print(fato_pessoa.columns)
print(fato_pessoa.shape)

Index(['especie', 'clientela', 'sexo', 'idade_dib', 'pec6_idade_dib',
       'pec6_gap', 'pec6_ano_dib', 'pec6_prob', 'pec6_percent'],
      dtype='object')
(889823, 9)


In [6]:
print('ANO FIM = {}'.format(ANO_FIM))
df = fato_pessoa[['especie','clientela', 'sexo', 'idade_dib', 'pec6_idade_dib','pec6_prob','pec6_gap','pec6_percent']]
print(df['especie'].unique())

ANO FIM = 2016
[41 42 57 32 46 92]


In [7]:
df_rgps = df.pivot_table(index='idade_dib', columns=['especie','clientela','sexo'], 
                    values='pec6_prob', aggfunc='count').round()
df_rgps.fillna(value=0, inplace=True, downcast='infer')
df_rgps

especie,32,32,32,32,41,41,41,41,42,42,42,42,46,46,46,57,57,92,92,92,92
clientela,1,1,2,2,1,1,2,2,1,1,...,2,1,1,2,1,1,1,1,2,2
sexo,1,3,1,3,1,3,1,3,1,3,...,3,1,3,1,1,3,1,3,1,3
idade_dib,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
15,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
17,1,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
18,8,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
19,21,2,2,0,0,0,0,0,0,0,...,0,0,0,0,0,0,2,0,0,0
20,38,6,7,2,0,0,0,0,0,0,...,0,0,0,0,0,0,4,0,0,0
21,54,7,11,2,0,0,0,0,0,0,...,0,0,0,0,0,0,5,1,0,0
22,85,20,16,5,0,0,0,0,0,0,...,0,0,0,0,0,0,6,2,0,0
23,129,31,12,4,0,0,0,0,0,0,...,0,0,0,0,0,0,15,1,0,0
24,143,54,15,6,0,0,0,0,0,0,...,0,0,0,0,0,0,13,1,1,0
25,166,72,19,16,0,0,0,0,0,0,...,0,0,0,0,0,0,17,2,2,0


### Aposentados PEC 6/2019 em ANO_FIM

In [8]:
sql = """
SELECT 
	ESPECIE
    ,CLIENTELA
	,SEXO
    ,IDADE_DIB
	,PEC6_IDADE_DIB
	--,PEC6_TEMPO_CONTRIB
	,PEC6_GAP
	,PEC6_ANO_DIB
	,PEC6_PROB
    ,PEC6_PERCENT
FROM {table_name} 
WHERE PEC6_ANO_DIB = {ano}
""".format(table_name=TB_REFORMA,
           ano=ANO_FIM)
fato_pessoa = ds_query(sql)
print(fato_pessoa.columns)
print(fato_pessoa.shape)

Index(['especie', 'clientela', 'sexo', 'idade_dib', 'pec6_idade_dib',
       'pec6_gap', 'pec6_ano_dib', 'pec6_prob', 'pec6_percent'],
      dtype='object')
(921862, 9)


In [9]:
print('ANO FIM = {}'.format(ANO_FIM))
df = fato_pessoa[['especie','clientela', 'sexo', 'idade_dib', 'pec6_idade_dib','pec6_prob','pec6_gap','pec6_percent']]
print(df['especie'].unique())

ANO FIM = 2016
[41 42 57 32 46 92]


In [10]:
df_qtd = df.pivot_table(index='pec6_idade_dib', columns=['especie','clientela','sexo'], 
                    values='pec6_prob', aggfunc='sum').round()
df_qtd.fillna(value=0, inplace=True, downcast='infer')
#df = df.groupby(['pec6_idade_dib','sexo']).sum()
#df['qtd'] = df['pec6_prob'].apply(lambda x: round(x))
#df_qtd.to_csv('../sandbox/2016_pec_qtd_sample.csv')
df_qtd

especie,32,32,32,32,41,41,41,41,42,42,42,42,46,46,46,57,57,92,92,92,92
clientela,1,1,2,2,1,1,2,2,1,1,...,2,1,1,2,1,1,1,1,2,2
sexo,1,3,1,3,1,3,1,3,1,3,...,3,1,3,1,1,3,1,3,1,3
pec6_idade_dib,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
15.0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
17.0,1,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
18.0,8,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
19.0,21,2,2,0,0,0,0,0,0,0,...,0,0,0,0,0,0,2,0,0,0
20.0,38,6,7,2,0,0,0,0,0,0,...,0,0,0,0,0,0,4,0,0,0
21.0,54,7,11,2,0,0,0,0,0,0,...,0,0,0,0,0,0,5,1,0,0
22.0,85,20,16,5,0,0,0,0,0,0,...,0,0,0,0,0,0,6,2,0,0
23.0,129,31,12,4,0,0,0,0,0,0,...,0,0,0,0,0,0,15,1,0,0
24.0,143,54,15,6,0,0,0,0,0,0,...,0,0,0,0,0,0,13,1,1,0
25.0,166,72,19,16,0,0,0,0,0,0,...,0,0,0,0,0,0,17,2,2,0


In [11]:
df_qtd.columns

MultiIndex(levels=[[32, 41, 42, 46, 57, 92], [1, 2], [1, 3]],
           codes=[[0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 5], [0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1], [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1]],
           names=['especie', 'clientela', 'sexo'])

### GAP Médio PEC 6/2019 em ANO_FIM

In [12]:
df_gap = df.pivot_table(index='idade_dib', columns=['especie','clientela','sexo'], 
                    values='pec6_gap', aggfunc='mean')#.round()
df_gap.fillna(value=0, inplace=True, downcast='infer')
#df_qtd.to_csv('../sandbox/2016_pec_avggap_sample.csv')
df_gap

especie,32,32,32,32,41,41,41,41,42,42,42,42,46,46,46,57,57,92,92,92,92
clientela,1,1,2,2,1,1,2,2,1,1,...,2,1,1,2,1,1,1,1,2,2
sexo,1,3,1,3,1,3,1,3,1,3,...,3,1,3,1,1,3,1,3,1,3
idade_dib,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
15,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0,0.000000,0.000000,0,0.0,0.000000,0,0,0,0
17,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0,0.000000,0.000000,0,0.0,0.000000,0,0,0,0
18,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0,0.000000,0.000000,0,0.0,0.000000,0,0,0,0
19,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0,0.000000,0.000000,0,0.0,0.000000,0,0,0,0
20,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0,0.000000,0.000000,0,0.0,0.000000,0,0,0,0
21,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0,0.000000,0.000000,0,0.0,0.000000,0,0,0,0
22,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0,0.000000,0.000000,0,0.0,0.000000,0,0,0,0
23,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0,0.000000,0.000000,0,0.0,0.000000,0,0,0,0
24,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0,0.000000,0.000000,0,0.0,0.000000,0,0,0,0
25,0,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0,0.000000,0.000000,0,0.0,0.000000,0,0,0,0


### Probabilidade de Sobrevivência Média PEC 6/2019 em ANO_FIM

In [13]:
df_prob = df.pivot_table(index='idade_dib', columns=['especie','clientela','sexo'], 
                    values='pec6_prob', aggfunc='mean')#.round()
df_prob.fillna(value=0, inplace=True, downcast='infer')
#df_prob.to_csv('../sandbox/2016_pec_avgprob_sample.csv')
df_prob

especie,32,32,32,32,41,41,41,41,42,42,42,42,46,46,46,57,57,92,92,92,92
clientela,1,1,2,2,1,1,2,2,1,1,...,2,1,1,2,1,1,1,1,2,2
sexo,1,3,1,3,1,3,1,3,1,3,...,3,1,3,1,1,3,1,3,1,3
idade_dib,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
15,1,0,0,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0,0,0,0
17,1,0,0,1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0,0,0,0
18,1,1,1,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0,0,0,0
19,1,1,1,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,1,0,0,0
20,1,1,1,1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,1,0,0,0
21,1,1,1,1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,1,1,0,0
22,1,1,1,1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,1,1,0,0
23,1,1,1,1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,1,1,0,0
24,1,1,1,1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,1,1,1,0
25,1,1,1,1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,1,1,1,0


### Percentual Médio PEC 6/2019

In [14]:
df_ppc = df.pivot_table(index='pec6_idade_dib', columns=['especie','clientela','sexo'], 
                        values='pec6_percent', aggfunc='mean')#.round()
df_ppc.fillna(value=0, inplace=True, downcast='infer')
#df_prob.to_csv('../sandbox/2016_pec_avgprob_sample.csv')
df_ppc

especie,32,32,32,32,41,41,41,41,42,42,42,42,46,46,46,57,57,92,92,92,92
clientela,1,1,2,2,1,1,2,2,1,1,...,2,1,1,2,1,1,1,1,2,2
sexo,1,3,1,3,1,3,1,3,1,3,...,3,1,3,1,1,3,1,3,1,3
pec6_idade_dib,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
15.0,0.600000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000,0.00,...,0.0,0.0,0.0,0.0,0.0,0.00,0.000000,0.000000,0.0,0.0
17.0,0.600000,0.000000,0.000000,0.600000,0.000000,0.000000,0.000000,0.000000,0.000,0.00,...,0.0,0.0,0.0,0.0,0.0,0.00,0.000000,0.000000,0.0,0.0
18.0,0.600000,0.600000,0.600000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000,0.00,...,0.0,0.0,0.0,0.0,0.0,0.00,0.000000,0.000000,0.0,0.0
19.0,0.600000,0.600000,0.600000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000,0.00,...,0.0,0.0,0.0,0.0,0.0,0.00,0.600000,0.000000,0.0,0.0
20.0,0.600000,0.600000,0.600000,0.600000,0.000000,0.000000,0.000000,0.000000,0.000,0.00,...,0.0,0.0,0.0,0.0,0.0,0.00,0.600000,0.000000,0.0,0.0
21.0,0.600000,0.600000,0.600000,0.600000,0.000000,0.000000,0.000000,0.000000,0.000,0.00,...,0.0,0.0,0.0,0.0,0.0,0.00,0.600000,0.600000,0.0,0.0
22.0,0.600000,0.600000,0.600000,0.600000,0.000000,0.000000,0.000000,0.000000,0.000,0.00,...,0.0,0.0,0.0,0.0,0.0,0.00,0.600000,0.600000,0.0,0.0
23.0,0.600000,0.600000,0.600000,0.600000,0.000000,0.000000,0.000000,0.000000,0.000,0.00,...,0.0,0.0,0.0,0.0,0.0,0.00,0.600000,0.600000,0.0,0.0
24.0,0.600000,0.600000,0.602667,0.600000,0.000000,0.000000,0.000000,0.000000,0.000,0.00,...,0.0,0.0,0.0,0.0,0.0,0.00,0.600000,0.600000,0.6,0.0
25.0,0.600000,0.600000,0.600000,0.600000,0.000000,0.000000,0.000000,0.000000,0.000,0.00,...,0.0,0.0,0.0,0.0,0.0,0.00,0.600000,0.600000,0.6,0.0


## Considerações

* **Probabilidade de a pessoa não querer se aposentar na idade mínima**
 * Utilizar probabilidades da base?
 * Como mensurar a "vontade de trabalhar mais para aumentar o valor do benefício"?
* **Aposentadoria por idade de homens**
 * Não sofre alterações em termos de quantidade na PEC 6/2019?
* **Tempo de Contribuição**
 * É válido condisiderar que o ano de inicio de contribuição como:  `ano_inicio_contrib = ano_dib - tempo_contrib`
 * Essa abordagem desconsidera períodos de desemprego/i

## Cleanup

In [15]:
# Print out elapsed time                                                        
elapsed_time = (time.time() - start_time) / 60                                  
print("\nExecution time: {0:0.4f} minutes.".format(elapsed_time))


Execution time: 0.1579 minutes.
