# Reforma

## Preâmbulo

In [1]:
import numpy as np
import pandas as pd
import psycopg2
import pandas.io.sql as sqlio
import time

# Track execution time                                                          
start_time = time.time()

### Parâmetros

In [2]:
# Simulation Parameters
TB_REFORMA = 'FATO_REFORMA_SAMPLE'
#TB_REFORMA = 'FATO_REFORMA'
ANO_INICIO = 1995
ANO_FIM = 2016
DADOS_FAZENDA = '../dataset/dados_fazenda.xlsx'

# Conection parameters
HOST='tama'
PORT='5432'
DBNAME='prevdb'
USER='prevdb_user'
PASS='pr3v'

### Biblioteca

In [3]:
def ds_query(sql_query):
    """
        Query Dataset

    Parâmetros
    ----------
        sql : string
            SQL query to be performed against the dataset
        
    Retorno
    -------
        Pandas Dataframe
    """
    # Connect to an existing database
    try:
        conn = psycopg2.connect("host='{}' port={} dbname='{}'user={} password={}"
                .format(HOST, PORT, DBNAME, USER, PASS))
        df = sqlio.read_sql_query(sql, conn)
        # Close communication with the database                                     
        conn.close()
        return df
    except:
        print("Unable to connect to the database")
        return

## Simulação

### FATO_REFORMA

In [4]:
sql = """
SELECT *
FROM {table_name} 
LIMIT 10
""".format(table_name=TB_REFORMA,
           ano=ANO_FIM)
fato_pessoa = ds_query(sql)
fato_pessoa

Unnamed: 0,index,ano_nasc,dt_nasc,dt_obito,sexo,clientela,ano_inicio_contrib,ano_dib,idade_dib,tempo_contrib,especie,pec6_ano_dib,pec6_idade_dib,pec6_gap,pec6_prob
0,0,1935,19350106,19981105,3,1,1988,1995,60,7,41,2008.0,73.0,13,0.789282
1,1,1929,19291214,0,3,1,1989,1995,65,6,41,2009.0,80.0,15,0.631777
2,2,1939,19390913,0,3,2,1988,1995,55,7,41,2008.0,69.0,14,0.845214
3,3,1944,19441009,0,1,1,1970,1995,50,25,46,2004.0,60.0,10,0.880527
4,4,1934,19340813,20151212,3,1,1978,1995,60,17,41,1998.0,64.0,4,0.948549
5,5,1934,19341121,0,3,1,1976,1995,60,19,41,1996.0,62.0,2,0.973187
6,6,1924,19240424,0,3,2,1988,1995,70,7,41,2008.0,84.0,14,0.519235
7,7,1929,19291112,0,3,2,1970,1995,65,25,41,1995.0,66.0,1,0.97956
8,8,1940,19400114,0,3,2,1986,1995,55,9,41,2006.0,66.0,11,0.885876
9,9,1934,19340710,0,1,2,1983,1995,60,12,41,2003.0,69.0,9,0.795428


In [5]:
sql = """
SELECT 
	ESPECIE
    ,CLIENTELA
	,SEXO
    ,IDADE_DIB
	,PEC6_IDADE_DIB
	--,PEC6_TEMPO_CONTRIB
	,PEC6_GAP
	,PEC6_ANO_DIB
	,PEC6_PROB
FROM {table_name} 
WHERE PEC6_ANO_DIB = {ano}
""".format(table_name=TB_REFORMA,
           ano=ANO_FIM)
fato_pessoa = ds_query(sql)
print(fato_pessoa.columns)
print(fato_pessoa.shape)

Index(['especie', 'clientela', 'sexo', 'idade_dib', 'pec6_idade_dib',
       'pec6_gap', 'pec6_ano_dib', 'pec6_prob'],
      dtype='object')
(960, 8)


In [6]:
print('ANO FIM = {}'.format(ANO_FIM))
df = fato_pessoa[['especie','clientela', 'sexo', 'idade_dib', 'pec6_idade_dib','pec6_prob','pec6_gap']]
print(df['especie'].unique())

ANO FIM = 2016
[42 41 57 46 32 92]


### Aposentados RGPS em ANO_FIM

In [7]:
df_rgps = df.pivot_table(index='idade_dib', columns=['especie','clientela','sexo'], 
                    values='pec6_prob', aggfunc='count').round()
df_rgps.fillna(value=0, inplace=True, downcast='infer')
df_rgps

especie,32,32,32,32,41,41,41,41,42,42,42,46,57,92,92
clientela,1,1,2,2,1,1,2,2,1,1,2,1,1,1,1
sexo,1,3,1,3,1,3,1,3,1,3,1,1,3,1,3
idade_dib,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3
23,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
27,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
28,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
29,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
30,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
31,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
32,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
33,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
34,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
38,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0


### Aposentados PEC 6/2019 em ANO_FIM

In [8]:
df_qtd = df.pivot_table(index='pec6_idade_dib', columns=['especie','clientela','sexo'], 
                    values='pec6_prob', aggfunc='sum').round()
df_qtd.fillna(value=0, inplace=True, downcast='infer')
#df = df.groupby(['pec6_idade_dib','sexo']).sum()
#df['qtd'] = df['pec6_prob'].apply(lambda x: round(x))
#df_qtd.to_csv('../sandbox/2016_pec_qtd_sample.csv')
df_qtd

especie,32,32,32,32,41,41,41,41,42,42,42,46,57,92,92
clientela,1,1,2,2,1,1,2,2,1,1,2,1,1,1,1
sexo,1,3,1,3,1,3,1,3,1,3,1,1,3,1,3
pec6_idade_dib,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3
23.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
27.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
28.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
29.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
30.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
31.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
32.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
33.0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
34.0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
38.0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0


In [9]:
df_qtd.columns

MultiIndex(levels=[[32, 41, 42, 46, 57, 92], [1, 2], [1, 3]],
           codes=[[0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 3, 4, 5, 5], [0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0], [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1]],
           names=['especie', 'clientela', 'sexo'])

### GAP Médio PEC 6/2019 em ANO_FIM

In [10]:
df_gap = df.pivot_table(index='pec6_idade_dib', columns=['especie','clientela','sexo'], 
                    values='pec6_gap', aggfunc='mean')#.round()
df_gap.fillna(value=0, inplace=True, downcast='infer')
#df_qtd.to_csv('../sandbox/2016_pec_avggap_sample.csv')
df_gap

especie,32,32,32,32,41,41,41,41,42,42,42,46,57,92,92
clientela,1,1,2,2,1,1,2,2,1,1,2,1,1,1,1
sexo,1,3,1,3,1,3,1,3,1,3,1,1,3,1,3
pec6_idade_dib,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3
23.0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0
27.0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0
28.0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0
29.0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0
30.0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0
31.0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0
32.0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0
33.0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0
34.0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0
38.0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0


### Probabilidade de Sobrevivência Média PEC 6/2019 em ANO_FIM

In [11]:
df_prob = df.pivot_table(index='pec6_idade_dib', columns=['especie','clientela','sexo'], 
                    values='pec6_prob', aggfunc='mean')#.round()
df_prob.fillna(value=0, inplace=True, downcast='infer')
#df_prob.to_csv('../sandbox/2016_pec_avgprob_sample.csv')
df_prob

especie,32,32,32,32,41,41,41,41,42,42,42,46,57,92,92
clientela,1,1,2,2,1,1,2,2,1,1,2,1,1,1,1
sexo,1,3,1,3,1,3,1,3,1,3,1,1,3,1,3
pec6_idade_dib,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3
23.0,0,1,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
27.0,1,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
28.0,1,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
29.0,0,1,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
30.0,1,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
31.0,1,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
32.0,0,1,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
33.0,1,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
34.0,1,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
38.0,1,1,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0


## Considerações

* **Probabilidade de a pessoa não querer se aposentar na idade mínima**
 * Utilizar probabilidades da base?
 * Como mensurar a "vontade de trabalhar mais para aumentar o valor do benefício"?
* **Aposentadoria por idade de homens**
 * Não sofre alterações em termos de quantidade na PEC 6/2019?
* **Tempo de Contribuição**
 * É válido condisiderar que o ano de inicio de contribuição como:  `ano_inicio_contrib = ano_dib - tempo_contrib`
 * Essa abordagem desconsidera períodos de desemprego/i

## Cleanup

In [12]:
# Print out elapsed time                                                        
elapsed_time = (time.time() - start_time) / 60                                  
print("\nExecution time: {0:0.4f} minutes.".format(elapsed_time))


Execution time: 0.0052 minutes.
