# Reforma

## Preâmbulo

In [1]:
import numpy as np
import pandas as pd
import psycopg2
import pandas.io.sql as sqlio
import time

# Track execution time                                                          
start_time = time.time()

### Parâmetros

In [2]:
# Simulation Parameters
#DBTABLE = 'FATO_AUXILIO_SAMPLE'
DBTABLE = 'FATO_AUXILIO'
#TB_REFORMA = 'FATO_REFORMA_SAMPLE'
TB_REFORMA = 'FATO_REFORMA'
ANO_INICIO = 1995
ANO_FIM = 2016
DADOS_FAZENDA = '../dataset/dados_fazenda.xlsx'

# Conection parameters
HOST='tama'
PORT='5432'
DBNAME='prevdb'
USER='prevdb_user'
PASS='pr3v'

### Biblioteca

In [3]:
def ds_query(sql_query):
    """
        Query Dataset

    Parâmetros
    ----------
        sql : string
            SQL query to be performed against the dataset
        
    Retorno
    -------
        Pandas Dataframe
    """
    # Connect to an existing database
    try:
        conn = psycopg2.connect("host='{}' port={} dbname='{}'user={} password={}"
                .format(HOST, PORT, DBNAME, USER, PASS))
        df = sqlio.read_sql_query(sql, conn)
        # Close communication with the database                                     
        conn.close()
        return df
    except:
        print("Unable to connect to the database")
        return

## Simulação

### Aposentados RGPS em ANO_FIM

In [4]:
sql = """
SELECT 
	ESPECIE
	,DIB
	,CLIENTELA
	,SEXO
	,SITUACAO
	,TEMPO_CONTRIB
	,IDADE_DIB
	,VL_RMI
FROM {table_name}
WHERE DIB > {ano}*10000 AND DIB < ({ano}+1)*10000
    AND ESPECIE IN (41 , 42)
    AND SEXO IN (3, 1)
""".format(table_name=DBTABLE,
           ano=ANO_FIM)
df = ds_query(sql)
df['ano_dib'] = df['dib'].apply(lambda x: int(x/10000))
print(df.columns)
print(df.shape)

Index(['especie', 'dib', 'clientela', 'sexo', 'situacao', 'tempo_contrib',
       'idade_dib', 'vl_rmi', 'ano_dib'],
      dtype='object')
(757591, 9)


In [5]:
print('ANO FIM = {}'.format(ANO_FIM))
df = df[df['ano_dib']==ANO_FIM][['ano_dib','idade_dib','especie','clientela', 'sexo','vl_rmi']]
df = df.pivot_table(index='idade_dib', columns=['especie','clientela','sexo'], 
                    values='vl_rmi', aggfunc='count').round()
df.fillna(value=0, inplace=True, downcast='infer')
df.to_csv('../sandbox/2016_concessoes_rgps.csv')
df

ANO FIM = 2016


especie,41,41,41,41,42,42,42,42
clientela,1,1,2,2,1,1,2,2
sexo,1,3,1,3,1,3,1,3
idade_dib,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3
37,0,0,0,0,0,1,0,0
38,0,0,0,0,1,0,0,0
39,0,0,0,0,0,4,0,0
40,0,0,0,0,14,17,0,1
41,0,0,0,0,42,50,0,0
42,0,0,0,0,60,283,1,1
43,0,0,0,0,149,446,1,0
44,0,0,0,0,286,749,0,1
45,0,0,0,0,488,1276,0,3
46,0,0,0,0,739,1883,5,2


### FATO_REFORMA

In [6]:
sql = """
SELECT *
FROM {table_name} 
LIMIT 10
""".format(table_name=TB_REFORMA,
           ano=ANO_FIM)
fato_pessoa = ds_query(sql)
fato_pessoa

Unnamed: 0,index,ano_nasc,dt_nasc,dt_obito,sexo,clientela,ano_inicio_contrib,ano_dib,idade_dib,tempo_contrib,especie,pec6_ano_dib,pec6_idade_dib,pec6_gap,pec6_prob
0,0,1948,19480820,0.0,1,1,1963,1995,47,32,42,2013,65,18,0.794648
1,1,1935,19350306,0.0,3,2,1986,1995,60,9,41,2006,71,11,0.830009
2,2,1935,19350215,0.0,3,1,1987,1995,60,8,41,2007,72,12,0.809194
3,3,1934,19341208,0.0,3,1,1983,1995,60,12,41,2003,69,9,0.870309
4,4,1941,19410525,0.0,3,1,1968,1995,54,27,42,2003,62,8,0.928176
5,5,1935,19350106,0.0,1,2,1982,1995,60,13,41,2002,67,7,0.841236
6,6,1934,19340407,0.0,1,2,1987,1995,61,8,41,2007,73,12,0.694269
7,7,1940,19400406,0.0,3,2,1989,1995,55,6,41,2009,69,14,0.845214
8,8,1932,19320826,0.0,1,2,1985,1995,62,10,41,2005,73,11,0.704797
9,9,1930,19300205,0.0,1,1,1983,1995,65,12,41,2003,73,8,0.742682


### Aposentados PEC 6/2019 em ANO_FIM

In [7]:
sql = """
SELECT 
	ESPECIE
    ,CLIENTELA
	,SEXO
	,PEC6_IDADE_DIB
	--,PEC6_TEMPO_CONTRIB
	,PEC6_GAP
	,PEC6_ANO_DIB
	,PEC6_PROB
FROM {table_name} 
""".format(table_name=TB_REFORMA,
           ano=ANO_FIM)
fato_pessoa = ds_query(sql)
print(fato_pessoa.columns)
print(fato_pessoa.shape)

Index(['especie', 'clientela', 'sexo', 'pec6_idade_dib', 'pec6_gap',
       'pec6_ano_dib', 'pec6_prob'],
      dtype='object')
(16349803, 7)


In [8]:
print('ANO FIM = {}'.format(ANO_FIM))
df = fato_pessoa[fato_pessoa['pec6_ano_dib']==ANO_FIM][['especie','clientela', 'sexo', 'pec6_idade_dib','pec6_prob']]
df = df.pivot_table(index='pec6_idade_dib', columns=['especie','clientela','sexo'], 
                    values='pec6_prob', aggfunc='sum').round()
df.fillna(value=0, inplace=True, downcast='infer')
#df = df.groupby(['pec6_idade_dib','sexo']).sum()
#df['qtd'] = df['pec6_prob'].apply(lambda x: round(x))
df.to_csv('../sandbox/2016_concessoes_pec.csv')
df

ANO FIM = 2016


especie,41,41,41,41,42,42,42,42
clientela,1,1,2,2,1,1,2,2
sexo,1,3,1,3,1,3,1,3
pec6_idade_dib,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3
60,0,0,38481,103690,0,0,1027,81
61,0,0,5427,3280,0,0,0,0
62,0,75151,6722,1746,0,81429,0,0
63,0,10546,7267,1324,0,0,0,0
64,0,11861,7468,1132,0,0,0,0
65,45755,15191,5937,853,179241,0,0,0
66,6456,13617,2491,733,0,0,0,0
67,6439,9131,1098,505,0,0,0,0
68,6903,6978,796,406,0,0,0,0
69,6791,5452,604,314,0,0,0,0


## Considerações

* **Probabilidade de a pessoa não querer se aposentar na idade mínima**
 * Utilizar probabilidades da base?
 * Como mensurar a "vontade de trabalhar mais para aumentar o valor do benefício"?
* **Aposentadoria por idade de homens**
 * Não sofre alterações em termos de quantidade na PEC 6/2019?
* **Tempo de Contribuição**
 * É válido condisiderar que o ano de inicio de contribuição como:  `ano_inicio_contrib = ano_dib - tempo_contrib`
 * Essa abordagem desconsidera períodos de desemprego/i

## Cleanup

In [9]:
# Print out elapsed time                                                        
elapsed_time = (time.time() - start_time) / 60                                  
print("\nExecution time: {0:0.4f} minutes.".format(elapsed_time))


Execution time: 0.9558 minutes.
