## Notebook Magic

In [None]:
%matplotlib inline
%load_ext autoreload

## Imports

In [None]:
import os
import yaml
import pandas as pd
from sqlalchemy import *
import matplotlib.pyplot as plt
import seaborn as sns

## SQL Connection

In [None]:
pg_cred = yaml.load(open("../conf/local/credentials.yml"), Loader=yaml.FullLoader)

In [None]:
url = 'postgresql://{}:{}@{}:{}/{}'
url = url.format(pg_cred["pg_user"], pg_cred["pg_pass"], pg_cred["pg_host"], 5432, "iefp")
con = create_engine(url, client_encoding='utf8')
meta = MetaData(bind=con, reflect=True)

## Get Pedidos Table

In [None]:
ped_cols = yaml.load(open("../conf/base/sigae_columns.yml"), Loader=yaml.FullLoader)["pedidos"]
table = 'pedidos'

## Distribution of reasons for signing up

In [None]:
%%time
sql = """
select {}
from {}
where "tipo_movimento" = 11
order by "ano_mes" desc
limit 1000000
""".format(', '.join(ped_cols), "pedidos")

df = pd.read_sql(sql, con)

In [None]:
cols = '*'
table = 'motivos_inscricao'
sql = """
select {}
from {}
""".format(', '.join(cols), table)
df_motivos_inscricao = pd.read_sql(sql, con)

In [None]:
df_register = df.merge(df_motivos_inscricao, left_on="motivo_inscricao", right_on="cmotivo_inscricao")
hist = df_register["dmotivo_inscricao"].value_counts()/len(df_register)
hist.plot.barh(figsize=(12,8), title="Distribution of Registering Reasons")

## Distribution of reason for cancelling registration

In [None]:
%%time
sql = """
select {}
from {}
where "tipo_movimento" = 31
order by "ano_mes" desc
limit 1000000
""".format(', '.join(ped_cols), "pedidos")

df = pd.read_sql(sql, con)

In [None]:
cols = '*'
table = 'motivos_anulacao'
sql = """
select {}
from {}
""".format(', '.join(cols), table)
df_motivos_anulacao = pd.read_sql(sql, con)

In [None]:
df_exit = df.copy()
df_cancel = df_exit.merge(df_motivos_anulacao, left_on="motivo_anulacao", right_on="cmotivo_anulacao")
hist = df_cancel["dmotivo_anulacao"].value_counts()/len(df_cancel)
hist.plot.barh(figsize=(12,8), title="Distribution of Exit Reasons");

## Monthly Registers, Exits and Placements

In [None]:
%%time
sql = """
select ano_mes, tipo_movimento, count(*)
from pedidos
where tipo_movimento = 31 or tipo_movimento = 21 or tipo_movimento = 11
group by ano_mes, tipo_movimento
"""
df = pd.read_sql(sql, con)

In [None]:
df_flow = df.copy()
df_flow = df_flow.pivot(index='ano_mes', columns='tipo_movimento', values='count').reset_index()
df_flow["ano_mes"] = pd.to_datetime(df_flow["ano_mes"].astype("int").astype("str"), format="%Y%m")
df_flow = df_flow.set_index("ano_mes")
df_flow = df_flow.rolling(3).mean()
df_flow.plot(figsize=(12,8), title="Monthly Registers (11), Exits (31) and IEFP Placements (21)");