In [None]:
import os
import yaml
import pandas as pd
from pyathena import connect
from pyathena.util import as_pandas
from sqlalchemy import *

%matplotlib inline
%load_ext autoreload

In [None]:
pg_cred = yaml.load(open("../conf/local/credentials.yml"), Loader=yaml.FullLoader)

In [None]:
url = 'postgresql://{}:{}@{}:{}/{}'
url = url.format(pg_cred["pg_user"], pg_cred["pg_pass"], pg_cred["pg_host"], 5432, "iefp")

# The return value of create_engine() is our connection object
con = create_engine(url, client_encoding='utf8')

# We then bind the connection to MetaData()
meta = MetaData(bind=con, reflect=True)

In [None]:
# Interventions over time: >30 means only results

sql = """select ute_id, ano_mes, codigo_intervencao,

resultado_intervencao, dcodigo_interv 

from intervencoes

join tipos_intervencoes

on intervencoes.codigo_intervencao = tipos_intervencoes.codigo_interv

where ((intervencoes.tipo_movimento > 30) 

and intervencoes.codigo_intervencao != '0101' 
and intervencoes.codigo_intervencao != '0102')

order by ute_id

limit 10;
;"""

interv_time = pd.read_sql(sql, con)

In [None]:
interv_time.head(20)

In [None]:

sql = """select ute_id, ano_mes, codigo_intervencao, intervencoes.tipo_movimento, resultado_intervencao, dcodigo_interv 

from intervencoes

join tipos_intervencoes

on intervencoes.codigo_intervencao = tipos_intervencoes.codigo_interv

order by ute_id, ano_mes

limit 10;
;"""

interv = pd.read_sql(sql, con)

In [None]:
interv.head(10)

In [None]:
# count (only) interventions per month where not PPE and = 35
sql = """select ano_mes, count(codigo_intervencao)

from intervencoes

where ((intervencoes.tipo_movimento = 35) 

and intervencoes.codigo_intervencao != '0101' 
and intervencoes.codigo_intervencao != '0102')

group by ano_mes

order by ano_mes

;"""

interv_time = pd.read_sql(sql, con)

In [None]:
interv_time.head(20)

In [None]:
interv_time["ano_mes"] = pd.to_datetime(interv_time["ano_mes"].astype("int").astype("str"), format="%Y%m")
interv_time.columns = ["Date", "Interventions"]
interv_time = interv_time.set_index("Date")

In [None]:
interv_time.plot(figsize=(12,8), title="Monthly Intervention Counts")

In [None]:
# count results interventions per month where not PPE and not >30
sql = """select ano_mes, count(codigo_intervencao)

from intervencoes

join tipos_intervencoes

on intervencoes.codigo_intervencao = tipos_intervencoes.codigo_interv

where ((intervencoes.tipo_movimento > 30) 

and intervencoes.codigo_intervencao != '0101' 
and intervencoes.codigo_intervencao != '0102')

order by ute_id

limit 10;
;"""

interv_time = pd.read_sql(sql, con)