# Generate popularity table

In [23]:
import sys
import os
try:
    import warnings                                                   
    warnings.filterwarnings('ignore')     
    import pandas as pd       
    import numpy as np
    import sqlite3
except ImportError:
    print("Error: missing one of the libraries..")
    sys.exit()

print(os.getcwd())
print(sys.argv[0])
print(os.path.dirname(os.path.realpath('__file__')))

/Users/marcelino/dmarcelinobr/pollingpoint/dashboards/notebooks
/Users/marcelino/miniconda3/envs/pymc3/lib/python3.8/site-packages/ipykernel_launcher.py
/Users/marcelino/dmarcelinobr/pollingpoint/dashboards/notebooks


## Load and clean the data

In [24]:
con = sqlite3.connect('../../pollingpoint.db')

file_long = pd.read_sql(""" SELECT * FROM aprovacao;""", con=con) 

con.close()

In [25]:
# converte para datetime 
file_long["data_fim"] = pd.to_datetime(file_long["data_fim"])

# converte para proporção 
file_long[["positiva", "regular", "negativa"]] = file_long[["positiva", "regular", "negativa"]].copy()/100

# Subset for the current president
data = file_long.loc[file_long.data_fim >= pd.to_datetime("2019-01-01")]

data = data.query('tipo == "Avaliação do governo federal"')

# Replace NA with 1000 interviews 
data["amostra"] = data["amostra"].replace(np.nan, 1000)
data["amostra"] = data["amostra"].astype('int') # May need some especial treat
data["positiva"] = data["positiva"].round(2)
data["regular"] = data["regular"].round(2)
data["negativa"] = data["negativa"].round(2)

# Filter for data to show
data = data[['data_fim', 'empresa', 'tipo', 'amostra', 'positiva', 'regular', 'negativa', 'modo', 'erro']].reset_index(drop=True)

## Sanity checks

We need to check that we haven't introduced any typos in the pollsters or methods, and do a quick visual check on the most recent approval values.

In [26]:
pd.crosstab(data.empresa, data.modo, margins=True)

modo,CATI,IVR,Online,Presencial,All
empresa,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Atlas,0,0,16,0,16
Datafolha,7,0,0,6,13
IDEIA Big Data,10,0,53,0,63
INCT/IDDC,1,0,0,2,3
Ibope,0,0,0,9,9
Ibpad,7,0,0,0,7
Instituto FSB,8,0,0,0,8
Instituto Methodus,1,0,0,0,1
Ipec,0,0,0,2,2
Ipespe,36,0,0,0,36


In [27]:
# Salve database to chart 
data.to_csv('../exports/polls_popularity.csv', sep=',', encoding='utf-8', mode = 'w', index=False)