In [1]:
from google.cloud import bigquery
import plotly.express as px
import plotly

dict_uf = {"11":"Rondônia",
           "12":"Acre",
           "13":"Amazonas",
           "14":"Roraima",
           "15":"Pará",
           "16":"Amapá",
           "17":"Tocantins",
           "21":"Maranhão",
           "22":"Piauí",
           "23":"Ceará",
           "24":"Rio Grande do Norte",
           "25":"Paraíba",
           "26":"Pernambuco",
           "27":"Alagoas",
           "28":"Sergipe",
           "29":"Bahia",
           "31":"Minas Gerais",
           "32":"Espírito Santo",
           "33":"Rio de Janeiro",
           "35":"São Paulo",
           "41":"Paraná",
           "42":"Santa Catarina",
           "43":"Rio Grande do Sul",
           "50":"Mato Grosso do Sul",
           "51":"Mato Grosso",
           "52":"Goiás",
           "53":"Distrito Federal"}

dict_sexo = {'1': 'Homem', '2':'Mulher'}

# Conexão com Big Query

In [2]:
project_id = 'brave-tea-400210'
dataset_id = 'fase_3_tech_challenge'
table_id = 'pnad-covid-19'
location = 'US'

client = bigquery.Client(project=project_id, location='US')

# Visualizações

## Distribuição da variável sexo ('A003')

In [3]:
query_sql = f'''WITH cont_sexo AS (
SELECT
    uf,
    sexo,
    COUNT(sexo) AS n_sexo,
FROM
    `{dataset_id}.{table_id}`
GROUP BY
    uf, sexo
ORDER BY
    uf
)

SELECT
    uf,
    sexo,
    n_sexo,
    ROUND(SUM(n_sexo)/SUM(n_sexo) OVER (PARTITION BY uf),4) AS proportion
FROM
    cont_sexo
GROUP BY
    uf, sexo, n_sexo'''

query_job = client.query(query_sql)
df_results = query_job.to_dataframe()

In [4]:
df_results.dtypes

uf              Int64
sexo            Int64
n_sexo          Int64
proportion    float64
dtype: object

In [5]:
df_results['uf'] = df_results['uf'].astype(str).replace(dict_uf)
df_results['sexo'] = df_results['sexo'].astype(str).replace(dict_sexo)
df_results.head()

Unnamed: 0,uf,sexo,n_sexo,proportion
0,Paraná,Mulher,30967,0.5163
1,Paraná,Homem,29017,0.4837
2,Minas Gerais,Mulher,53081,0.5149
3,Minas Gerais,Homem,50015,0.4851
4,Mato Grosso do Sul,Homem,12754,0.4825


In [14]:
px.colors.qualitative.G10

['#3366CC',
 '#DC3912',
 '#FF9900',
 '#109618',
 '#990099',
 '#0099C6',
 '#DD4477',
 '#66AA00',
 '#B82E2E',
 '#316395']

In [18]:
fig_3 = px.bar(df_results, x="proportion", y="uf", color="sexo", title="Ditribuição da população da pesquisa por sexo",
            color_discrete_sequence=[px.colors.qualitative.G10[1], px.colors.qualitative.G10[0]],height=800, width=600, labels=dict(uf="Estado", sexo="Sexo", proportion="Proporção (%)"))

fig_3.update_layout(title_x=0.5)
fig_3.add_vline(x=0.5)

#plotly.offline.plot(fig_3, filename = 'br_distribuicao_populacao.html', auto_play=False, auto_open=False)

fig_3.show()