# Configuration / Setup


In [1]:
import pandas as pd
import numpy as np
import requests
import zipfile
import json
import io
import time
import os
import urllib.parse
import locale
from tqdm.notebook import tqdm
from pandas.api.types import union_categoricals


pd.options.display.max_columns = None

In [2]:
'''
**$df = read_csv_url("https://cdn.tse.jus.br/estatistica/sead/odsele/eleitorado_locais_votacao/eleitorado_local_votacao_2022.zip",
                      dtype={
                            "DS_ELEICAO": pd.CategoricalDtype(),
                            "NR_TURNO": pd.CategoricalDtype(),
                            "SG_UF": pd.CategoricalDtype(),
                            "CD_MUNICIPIO": pd.CategoricalDtype(),
                            "NM_MUNICIPIO": pd.CategoricalDtype(),
                            "NR_ZONA": pd.CategoricalDtype(),
                            "NR_SECAO": pd.CategoricalDtype(),

                            "CD_TIPO_SECAO_AGREGADA": pd.CategoricalDtype(),
                            "DS_TIPO_SECAO_AGREGADA": pd.CategoricalDtype(),
                            "NR_SECAO_PRINCIPAL": pd.CategoricalDtype(),

                            "NR_LOCAL_VOTACAO": pd.CategoricalDtype(),
                            "NM_LOCAL_VOTACAO": pd.CategoricalDtype(),

                            "CD_TIPO_LOCAL": pd.CategoricalDtype(),
                            "DS_TIPO_LOCAL": pd.CategoricalDtype(),
                            "DS_ENDERECO": pd.CategoricalDtype(),
                            "NM_BAIRRO": pd.CategoricalDtype(),
                            "NR_CEP": pd.CategoricalDtype(),
                            "NR_TELEFONE_LOCAL": pd.CategoricalDtype(),
                            "NR_LATITUDE": np.float64,
                            "NR_LONGITUDE": np.float64,
                            "CD_SITU_LOCAL_VOTACAO": pd.CategoricalDtype(),
                            "DS_SITU_LOCAL_VOTACAO": pd.CategoricalDtype(),
                            "CD_SITU_ZONA": pd.CategoricalDtype(),
                            "DS_SITU_ZONA": pd.CategoricalDtype(),
                            "CD_SITU_SECAO": pd.CategoricalDtype(),
                            "DS_SITU_SECAO": pd.CategoricalDtype(),
                            "CD_SITU_LOCALIDADE": pd.CategoricalDtype(),
                            "DS_SITU_LOCALIDADE": pd.CategoricalDtype(),
                            "CD_SITU_SECAO_ACESSIBILIDADE": pd.CategoricalDtype(),
                            "DS_SITU_SECAO_ACESSIBILIDADE": pd.CategoricalDtype(),
                            "QT_ELEITOR_SECAO": pd.Int64Dtype(),
                            "QT_ELEITOR_ELEICAO_FEDERAL": pd.Int64Dtype(),
                            "QT_ELEITOR_ELEICAO_ESTADUAL": pd.Int64Dtype(),
                            "QT_ELEITOR_ELEICAO_MUNICIPAL": pd.Int64Dtype()
                        }
                  )
'''


'\n**$df = read_csv_url("https://cdn.tse.jus.br/estatistica/sead/odsele/eleitorado_locais_votacao/eleitorado_local_votacao_2022.zip",\n                      dtype={\n                            "DS_ELEICAO": pd.CategoricalDtype(),\n                            "NR_TURNO": pd.CategoricalDtype(),\n                            "SG_UF": pd.CategoricalDtype(),\n                            "CD_MUNICIPIO": pd.CategoricalDtype(),\n                            "NM_MUNICIPIO": pd.CategoricalDtype(),\n                            "NR_ZONA": pd.CategoricalDtype(),\n                            "NR_SECAO": pd.CategoricalDtype(),\n\n                            "CD_TIPO_SECAO_AGREGADA": pd.CategoricalDtype(),\n                            "DS_TIPO_SECAO_AGREGADA": pd.CategoricalDtype(),\n                            "NR_SECAO_PRINCIPAL": pd.CategoricalDtype(),\n\n                            "NR_LOCAL_VOTACAO": pd.CategoricalDtype(),\n                            "NM_LOCAL_VOTACAO": pd.CategoricalDtype(),\n\n

In [3]:
BR_STATES = [ 
    "AC", "AL", "AM", "AP", "BA", "CE", "DF", "ES", "GO", "MA", "MG", 
    "MS", "MT", "PA", "PB", "PE", "PI", "PR", "RJ", "RN", "RO", "RR", 
    "RS", "SC", "SE", "SP", "TO"]

BR_VOTING_STATES = BR_STATES + ["ZZ"]


DOWNLOAD_DIR = "data/download"
TEMP_DIR = "data/temp"
OUTPUT_DIR = "data/output"

os.makedirs(DOWNLOAD_DIR, exist_ok=True)
os.makedirs(TEMP_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Mount Google Drive, put a mapbox token file
# from google.colab import drive
# drive.mount('/drive')
# px.set_mapbox_access_token(open("drive/MyDrive/Keys/.mapbox_token").read())

# Utils

## TSE

Utils to directly open the TSE datasets

In [4]:
def get_zip_content(file, filename = None):
    with zipfile.ZipFile(file) as z:
        if filename == None:
            filename = next(f for f in z.namelist() if os.path.splitext(f)[1] == ".csv")
        with z.open(filename) as f:
            return io.BytesIO(f.read())

# The zips have many files inside and CDN requires a custom agent
def wrap_zip_request(url: str, filename = None):
    zipname = os.path.basename(urllib.parse.urlparse(url).path.strip("/"))
    download_path = os.path.join(DOWNLOAD_DIR, zipname)
    try:
        return get_zip_content(download_path, filename)
    except (FileNotFoundError, zipfile.BadZipFile):
        retries = 0
        while retries < 5:
            try:
                with requests.get(url, headers={"User-Agent": "Mozilla"}) as r:
                    r.raise_for_status()
                    result = get_zip_content(io.BytesIO(r.content), filename)
                    with open(download_path, "wb") as f:
                        f.write(r.content)
                    return result
            except requests.exceptions.ConnectionError as ex:
                print(f"Request for {url} failed: {ex}, retrying...")
                time.sleep(2)
                retries += 1

def read_csv_url(url, *args, **kwargs):
    return pd.read_csv(wrap_zip_request(url), encoding="latin_1", sep=";", 
                       dayfirst=True, infer_datetime_format=True, na_values=["#NULO#", "#NE#", -1, -3], *args, **kwargs)
    
def read_json_url(url):
    with requests.get(url, headers={"User-Agent": "Mozilla"}) as r:
        return r.json()


Voting machine web bulletin helpers

In [5]:
def get_bweb_url(round, state):
    round_gen_dt = "051020221321" if round == 1 else "311020221535"
    return f"https://cdn.tse.jus.br/estatistica/sead/eleicoes/eleicoes2022/buweb/bweb_{round}t_{state}_{round_gen_dt}.zip"

def read_bweb(round, state):
    url = get_bweb_url(round, state)
    df = read_csv_url(url,
                      usecols=list(range(7,45)),
                      parse_dates=["DT_BU_RECEBIDO", "DT_CARGA_URNA_EFETIVADA", 
                                   "DT_ABERTURA", "DT_ENCERRAMENTO", "DT_EMISSAO_BU"],
                      dtype={
                            "NR_TURNO": pd.CategoricalDtype(),
                            "CD_ELEICAO": pd.CategoricalDtype(),
                            "DS_ELEICAO": pd.CategoricalDtype(),                             
                            "SG_UF": pd.CategoricalDtype(),
                            "CD_MUNICIPIO": pd.CategoricalDtype(),
                            "NM_MUNICIPIO": pd.CategoricalDtype(),
                            "NR_ZONA": pd.CategoricalDtype(),
                            "NR_SECAO": pd.CategoricalDtype(),
                            "NR_LOCAL_VOTACAO": pd.CategoricalDtype(),
                            "CD_CARGO_PERGUNTA": pd.CategoricalDtype(),
                            "DS_CARGO_PERGUNTA": pd.CategoricalDtype(),
                            "NR_PARTIDO": pd.CategoricalDtype(),
                            "SG_PARTIDO": pd.CategoricalDtype(),
                            "NM_PARTIDO": pd.CategoricalDtype(),
                            "QT_APTOS": pd.Int64Dtype(),
                            "QT_COMPARECIMENTO": pd.Int64Dtype(),
                            "QT_ABSTENCOES": pd.Int64Dtype(),
                            "CD_TIPO_URNA": pd.CategoricalDtype(),
                            "DS_TIPO_URNA": pd.CategoricalDtype(),
                            "CD_TIPO_VOTAVEL": pd.CategoricalDtype(),
                            "DS_TIPO_VOTAVEL": pd.CategoricalDtype(),
                            "NR_VOTAVEL": pd.CategoricalDtype(),
                            "NM_VOTAVEL": pd.CategoricalDtype(),
                            "QT_VOTOS": pd.Int64Dtype(),
                            "NR_URNA_EFETIVADA": pd.CategoricalDtype(),
                            "CD_CARGA_1_URNA_EFETIVADA": pd.CategoricalDtype(),
                            "CD_CARGA_2_URNA_EFETIVADA": pd.CategoricalDtype(),
                            "CD_FLASHCARD_URNA_EFETIVADA": pd.CategoricalDtype(),
                            "DS_CARGO_PERGUNTA_SECAO": pd.CategoricalDtype(),
                            "DS_AGREGADAS": pd.CategoricalDtype(),
                            "QT_ELEITORES_BIOMETRIA_NH": pd.Int64Dtype(),
                            "NR_JUNTA_APURADORA": pd.CategoricalDtype(),
                            "NR_TURMA_APURADORA": pd.CategoricalDtype(),
                    }
                )
    return df  

Voting places helpers

In [6]:
def read_voting_places():
    df = read_csv_url("https://cdn.tse.jus.br/estatistica/sead/odsele/eleitorado_locais_votacao/eleitorado_local_votacao_2022.zip",
                      dtype={
                            "DS_ELEICAO": pd.CategoricalDtype(),
                            "NR_TURNO": pd.CategoricalDtype(),
                            "SG_UF": pd.CategoricalDtype(),
                            "CD_MUNICIPIO": pd.CategoricalDtype(),
                            "NM_MUNICIPIO": pd.CategoricalDtype(),
                            "NR_ZONA": pd.CategoricalDtype(),
                            "NR_SECAO": pd.CategoricalDtype(),
                             
                            "CD_TIPO_SECAO_AGREGADA": pd.CategoricalDtype(),
                            "DS_TIPO_SECAO_AGREGADA": pd.CategoricalDtype(),
                            "NR_SECAO_PRINCIPAL": pd.CategoricalDtype(),
                             
                            "NR_LOCAL_VOTACAO": pd.CategoricalDtype(),
                            "NM_LOCAL_VOTACAO": pd.CategoricalDtype(),
                                                         
                            "CD_TIPO_LOCAL": pd.CategoricalDtype(),
                            "DS_TIPO_LOCAL": pd.CategoricalDtype(),
                            "DS_ENDERECO": pd.CategoricalDtype(),
                            "NM_BAIRRO": pd.CategoricalDtype(),
                            "NR_CEP": pd.CategoricalDtype(),
                            "NR_TELEFONE_LOCAL": pd.CategoricalDtype(),
                            "NR_LATITUDE": np.float64,
                            "NR_LONGITUDE": np.float64,
                            "CD_SITU_LOCAL_VOTACAO": pd.CategoricalDtype(),
                            "DS_SITU_LOCAL_VOTACAO": pd.CategoricalDtype(),
                            "CD_SITU_ZONA": pd.CategoricalDtype(),
                            "DS_SITU_ZONA": pd.CategoricalDtype(),
                            "CD_SITU_SECAO": pd.CategoricalDtype(),
                            "DS_SITU_SECAO": pd.CategoricalDtype(),
                            "CD_SITU_LOCALIDADE": pd.CategoricalDtype(),
                            "DS_SITU_LOCALIDADE": pd.CategoricalDtype(),
                            "CD_SITU_SECAO_ACESSIBILIDADE": pd.CategoricalDtype(),
                            "DS_SITU_SECAO_ACESSIBILIDADE": pd.CategoricalDtype(),
                            "QT_ELEITOR_SECAO": pd.Int64Dtype(),
                            "QT_ELEITOR_ELEICAO_FEDERAL": pd.Int64Dtype(),
                            "QT_ELEITOR_ELEICAO_ESTADUAL": pd.Int64Dtype(),
                            "QT_ELEITOR_ELEICAO_MUNICIPAL": pd.Int64Dtype()
                        }
                    )
    return df  

In [7]:
# Sources: 
# TSE original dataset
# + Base dos Dados (https://basedosdados.org/dataset/br-tse-eleicoes?bdm_table=local_secao)
# + Geocode Google Maps Api
def read_voting_places_geocode():
    # CSV at https://storage.googleapis.com/capyvara_public/tse/geocoded_voting_places_001.csv
    return pd.read_parquet("https://storage.googleapis.com/capyvara_public/tse/geocoded_voting_places_001.parquet")

In [8]:
def read_tse_cities():
    def expand_cities(data):
        for abr in data["abr"]:
            state = abr["cd"]
            for mu in abr["mu"]:
                city = mu["cd"].lstrip("0")
                city_ibge = mu["cdi"].lstrip("0") if len(mu["cdi"]) > 0 else None
                name = mu["nm"]
                is_capital = mu["c"] == "S"
                zones = mu["z"]
                yield (state, city, city_ibge, name, is_capital, [z.lstrip("0") for z in zones])

    url = "https://resultados.tse.jus.br/oficial/ele2022/545/config/mun-e000545-cm.json"
    data = list(expand_cities(read_json_url(url)))

    df = pd.DataFrame(data,
                        columns=["SG_UF", "CD_MUNICIPIO",
                            "CD_MUNICIPIO_IBGE", "NM_MUNICIPIO", "MUNICIPIO_CAPITAL", "MUNICIPIO_ZONAS"])

    df["SG_UF"] = df["SG_UF"].astype(pd.CategoricalDtype())
    df["CD_MUNICIPIO"] = df["CD_MUNICIPIO"].astype(pd.CategoricalDtype())
    df["CD_MUNICIPIO_IBGE"] = df["CD_MUNICIPIO_IBGE"].astype(pd.CategoricalDtype())
    df["NM_MUNICIPIO"] = df["NM_MUNICIPIO"].astype(pd.CategoricalDtype())
    df["MUNICIPIO_CAPITAL"] = df["MUNICIPIO_CAPITAL"].astype(bool)
    return df.set_index(["SG_UF", "CD_MUNICIPIO"]).sort_index()

## Misc

In [9]:
def concatenate(dfs):
    dfs = list(dfs)
    
    """Concatenate while preserving categorical columns.
    NB: We change the categories in-place for the input dataframes"""
    for col in set.intersection(
        *[
            set(df.select_dtypes(include='category').columns)
            for df in dfs
        ]
    ):
        uc = union_categoricals([df[col] for df in dfs])
        for df in dfs:
            df[col] = pd.Categorical(df[col].values, categories=uc.categories)
    return pd.concat(dfs)

def multiindex_pivot(df, columns=None, values=None):
    #https://github.com/pandas-dev/pandas/issues/23955
    names = list(df.index.names)
    df = df.reset_index()
    list_index = df[names].values
    tuples_index = [tuple(i) for i in list_index] # hashable
    df = df.assign(tuples_index=tuples_index)
    df = df.pivot(index="tuples_index", columns=columns, values=values)
    tuples_index = df.index  # reduced
    index = pd.MultiIndex.from_tuples(tuples_index, names=names)
    df.index = index
    return df

# Prepare voting machine bulletins dataframes

Cities helpers

In [10]:
df_cities = read_tse_cities()

Load all states files and concat 

In [11]:
def make_bweb_all():
    df = concatenate(tqdm((read_bweb(2, state) for state in BR_VOTING_STATES), total=len(BR_VOTING_STATES)))
    df = df.set_index(["NR_TURNO", "SG_UF", "NR_ZONA", "NR_SECAO"]).sort_index()
    return df

df_bweb_all = make_bweb_all()

  0%|          | 0/28 [00:00<?, ?it/s]

Split pivoted dataframe containing just voting data

In [12]:
def make_president_votes():
    df = df_bweb_all[df_bweb_all["DS_TIPO_URNA"] == "APURADA"].loc["2"][["CD_CARGO_PERGUNTA", 
                                "DS_CARGO_PERGUNTA", "CD_TIPO_VOTAVEL", "DS_TIPO_VOTAVEL", 
                                "NR_VOTAVEL", "NM_VOTAVEL", "QT_VOTOS"]]

    df = df[(df["CD_CARGO_PERGUNTA"] == "1")]
    df = df.pipe(multiindex_pivot, columns=["NM_VOTAVEL"], values="QT_VOTOS").fillna(0)
    df.rename(columns={
            "JAIR BOLSONARO": "QT_VOTOS_JAIR_BOLSONARO",
            "LULA": "QT_VOTOS_LULA",
            "Branco": "QT_VOTOS_BRANCO",
            "Nulo": "QT_VOTOS_NULO",
            }, inplace=True)
    df = df[["QT_VOTOS_LULA", "QT_VOTOS_JAIR_BOLSONARO", "QT_VOTOS_BRANCO", "QT_VOTOS_NULO"]]
    df.columns.name = ""
    return df

df_president_votes = make_president_votes()

Split dataframe containing just section-wise data, join with pivoted presidential votes column data

Now we have one section per line with the voting info 


In [13]:
def make_sections():
    df = df_bweb_all[df_bweb_all["DS_TIPO_URNA"] == "APURADA"].loc["2"][["CD_MUNICIPIO", "NM_MUNICIPIO", "NR_LOCAL_VOTACAO", 
                    "DT_BU_RECEBIDO", "QT_APTOS", "QT_COMPARECIMENTO", "QT_ABSTENCOES", "CD_TIPO_URNA", "DS_TIPO_URNA", 
                    "NR_URNA_EFETIVADA", "CD_FLASHCARD_URNA_EFETIVADA", "DS_AGREGADAS", "DT_ABERTURA", 
                    "DT_ENCERRAMENTO", "QT_ELEITORES_BIOMETRIA_NH", 
                    "DT_EMISSAO_BU"]]
                    
    df = df[~df.index.duplicated(keep='first')]
    df = df.join(df_president_votes)
    return df

df_sections = make_sections()

In [14]:
# Ensure it matches TSE
assert(len(df_sections.groupby(level=["SG_UF", "NR_ZONA"], observed=True).size()) == 2637)
assert(df_sections["QT_VOTOS_LULA"].sum() == 60345999)
assert(df_sections["QT_VOTOS_JAIR_BOLSONARO"].sum() == 58206354)

# Prepare voting places info

In [15]:
def make_voting_places_all():
    df = read_voting_places()x
    print(df.head)
    df = df.set_index(["NR_TURNO", "SG_UF", "NR_ZONA", "NR_SECAO"]).sort_index()
    return df

df_voting_places_all = make_voting_places_all()

SyntaxError: invalid syntax (2890601965.py, line 2)

Group voting places so we have one voting place per line, merge aggregated sections counts

In [None]:
def make_voting_places():
    df = df_voting_places_all.loc["2"].reset_index().set_index(["CD_MUNICIPIO", "NR_ZONA", "NR_LOCAL_VOTACAO"])

    df_group = df[["QT_ELEITOR_SECAO", 
                "QT_ELEITOR_ELEICAO_FEDERAL", 
                "QT_ELEITOR_ELEICAO_ESTADUAL", 
                "QT_ELEITOR_ELEICAO_MUNICIPAL"]].groupby(df.index.names, observed=True).agg(["sum", "size"])

    num_sections = df_group[("QT_ELEITOR_SECAO", "size")]
    num_voters = df_group.xs('sum', axis=1, level=1)

    df["QT_SECOES"] = num_sections

    # Fix issue: https://github.com/pandas-dev/pandas/issues/4094
    df.update(num_voters)
    df["QT_ELEITOR_SECAO"] = df["QT_ELEITOR_SECAO"].astype(pd.Int64Dtype())
    df["QT_ELEITOR_ELEICAO_FEDERAL"] = df["QT_ELEITOR_ELEICAO_FEDERAL"].astype(pd.Int64Dtype())
    df["QT_ELEITOR_ELEICAO_ESTADUAL"] = df["QT_ELEITOR_ELEICAO_ESTADUAL"].astype(pd.Int64Dtype())
    df["QT_ELEITOR_ELEICAO_MUNICIPAL"] = df["QT_ELEITOR_ELEICAO_MUNICIPAL"].astype(pd.Int64Dtype())

    df_geo = read_voting_places_geocode()
    df.update(df_geo)

    
    df = df[df["CD_TIPO_SECAO_AGREGADA"] == "1"]
    df = df[~df.index.duplicated(keep='first')]
    df = df.sort_index()

    df = df.rename(columns={
            "QT_ELEITOR_SECAO": "QT_ELEITOR",
            "CD_SITU_SECAO_ACESSIBILIDADE": "CD_SITU_ACESSIBILIDADE",
            "DS_SITU_SECAO_ACESSIBILIDADE": "DS_SITU_ACESSIBILIDADE",
        })

    df = df[["SG_UF", "NM_MUNICIPIO", "NM_LOCAL_VOTACAO", "CD_TIPO_LOCAL", 
                "DS_TIPO_LOCAL", "DS_ENDERECO", "NM_BAIRRO", "NR_CEP", "NR_TELEFONE_LOCAL", 
                "NR_LATITUDE", "NR_LONGITUDE", "CD_SITU_LOCAL_VOTACAO", "DS_SITU_LOCAL_VOTACAO", 
                "CD_SITU_LOCALIDADE", "DS_SITU_LOCALIDADE", "CD_SITU_ACESSIBILIDADE", 
                "DS_SITU_ACESSIBILIDADE", "QT_SECOES",
                "QT_ELEITOR", "QT_ELEITOR_ELEICAO_FEDERAL", 
                "QT_ELEITOR_ELEICAO_ESTADUAL", "QT_ELEITOR_ELEICAO_MUNICIPAL"]]

    return df

df_voting_places = make_voting_places()

Some mismatches in voting place id between datasets, update sections with newer from location dataset


In [None]:
def patch_sections_voting_place():
    df = df_voting_places_all[df_voting_places_all["CD_TIPO_SECAO_AGREGADA"] == "1"].loc["2"].loc[:,"NR_LOCAL_VOTACAO"]
    df_sections.update(df)

patch_sections_voting_place()

# Plots

In [None]:
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.figure_factory as ff

pd.options.plotting.backend = "plotly"
px.defaults.width = 1280
px.defaults.height = 720

FLIP_PLOTLY_COLORWAY = px.colors.qualitative.Plotly.copy()
FLIP_PLOTLY_COLORWAY[0], FLIP_PLOTLY_COLORWAY[1] = FLIP_PLOTLY_COLORWAY[1], FLIP_PLOTLY_COLORWAY[0]

def export_fig_html(fig, filename, *args, **kwargs):
    # TODO: # <script src="https://cdn.plot.ly/plotly-locale-pt-br-latest.js"></script>
    # https://github.com/plotly/plotly.js/tree/master/dist#to-include-localization

    fig_copy = go.Figure(fig)
    fig_copy.layout.width = None
    fig_copy.layout.height = None
    #fig_copy.write_html(os.path.join(OUTPUT_DIR, filename), include_plotlyjs="cdn", config={"locale": "pt-BR", "scrollZoom": "True"}, *args, **kwargs)
    fig_copy.write_html(os.path.join(OUTPUT_DIR, filename), include_plotlyjs="cdn", *args, **kwargs)

## All voting places, size per quantify of merged sections

In [None]:
def voting_places_plot():
    fig = px.scatter_mapbox(df_voting_places, 
                            title="Locais de votação 2022",
                            zoom=3, center={"lat": -11.44, "lon": -52.01},
                            lat="NR_LATITUDE", lon="NR_LONGITUDE", 
                            color="QT_SECOES",
                            range_color=[1,60],
                            color_continuous_scale=px.colors.sequential.Turbo,
                            size=df_voting_places["QT_ELEITOR_ELEICAO_FEDERAL"].astype("int"),
                            size_max=20,
                            hover_name="NM_LOCAL_VOTACAO", 
                            hover_data=["SG_UF", "NM_MUNICIPIO", "DS_ENDERECO", "NM_BAIRRO"],
                            labels={
                                "size": "Eleitores", 
                                "QT_SECOES": "Seções",
                                "NR_LATITUDE": "Latitude",
                                "NR_LONGITUDE": "Longitude",
                                "SG_UF": "UF",
                                "NM_MUNICIPIO": "Município",
                                "NM_BAIRRO": "Bairro",
                                "DS_ENDERECO": "Endereço",
                                })
    #fig.update_layout(mapbox_style="open-street-map")
    fig.update_layout(mapbox_style="carto-positron")
    fig.update_traces(marker=dict(sizemin=2))
    return fig

fig = voting_places_plot()
#export_fig_html(fig, "locais_votacao_2022.html")
fig.show()

## All voting places results

In [None]:
def voting_places_results_plot():
    df = df_sections.groupby(df_voting_places.index.names, observed=True).sum()
    df = df.join(df_voting_places)

    df["QT_VOTOS_CONCORRENTES"] = (df["QT_VOTOS_LULA"] + df["QT_VOTOS_JAIR_BOLSONARO"]).astype(int)
    df["LULA_PE_VOTOS"] = (df["QT_VOTOS_LULA"] / df["QT_VOTOS_CONCORRENTES"]) * 100
    df["JAIR_BOLSONARO_PE_VOTOS"] = (df["QT_VOTOS_JAIR_BOLSONARO"] / df["QT_VOTOS_CONCORRENTES"]) * 100
    df["DIF_PE_VOTOS"] = df["LULA_PE_VOTOS"] - df["JAIR_BOLSONARO_PE_VOTOS"]
    df["ABSTENCOES_PE"] = (df["QT_ABSTENCOES"] / df["QT_APTOS"]) * 100
    df["QT_VOTOS_INVALIDOS"] = df["QT_VOTOS_BRANCO"] + df["QT_VOTOS_NULO"]

    fig = px.scatter_mapbox(df, 
                            title="Resultado 2o turno por local de votação 2022 (Lula/Jair Bolsonaro)",
                            zoom=3, center={"lat": -11.44, "lon": -52.01},
                            lat="NR_LATITUDE", lon="NR_LONGITUDE", 
                            color="LULA_PE_VOTOS",
                            color_continuous_scale=px.colors.diverging.balance,
                            color_continuous_midpoint=50,
                            size="QT_VOTOS_CONCORRENTES",
                            size_max=20,
                            opacity=0.8,
                            hover_name="NM_LOCAL_VOTACAO", 
                            hover_data={
                                "LULA_PE_VOTOS": ":.1f",
                                "QT_VOTOS_LULA": True,
                                "QT_VOTOS_JAIR_BOLSONARO": True,
                                "QT_VOTOS_CONCORRENTES": True,
                                "QT_VOTOS_INVALIDOS": True,
                                "QT_APTOS": True,
                                "QT_ABSTENCOES": True,
                                "ABSTENCOES_PE": ":.1f",
                                "QT_SECOES": True,
                                "NR_LATITUDE": False,
                                "NR_LONGITUDE": False,
                                "SG_UF": True,
                                "NM_MUNICIPIO": True,
                                "NM_BAIRRO": True,
                                "DS_ENDERECO": True,
                            },
                            labels={
                                "DIF_PE_VOTOS": "Dif. %",
                                "LULA_PE_VOTOS": "Lula %",
                                "QT_VOTOS_LULA": "Votos Lula",
                                "QT_VOTOS_JAIR_BOLSONARO": "Votos Jair Bolsonaro",
                                "QT_VOTOS_CONCORRENTES": "Votos válidos", 
                                "QT_VOTOS_INVALIDOS": "Votos nulo/branco",
                                "QT_APTOS": "Eleitores aptos",
                                "QT_ABSTENCOES": "Eleitores ausentes",
                                "ABSTENCOES_PE": "Abstenção %",
                                "QT_SECOES": "Seções",
                                "SG_UF": "UF",
                                "NM_MUNICIPIO": "Município",
                                "NM_BAIRRO": "Bairro",
                                "DS_ENDERECO": "Endereço",
                                })
    #fig.update_layout(mapbox_style="open-street-map")
    fig.update_layout(mapbox_style="carto-positron")
    fig.update_traces(marker=dict(sizemin=2))
    return fig

fig = voting_places_results_plot()
#export_fig_html(fig, "locais_votacao_resultados_2022.html")
fig.show()