In [217]:
from pathlib import Path

import pandas as pd

In [218]:
pd.options.display.float_format = "{:_.4f}".format

In [219]:
DADOS: Path = Path(".").parent.resolve() / "data" / "raw"
AMOSTRAS: Path = Path(".").parent.resolve() / "data" / "sample"

# Carga dos dados

In [220]:
names_map = {
    "Luiz Inácio Lula Da Silva": "Lula",
    "Jose Maria Eymael": "Eymael",
    "Constituinte Eymael": "Eymael",
    "Kelmon Luis Da Silva Souza": "Padre Kelmon",
    "Brancos": "Branco",
    "Voto Branco": "Branco",
    "Luiz Felipe Chaves D Avila": "Felipe D'Avila",
    "Soraia Thronicke": "Soraya Thronicke",
    "Soraya Vieira Thronicke": "Soraya Thronicke",
    "Ciro Ferreira Gomes": "Ciro Gomes",
    "Sofia Padua Manzano": "Sofia Manzano",
    "Jair Messias Bolsonaro": "Jair Bolsonaro",
    "Simone Nassar Tebet": "Simone Tebet",
    "Leonardo Péricles Vieira Roque": "Léo Péricles",
    "Voto Nulo": "Nulo",
    "Nulos": "Nulo",
    "Vera Lucia Pereira Da Silva Salgado": "Vera Lucia",
    "Felipe D´Avila": "Felipe D'Avila",
}

In [230]:
dfs = []

for file in DADOS.glob("*.csv"):
    cargo, turno, *_ = file.stem.split("_")
    df = (
        pd.read_csv(file, dtype="category")
        .iloc[:, -2:]
        .rename(columns=str.casefold)
        .rename(columns=lambda name: "nome" if name != "estado" else name)
        .assign(
            nome=lambda df_: df_["nome"].str.title(),
            cargo=cargo,
            turno=int(turno)
        )
        .assign(
            nome=lambda df_: df_["nome"].apply(lambda s: names_map.get(s, s))
        )
    )
    dfs.append(df)

populacao = pd.concat(dfs).astype({
    "estado": "category", "cargo": "category"
})

# Geração das amostras

In [None]:
sample_sizes = (
    pd.DataFrame([
        ['AC', 1, "Governador", 455_438, 2389],
        ['AC', 1, "Senador", 455_438, 2389],
        ['RO', 1, "Governador", 925_763, 2395],
        ['RO', 1, "Senador", 925_763, 2395],
        ['RO', 2, "Governador", 925563, 2395],
        ["AC", 1, "Presidente", 455_903, 110],
        ["RO", 1, "Presidente", 926_827, 224],
        ["AM", 1, "Presidente", 2_113_771, 512],
        ["RR", 1, "Presidente", 305_404, 74],
        ["AP", 1, "Presidente", 442_842, 107],
        ["PA", 1, "Presidente", 4_789_311, 1_159],
        ["TO", 1, "Presidente", 891_449, 216],
        ["AC", 2, "Presidente", 420_760, 104],
        ["RO", 2, "Presidente", 926_517, 230],
        ["AM", 2, "Presidente", 2_067_875, 513],
        ["RR", 2, "Presidente", 286_269, 71],
        ["AP", 2, "Presidente", 400_683, 99],
        ["PA", 2, "Presidente", 4_701_740, 1_167],
        ["TO", 2, "Presidente", 871_238, 216],

    ],
    columns=["estado", "turno", "cargo", "N", "n"])
    .set_index(["estado", "turno", "cargo"])
)
sample_sizes

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,N,n
estado,turno,cargo,Unnamed: 3_level_1,Unnamed: 4_level_1
AC,1,Governador,455438,2389
AC,1,Senador,455438,2389
RO,1,Governador,925763,2395
RO,1,Senador,925763,2395
RO,2,Governador,925563,2395
AC,1,Presidente,455903,110
RO,1,Presidente,926827,224
AM,1,Presidente,2113771,512
RR,1,Presidente,305404,74
AP,1,Presidente,442842,107


In [None]:
random_state = 789

for (uf, turno, cargo), tamanho in sample_sizes.iterrows():
    sample = (
        populacao.query(
            f"estado == {uf!r} and turno == {turno} and cargo == {cargo!r}"
        )
        .sort_values(by="nome")
        .sample(tamanho.n, replace=False, random_state=random_state)
    )
    sample[["nome", "estado"]].to_csv(
        AMOSTRAS / f"{cargo}_{turno}_turno_{uf}.csv",
    )
    print(uf, turno, cargo, sample.shape[0])

AC 1 Governador 2389
AC 1 Senador 2389
RO 1 Governador 2395
RO 1 Senador 2395
RO 2 Governador 2395
AC 1 Presidente 110
RO 1 Presidente 224
AM 1 Presidente 512
RR 1 Presidente 74
AP 1 Presidente 107
PA 1 Presidente 1159
TO 1 Presidente 216
AC 2 Presidente 104
RO 2 Presidente 230
AM 2 Presidente 513
RR 2 Presidente 71
AP 2 Presidente 99
PA 2 Presidente 1167
TO 2 Presidente 216


In [None]:
dfs = []

for file in AMOSTRAS.glob("*.csv"):
    cargo, turno, *_ = file.stem.split("_")
    df = (
        pd.read_csv(file, dtype="category")
        .iloc[:, -2:]
        .rename(columns=str.casefold)
        .rename(columns=lambda name: "nome" if name != "estado" else name)
        .assign(
            nome=lambda df_: df_["nome"].str.title(),
            cargo=cargo,
            turno=int(turno)
        )
        .assign(
            nome=lambda df_: df_["nome"].apply(lambda s: names_map.get(s, s))
        )
    )
    dfs.append(df)

amostra = (
    pd.concat(dfs)
    .astype({
        "estado": "category", "cargo": "category"
    })
)

# Proporções

## Amostrais

In [176]:
(
    amostra
    .query("cargo != 'Presidente'")
    .groupby(["cargo", "estado", "turno"])
    ["nome"].apply(lambda s: s.value_counts(normalize=True))
    .to_frame("proporcao")
)

  .groupby(["cargo", "estado", "turno"])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,proporcao
cargo,estado,turno,Unnamed: 3_level_1,Unnamed: 4_level_1
Governador,AC,1,Gladson Cameli,0.5303
Governador,AC,1,Jorge Viana,0.2265
Governador,AC,1,Mara Rocha,0.1059
Governador,AC,1,Petecão,0.0611
Governador,AC,1,Nulo,0.0469
Governador,AC,1,Branco,0.0205
Governador,AC,1,Marcio Bittar,0.0067
Governador,AC,1,Professor Nilson,0.0013
Governador,AC,1,David Hall,0.0008
Governador,RO,1,Coronel Marcos Rocha,0.3587


In [202]:
proporcao_estrato = (
    amostra
    .query("cargo == 'Presidente'")
    .groupby(["cargo", "estado", "turno"])
    ["nome"]
    .apply(lambda s: s.value_counts(normalize=True))
    .to_frame("proporcao")
    .assign(
        nome=lambda df_: df_.index.get_level_values(-1)
    )
    .droplevel(-1)
)
proporcao_estrato

  .groupby(["cargo", "estado", "turno"])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,proporcao,nome
cargo,estado,turno,Unnamed: 3_level_1,Unnamed: 4_level_1
Presidente,AC,1,0.6818,Jair Bolsonaro
Presidente,AC,1,0.2545,Lula
Presidente,AC,1,0.0273,Ciro Gomes
Presidente,AC,1,0.0182,Branco
Presidente,AC,1,0.0091,Simone Tebet
Presidente,...,...,...,...
Presidente,TO,1,0.0139,Branco
Presidente,TO,2,0.5231,Lula
Presidente,TO,2,0.4306,Jair Bolsonaro
Presidente,TO,2,0.0370,Nulo


In [211]:
peso_estrato = (
    sample_sizes
    .query("cargo == 'Presidente'")
    .groupby("turno", as_index=False)
    ["N"].apply(lambda s: s / s.sum())
    .to_frame("Wh")
    .droplevel(0)
    .reorder_levels(["cargo", "estado", "turno"])
)

In [216]:
(
    proporcao_estrato
    .join(peso_estrato)
    .groupby(["nome", "turno"])
    .apply(lambda df_: (df_["proporcao"] * df_["Wh"]).sum())
    .unstack("turno")
)

  .apply(lambda df_: (df_["proporcao"] * df_["Wh"]).sum())


turno,1,2
nome,Unnamed: 1_level_1,Unnamed: 2_level_1
Branco,0.01,0.0112
Ciro Gomes,0.0241,
Felipe D'Avila,0.0025,
Jair Bolsonaro,0.4497,0.4805
Lula,0.4512,0.4862
Nulo,0.0166,0.0221
Padre Kelmon,0.0012,
Simone Tebet,0.0412,
Soraya Thronicke,0.0033,


## Populacionais

In [231]:
(
    populacao
    .query("cargo != 'Presidente'")
    .groupby(["cargo", "estado", "turno"])
    ["nome"].apply(lambda s: s.value_counts(normalize=True))
    .to_frame("proporcao")
)

  .groupby(["cargo", "estado", "turno"])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,proporcao
cargo,estado,turno,Unnamed: 3_level_1,Unnamed: 4_level_1
Governador,AC,1,Gladson Cameli,0.5316
Governador,AC,1,Jorge Viana,0.2267
Governador,AC,1,Mara Rocha,0.1036
Governador,AC,1,Petecão,0.0601
Governador,AC,1,Nulo,0.0463
...,...,...,...,...
Senador,TO,1,Pastor Claudemir Lopes,0.0235
Senador,TO,1,Andrea Schimdt,0.0066
Senador,TO,1,Lúcia Viana,0.0019
Senador,TO,1,Marcelo Claudio,0.0008


In [232]:
proporcao_estrato_pop = (
    populacao
    .query("cargo == 'Presidente'")
    .groupby(["cargo", "estado", "turno"])
    ["nome"]
    .apply(lambda s: s.value_counts(normalize=True))
    .to_frame("proporcao")
    .assign(
        nome=lambda df_: df_.index.get_level_values(-1)
    )
    .droplevel(-1)
)
proporcao_estrato_pop

  .groupby(["cargo", "estado", "turno"])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,proporcao,nome
cargo,estado,turno,Unnamed: 3_level_1,Unnamed: 4_level_1
Presidente,AC,1,0.6045,Jair Bolsonaro
Presidente,AC,1,0.2830,Lula
Presidente,AC,1,0.0441,Simone Tebet
Presidente,AC,1,0.0270,Ciro Gomes
Presidente,AC,1,0.0226,Branco
Presidente,...,...,...,...
Presidente,TO,1,0.0001,Léo Péricles
Presidente,TO,2,0.4997,Lula
Presidente,TO,2,0.4724,Jair Bolsonaro
Presidente,TO,2,0.0200,Nulo


In [233]:
peso_estrato_pop = (
    sample_sizes
    .query("cargo == 'Presidente'")
    .groupby("turno", as_index=False)
    ["N"].apply(lambda s: s / s.sum())
    .to_frame("Wh")
    .droplevel(0)
    .reorder_levels(["cargo", "estado", "turno"])
)

In [234]:
(
    proporcao_estrato_pop
    .join(peso_estrato_pop)
    .groupby(["nome", "turno"])
    .apply(lambda df_: (df_["proporcao"] * df_["Wh"]).sum())
    .unstack("turno")
)

  .apply(lambda df_: (df_["proporcao"] * df_["Wh"]).sum())


turno,1,2
nome,Unnamed: 1_level_1,Unnamed: 2_level_1
Branco,0.0097,0.0094
Ciro Gomes,0.0234,
Eymael,0.0001,
Felipe D'Avila,0.0018,
Jair Bolsonaro,0.4429,0.4944
Lula,0.4591,0.4747
Léo Péricles,0.0002,
Nulo,0.0154,0.0215
Padre Kelmon,0.0006,
Simone Tebet,0.0411,
