In [148]:
import pandas as pd
import numpy as np
from pathlib import Path

In [149]:
path = Path.cwd().parents[0] / "output/vote_result.xlsx"
df = pd.read_excel(path)

In [147]:
def get_dapil_data(df: pd.DataFrame, dapil_no: int) -> (dict, pd.DataFrame):
    # get full data of dapil
    data = df.loc[df["dapil_no"] == dapil_no]

    # get partai vote per dapil
    partai_vote = (
        data.loc[:, ["partai", "partai_vote"]]
        .drop_duplicates()
        .set_index("partai")
        .to_dict()
        .get("partai_vote")
    )

    # get calon vote per dapil
    calon_vote = (
        data.loc[:, ["partai", "nama", "vote"]]
        .sort_values(["partai", "vote"], ascending=False)
        .assign(rank=lambda df_: df_.groupby("partai").cumcount(ascending=True) + 1)
    )

    return partai_vote, calon_vote


def return_odd_number(start_index=0):
    number = start_index * 2 + 1
    return number


def get_element_count(input_list: list):
    output_list = []
    element_count = {}

    for item in input_list:
        if item not in element_count:
            element_count[item] = 1
        else:
            element_count[item] += 1
        output_list.append((item, element_count[item]))

    return output_list


def get_selected_partai(partai_vote: dict, num_selected: int, with_rank=False):
    partai_vote_copy = partai_vote.copy()
    selected_partai = []

    for round in list(range(1, num_selected + 1)):
        highest_voted = max(
            partai_vote_copy, key=lambda k: partai_vote_copy[k]
        )  # current highest voted partai
        for partai, vote in partai_vote_copy.items():
            times_selected = selected_partai.count(
                partai
            )  # how many times this partai been selected

            if partai == highest_voted:
                partai_vote_copy[partai] /= return_odd_number(
                    start_index=times_selected
                )  # divide by odd number
                selected_partai.append(partai)
    if with_rank:
        return get_element_count(selected_partai)
    return selected_partai


def get_selected_calon(
    calon_vote: pd.DataFrame, selected_partai: list, with_partai=False
):
    selected_calon = []
    for partai, rank in selected_partai:
        calon = calon_vote.loc[
            (calon_vote["partai"] == partai) & (calon_vote["rank"] == rank), "nama"
        ].values[0]
        selected_calon.append(calon)

    if with_partai:
        partai = [p for p, r in selected_partai]
        selected_calon = list(zip(partai, selected_calon))

    return selected_calon


partai_vote, calon_vote = get_dapil_data(df, 1) # specify dapil here
selected_partai = get_selected_partai(partai_vote, 12, with_rank=True) # specify num selected
selected_calon = get_selected_calon(calon_vote, selected_partai, with_partai=True)
selected_calon

[('PDIP', 'H. Prasetyo Edi Marsudi, S.H.'),
 ('PDIP', 'Pandapotan Sinaga, S.E., M.M.'),
 ('PKS', 'H. Dany Anwar'),
 ('PKS', 'H. Ismail, S.Pd.'),
 ('Gerindra', 'Iman Satria'),
 ('Gerindra', 'Dr. Dian Pratama, Sp.OG.'),
 ('PSI', 'Idris Ahmad, S.K.M.'),
 ('PSI', 'Johannes Martuah, S.T.'),
 ('PDIP', 'Wa Ode Herlina'),
 ('Demokrat', 'Desie Christhyana Sari'),
 ('Demokrat', 'Panji Purboyo'),
 ('PKS', 'Israyani, S.P.')]