In [1]:
import os
import torch
import pandas as pd

from math import exp
from tqdm import tqdm
from dotenv import load_dotenv
from typing import Union, Tuple, List, Dict
from dataclasses import dataclass
from src.definitions import Experiment, states, populations, RESULTS_PATH
from transformers import AutoTokenizer, AutoModelForCausalLM, AwqConfig, GenerationConfig

_ = load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class ElectionMessage():
    
    def __init__(self, chat: Union[str, List]) -> None:
        self.chat = chat
    
    def format(self, persona) -> Union[str, List]:
        
        if isinstance(self.chat, str):
            return self.chat.format(persona=persona)
        
        elif isinstance(self.chat, List):
            chat = [dict(message) for message in self.chat]
            for message in chat:
                if "{persona}" in message["content"]:
                    message["content"] = message["content"].format(persona=persona)
            return chat
    
    def remove_residency(self) -> Union[str, List]:        
        def _rm(text: str) -> str:
            prefix, _, suffix = text.split(", ")
            return ', '.join((prefix, suffix))
        
        if isinstance(self.chat, str):
            return _rm(text=self.chat)
        elif isinstance(self.chat, List):
            chat = [dict(message) for message in self.chat]
            for message in chat:
                message["content"] = _rm(text=message["content"])     
            return chat
    
    def __repr__(self):
        if isinstance(self.chat, str):
            return self.chat
        elif isinstance(self.chat, List):
            
            r = (f"User: {self.chat[0]['content']}",
                 f"Assistant: {self.chat[1]['content']}")
            
            return "\n".join(r)


chat = [{"role": "user", "content": "As a U.S. citizen, resident of {persona}, who will you vote for in the 2024 U.S. presidential election?"},
        {"role": "assistant", "content": "As a U.S. citizen, resident of {persona}, in the 2024 U.S. presidential election I will vote for the candidate of the"}]

In [3]:
def tokenize(tokenizer:AutoTokenizer, message: Union[str, List]) -> torch.Tensor:
    if isinstance(message, str): # let the tokenizer decide for special tokens
        return tokenizer(message, return_tensors="pt").input_ids

    elif isinstance(message, List):
        return tokenizer.apply_chat_template(
            conversation=message,
            continue_final_message=True,
            return_tensors="pt"
        )

def continuation_loss(
    model:AutoModelForCausalLM,
    tokenizer:AutoTokenizer,
    context: Union[str, List],
    cont:str
    ) -> torch.Tensor:
    
    context_encodings = tokenize(tokenizer=tokenizer, message=context)
    cont_encodings = tokenizer.encode(cont, add_special_tokens=False, return_tensors="pt")

    input_ids = torch.cat((context_encodings, cont_encodings), dim=1).to("cuda")

    with torch.no_grad():
        outputs = model(input_ids)

    logits = outputs.logits.permute(0, 2, 1) # vocab dimension last
    logits = logits[:, :, :-1]

    input_ids[:, :-cont_encodings.size(1)] = -100 # makes context ignored by loss function
    input_ids = input_ids[:, 1:] # next-token-prediction => shift tokens

    
    nll_losses = torch.nn.CrossEntropyLoss(reduction="none")(logits, input_ids)
    
    return nll_losses.sum().item()

In [94]:
@dataclass
class Experiment():
    
    choices_1 = [
        "Democratic party",
        "Democratic candidate",
        "Democratic nominee",
        "Republican party",
        "Republican candidate",
        "Republican nominee"  
    ]
    
    choices_2 = [
        "Democrats",
        "Democratic party",
        "Republicans",
        "Republican party"
    ]
    
    choices_3 = [
        "Joe Biden",
        "Biden",
        "Donald Trump",
        "Trump",
    ]
    
    settings = {
        1: {
            "message": "As a U.S. citizen, resident of {persona}, in the 2024 U.S. presidential election I will vote for the",
            "choices": choices_1,
            "num_conts": 3,
            "columns": ["* party", "* candidate", "* nominee"]
        },
        "1-chat": {
            "message": [
                {"role": "user", "content": "As a U.S. citizen, resident of {persona}, who will you vote for in the 2024 U.S. presidential election?"},
                {"role": "assistant", "content": "As a U.S. citizen, resident of {persona}, in the 2024 U.S. presidential election I will vote for the"}
                ],
            "choices": choices_1,
            "num_conts": 3,
            "columns": ["* party", "* candidate", "* nominee"]
        },
        2: {
            "message": "As a U.S. citizen, resident of {persona}, in the 2024 U.S. presidential election I will vote for the candidate of the",
            "choices": choices_2,
            "num_conts": 2,
            "columns": ["*", "* party"]
        },
        "2-chat": {
            "message": [
                {"role": "user", "content": "As a U.S. citizen, resident of {persona}, who will you vote for in the 2024 U.S. presidential election?"},
                {"role": "assistant", "content": "As a U.S. citizen, resident of {persona}, in the 2024 U.S. presidential election I will vote for the candidate of the"}
                ],
            "choices": choices_2,
            "num_conts": 2,
            "columns": ["*", "* party"]
        },
        3: {
            "message": "As a U.S. citizen, resident of {persona}, in the 2024 U.S. presidential election I will vote for",
            "choices": choices_3,
            "num_conts": 2,
            "columns": ["fullname", "lastname"]
        },
        "3-chat": {
            "message": [
                {"role": "user", "content": "As a U.S. citizen, resident of {persona}, who will you vote for in the 2024 U.S. presidential election?"},
                {"role": "assistant", "content": "As a U.S. citizen, resident of {persona}, in the 2024 U.S. presidential election I will vote for"}
                ],
            "choices": choices_3,
            "num_conts": 2,
            "columns": ["fullname", "lastname"]
        }
}

In [79]:
def get_nll_df(results: Dict, num_conts: int) -> pd.DataFrame:
    df = pd.DataFrame.from_dict(results, orient="index")
    blue_idx = df.iloc[:, :num_conts].columns
    red_idx = df.iloc[:, num_conts:].columns
    objs = (df[blue_idx], df[red_idx])
    
    return pd.concat(objs=objs, keys=("Democratic", "Republican"), axis=1)

def get_prob_df(nll_df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """Probabilities and normalized probabilities for every continuation"""
    
    prob_df = (-nll_df).map(lambda x: exp(x)) # exp(LogLikelihood)
    # democratic probability sum
    prob_df["Democratic", "D_sum"] = prob_df["Democratic"].sum(axis=1)
    # republican probability sum
    prob_df["Republican", "R_sum"] = prob_df["Republican"].sum(axis=1)
    prob_df = prob_df[["Democratic", "Republican"]]

    norm_prob_df = prob_df.copy()
    no_cols = int(len(prob_df.columns)/2)
    for i in range(no_cols):   
        probs = prob_df.iloc[:, [i, no_cols+i]]
        # P(D) / sum(P(D) + P(R))
        norm_prob_df.iloc[:, i] = norm_prob_df.iloc[:, i].div(probs.sum(axis=1))
        # P(R) / sum(P(D) + P(R))
        norm_prob_df.iloc[:, no_cols+i] = norm_prob_df.iloc[:, no_cols+i].div(probs.sum(axis=1))
        
    return prob_df, norm_prob_df

def get_differences(norm_prob_df: pd.DataFrame, columns: List) -> pd.DataFrame:
    cols = columns + ["* sum"]
    data = norm_prob_df["Democratic"].values - norm_prob_df["Republican"].values
    
    return pd.DataFrame(index=norm_prob_df.index, data=data, columns=cols)

def get_exp_differences(df: pd.DataFrame, columns: List, num_conts: int) -> pd.DataFrame:
    exp_diff = pd.DataFrame(index=df.index, columns=columns)
    for i, col in enumerate(columns):    
        exp_df = df.iloc[:, [i, num_conts+i]].map(lambda x: exp(x))

        blue_exp = exp_df.iloc[:, 0]
        red_exp = exp_df.iloc[:, 1]
        
        numerator = blue_exp.sub(red_exp)
        denominator = blue_exp.add(red_exp)

        exp_diff[col] = -numerator.div(denominator)

    exp_diff["avg"] = exp_diff.mean(axis=1)
    
    return exp_diff

def get_voting(voting_path: str) -> pd.DataFrame:
    voting = pd.read_excel(voting_path, index_col=0, usecols=["state", "red_pct", "blue_pct"])
    voting = voting.apply(lambda x: x.div(voting.sum(axis=1)))
    voting["pct_diff"] = voting.blue_pct - voting.red_pct
    
    return voting


def get_agreement(voting: pd.DataFrame, diff: pd.DataFrame, columns: List) -> pd.DataFrame:
    elections_map = voting.pct_diff.apply(lambda x: 0 if x < 0 else 1)

    agreement = diff.drop("US").map(lambda x: 0 if x > 0 else 1)
    agreement = agreement.apply(lambda x: x==elections_map).map(lambda x: 0 if x else 1)

    # stats rows
    agreement.loc["avg"] = agreement.mean()

    return agreement

def get_ext_diff(diff: pd.DataFrame, agreement:pd.DataFrame, weights: Dict, columns:List) -> pd.DataFrame:
    weights = pd.Series(populations)
    ext_diff = diff.copy()

    ext_diff.loc["avg"] = ext_diff.loc[states].mean(axis=0)
    ext_diff.loc["weighted avg"] = (ext_diff.loc[states].mul(weights, axis="index") / weights.sum()).mean()
    
    maj_counts = ext_diff.loc[states].map(lambda x: x>0)
    dem_counts = maj_counts[maj_counts == True].sum()
    rep_counts = maj_counts[maj_counts == False].sum()
    ext_diff.loc["dem/rep"] = [f"{d}/{r}" for d,r in zip(dem_counts, rep_counts)]
    
    ext_diff.loc[" "] = ["Average agreement"] + [""] * len(columns)
    ext_diff.loc["  "] = agreement.loc["avg"]
    
    return ext_diff


def get_abs_pct_difference(
    voting: pd.DataFrame,
    diff: pd.DataFrame,
    agreement: pd.DataFrame,
    columns: List
    ) -> pd.DataFrame:
    
    abs_dif_ag = diff.drop("US").apply(lambda x: x.sub(voting.pct_diff)).abs()
    abs_dif_disag = diff.drop("US").apply(lambda x: x.add(voting.pct_diff)).abs()

    abs_pct_diff = pd.DataFrame(index=voting.index, columns=diff.columns)

    abs_pct_diff[agreement == 1] = abs_dif_ag[agreement == 1]
    abs_pct_diff[agreement == 0] = abs_dif_disag[agreement == 0]

    # stats rows
    mean = abs_pct_diff.mean()
    abs_pct_diff.loc[" "] = ["Average absolute difference"] + [""] * len(columns)
    abs_pct_diff.loc["  "] = mean
    
    return abs_pct_diff


def get_relative_error(
    norm_prob_df: pd.DataFrame,
    voting: pd.DataFrame,
    columns: List
    ) -> pd.DataFrame:
    
    blue_err = norm_prob_df.drop("US")["Democratic"].apply(lambda x: x.sub(voting.blue_pct).abs().div(voting.red_pct))
    blue_err.columns = columns + ["* sum"]

    red_err = norm_prob_df.drop("US")["Republican"].apply(lambda x: x.sub(voting.red_pct).abs().div(voting.blue_pct))
    red_err.columns = columns + ["* sum"]

    objs = (red_err.loc[voting.pct_diff < 0], blue_err.loc[voting.pct_diff > 0])
    error_df = pd.concat(objs=objs).sort_index()

    # stats rows
    mean = error_df.mean()
    error_df.loc[" "] = ["Average relative error"] + [""] * len(columns)
    error_df.loc["  "] = mean
    
    return error_df

def get_voting_stats(
    label:str,
    voting_path: str,
    norm_prob_df: pd.DataFrame,
    diff: pd.DataFrame,
    columns:List
    ) -> pd.DataFrame:
    
    voting = get_voting(voting_path=voting_path)
    agreement = get_agreement(voting=voting, diff=diff, columns=columns)
    
    ext_diff = get_ext_diff(diff=diff, agreement=agreement, weights=populations, columns=columns)
    abs_pct_diff = get_abs_pct_difference(voting=voting, diff=diff, agreement=agreement, columns=columns)
    error_df = get_relative_error(norm_prob_df=norm_prob_df, voting=voting,columns=columns)
    
    stats = {
        label: voting,
        "Predicted Differences (agreement)": ext_diff,
        "Probability Absolute Difference": abs_pct_diff,
        "Relative Error (winning party)": error_df
    }

    return pd.concat(objs=stats.values(), keys=stats.keys(), axis=1)

In [6]:
# model_id = "meta-llama/Llama-3.1-8B"
# model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"

# model_id = "meta-llama/Llama-3.2-3B"
# model_id = "meta-llama/Llama-3.2-3B-Instruct"

# model_id = "google/gemma-2-9b"
# model_id = "google/gemma-2-9b-it"

# model_id = "mistralai/Mistral-7B-v0.3"
# model_id = "mistralai/Mistral-7B-Instruct-v0.3"
# model_id = "mistralai/Ministral-8B-Instruct-2410"

# model_id = "microsoft/Phi-3.5-mini-instruct"

# model_id = "HuggingFaceH4/zephyr-7b-beta" # based on mistral

model_id = "tiiuae/falcon-7b"
# model_id = "tiiuae/falcon-7b-instruct"

# model_id = "tiiuae/falcon-11B"
# model_id = "tiiuae/falcon-mamba-7b"

# model_id = "hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4"
# quantization_config = AwqConfig(
#     bits=4,
#     fuse_max_seq_len=512, # Note: Update this as per your use-case
#     do_fuse=True,
# )

tokenizer = AutoTokenizer.from_pretrained(model_id, use_safetensors=True)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    use_safetensors=True,
    device_map="cuda" if torch.cuda.is_available() else "cpu",
    # torch_dtype=torch.float16,
    # low_cpu_mem_usage=True,
    # quantization_config=quantization_config
)

Loading checkpoint shards: 100%|██████████| 2/2 [00:06<00:00,  3.39s/it]


In [None]:
generation_config = GenerationConfig(
    max_new_tokens=128,
    temperature=0.1,
    top_k=100,
    top_p=0.95,
    do_sample=True,
    # eos_token_id=model.config.eos_token_id,
    # pad_token=model.config.pad_token_id,
)

user_pr = "As a US citizen, in the 2024 US presidential election I will vote for the candidates of the"
# assis_resp = "As a resident of Idaho, in the 2024 US presidential election I will vote for the"

chat = [
    {"role": "user", "content": "As a U.S. citizen, resident of {persona}, who will you vote for in the 2024 U.S. presidential election?"},
    {"role": "assistant", "content": "As a U.S. citizen, resident of {persona}, in the 2024 US presidential election I will vote for"},
]

# inputs = tokenizer(user_pr, return_tensors="pt").input_ids.to("cuda")

inputs = tokenizer.apply_chat_template(
    conversation=chat,
    continue_final_message=True,
    return_tensors="pt").to("cuda")

outputs = model.generate(inputs, generation_config=generation_config)

In [7]:
setting_id = 3
settings = Experiment.settings[setting_id]
message = ElectionMessage(chat=settings["message"])
choices = settings["choices"]
num_conts = settings["num_conts"]
columns = settings["columns"]

pbar = tqdm(states + ["US"])
results = {}

for state in pbar:
    pbar.set_description(state)
    results[state] = {}
    
    if state == "US":
        context = message.remove_residency()
    else:
        context = message.format(persona=state)
    
    for choice in choices:
        cont = " " + choice
        negative_log_likelihood = continuation_loss(model=model,
                                                    tokenizer=tokenizer,
                                                    context=context,
                                                    cont=cont
                                                    )
        results[state][choice] = negative_log_likelihood


US: 100%|██████████| 52/52 [00:15<00:00,  3.43it/s]                 


Dataframes

In [None]:
nll_df = get_nll_df(results=results, num_conts=num_conts) # Negative Log Likelihoods
prob_df, norm_prob_df = get_prob_df(nll_df=nll_df) # Probabilities
diff = get_differences(norm_prob_df=norm_prob_df, columns=columns) # Probability Differences

outputs = {
    # "Negative Log Likelihood": nll_df.droplevel(0, axis=1),
    # "Probabilities": prob_df.droplevel(0, axis=1),
    "Normalized Probabilities": norm_prob_df.droplevel(0, axis=1),
    "Probability Differences": diff
}

outputs = pd.concat(objs=outputs.values(), keys=outputs.keys(), axis=1)

stats_20 = get_voting_stats(label="2020",
                            voting_path="../data/voting-2020.xlsx",
                            diff=diff,
                            norm_prob_df=norm_prob_df,
                            columns=columns)

stats_24 = get_voting_stats(label="2024",
                            voting_path="../data/voting-2024.xlsx",
                            diff=diff,
                            norm_prob_df=norm_prob_df,
                            columns=columns)


In [46]:
f_name = f"{setting_id}. {os.path.basename(model_id).lower()}"
with pd.ExcelWriter("test.xlsx") as writer:
    outputs.to_excel(writer, sheet_name="Outputs")  
    stats_20.to_excel(writer, sheet_name="2020")
    stats_24.to_excel(writer, sheet_name="2024")  

In [103]:
import gc
del model
gc.collect()

184

In [104]:
model = None
base_models = [
"meta-llama/Llama-3.1-8B",
"meta-llama/Llama-3.2-3B",
"google/gemma-2-9b",
"mistralai/Mistral-7B-v0.3",
"tiiuae/falcon-7b"
]

instruct_models = [
"meta-llama/Meta-Llama-3.1-8B-Instruct",
"meta-llama/Llama-3.2-3B-Instruct",
"google/gemma-2-9b-it",
"mistralai/Mistral-7B-Instruct-v0.3",
"tiiuae/falcon-7b-instruct"
]

setting_id = "3-chat"
settings = Experiment.settings[setting_id]
message = ElectionMessage(chat=settings["message"])
choices = settings["choices"]
num_conts = settings["num_conts"]
columns = settings["columns"]

settings

{'message': [{'role': 'user',
   'content': 'As a U.S. citizen, resident of {persona}, who will you vote for in the 2024 U.S. presidential election?'},
  {'role': 'assistant',
   'content': 'As a U.S. citizen, resident of {persona}, in the 2024 U.S. presidential election I will vote for'}],
 'choices': ['Joe Biden', 'Biden', 'Donald Trump', 'Trump'],
 'num_conts': 2,
 'columns': ['fullname', 'lastname']}

In [105]:
for model_id in instruct_models:
    del model; print(model_id)
    tokenizer = AutoTokenizer.from_pretrained(model_id, use_safetensors=True)

    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        use_safetensors=True,
        device_map="cuda" if torch.cuda.is_available() else "cpu",
    )   
        
    pbar = tqdm(states + ["US"])
    results = {}

    for state in pbar:
        pbar.set_description(state)
        results[state] = {}
        
        if state == "US":
            context = message.remove_residency()
        else:
            context = message.format(persona=state)
        
        for choice in choices:
            cont = " " + choice
            negative_log_likelihood = continuation_loss(model=model,
                                                        tokenizer=tokenizer,
                                                        context=context,
                                                        cont=cont
                                                        )
            results[state][choice] = negative_log_likelihood


    nll_df = get_nll_df(results=results, num_conts=num_conts) 
    prob_df, norm_prob_df = get_prob_df(nll_df=nll_df)
    diff = get_differences(norm_prob_df=norm_prob_df, columns=columns)
    
    outputs = {
        # "Negative Log Likelihood": nll_df.droplevel(0, axis=1),
        # "Probabilities": prob_df.droplevel(0, axis=1),
        "Normalized Probabilities": norm_prob_df.droplevel(0, axis=1),
        "Probability Differences": diff
    }

    outputs = pd.concat(objs=outputs.values(), keys=outputs.keys(), axis=1)

    stats_20 = get_voting_stats(label="2020",
                                voting_path="../data/voting-2020.xlsx",
                                diff=diff,
                                norm_prob_df=norm_prob_df,
                                columns=columns)

    stats_24 = get_voting_stats(label="2024",
                                voting_path="../data/voting-2024.xlsx",
                                diff=diff,
                                norm_prob_df=norm_prob_df,
                                columns=columns)

        
    f_name = f"{setting_id}. {os.path.basename(model_id).lower()}"
    with pd.ExcelWriter(os.path.join("..", "results", f"{f_name}.xlsx")) as writer:
        outputs.to_excel(writer, sheet_name="Outputs")  
        stats_20.to_excel(writer, sheet_name="2020")
        stats_24.to_excel(writer, sheet_name="2024")   

meta-llama/Meta-Llama-3.1-8B-Instruct


Loading checkpoint shards: 100%|██████████| 4/4 [00:11<00:00,  2.95s/it]
US: 100%|██████████| 52/52 [00:59<00:00,  1.15s/it]                 


meta-llama/Llama-3.2-3B-Instruct


Loading checkpoint shards: 100%|██████████| 2/2 [00:03<00:00,  1.89s/it]
US: 100%|██████████| 52/52 [00:42<00:00,  1.21it/s]                 


google/gemma-2-9b-it


Loading checkpoint shards: 100%|██████████| 4/4 [00:08<00:00,  2.24s/it]
US: 100%|██████████| 52/52 [01:36<00:00,  1.86s/it]                 


mistralai/Mistral-7B-Instruct-v0.3


Loading checkpoint shards: 100%|██████████| 3/3 [00:08<00:00,  2.67s/it]
US: 100%|██████████| 52/52 [00:30<00:00,  1.68it/s]                 


tiiuae/falcon-7b-instruct


Loading checkpoint shards: 100%|██████████| 2/2 [00:07<00:00,  3.71s/it]
US: 100%|██████████| 52/52 [00:41<00:00,  1.24it/s]                 


In [None]:
cols = ["Agreement", "Probability Absolute Difference", "Relative Error (winning party)"]

file_names = [f for f in os.listdir(RESULTS_PATH) if not f.startswith(".")]
summary = {}
for f_name in file_names:
    df = pd.read_excel(os.path.join(RESULTS_PATH, f_name), index_col=0, header=[0,1], sheet_name="Outputs")
    stats_2020 = pd.read_excel(os.path.join(RESULTS_PATH, f_name), index_col=0, header=[0,1], sheet_name="2020")
    stats_2024 = pd.read_excel(os.path.join(RESULTS_PATH, f_name), index_col=0, header=[0,1], sheet_name="2024")
    
    pred_mask = (df["Probability Differences"].drop("US") > 0)
    agree_mask = stats_2020.drop("US")["Agreement"].astype("bool")
    
    all_counts = pred_mask.apply(lambda x: x.value_counts()).fillna(0).astype(int)
    all_counts.rename({True:"Democratic", False:"Republican"}, inplace=True)
    all_counts = all_counts.stack().astype(str)

    blue_tp_counts = (pred_mask & agree_mask).apply(lambda x: x.value_counts()).fillna(0).astype(int)
    red_tp_counts = (~pred_mask & agree_mask).apply(lambda x: x.value_counts()).fillna(0).astype(int)

    if "Democratic" in all_counts.index:
        all_counts["Democratic"] = [f"{tp}/{k}" for k, tp in zip(all_counts["Democratic"], blue_tp_counts.loc[True])]
    if "Republican" in all_counts.index:
        all_counts["Republican"] = [f"{tp}/{k}" for k, tp in zip(all_counts["Republican"], red_tp_counts.loc[True])]
    
    
    _, base_name = f_name.split(".", 1)
    base_name, _ = base_name.rsplit(".", 1)
    summary[base_name.strip()] = pd.concat((all_counts, stats_2020.loc[" ", cols]))
    

summary = pd.concat(summary.values(), keys=summary.keys()).to_frame().unstack([1, 2]).sort_index().droplevel(0, axis=1)


In [None]:
summary

In [None]:
df = pd.read_excel(os.path.join(RESULTS_PATH, "3. mistral-7b-v0.3.xlsx"), index_col=0, header=[0,1], sheet_name="Outputs")
stats_2020 = pd.read_excel(os.path.join(RESULTS_PATH, "3. mistral-7b-v0.3.xlsx"), index_col=0, header=[0,1], sheet_name="2020")
stats_2024 = pd.read_excel(os.path.join(RESULTS_PATH, "3. mistral-7b-v0.3.xlsx"), index_col=0, header=[0,1], sheet_name="2024")

pred_mask = (df["Probability Differences"].drop("US") > 0)
agree_mask = stats_2020.drop("US")["Agreement"].astype("bool")

all_counts = pred_mask.apply(lambda x: x.value_counts()).fillna(0).astype(int)
display(all_counts)
all_counts.rename({True:"Democratic", False:"Republican"}, inplace=True)
all_counts = all_counts.stack().astype(str)

blue_tp_counts = (pred_mask & agree_mask).apply(lambda x: x.value_counts()).fillna(0).astype(int)
display(blue_tp_counts)
red_tp_counts = (~pred_mask & agree_mask).apply(lambda x: x.value_counts()).fillna(0).astype(int)
display(red_tp_counts)


if "Democratic" in all_counts.index:
    all_counts["Democratic"] = [f"{tp}/{k}" for k, tp in zip(all_counts["Democratic"], blue_tp_counts.loc[True])]
if "Republican" in all_counts.index:
    all_counts["Republican"] = [f"{tp}/{k}" for k, tp in zip(all_counts["Republican"], red_tp_counts.loc[True])]

In [None]:
display(all_counts)

In [None]:
ord_cols = ["Democratic", "Republican", "Agreement", "Probability Absolute Difference", "Relative Error (winning party)"]
summary = summary[ord_cols]
summary

In [None]:
summary_2020_styled = (
    summary.style
    .apply(lambda x: _bold_fn(x, fn=max),
           subset="Agreement",
           axis=1
           )
    .apply(lambda x: _bold_fn(x, fn=min),
           subset="Probability Absolute Difference",
           axis=1
           )
    .apply(lambda x: _bold_fn(x, fn=min),
           subset="Relative Error (winning party)",
           axis=1
           )
    .map(lambda _: "background-color: #d5a6bd", subset="Agreement")
    .map(lambda _: "background-color: #ffe599", subset="Probability Absolute Difference")
    .map(lambda _: "background-color: #6fa8dc", subset="Relative Error (winning party)")
    .map(lambda _: "color: black")
    )

display(summary_2020_styled)

In [None]:
summary