In [10]:
import pandas as pd
import numpy as np
import os
from IPython.display import HTML, Markdown


DATA_PATH = "data"
# check if rendering within sections folder
if os.getcwd().split("/")[-1] == "notebooks":
    DATA_PATH = "../" + DATA_PATH

In [11]:
sample = pd.read_parquet(f"{DATA_PATH}/sample.parquet")

In [12]:
new_inc = {
    "less than $25,000": "< $25k",
    "$25,000 to $49,999": "$25k-$49k",
    "$50,000 to $74,999": "$50k-$74k",
    "$75,000 to $99,999": "$75k-$99k",
    "$100,000 to $149,999": "$100k-$149k",
    "$150,000 or more": "≥ $150k",
}
new_age = {
    "18 to 24 years old": "18-24",
    "25 to 34 years old": "25-34",
    "35 to 44 years old": "35-44",
    "45 to 54 years old": "45-54",
    "55 to 64 years old": "55-64",
    "65 years old or older": "65+",
}
sample["incgrp"] = sample["incgrp"].cat.rename_categories(new_inc)
sample["agegrp"] = sample["agegrp"].cat.rename_categories(new_age)

In [13]:
demo_dfs = []
for var in ["sex", "agegrp", "incgrp"]:
    demo_df = sample.copy()
    demo_df["pct"] = demo_df.groupby(["iter", var], observed=True)["sampleid"].transform("count") / 1_000 * 100
    demo_df = demo_df.groupby([var], observed=True)["pct"].agg(
        min="min",
        mean="mean",
        max="max"
    ).round(1)
    demo_dfs.append(demo_df)

In [14]:
demo_tbl = pd.concat(demo_dfs).reset_index()
demo_tbl["index"] = demo_tbl["index"].str.capitalize()
desc_col = (
    ["Sex"]
    + [""] * 1
    + ["Age"]
    + [""] * 5
    + ["Household income"]
    + ["(in 2023 USD)"]
    + [""] * 4
)
demo_tbl.index = desc_col
demo_tbl.columns = ["", "Min", "Mean", "Max"]
for col in demo_tbl.columns[1:]:
    demo_tbl[col] = demo_tbl[col].map("{:.1f}%".format)
demo_tbl

Unnamed: 0,Unnamed: 1,Min,Mean,Max
Sex,Female,46.1%,51.3%,57.8%
,Male,42.2%,48.8%,53.9%
Age,18-24,7.6%,10.8%,14.4%
,25-34,14.1%,17.9%,21.7%
,35-44,12.9%,17.1%,21.6%
,45-54,12.2%,16.1%,20.0%
,55-64,12.4%,16.8%,21.4%
,65+,18.3%,21.8%,25.6%
Household income,< $25k,7.9%,11.2%,14.2%
(in 2023 USD),$25k-$49k,12.6%,16.5%,20.3%


In [15]:
with open(f"../tables/_tbl-demo.qmd", "w") as f:
    f.write(
        demo_tbl.to_markdown(
            index=True,
            colalign=("left", "left", "right", "right", "right"),
            tablefmt="pipe"
        )
    )