In [1]:
import polars as pl
import plotly.express as px
import plotly.graph_objects as go

In [2]:
df = pl.read_csv(
    "./data/nces/geo/ELSI/state/total_spend_ELSI_csv_export_6385901129652770938927.csv",
    skip_rows=6,
    n_rows=51,
)
df = df.unpivot(
    on=df.columns[1:],
    index="State Name",
    variable_name="VARIABLE",
    value_name="VALUE"
).with_columns(
    SCHOOL_YEAR=pl.col("VARIABLE").str.extract(r"(\d{4}\-\d{2})$"),
    VARIABLE=pl.col("VARIABLE").str.replace(r" \[(State( Finance)?)\] (\d{4}\-\d{2})$", ""),
).pivot(
    on="VARIABLE",
    index=["State Name", "SCHOOL_YEAR"],
    values="VALUE",
    aggregate_function="first"
).rename(
    {"State Name": "State"}
).with_columns(
    pl.col("SCHOOL_YEAR").str.extract(r"^(\d{4})").str.to_integer().alias("SCHOOL_YEAR_START"),
).with_columns(
    ((pl.col("SCHOOL_YEAR_START") - pl.col("SCHOOL_YEAR_START") % 100) + pl.col("SCHOOL_YEAR").str.extract(r"\-(\d{2})$").str.to_integer()).alias("SCHOOL_YEAR_END")
).sort("State", "SCHOOL_YEAR")
df

State,SCHOOL_YEAR,Total Current Expenditures for Public El-Sec (TE5),SCHOOL_YEAR_START,SCHOOL_YEAR_END
str,str,i64,i64,i64
"""ALABAMA""","""1986-87""",1775996940,1986,1987
"""ALABAMA""","""1987-88""",1873390358,1987,1988
"""ALABAMA""","""1988-89""",2188020276,1988,1989
"""ALABAMA""","""1989-90""",2275232609,1989,1990
"""ALABAMA""","""1990-91""",2475216097,1990,1991
…,…,…,…,…
"""WYOMING""","""2016-17""",1555015898,2016,2017
"""WYOMING""","""2017-18""",1520759257,2017,2018
"""WYOMING""","""2018-19""",1530465306,2018,2019
"""WYOMING""","""2019-20""",1576787463,2019,2020


In [3]:
inflation = pl.read_excel(
    "./data/bls/inflation/SeriesReport-20240811222640_b99bd2.xlsx",
    read_options=dict(header_row=11),
)
inflation = inflation.with_columns(
    pl.col("Annual").shift(-1).alias("Annual (Next Year)")
).filter(pl.col("Year") < 2023).sort("Year", descending=True).with_columns(
    (1 + pl.col("Annual (Next Year)") / 100).cum_prod().alias("Current to 2023 Dollar Conversion Rate")
).sort("Year")
inflation

Year,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec,Annual,HALF1,HALF2,Annual (Next Year),Current to 2023 Dollar Conversion Rate
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
1986,4.4,4.2,4.1,4.2,4.0,4.0,4.1,4.0,4.1,4.0,3.8,3.8,4.0,4.1,4.0,4.1,2.715313
1987,3.8,3.8,4.0,4.2,4.2,4.1,4.0,4.2,4.3,4.3,4.4,4.2,4.1,4.0,4.2,4.4,2.60837
1988,4.3,4.3,4.4,4.3,4.3,4.5,4.5,4.4,4.4,4.5,4.4,4.7,4.4,4.4,4.5,4.5,2.498438
1989,4.6,4.8,4.7,4.6,4.6,4.5,4.6,4.4,4.3,4.3,4.4,4.4,4.5,4.6,4.3,5.0,2.39085
1990,4.4,4.6,4.9,4.8,4.8,4.9,5.0,5.5,5.5,5.3,5.3,5.2,5.0,4.8,5.3,4.9,2.277
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
2018,1.8,1.8,2.1,2.1,2.2,2.3,2.4,2.2,2.2,2.1,2.2,2.2,2.1,2.1,2.2,2.2,1.198443
2019,2.2,2.1,2.0,2.1,2.0,2.1,2.2,2.4,2.4,2.3,2.3,2.3,2.2,2.1,2.3,1.7,1.172645
2020,2.3,2.4,2.1,1.4,1.2,1.2,1.6,1.7,1.7,1.6,1.6,1.6,1.7,1.8,1.6,3.6,1.153043
2021,1.4,1.3,1.6,3.0,3.8,4.5,4.3,4.0,4.0,4.6,4.9,5.5,3.6,2.6,4.5,6.2,1.112976


In [4]:
adjusted_spending = df.join(
    inflation.select("Year", "Current to 2023 Dollar Conversion Rate"),
    left_on="SCHOOL_YEAR_END",
    right_on="Year",
).with_columns(
    (
        pl.col("Total Current Expenditures for Public El-Sec (TE5)")
        * pl.col("Current to 2023 Dollar Conversion Rate")
        / 1_000_000_000
    ).alias("Total Current Expenditures for Public El-Sec (TE5) (Billions, 2023 Dollars)")
)
adjusted_spending

State,SCHOOL_YEAR,Total Current Expenditures for Public El-Sec (TE5),SCHOOL_YEAR_START,SCHOOL_YEAR_END,Current to 2023 Dollar Conversion Rate,"Total Current Expenditures for Public El-Sec (TE5) (Billions, 2023 Dollars)"
str,str,i64,i64,i64,f64,f64
"""ALABAMA""","""1986-87""",1775996940,1986,1987,2.60837,4.632457
"""ALABAMA""","""1987-88""",1873390358,1987,1988,2.498438,4.680551
"""ALABAMA""","""1988-89""",2188020276,1988,1989,2.39085,5.231229
"""ALABAMA""","""1989-90""",2275232609,1989,1990,2.277,5.180705
"""ALABAMA""","""1990-91""",2475216097,1990,1991,2.170639,5.3728
…,…,…,…,…,…,…
"""WYOMING""","""2016-17""",1555015898,2016,2017,1.22361,1.902734
"""WYOMING""","""2017-18""",1520759257,2017,2018,1.198443,1.822543
"""WYOMING""","""2018-19""",1530465306,2018,2019,1.172645,1.794692
"""WYOMING""","""2019-20""",1576787463,2019,2020,1.153043,1.818104


In [5]:

def add_top_n_state_annotations(fig: go.Figure, df: pl.DataFrame, n: int = 4):
    top5_states = df.filter(
        pl.col("SCHOOL_YEAR") == pl.col("SCHOOL_YEAR").max()
    ).sort(
        "Total Current Expenditures for Public El-Sec (TE5) (Billions, 2023 Dollars)",
    ).tail(n + 1).select(
        "State",
        "Total Current Expenditures for Public El-Sec (TE5) (Billions, 2023 Dollars)"
    ).rename(
        {
            "Total Current Expenditures for Public El-Sec (TE5) (Billions, 2023 Dollars)": "Total"
        }
    ).to_dicts()

    for i, state in enumerate(reversed(top5_states[-n:])):
        total = sum([s["Total"] for s in top5_states[-i:]]) if i > 0 else 0
        next_total = sum([s["Total"] for s in top5_states[-(i + 1):]])
        fig.add_annotation(
            x=0.9993,
            y=(total + next_total) / 2,
            xref="paper",
            text=state["State"],
            showarrow=False,
            font=dict(
                size=16 - i,
                color="white",
            ),
            opacity=0.5
        )
    return fig


fig = add_top_n_state_annotations(
    px.area(
        adjusted_spending.with_columns(
            pl.col("Total Current Expenditures for Public El-Sec (TE5) (Billions, 2023 Dollars)").sum().over("State").alias("State Max")
        ).sort("SCHOOL_YEAR_END", "State Max", descending=[False, True]),
        x="SCHOOL_YEAR_END",
        y="Total Current Expenditures for Public El-Sec (TE5) (Billions, 2023 Dollars)",
        color="State",
        title="US Spending on Public Elementary & Secondary Education<br><sup>Real spending on public education has doubled in the last 40 years.</sup>",
        labels={
            "SCHOOL_YEAR_END": "School Year (End)",
        },
        template="plotly_dark",
        color_discrete_sequence=px.colors.qualitative.Antique_r,
        width=1000,
        height=600,
    ).update_layout(
        showlegend=False,
        xaxis_title=None,
        yaxis_title="US Education Spending<br><sup>(Billions USD, 2023 Dollars)</sup>",
        yaxis_tickformat="$,.0f",
        plot_bgcolor="#171717",
        paper_bgcolor="#171717",
        xaxis_showgrid=False,
    ).add_annotation(
        x=1.07,
        y=-0.15,
        xref="paper",
        yref="paper",
        text="<br>".join(
            (
                "Chart by Dominic Tarro | 𝕏 @dominictarro",
                "Source: CCD National Public Education Financial Survey (1986-2021)"
            )
        ),
        align="right",
        showarrow=False,
        font=dict(
            size=10,
            color="grey"
        ),
        opacity=0.7
    ).add_annotation(
        x=-0.08,
        y=-0.15,
        xref="paper",
        yref="paper",
        text="<br>".join(
            (
                "Note: 2023 dollar adjustment for school year expenditures was done with the ending year's inflation rate.",
            )
        ),
        align="left",
        showarrow=False,
        font=dict(
            size=10,
            color="grey"
        ),
        opacity=0.7
    ),
    adjusted_spending
)
fig.show()
fig.write_image("charts/spending-public-ed.png")