https://nces.ed.gov/ccd/elsi/tableGenerator.aspx

https://www.usinflationcalculator.com/inflation/current-inflation-rates/
https://data.bls.gov/pdq/SurveyOutputServlet

In [1]:
from collections import namedtuple

import polars as pl
import plotly.express as px
import plotly.graph_objects as go
import statsmodels.regression

In [2]:
df = pl.read_csv(
    "./data/nces/geo/ELSI/state/ELSI_csv_export_6385899184786235507585.csv",
    skip_rows=6,
    n_rows=51
).drop("State Name [State] 2018-19").with_columns(
    pl.col("State Name").str.to_titlecase()
)
df = df.unpivot(
    on=df.columns[1:],
    index="State Name",
    variable_name="VARIABLE",
    value_name="VALUE"
).with_columns(
    SCHOOL_YEAR=pl.col("VARIABLE").str.extract(r"(\d{4}\-\d{2})$"),
    VARIABLE=pl.col("VARIABLE").str.replace(r" \[(State( Finance)?)\] (\d{4}\-\d{2})$", ""),
    VALUE=pl.col("VALUE").replace(chr(8211), None).str.to_integer(),
).pivot(
    on="VARIABLE",
    index=["State Name", "SCHOOL_YEAR"],
    values="VALUE",
    aggregate_function="first"
).with_columns(
    pl.sum_horizontal("Kindergarten Students", "Prekindergarten Students", "Grades 1-8 Students", "Grades 9-12 Students").alias("Total Students"),
    pl.sum_horizontal("Grades 1-8 Students", "Grades 9-12 Students").alias("Grades 1-12 Students")
).sort("State Name", "SCHOOL_YEAR").with_columns(
    pl.col("SCHOOL_YEAR").str.extract(r"^(\d{4})").str.to_integer().alias("SCHOOL_YEAR_START"),
    pl.col("SCHOOL_YEAR").str.extract(r"\-(\d{2})$").str.to_integer().add(2000).alias("SCHOOL_YEAR_END")
)
df

State Name,SCHOOL_YEAR,Grades 1-8 Students,Grade 8 Students,Grade 7 Students,Grade 6 Students,Grade 5 Students,Grade 4 Students,Grade 3 Students,Grade 2 Students,Grade 1 Students,Total Expenditures (TE11+E4D+E7A1),Total Revenues (TR) per Pupil (MEMBR),Local Revenues (STR1+R2) per Pupil (MEMBR),State Revenues (R3) per Pupil (MEMBR),Federal Revenues (STR4) per Pupil (MEMBR),Total Expenditures (TE11+E4D+E7A1) per Pupil (MEMBR),Grades 9-12 Students,Prekindergarten Students,Kindergarten Students,Total Students,Grades 1-12 Students,SCHOOL_YEAR_START,SCHOOL_YEAR_END
str,str,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64
"""Alabama""","""2011-12""",461122,57919,59050,59082,58415,56491,55991,56315,57859,7229298957,9534,3125,5284,1126,9709,217615,8282,57602,744621,678737,2011,2012
"""Alabama""","""2012-13""",458974,58151,59685,58240,56523,55859,55868,56361,58287,7438871954,9653,3225,5286,1142,9990,217203,9032,59428,744637,676177,2012,2013
"""Alabama""","""2013-14""",458327,58799,58455,56428,55699,55719,56003,56909,60315,7591336635,9913,3341,5448,1124,10173,218705,9415,59757,746204,677032,2013,2014
"""Alabama""","""2014-15""",454081,57471,56251,54859,55265,55479,56007,58370,60379,7616860366,9992,3321,5549,1122,10235,221068,11076,57939,744164,675149,2014,2015
"""Alabama""","""2015-16""",452764,55792,55172,54900,55340,55808,57963,58766,59023,7856051026,10200,3485,5578,1138,10562,222182,13230,55613,743789,674946,2015,2016
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""Wyoming""","""2014-15""",58774,6927,6903,7181,7076,7438,7588,7835,7826,1929789830,20855,7697,11874,1284,20515,26732,564,7997,94067,85506,2014,2015
"""Wyoming""","""2015-16""",59453,6902,7191,7155,7451,7551,7839,7704,7660,2020050580,21569,7851,12414,1305,21327,26914,509,7841,94717,86367,2015,2016
"""Wyoming""","""2016-17""",58998,7135,7071,7352,7452,7642,7616,7422,7308,1913258414,20508,7128,12122,1258,20317,26924,664,7584,94170,85922,2016,2017
"""Wyoming""","""2017-18""",58757,7086,7373,7449,7604,7537,7411,7179,7118,1729104112,19320,7102,10983,1236,18344,27361,671,7469,94258,86118,2017,2018


In [3]:
inflation = pl.read_excel(
    "./data/bls/inflation/SeriesReport-20240811173715_e1eda9.xlsx",
    read_options=dict(header_row=11),
)
inflation = inflation.with_columns(
    pl.col("HALF1").shift(-1).alias("HALF1 (Next Year)")
).filter(pl.col("Year") < 2024).sort("Year", descending=True).with_columns(
    (1 + pl.col("HALF1 (Next Year)") / 100).cum_prod().alias("Current to 2024 Dollar Conversion Rate")
).sort("Year")
inflation

Year,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec,Annual,HALF1,HALF2,HALF1 (Next Year),Current to 2024 Dollar Conversion Rate
i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2010,1.6,1.3,1.1,0.9,0.9,0.9,0.9,0.9,0.8,0.6,0.8,0.8,1.0,1.1,0.8,1.3,1.436149
2011,1.0,1.1,1.2,1.3,1.5,1.6,1.8,2.0,2.0,2.1,2.2,2.2,1.7,1.3,2.0,2.2,1.417718
2012,2.3,2.2,2.3,2.3,2.3,2.2,2.1,1.9,2.0,2.0,1.9,1.9,2.1,2.2,2.0,1.8,1.3872
2013,1.9,2.0,1.9,1.7,1.7,1.6,1.7,1.8,1.7,1.7,1.7,1.7,1.8,1.8,1.7,1.8,1.362672
2014,1.6,1.6,1.7,1.8,2.0,1.9,1.9,1.7,1.7,1.8,1.7,1.6,1.7,1.8,1.7,1.7,1.338578
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
2019,2.2,2.1,2.0,2.1,2.0,2.1,2.2,2.4,2.4,2.3,2.3,2.3,2.2,2.1,2.3,1.8,1.211212
2020,2.3,2.4,2.1,1.4,1.2,1.2,1.6,1.7,1.7,1.6,1.6,1.6,1.7,1.8,1.6,2.6,1.189795
2021,1.4,1.3,1.6,3.0,3.8,4.5,4.3,4.0,4.0,4.6,4.9,5.5,3.6,2.6,4.5,6.2,1.159645
2022,6.0,6.4,6.5,6.2,6.0,5.9,5.9,6.3,6.6,6.3,6.0,5.7,6.2,6.2,6.1,5.4,1.091944


In [4]:
student_investment = df.join(
    inflation.select("Year", "Current to 2024 Dollar Conversion Rate"),
    left_on="SCHOOL_YEAR_END",
    right_on="Year",
).with_columns(
    (
        pl.col("Total Expenditures (TE11+E4D+E7A1) per Pupil (MEMBR)")
        * pl.col("Current to 2024 Dollar Conversion Rate")
    ).alias("Total Expenditures (TE11+E4D+E7A1) per Pupil (MEMBR) (2024 Dollars)")
).with_columns(
    pl.col("Total Expenditures (TE11+E4D+E7A1) per Pupil (MEMBR) (2024 Dollars)")
    .rolling_sum(4)
    .over("State Name")
    .alias("Total Expenditures (TE11+E4D+E7A1) per Graduating Pupil (MEMBR) (4-Year Rolling Sum) (2024 Dollars)"),
).select(
    [
        "State Name",
        "SCHOOL_YEAR",
        "Total Expenditures (TE11+E4D+E7A1) per Graduating Pupil (MEMBR) (4-Year Rolling Sum) (2024 Dollars)"
    ]
).filter(
    pl.col("SCHOOL_YEAR").is_in(["2014-15", "2018-19"])
).pivot(
    on="SCHOOL_YEAR",
    index="State Name",
    values="Total Expenditures (TE11+E4D+E7A1) per Graduating Pupil (MEMBR) (4-Year Rolling Sum) (2024 Dollars)",
).with_columns(
    (pl.col("2018-19") - pl.col("2014-15")).alias("Investment Change")
).sort("Investment Change")
student_investment

State Name,2014-15,2018-19,Investment Change
str,f64,f64,f64
"""Alaska""",112089.426043,97380.968735,-14708.457307
"""Wyoming""",103168.939805,97872.292291,-5296.647514
"""Louisiana""",65353.876714,62797.163784,-2556.71293
"""West Virginia""",66193.735749,63698.943066,-2494.792684
"""Arkansas""",59785.216991,58313.792864,-1471.424127
…,…,…,…
"""Hawaii""",71789.843657,81138.560954,9348.717297
"""New York""",118465.234651,128174.444009,9709.209358
"""Oregon""",59834.132795,70567.546915,10733.41412
"""California""",60510.288472,72236.325703,11726.037231


In [5]:
math_performance = pl.concat(
    [
        pl.read_csv(
            "data/nces/geo/nations-report-card/2019/state_grade-8_math_SPCsv202408081556.csv",
            columns=["Jurisdiction", "MN", "AB", "AP"],
        ).with_columns(
            pl.lit(2015).alias("Graduating Year")
        ),
        pl.read_csv(
            "data/nces/geo/nations-report-card/2015/state_grade-8_math_SPCsv202408081407.csv",
            columns=["Jurisdiction", "MN", "AB", "AP"],
        ).with_columns(
            pl.lit(2019).alias("Graduating Year"),
            pl.col("AP").replace("#", None).str.to_integer()
        ),
    ]
).rename(
    {
        "Jurisdiction": "State Name",
        "MN": "Mean Grade 8 Math Score",
        "AB": "At or Above Basic (%)",
        "AP": "At or Above Proficient (%)",
    }
).pivot(
    on="Graduating Year",
    index="State Name",
    values=["Mean Grade 8 Math Score", "At or Above Basic (%)", "At or Above Proficient (%)"],
).with_columns(
    (pl.col("Mean Grade 8 Math Score_2019") - pl.col("Mean Grade 8 Math Score_2015")).alias("Mean Grade 8 Math Score Change"),
    (pl.col("At or Above Basic (%)_2019") - pl.col("At or Above Basic (%)_2015")).alias("At or Above Basic (%) Change"),
    (pl.col("At or Above Proficient (%)_2019") - pl.col("At or Above Proficient (%)_2015")).alias("At or Above Proficient (%) Change"),
).select(
    "State Name",
    "Mean Grade 8 Math Score Change",
    "At or Above Basic (%) Change",
    "At or Above Proficient (%) Change",
).sort("Mean Grade 8 Math Score Change")

math_performance

State Name,Mean Grade 8 Math Score Change,At or Above Basic (%) Change,At or Above Proficient (%) Change
str,i64,i64,i64
"""District of Columbia""",-6,-4,-4
"""Florida""",-4,-2,-5
"""Louisiana""",-4,-4,-5
"""North Carolina""",-3,-2,-4
"""Mississippi""",-3,-2,-2
…,…,…,…
"""Texas""",4,7,2
"""Hawaii""",4,5,2
"""Rhode Island""",5,8,3
"""Alaska""",6,8,3


In [6]:
Result = namedtuple("CorrelationResult", ["result", "r_squared", "p_value", "coef"])

def correlate(df: pl.DataFrame, x: str, y: str, show_summary: bool = True) -> Result:
    """Return the OLS Adjusted R-squared and p-value of the model."""
    result = statsmodels.regression.linear_model.OLS(
        endog=df[y].to_list(),
        exog=df[x].to_list(),
    ).fit()
    if show_summary:
        display(result.summary())
    return Result(
        result=result,
        r_squared=float(result.rsquared_adj),
        p_value=float(result.pvalues[0]),
        coef=float(result.params[0]),
    )

In [7]:
# correlate(df, "Investment Change", "Mean Grade 8 Math Score Change")

In [8]:
def generate_state_investment_and_math_performance_scatter_plot(column: str, yaxis_title: str, exclude_alaska: bool = True) -> go.Figure:
    _df = student_investment.join(
        math_performance,
        on="State Name"
    ).join(
        # Adding the number of students in the 8th grade cohort
        df.filter(pl.col("SCHOOL_YEAR").is_in(["2018-19"])).select("State Name", "Grade 8 Students"),
        on="State Name",
        suffix="_students",
    ).with_columns(
        # Only displaying state names whose values stand out
        # Of the top states in investment/score changes, choose the top ranks among them
        pl.when(
            pl.max_horizontal(
                (pl.col(column) - pl.col(column).mean()).abs().rank(method="max"),
                (pl.col("Investment Change") - pl.col("Investment Change").mean()).abs().rank(method="max")
            ).rank(method="max").is_between(35, 50)
        )
        .then(pl.col("State Name"))
        .otherwise(None)
        .alias("Display Name")
    )
    result = correlate(
        _df.filter(pl.col("State Name") != ("Alaska" if exclude_alaska else None)),
        "Investment Change",
        column,
    )
    return px.scatter(
        _df,
        x="Investment Change",
        y=column,
        size="Grade 8 Students",
        text="Display Name",
        title=(
            "Increasing Student Investment is a bad approach for raising Math Performance"
            "<br>"
            "<sup>"
            "Comparison of 2015 & 2019's 8<sup>th</sup> grade graduating classes across 50 U.S. States"
            "</sup>"
        ),
        labels={
            "2014-15": "Per Pupil Investment (2014-15)",
            column: yaxis_title,
        },
        hover_data=["State Name"],
        hover_name="State Name",
        template="plotly_dark",
        width=1280,
        height=720,
    ).update_layout(
        # coloraxis_cmid=0,
        xaxis_title_font_size=10,
        xaxis_title="Change in Student Investment<br><sup>(2024 Dollars)</sup>",
        xaxis_tickformat="$,.0f",
        coloraxis_colorbar_orientation="h",
        plot_bgcolor="#171717",
        paper_bgcolor="#171717",
        margin=dict(t=50, b=135),
    ).update_traces(
        dict(
            textposition="top center",
            textfont=dict(color="grey"),
            marker=dict(color="#e69138"),
        ),
    ).add_annotation(
        # Right, watermark and source
        x=1.06,
        y=-0.230,
        xref="paper",
        yref="paper",
        text="<br>".join(
            (
                "Chart by Dominic Tarro | 𝕏 @dominictarro",
                "Sources: NAEP Mathematics Assessments (2015, 2019)",
                "CCD National Public Education Financial Survey (2011-2019), Series TE11+E4D+E7A1",
            )
        ),
        align="right",
        showarrow=False,
        font=dict(
            size=10,
            color="grey"
        ),
        opacity=0.7
    ).add_annotation(
        x=-0.058,
        y=-0.230,
        xref="paper",
        yref="paper",
        text="<br>".join(
            (
                "Notes: The NAEP Mathematics scale scores between 0 to 500.",
                "\"Student Investment\" is the sum of real, per-pupil spending from 5th through 8th grade.",
                "Dollar adjustment was made using each school year's ending year.",
                "OLS Adj. R-squared: {:.03f}; P|t|>{:.03f}; {:.2f} {} improvement per additional $1,000 in Student Investment{}".format(
                    result.r_squared,
                    result.p_value,
                    result.coef * 1000,
                    "points" if "mean" in column.lower() else "% points",
                    " (excl. Alaska)" if exclude_alaska else ""
                ),
            )
        ),
        align="left",
        showarrow=False,
        font=dict(
            size=10,
            color="grey"
        ),
        opacity=0.7
    ).add_hline(
        y=math_performance["Mean Grade 8 Math Score Change"].mean(),
        annotation=dict(
            text=f"Mean ({math_performance['Mean Grade 8 Math Score Change'].mean():.2f} points)",
            showarrow=False,
            font=dict(color="#76a5af"),
            yshift=5,
        ),
        annotation_position="top left",
        line=dict(
            color="#76a5af",
            width=1,
            dash="dash"
        ),
    ).add_vline(
        x=student_investment["Investment Change"].mean(),
        annotation=dict(
            text=f"Mean (${student_investment['Investment Change'].mean():,.0f})",
            showarrow=False,
            font=dict(color="#93c47d"),
            yshift=5,
            xshift=5,
        ),
        annotation_position="bottom right",
        line=dict(
            color="#93c47d",
            width=1,
            dash="dash"
        ),
    )

fig_scatter_math_avg = generate_state_investment_and_math_performance_scatter_plot("Mean Grade 8 Math Score Change", "Change in Average Math Score")
fig_scatter_math_avg.write_image(
    "charts/spending-school-roi-scatter-math-mean.svg",
)
fig_scatter_math_avg.show()

0,1,2,3
Dep. Variable:,y,R-squared (uncentered):,0.107
Model:,OLS,Adj. R-squared (uncentered):,0.088
Method:,Least Squares,F-statistic:,5.724
Date:,"Sat, 28 Sep 2024",Prob (F-statistic):,0.0207
Time:,20:09:27,Log-Likelihood:,-111.57
No. Observations:,49,AIC:,225.1
Df Residuals:,48,BIC:,227.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
x1,0.0002,7.14e-05,2.393,0.021,2.73e-05,0.000

0,1,2,3
Omnibus:,1.229,Durbin-Watson:,2.166
Prob(Omnibus):,0.541,Jarque-Bera (JB):,1.059
Skew:,0.15,Prob(JB):,0.589
Kurtosis:,2.345,Cond. No.,1.0


In [9]:

def generate_state_investment_and_math_performance_bar_chart(
    column: str, legend_title: str, description: str, change_measure: str
) -> go.Figure:
    df = student_investment.join(
        math_performance,
        on="State Name"
    ).sort("Investment Change")
    result = correlate(df, "Investment Change", column)
    return px.bar(
        df,
        x="State Name",
        y="Investment Change",
        color=column,
        title=f"US Primary Education Return on Investment<br><sup><i>{description}</i></sup>",
        template="plotly_dark",
        color_continuous_scale=px.colors.sequential.RdBu,
        labels={
            "State Name": "State",
            "Investment Change": "4-Year Expenditure Change (2024 Dollars)",
            column: f"Change in NAEP {legend_title}",
        },
        width=1000,
        height=600,
    ).update_layout(
        xaxis=dict(showticklabels=False, title=None),
        yaxis_title="Per Pupil, 4-Year Expenditure Change Between 2015 and 2019<br>(2024 Dollars)",
        yaxis_title_font_size=10,
        coloraxis_colorbar_title=f"Change in <i>NAEP {legend_title}</i>",
        coloraxis_colorbar_orientation="h",
        plot_bgcolor="#171717",
        paper_bgcolor="#171717",
    ).update_coloraxes(
        cmid=0,
    ).add_annotation(
        x=1.07,
        y=-0.15,
        xref="paper",
        yref="paper",
        text="<br>".join(
            (
                "Chart by Dominic Tarro | 𝕏 @dominictarro",
                "Sources: NAEP Mathematics Assessments (2015, 2019)",
                "CCD National Public Education Financial Survey (2010-2019)"
            )
        ),
        align="right",
        showarrow=False,
        font=dict(
            size=10,
            color="grey"
        ),
        opacity=0.7
    ).add_annotation(
        x=-0.08,
        y=-0.15,
        xref="paper",
        yref="paper",
        text="<br>".join(
            (
                "Note: The NAEP Mathematics scale ranges from 0 to 500.",
                "<i>Per Pupil, 4-Year Expenditure</i> is an aggregate from 5th-8th grade. Dollar adjustment was made using <br>each school year's ending year.",
                f"OLS Adj. R-squared: {result.r_squared:.03f}; P|t|>{result.p_value:.03f}; {change_measure.title()} change per $1000 change in per student spending={result.coef * 1000:.2f}"
            )
        ),
        align="left",
        showarrow=False,
        font=dict(
            size=10,
            color="grey"
        ),
        opacity=0.7
    )

_fig1 = generate_state_investment_and_math_performance_bar_chart(
    "Mean Grade 8 Math Score Change",
    "Average Math Score",
    "States spending more per student from 5th through 8th grade had a negligible relationship with average 8th grade math performance.",
    "score",
)
_fig1.write_image("charts/spending-school-roi-math-mean.png")
_fig1.show()
_fig2 = generate_state_investment_and_math_performance_bar_chart(
    "At or Above Basic (%) Change",
    "At or Above Basic</i><br><sup>(% Points)</sup>",
    "States spending more per student from 5th through 8th grade had a small relationship with basic 8th grade math performance.",
    "score",
)
_fig2.write_image("charts/spending-school-roi-math-basic.png")
_fig2.show()
_fig3 = generate_state_investment_and_math_performance_bar_chart(
    "At or Above Proficient (%) Change",
    "At or Above Proficient</i><br><sup>(% Points)</sup>",
    "States spending more per student from 5th through 8th grade had a negligible relationship with 8th grade math profieciency.",
    "score",
)
_fig3.write_image("charts/spending-school-roi-math-proficient.png")
_fig3.show()

0,1,2,3
Dep. Variable:,y,R-squared (uncentered):,0.023
Model:,OLS,Adj. R-squared (uncentered):,0.003
Method:,Least Squares,F-statistic:,1.151
Date:,"Sat, 28 Sep 2024",Prob (F-statistic):,0.289
Time:,20:09:28,Log-Likelihood:,-118.36
No. Observations:,50,AIC:,238.7
Df Residuals:,49,BIC:,240.6
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
x1,7.669e-05,7.15e-05,1.073,0.289,-6.7e-05,0.000

0,1,2,3
Omnibus:,1.095,Durbin-Watson:,2.072
Prob(Omnibus):,0.578,Jarque-Bera (JB):,0.97
Skew:,0.329,Prob(JB):,0.616
Kurtosis:,2.82,Cond. No.,1.0


0,1,2,3
Dep. Variable:,y,R-squared (uncentered):,0.146
Model:,OLS,Adj. R-squared (uncentered):,0.129
Method:,Least Squares,F-statistic:,8.4
Date:,"Sat, 28 Sep 2024",Prob (F-statistic):,0.0056
Time:,20:09:28,Log-Likelihood:,-133.25
No. Observations:,50,AIC:,268.5
Df Residuals:,49,BIC:,270.4
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
x1,0.0003,9.63e-05,2.898,0.006,8.56e-05,0.000

0,1,2,3
Omnibus:,9.223,Durbin-Watson:,1.112
Prob(Omnibus):,0.01,Jarque-Bera (JB):,9.157
Skew:,0.771,Prob(JB):,0.0103
Kurtosis:,4.42,Cond. No.,1.0


0,1,2,3
Dep. Variable:,y,R-squared (uncentered):,0.001
Model:,OLS,Adj. R-squared (uncentered):,-0.019
Method:,Least Squares,F-statistic:,0.04962
Date:,"Sat, 28 Sep 2024",Prob (F-statistic):,0.825
Time:,20:09:28,Log-Likelihood:,-124.54
No. Observations:,50,AIC:,251.1
Df Residuals:,49,BIC:,253.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
x1,1.802e-05,8.09e-05,0.223,0.825,-0.000,0.000

0,1,2,3
Omnibus:,1.634,Durbin-Watson:,2.349
Prob(Omnibus):,0.442,Jarque-Bera (JB):,1.61
Skew:,0.378,Prob(JB):,0.447
Kurtosis:,2.553,Cond. No.,1.0


In [10]:
reading_performance = pl.concat(
    [
        pl.read_csv(
            "data/nces/geo/nations-report-card/2019/state_grade-8_reading_SPCsv202408081536.csv",
            columns=["Jurisdiction", "MN", "AB", "AP"],
        ).with_columns(
            pl.lit(2015).alias("Graduating Year")
        ),
        pl.read_csv(
            "data/nces/geo/nations-report-card/2015/state_grade-8_reading_SPCsv202408081449.csv",
            columns=["Jurisdiction", "MN", "AB", "AP"],
        ).with_columns(
            pl.lit(2019).alias("Graduating Year"),
        ),
    ]
).with_columns(
            pl.col("MN").replace(chr(8212), None).str.to_integer(),
            pl.col("AB").replace(chr(8212), None).str.to_integer(),
            pl.col("AP").replace(chr(8212), None).str.to_integer(),

).rename(
    {
        "Jurisdiction": "State Name",
        "MN": "Mean Grade 8 Reading Score",
        "AB": "At or Above Basic (%)",
        "AP": "At or Above Proficient (%)",
    }
).pivot(
    on="Graduating Year",
    index="State Name",
    values=["Mean Grade 8 Reading Score", "At or Above Basic (%)", "At or Above Proficient (%)"],
).with_columns(
    (pl.col("Mean Grade 8 Reading Score_2019") - pl.col("Mean Grade 8 Reading Score_2015")).alias("Mean Grade 8 Reading Score Change"),
    (pl.col("At or Above Basic (%)_2019") - pl.col("At or Above Basic (%)_2015")).alias("At or Above Basic (%) Change"),
    (pl.col("At or Above Proficient (%)_2019") - pl.col("At or Above Proficient (%)_2015")).alias("At or Above Proficient (%) Change"),
).select(
    "State Name",
    "Mean Grade 8 Reading Score Change",
    "At or Above Basic (%) Change",
    "At or Above Proficient (%) Change",
).sort("Mean Grade 8 Reading Score Change")

reading_performance

State Name,Mean Grade 8 Reading Score Change,At or Above Basic (%) Change,At or Above Proficient (%) Change
str,i64,i64,i64
"""Puerto Rico""",,,
"""Mississippi""",-4,-4,-5
"""DoDEA""",-3,-1,-5
"""North Carolina""",-2,0,-3
"""Louisiana""",-2,-2,-4
…,…,…,…
"""Minnesota""",6,7,6
"""Iowa""",6,8,3
"""Alabama""",6,7,2
"""New Hampshire""",7,7,7


In [11]:
def generate_state_investment_and_reading_performance_bar_chart(
    column: str, legend_title: str, description: str, change_measure: str
) -> go.Figure:
    df = student_investment.join(
        reading_performance,
        on="State Name"
    ).sort("Investment Change")
    result = correlate(df, "Investment Change", column)
    return px.bar(
        df,
        x="State Name",
        y="Investment Change",
        color=column,
        title=f"US Primary Education Return on Investment<br><sup><i>{description}</i></sup>",
        template="plotly_dark",
        color_continuous_scale=px.colors.sequential.RdBu,
        labels={
            "State Name": "State",
            "Investment Change": "4-Year Expenditure Change (2024 Dollars)",
            column: f"Change in NAEP {legend_title}",
        },
        width=1000,
        height=600,
    ).update_layout(
        xaxis=dict(showticklabels=False, title=None),
        yaxis_title="Per Pupil, 4-Year Expenditure Change Between 2015 and 2019<br>(2024 Dollars)",
        yaxis_title_font_size=10,
        coloraxis_colorbar_title=f"Change in <i>NAEP {legend_title}</i>",
        coloraxis_colorbar_orientation="h",
        plot_bgcolor="#171717",
        paper_bgcolor="#171717",
    ).update_coloraxes(
        cmid=0,
    ).add_annotation(
        x=1.07,
        y=-0.15,
        xref="paper",
        yref="paper",
        text="<br>".join(
            (
                "Chart by Dominic Tarro | 𝕏 @dominictarro",
                "Sources: NAEP Reading Assessments (2015, 2019)",
                "CCD National Public Education Financial Survey (2010-2019)"
            )
        ),
        align="right",
        showarrow=False,
        font=dict(
            size=10,
            color="grey"
        ),
        opacity=0.7
    ).add_annotation(
        x=-0.08,
        y=-0.15,
        xref="paper",
        yref="paper",
        text="<br>".join(
            (
                "The NAEP Reading scale ranges from 0 to 500.",
                "<i>Per Pupil, 4-Year Expenditure</i> is an aggregate from 5th-8th grade. Dollar adjustment was made using <br>each school year's ending year.",
                f"OLS Adj. R-squared: {result.r_squared:.03f}; P|t|>{result.p_value:.03f}; {change_measure.title()} change per $1000 in per student spending={result.coef * 1000:.2f}"
            )
        ),
        align="left",
        showarrow=False,
        font=dict(
            size=10,
            color="grey"
        ),
        opacity=0.7
    )

_fig4 = generate_state_investment_and_reading_performance_bar_chart(
    "Mean Grade 8 Reading Score Change",
    "Average Reading Score",
    "States spending more per student from 5th through 8th grade had a negligible relationship with average 8th grade reading performance.",
    "% pts",
)
_fig4.write_image("./charts/spending-school-roi-reading-mean.png")
_fig4.show()
_fig5 = generate_state_investment_and_reading_performance_bar_chart(
    "At or Above Basic (%) Change",
    "At or Above Basic</i><br><sup>(% Points)</sup>",
    "States spending more per student from 5th through 8th grade had a small relationship with basic 8th grade reading performance.",
    "score",
)
_fig5.write_image("./charts/spending-school-roi-reading-basic.png")
_fig5.show()
_fig6 = generate_state_investment_and_reading_performance_bar_chart(
    "At or Above Proficient (%) Change",
    "At or Above Proficient</i><br><sup>(% Points)</sup>",
    "States spending more per student from 5th through 8th grade had a negligible relationship with 8th grade reading proficiency.",
    "% pts",
)
_fig6.write_image("./charts/spending-school-roi-reading-proficient.png")
_fig6.show()

0,1,2,3
Dep. Variable:,y,R-squared (uncentered):,0.059
Model:,OLS,Adj. R-squared (uncentered):,0.04
Method:,Least Squares,F-statistic:,3.065
Date:,"Sat, 28 Sep 2024",Prob (F-statistic):,0.0863
Time:,20:09:29,Log-Likelihood:,-134.9
No. Observations:,50,AIC:,271.8
Df Residuals:,49,BIC:,273.7
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
x1,0.0002,9.95e-05,1.751,0.086,-2.58e-05,0.000

0,1,2,3
Omnibus:,0.683,Durbin-Watson:,0.945
Prob(Omnibus):,0.711,Jarque-Bera (JB):,0.163
Skew:,0.06,Prob(JB):,0.922
Kurtosis:,3.252,Cond. No.,1.0


0,1,2,3
Dep. Variable:,y,R-squared (uncentered):,0.116
Model:,OLS,Adj. R-squared (uncentered):,0.098
Method:,Least Squares,F-statistic:,6.435
Date:,"Sat, 28 Sep 2024",Prob (F-statistic):,0.0144
Time:,20:09:29,Log-Likelihood:,-143.49
No. Observations:,50,AIC:,289.0
Df Residuals:,49,BIC:,290.9
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
x1,0.0003,0.000,2.537,0.014,6.23e-05,0.001

0,1,2,3
Omnibus:,2.835,Durbin-Watson:,0.626
Prob(Omnibus):,0.242,Jarque-Bera (JB):,2.169
Skew:,0.117,Prob(JB):,0.338
Kurtosis:,3.993,Cond. No.,1.0


0,1,2,3
Dep. Variable:,y,R-squared (uncentered):,0.0
Model:,OLS,Adj. R-squared (uncentered):,-0.02
Method:,Least Squares,F-statistic:,0.01658
Date:,"Sat, 28 Sep 2024",Prob (F-statistic):,0.898
Time:,20:09:29,Log-Likelihood:,-124.5
No. Observations:,50,AIC:,251.0
Df Residuals:,49,BIC:,252.9
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
x1,1.04e-05,8.08e-05,0.129,0.898,-0.000,0.000

0,1,2,3
Omnibus:,0.319,Durbin-Watson:,1.73
Prob(Omnibus):,0.853,Jarque-Bera (JB):,0.228
Skew:,0.158,Prob(JB):,0.892
Kurtosis:,2.9,Cond. No.,1.0


In [12]:
"${:,.2f}".format(
    student_investment.join(
        df.filter(pl.col("SCHOOL_YEAR") == "2018-19").select("State Name", "Grade 8 Students"),
        on="State Name"
    ).with_columns(
        (pl.col("Investment Change") * pl.col("Grade 8 Students")).alias("Total Investment Change"),
    )["Total Investment Change"].sum()
)

'$16,587,376,503.74'

In [13]:
math_performance["Mean Grade 8 Math Score Change"].mean(), reading_performance["Mean Grade 8 Reading Score Change"].mean()

(0.6296296296296297, 2.509433962264151)

In [14]:
math_performance["At or Above Basic (%) Change"].mean(), reading_performance["At or Above Basic (%) Change"].mean()

(2.3518518518518516, 3.6226415094339623)

In [15]:
math_performance["At or Above Proficient (%) Change"].mean(), reading_performance["At or Above Proficient (%) Change"].mean()

(-0.32075471698113206, 0.6415094339622641)