# PISA as a Predictor of Economic and Institutional Performance
How well do PISA scores (as compared to lost wallet reporting rates) explain variation in economic development?

There are four measures of "Economic and Institutional Performance" in the second part of the paper: GDP per capita (`log_gdp`), productivity(`log_tfp`), government effectiveness (`gee`), and letter grade efficiency (`letter_grading`). If PISA scores are considered a fifth measure of the same sort, we find that wallet reporting rates are an equally effective predictor. When combined with any other measure of social capital, the coefficient for wallet reporting rates are always statistically significant with p<0.01 and with $R^2$ greater than most of the other fit models.

## Regression Results
The table below seeks to replicate part of Table 2 from **Tannenbaum** and adds two new columns (Model 9 and Model 10) which contain the respective OLS model results where PISA scores are predicted. Models 7 and 8 are recreated here to show that the table is generated with the same process that generated Table 2 in **Tannenbaum**.

In [326]:
import pandas as pd
import statsmodels.api as sm
from great_tables import GT

# Data import
survey_cols = [
    "general_trust",
    "GPS_trust",
    "general_morality",
    "MFQ_genmorality",
    "civic_cooperation",
    "GPS_posrecip",
    "GPS_altruism",
    "stranger1",
]

econ_cols = [
    "log_gdp",
    "log_tfp",
    "gee",
    "letter_grading",
]

df = pd.read_csv(
    "../data/tannenbaum_data.csv",
)

# Add PISA data
pisa = pd.read_csv("../data/pisa_data.csv").rename(columns={"mean_score": "pisa_score"})
df = df.merge(pisa, how="left", on="country")


# p-value stars to award to each parameter coef estimate.
def stars(p):
    if p > 0.1:
        return ""
    elif p > 0.05:
        return "*"
    elif p > 0.01:
        return "**"
    else:
        return "***"


# Run regression for each survey measure.
def get_model_results_no_pred(survey_measure, econ_measure):
    regression_df = (
        df.groupby("country")[[econ_measure, survey_measure]].mean().dropna()
    )

    y = regression_df[econ_measure]
    X = regression_df[[survey_measure]]

    # Standardize predictors
    X_std = (X - X.mean()) / X.std()
    X_std = sm.add_constant(X_std)

    model = sm.OLS(y, X_std)
    results = model.fit(cov_type="HC1")  # Robust standard errors same as in Tannenbaum
    result_df = (
        pd.DataFrame(
            {
                "param": pd.Series(
                    [
                        f"{v:.3f}{stars(p)}"
                        for v, p in zip(results.params[1:], results.pvalues[1:])
                    ],
                    index=results.params.index[1:],
                ),
                "se": results.bse[1:].apply(lambda x: f"({x:.3f})"),
            }
        )
        .astype({"param": object, "se": object})
        .stack()
    )
    result_df.loc[("<i>N<i>", "")] = X.shape[0]
    result_df.loc[("<i>R<i><sup>2</sup>", "")] = f"{results.rsquared:.3f}"
    result_df = result_df.reset_index()
    result_df["measure"] = survey_measure

    return result_df


# Run regression for each survey measure with predictor variable.
def get_model_results(survey_measure, econ_measure):
    regression_df = (
        df.groupby("country")[["response", econ_measure, survey_measure]]
        .mean()
        .dropna()
    )

    y = regression_df[econ_measure]
    # y = (y - y.mean()) / y.std()
    X = regression_df[[survey_measure, "response"]]

    # Standardize predictors
    X_std = (X - X.mean()) / X.std()
    X_std = sm.add_constant(X_std)

    model = sm.OLS(y, X_std)
    results = model.fit(cov_type="HC1")  # Robust standard errors same as in Tannenbaum

    result_df = (
        pd.DataFrame(
            {
                "param": pd.Series(
                    [
                        f"{v:.3f}{stars(p)}"
                        for v, p in zip(results.params[1:], results.pvalues[1:])
                    ],
                    index=results.params.index[1:],
                ),
                "se": results.bse[1:].apply(lambda x: f"({x:.3f})"),
            }
        )
        .astype({"param": object, "se": object})
        .stack()
    )
    result_df.loc[("<i>N<i>", "")] = X.shape[0]
    result_df.loc[("<i>R<i><sup>2</sup>", "")] = f"{results.rsquared:.3f}"
    result_df = result_df.reset_index()
    result_df["measure"] = survey_measure

    return result_df


model_7_results = [
    get_model_results_no_pred(col, "letter_grading") for col in survey_cols
]
model_7 = pd.concat(model_7_results)
model_7 = model_7.rename(columns={0: "Model 7"})

model_8_results = [get_model_results(col, "letter_grading") for col in survey_cols]
model_8 = pd.concat(model_8_results)
model_8 = model_8.rename(columns={0: "Model 8"})

model_9_results = [get_model_results_no_pred(col, "pisa_score") for col in survey_cols]
model_9 = pd.concat(model_9_results)
model_9 = model_9.rename(columns={0: "Model 9"})

model_10_results = [get_model_results(col, "pisa_score") for col in survey_cols]
model_10 = pd.concat(model_10_results)
model_10 = model_10.rename(columns={0: "Model 10"})

# Combine results and make pretty.
display_df = (
    model_7.merge(model_8, on=["level_0", "level_1", "measure"], how="right")
    .merge(model_9, on=["level_0", "level_1", "measure"], how="left")
    .merge(model_10, on=["level_0", "level_1", "measure"], how="right")
    .iloc[:, [0, 2, 3, 4, 5, 6]]
)

display_df.loc[:, "level_0"] = display_df.loc[:, "level_0"].where(
    display_df.loc[:, "level_0"] != display_df.loc[:, "level_0"].shift(), ""
)

display_df
(
    GT(display_df)
    .tab_header(title="TABLE 2.—PREDICTIVE VALUE OF WALLET REPORTING RATES")
    .tab_stub(rowname_col="level_0", groupname_col="measure")
    .tab_spanner(label="Letter grade efficiency", columns=["Model 7", "Model 8"])
    .tab_spanner(label="PISA Score", columns=["Model 9", "Model 10"])
    .tab_options(
        table_body_hlines_style="none",
    )
    .cols_align(align="center", columns=["Model 7", "Model 8"])
)

TABLE 2.—PREDICTIVE VALUE OF WALLET REPORTING RATES,TABLE 2.—PREDICTIVE VALUE OF WALLET REPORTING RATES,TABLE 2.—PREDICTIVE VALUE OF WALLET REPORTING RATES,TABLE 2.—PREDICTIVE VALUE OF WALLET REPORTING RATES,TABLE 2.—PREDICTIVE VALUE OF WALLET REPORTING RATES
Unnamed: 0_level_1,Letter grade efficiency,Letter grade efficiency,PISA Score,PISA Score
Unnamed: 0_level_2,Model 7,Model 8,Model 9,Model 10
general_trust,general_trust,general_trust,general_trust,general_trust
general_trust,0.077*,-0.013,26.595***,3.945
,(0.041),(0.040),(4.824),(6.127)
response,,0.148***,,32.851***
,,(0.050),,(5.582)
N,39,39,32,32
R2,0.078,0.263,0.373,0.672
GPS_trust,GPS_trust,GPS_trust,GPS_trust,GPS_trust
GPS_trust,-0.016,-0.018,5.554,5.317
,(0.050),(0.039),(8.277),(4.295)
