In [35]:
import polars as pl
import statsmodels.formula.api as smf
import numpy as np
from stargazer.stargazer import Stargazer
from linearmodels.panel import PanelOLS

In [36]:
df = pl.read_parquet("../data/combined_all.parquet")
pd_df = df.to_pandas()
pd_df = pd_df[pd_df.reset_index()["year"].isin([2003, 2006, 2009, 2012, 2015, 2018])]

pd_df = pd_df.set_index(["country", "year"])

In [37]:
pd_df["arwu_ranked_num_pc"] = pd_df["arwu_ranked_num"] / pd_df["population"] * 1_000_000
pd_df["gdp_pc"] = np.log(pd_df["gdp_pc"])
pd_df["population"] = np.log(pd_df["population"])

In [38]:
group1 = pd_df[(pd_df["arwu_ranked_num"].notnull()) & (pd_df["math"].notnull()) & (pd_df["imo_total_score"].notnull()) & (pd_df["primary_completion"].notnull()) & (pd_df["lower_sec_completion"].notnull()) & (pd_df["upper_sec_completion"].notnull()) & (pd_df["democracy_eiu"].notnull())]
group1.shape

(109, 15)

In [39]:
group2 = pd_df[(pd_df["gdp_pc"].notnull()) & (pd_df["arwu_ranked_num"].notnull()) & (pd_df["imo_total_score"].notnull()) & (pd_df["primary_completion"].notnull()) & (pd_df["lower_sec_completion"].notnull()) & (pd_df["upper_sec_completion"].notnull()) & (pd_df["democracy_eiu"].notnull())]
group2.shape

(222, 15)

In [40]:
pisa_time_fe = PanelOLS.from_formula("gdp_pc_growth ~ gdp_pc + imo_total_score + math + in_math99 + arwu_ranked_num*gdp_pc + primary_completion + lower_sec_completion + upper_sec_completion + population + democracy_eiu + TimeEffects", group1).fit()

In [41]:
pisa_time_country_fe = PanelOLS.from_formula("gdp_pc_growth ~ gdp_pc + imo_total_score + math + in_math99 + arwu_ranked_num*gdp_pc + primary_completion + lower_sec_completion + upper_sec_completion + population + democracy_eiu + TimeEffects + EntityEffects", group1).fit()

In [42]:
pisa_no_math_time_fe = PanelOLS.from_formula("gdp_pc_growth ~ gdp_pc + imo_total_score + in_math99 + arwu_ranked_num*gdp_pc + primary_completion + lower_sec_completion + upper_sec_completion + population + democracy_eiu + TimeEffects", group1).fit()

In [43]:
non_pisa = PanelOLS.from_formula("gdp_pc_growth ~ gdp_pc + democracy_eiu + imo_total_score + arwu_ranked_num*gdp_pc + primary_completion + lower_sec_completion + upper_sec_completion + population + TimeEffects", group2).fit()

In [44]:
non_pisa_no_interac = PanelOLS.from_formula("gdp_pc_growth ~ gdp_pc + democracy_eiu + imo_total_score + arwu_ranked_num + primary_completion + lower_sec_completion + upper_sec_completion + population + TimeEffects", group2).fit()

In [45]:
non_pisa

0,1,2,3
Dep. Variable:,gdp_pc_growth,R-squared:,0.1370
Estimator:,PanelOLS,R-squared (Between):,-12.796
No. Observations:,222,R-squared (Within):,0.0354
Date:,"Mon, Apr 01 2024",R-squared (Overall):,-9.9892
Time:,21:27:39,Log-likelihood,-501.81
Cov. Estimator:,Unadjusted,,
,,F-statistic:,3.6868
Entities:,103,P-value,0.0003
Avg Obs:,2.1553,Distribution:,"F(9,209)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
gdp_pc,-0.8488,0.2068,-4.1046,0.0001,-1.2565,-0.4411
democracy_eiu,0.1949,0.1188,1.6411,0.1023,-0.0392,0.4290
imo_total_score,0.0135,0.0039,3.4701,0.0006,0.0058,0.0212
arwu_ranked_num,-0.2286,0.3260,-0.7013,0.4839,-0.8714,0.4141
primary_completion,-0.0204,0.0175,-1.1669,0.2446,-0.0549,0.0141
lower_sec_completion,0.0483,0.0247,1.9570,0.0517,-0.0004,0.0969
upper_sec_completion,-0.0218,0.0194,-1.1219,0.2632,-0.0600,0.0165
population,-0.2250,0.1461,-1.5403,0.1250,-0.5131,0.0630
arwu_ranked_num:gdp_pc,0.0205,0.0298,0.6883,0.4920,-0.0382,0.0791
