In [2]:
import polars as pl
import statsmodels.formula.api as smf
import numpy as np
from stargazer.stargazer import Stargazer

In [3]:
df = pl.read_parquet("../data/combined-2022-xgboost-synthetic-eiu.parquet")
pd_df = df.to_pandas()
prod_columns = {"math": "PISA Math", "in_math99": "PISA Math in global P99","in_math99:gdp_pc": "PISA Math 99 x GDP PC","imo_total_score": "IMO score per log population", "imo_total_score:gdp_pc": "IMO score x GDP pc", "arwu_ranked_num":"ARWU insitutions", "arwu_ranked_num:gdp_pc": "ARWU insitutions x GDP PC", "gdp_pc": "GDP per capita", "primary_completion": "Primary School Completion Rate", "lower_sec_completion": "Lower Sec. Completion Rate", "upper_sec_completion": "Upper Sec. Completion Rate", "democracy_eiu": "Democracy Rating", "population": "Population"}

In [4]:
pd_df.head()

Unnamed: 0,country,year,gdp_pc,gdp_pc_growth,primary_completion,lower_sec_completion,upper_sec_completion,population,arwu_ranked_num,arwu_sum_score,math,read,science,in_math99,imo_total_score,democracy_eiu,xg_primary_completion,xg_lower_sec_completion,xg_upper_sec_completion
0,AFG,2003,199.643228,92.702913,16.996843,15.810551,10.813185,22645130.0,0.0,0.0,,,,,0.0,3.310546,16.996843,15.810551,10.813185
1,AFG,2004,221.830531,-249.725464,15.907422,18.582813,10.807993,23553551.0,0.0,0.0,,,,,0.0,3.015871,15.907422,18.582813,10.807993
2,AFG,2005,254.115274,732.187384,24.646439,8.61373,6.516856,24411191.0,0.0,0.0,,,,,0.0,3.061568,24.646439,8.61373,6.516856
3,AFG,2006,274.015394,108.49882,27.276413,14.70216,10.832012,25442944.0,0.0,0.0,,,,,0.0,3.06,27.276413,14.70216,10.832012
4,AFG,2007,376.318296,1180.338266,30.758774,9.329118,9.298284,25903301.0,0.0,0.0,,,,,0.0,2.632212,30.758774,9.329118,9.298284


In [5]:
# pd_df["arwu_ranked_num"] = pd_df["arwu_ranked_num"] / pd_df["population"] * 1_000_000
# pd_df["imo_total_score"] = pd_df["imo_total_score"] * np.log(pd_df["population"])
# pd_df["arwu_ranked_num"] = np.where(pd_df["year"] < 2017, pd_df["arwu_ranked_num"], pd_df["arwu_ranked_num"] / 2)
# # pd_df["arwu_ranked_num"] = np.maximum(pd_df["arwu_ranked_num"], pd_df["arwu_sum_score"]) / pd_df["population"] * 1_000_000
# pd_df["gdp_pc_growth"] = 100 * pd_df["gdp_pc_growth"]
# pd_df["gdp_pc"] = pd_df["gdp_pc"] / 1000
# pd_df["population"] = np.log(pd_df["population"])
pd_df["high_income"] = np.where(pd_df["gdp_pc"] > 12475, 1, 0)

In [6]:
pd_df[["gdp_pc", "gdp_pc_growth", "math", "primary_completion", "lower_sec_completion", "population", "democracy_eiu"]].describe()

Unnamed: 0,gdp_pc,gdp_pc_growth,math,primary_completion,lower_sec_completion,population,democracy_eiu
count,5126.0,5075.0,441.0,5320.0,5320.0,5300.0,5320.0
mean,15325.512514,203.390942,461.932227,73.545217,57.239016,291047600.0,6.998788
std,23704.041944,539.829646,56.230835,22.245384,23.906633,913896200.0,2.637833
min,114.367007,-5518.868118,315.963154,-1.092728,2.03617,9668.0,0.32
25%,1707.629017,11.920733,416.13717,61.55589,40.423835,1500115.0,4.87
50%,5536.609823,219.518969,478.696484,80.176689,58.348389,9758046.0,7.68
75%,18805.641241,437.328514,502.800431,91.103827,76.156763,59877910.0,9.519624
max,240862.182448,9695.64199,574.66382,102.673096,104.400986,7950947000.0,9.93


In [7]:
pd_df[["in_math99", "arwu_ranked_num", "imo_total_score"]].describe()

Unnamed: 0,in_math99,arwu_ranked_num,imo_total_score
count,442.0,5300.0,5300.0
mean,0.933362,0.064058,1.639827
std,1.457751,0.198723,2.906371
min,0.0,0.0,0.0
25%,0.056695,0.0,0.0
50%,0.486001,0.0,0.0
75%,1.184127,0.0,2.356299
max,14.639147,1.565903,14.306904


In [8]:
# pd_df.groupby("year")["arwu_ranked_num"].sum()

In [9]:
# math = smf.ols("math ~ gdp_pc + imo_total_score + in_math99 + arwu_ranked_num*gdp_pc + primary_completion + lower_sec_completion + upper_sec_completion + population + democracy_eiu", pd_df[pd_df["year"].isin([2003, 2006, 2009, 2012, 2015, 2018])]).fit()
# math.summary()

In [10]:
pisa_df = pd_df[pd_df["year"].isin([2003, 2006, 2009, 2012, 2015, 2018, 2022]) & (pd_df["math"].notnull())]

In [11]:
pisa_no_controls = smf.ols("gdp_pc_growth ~ imo_total_score + arwu_ranked_num + in_math99", pisa_df).fit()
pisa_no_fix = smf.ols("gdp_pc_growth ~ gdp_pc + imo_total_score + math + in_math99 + arwu_ranked_num + primary_completion + lower_sec_completion + upper_sec_completion + population + democracy_eiu", pisa_df).fit()
pisa = smf.ols("gdp_pc_growth ~ gdp_pc + imo_total_score + math + in_math99 + arwu_ranked_num + primary_completion + lower_sec_completion + upper_sec_completion + population + democracy_eiu + C(year) ", pisa_df).fit()
pisa_country = smf.ols("gdp_pc_growth ~ gdp_pc + imo_total_score + math + in_math99 + arwu_ranked_num + primary_completion + lower_sec_completion + upper_sec_completion + population + democracy_eiu + country + C(year)", pisa_df).fit()

In [12]:
non_pisa_pyears = smf.ols("gdp_pc_growth ~ gdp_pc + democracy_eiu + imo_total_score + arwu_ranked_num + primary_completion + lower_sec_completion + upper_sec_completion + population + C(year) ", pd_df[pd_df["year"].isin([2003, 2006, 2009, 2012, 2015, 2018, 2022])]).fit()
non_pisa = smf.ols("gdp_pc_growth ~ gdp_pc + democracy_eiu + imo_total_score + arwu_ranked_num + primary_completion + lower_sec_completion + upper_sec_completion + population + C(year) ", pd_df).fit()
non_pisa_country = smf.ols("gdp_pc_growth ~ gdp_pc + democracy_eiu + imo_total_score + arwu_ranked_num + primary_completion + lower_sec_completion + upper_sec_completion + population + C(year) + country", pd_df).fit()
# non_pisa_no_interac = smf.ols("gdp_pc_growth ~ gdp_pc + democracy_eiu + imo_total_score + arwu_ranked_num + primary_completion + lower_sec_completion + upper_sec_completion + population + C(year) ", pd_df).fit()

In [13]:
pisa_sg = Stargazer([pisa_no_controls, pisa_no_fix, pisa, pisa_country])
pisa_sg.covariate_order(["in_math99", "imo_total_score", "arwu_ranked_num"])
pisa_sg.add_line('Time Effects', ['No', 'No', 'Yes', 'Yes'])
pisa_sg.add_line('Fixed Effects', ['No', 'No', 'No', 'Yes'])
pisa_sg.add_line('Controls', ['No', 'Yes', 'Yes', 'Yes'])
pisa_sg.add_line('Entities', [89, 89, 89, 89])
pisa_sg.rename_covariates(prod_columns)
pisa_sg.custom_columns(['Model 1 (base)', 'Model 2', 'Model 3 (Time FE)', 'Model 4 (Time + Entity FE)'], [1, 1, 1, 1])
pisa_sg.show_model_numbers(False)
print(pisa_sg.render_latex())
# pisa_sg

\begin{table}[!htbp] \centering
\begin{tabular}{@{\extracolsep{5pt}}lcccc}
\\[-1.8ex]\hline
\hline \\[-1.8ex]
& \multicolumn{4}{c}{\textit{Dependent variable: gdp_pc_growth}} \
\cr \cline{2-5}
\\[-1.8ex] & \multicolumn{1}{c}{Model 1 (base)} & \multicolumn{1}{c}{Model 2} & \multicolumn{1}{c}{Model 3 (Time FE)} & \multicolumn{1}{c}{Model 4 (Time + Entity FE)}  \\
\hline \\[-1.8ex]
 PISA Math in global P99 & -31.228$^{*}$ & -38.928$^{*}$ & -34.569$^{*}$ & -115.492$^{***}$ \\
& (16.314) & (20.802) & (17.850) & (36.599) \\
 IMO score per log population & 10.674$^{*}$ & 4.924$^{}$ & 3.122$^{}$ & -11.158$^{}$ \\
& (6.363) & (7.470) & (6.441) & (15.728) \\
 ARWU insitutions & -166.641$^{**}$ & -162.705$^{*}$ & -176.049$^{**}$ & -159.960$^{}$ \\
& (68.800) & (86.871) & (72.281) & (112.884) \\
 Time Effects & No & No & Yes & Yes \\
 Fixed Effects & No & No & No & Yes \\
 Controls & No & Yes & Yes & Yes \\
 Entities & 89 & 89 & 89 & 89 \\
\hline \\[-1.8ex]
 Observations & 440 & 440 & 440 & 440 \\

In [14]:
# non_pisa_sg = Stargazer([pisa, non_pisa_pyears, non_pisa, non_pisa_country])
# non_pisa_sg.covariate_order(["imo_total_score", "imo_total_score:gdp_pc", "arwu_ranked_num", "arwu_ranked_num:gdp_pc", "gdp_pc", "primary_completion", "lower_sec_completion", "upper_sec_completion", "democracy_eiu", "population"])
# non_pisa_sg.add_line('Time Effects', ['Yes', 'Yes', 'Yes', 'Yes'])
# non_pisa_sg.add_line('Fixed Effects', ['No', 'No', 'No', 'Yes'])
# non_pisa_sg.add_line('Entities', [49, 103, 165, 165])
# non_pisa_sg.custom_columns(['Model 3 (PISA)', 'Model 5 (PISA years)', 'Model 6 (All years)', 'Model 7 (All years, FE)'], [1, 1, 1, 1])
# non_pisa_sg.show_model_numbers(False)
# non_pisa_sg.rename_covariates(prod_columns)
# # print(non_pisa_sg.render_latex())
# non_pisa_sg

In [15]:
group1 = pd_df[(pd_df["arwu_ranked_num"].notnull()) & (pd_df["math"].notnull()) & (pd_df["imo_total_score"].notnull()) & (pd_df["primary_completion"].notnull()) & (pd_df["lower_sec_completion"].notnull()) & (pd_df["upper_sec_completion"].notnull()) & (pd_df["democracy_eiu"].notnull())]
group1.shape

(441, 20)

In [16]:
group2 = pd_df[(pd_df["gdp_pc"].notnull()) & (pd_df["arwu_ranked_num"].notnull()) & (pd_df["imo_total_score"].notnull()) & (pd_df["primary_completion"].notnull()) & (pd_df["lower_sec_completion"].notnull()) & (pd_df["upper_sec_completion"].notnull()) & (pd_df["democracy_eiu"].notnull())]
group2.shape

(5126, 20)

In [17]:
group2[["gdp_pc", "gdp_pc_growth", "math", "primary_completion", "lower_sec_completion", "population", "democracy_eiu"]].describe()

Unnamed: 0,gdp_pc,gdp_pc_growth,math,primary_completion,lower_sec_completion,population,democracy_eiu
count,5126.0,5054.0,441.0,5126.0,5126.0,5126.0,5126.0
mean,15325.512514,203.836783,461.932227,73.451682,57.554116,300711600.0,6.967983
std,23704.041944,532.154108,56.230835,22.506727,24.085865,927746600.0,2.606636
min,114.367007,-5518.868118,315.963154,-1.092728,2.03617,9668.0,0.32
25%,1707.629017,13.534144,416.13717,61.231469,40.426849,1914223.0,4.87
50%,5536.609823,221.466668,478.696484,80.188564,59.013533,10056740.0,7.53
75%,18805.641241,437.627995,502.800431,91.519964,76.698416,66373320.0,9.514626
max,240862.182448,9695.64199,574.66382,102.673096,104.400986,7950947000.0,9.93


In [18]:
group2[["arwu_ranked_num", "imo_total_score"]].describe()

Unnamed: 0,arwu_ranked_num,imo_total_score
count,5126.0,5126.0
mean,0.066215,1.668493
std,0.201713,2.918943
min,0.0,0.0
25%,0.0,0.0
50%,0.0,0.0
75%,0.0,2.557807
max,1.565903,14.306904


In [19]:
group1[["gdp_pc", "gdp_pc_growth", "math", "primary_completion", "lower_sec_completion", "population", "democracy_eiu"]].describe()

Unnamed: 0,gdp_pc,gdp_pc_growth,math,primary_completion,lower_sec_completion,population,democracy_eiu
count,441.0,440.0,441.0,441.0,441.0,441.0,441.0
mean,28463.985814,169.064165,461.932227,90.000496,77.894528,35318600.0,7.246516
std,25255.097112,452.675266,56.230835,10.245705,17.143303,59445620.0,1.772025
min,543.110702,-2292.684929,315.963154,51.353222,29.21385,34000.0,1.93
25%,8495.653081,11.02982,416.13717,82.985069,63.84901,4408581.0,6.412529
50%,19861.69743,190.810046,478.696484,94.491341,83.070015,9799186.0,7.57
75%,42801.908117,396.299473,502.800431,98.383331,91.896492,42187640.0,8.38
max,149461.785571,3303.048777,574.66382,101.947922,101.966637,333287600.0,9.93


In [20]:
group1[["in_math99", "arwu_ranked_num", "imo_total_score"]].describe()

Unnamed: 0,in_math99,arwu_ranked_num,imo_total_score
count,441.0,441.0,441.0
mean,0.935478,0.256697,4.724749
std,1.458726,0.334478,3.486964
min,0.0,0.0,0.0
25%,0.061045,0.0,1.54044
50%,0.486487,0.078525,4.884144
75%,1.185308,0.460051,7.505591
max,14.639147,1.53612,11.786918


In [21]:
import plotly.express as px

In [22]:
fig = px.scatter(group1, y="gdp_pc_growth", x="year", color='gdp_pc', hover_data=["country", "year"])
fig.show()

In [23]:
fig = px.scatter(group2, y="gdp_pc_growth", x="year", color='imo_total_score', hover_data=["country", "year"])
fig.show()

In [24]:
fig = px.scatter(group2, y="gdp_pc_growth", x="year", color='arwu_ranked_num', hover_data=["country", "year"])
fig.show()

In [25]:
fig = px.scatter(pd_df, y="math", x="year", color='arwu_ranked_num', hover_data=["country", "year"])
fig.show()

In [26]:
year_pisa_regs = []
average = smf.ols("gdp_pc_growth ~ gdp_pc + imo_total_score + math + in_math99 + arwu_ranked_num + primary_completion + lower_sec_completion + upper_sec_completion + population + democracy_eiu", pisa_df.groupby("country").mean().reset_index()).fit()
years = list(range(2003, 2019, 3)) + [2022]
years
for y in years:
    filtered = pisa_df[pisa_df["year"] == y]
    reg = smf.ols("gdp_pc_growth ~ gdp_pc + imo_total_score + math + in_math99 + arwu_ranked_num + primary_completion + lower_sec_completion + upper_sec_completion + population + democracy_eiu", filtered).fit()
    year_pisa_regs.append(reg)
year_pisa_regs.append(pisa_country)

In [27]:
yearly_pisa = Stargazer(year_pisa_regs)
yearly_pisa.covariate_order(["in_math99", "imo_total_score", "arwu_ranked_num"])
yearly_pisa.rename_covariates(prod_columns)
yearly_pisa.custom_columns(list(map(str, years)) + ["Panel FE"], [1] * len(year_pisa_regs))
yearly_pisa.show_model_numbers(False)
# print(pisa_sg.render_latex())
yearly_pisa.add_line('Controls', ['Yes'] * len(year_pisa_regs))
print(yearly_pisa.render_latex())
# yearly_pisa

\begin{table}[!htbp] \centering
\begin{tabular}{@{\extracolsep{5pt}}lcccccccc}
\\[-1.8ex]\hline
\hline \\[-1.8ex]
& \multicolumn{8}{c}{\textit{Dependent variable: gdp_pc_growth}} \
\cr \cline{2-9}
\\[-1.8ex] & \multicolumn{1}{c}{2003} & \multicolumn{1}{c}{2006} & \multicolumn{1}{c}{2009} & \multicolumn{1}{c}{2012} & \multicolumn{1}{c}{2015} & \multicolumn{1}{c}{2018} & \multicolumn{1}{c}{2022} & \multicolumn{1}{c}{Panel FE}  \\
\hline \\[-1.8ex]
 PISA Math in global P99 & -52.490$^{}$ & -193.475$^{**}$ & 154.062$^{**}$ & 16.575$^{}$ & -34.309$^{}$ & 3.769$^{}$ & -69.681$^{**}$ & -115.492$^{***}$ \\
& (48.524) & (84.559) & (62.680) & (36.365) & (76.140) & (27.106) & (27.702) & (36.599) \\
 IMO score per log population & 4.478$^{}$ & -9.112$^{}$ & 14.038$^{}$ & 7.368$^{}$ & -16.622$^{}$ & 2.193$^{}$ & -0.174$^{}$ & -11.158$^{}$ \\
& (17.327) & (20.741) & (17.012) & (13.983) & (27.567) & (7.722) & (13.474) & (15.728) \\
 ARWU insitutions & -300.038$^{**}$ & -454.401$^{***}$ & 136.623$^{}$

# Without synthetic data

In [28]:
no_impute_df = pl.read_parquet("../data/combined-2022-included.parquet")
ni_pd_df = no_impute_df.to_pandas()
prod_columns = {"math": "PISA Math", "in_math99": "PISA Math in global P99","in_math99:gdp_pc": "PISA Math 99 x GDP PC","imo_total_score": "IMO score per log population", "imo_total_score:gdp_pc": "IMO score x GDP pc", "arwu_ranked_num":"ARWU insitutions", "arwu_ranked_num:gdp_pc": "ARWU insitutions x GDP PC", "gdp_pc": "GDP per capita", "primary_completion": "Primary School Completion Rate", "lower_sec_completion": "Lower Sec. Completion Rate", "upper_sec_completion": "Upper Sec. Completion Rate", "democracy_eiu": "Democracy Rating", "population": "Population"}
ni_pd_df["arwu_ranked_num"] = ni_pd_df["arwu_ranked_num"] / ni_pd_df["population"] * 1_000_000
ni_pd_df["imo_total_score"] = ni_pd_df["imo_total_score"] / np.log(ni_pd_df["population"])
ni_pd_df["arwu_ranked_num"] = np.where(ni_pd_df["year"] < 2017, ni_pd_df["arwu_ranked_num"], ni_pd_df["arwu_ranked_num"] / 2)
# ni_pd_df["arwu_ranked_num"] = np.maximum(ni_pd_df["arwu_ranked_num"], ni_pd_df["arwu_sum_score"]) / ni_pd_df["population"] * 1_000_000
ni_pd_df["gdp_pc_growth"] = 100 * ni_pd_df["gdp_pc_growth"]
# pd_df["gdp_pc"] = pd_df["gdp_pc"] / 1000
# pd_df["population"] = np.log(pd_df["population"])

In [29]:
ni_pisa_df = ni_pd_df[ni_pd_df["year"].isin([2003, 2006, 2009, 2012, 2015, 2018, 2022]) & (ni_pd_df["math"].notnull())]

In [30]:
pisa_no_controls = smf.ols("gdp_pc_growth ~ imo_total_score + arwu_ranked_num + in_math99", ni_pisa_df).fit()
pisa_no_fix = smf.ols("gdp_pc_growth ~ gdp_pc + imo_total_score + math + in_math99 + arwu_ranked_num + primary_completion + lower_sec_completion + upper_sec_completion + population + democracy_eiu", ni_pisa_df).fit()
pisa = smf.ols("gdp_pc_growth ~ gdp_pc + imo_total_score + math + in_math99 + arwu_ranked_num + primary_completion + lower_sec_completion + upper_sec_completion + population + democracy_eiu + C(year) ", ni_pisa_df).fit()
pisa_country = smf.ols("gdp_pc_growth ~ gdp_pc + imo_total_score + math + in_math99 + arwu_ranked_num + primary_completion + lower_sec_completion + upper_sec_completion + population + democracy_eiu + country + C(year)", ni_pisa_df).fit()

In [31]:
pisa_sg = Stargazer([pisa_no_controls, pisa_no_fix, pisa, pisa_country])
pisa_sg.covariate_order(["in_math99", "imo_total_score", "arwu_ranked_num"])
pisa_sg.add_line('Time Effects', ['No', 'No', 'Yes', 'Yes'])
pisa_sg.add_line('Fixed Effects', ['No', 'No', 'No', 'Yes'])
pisa_sg.add_line('Controls', ['No', 'Yes', 'Yes', 'Yes'])
pisa_sg.rename_covariates(prod_columns)
pisa_sg.custom_columns(['Model 1 (base)', 'Model 2', 'Model 3 (Time FE)', 'Model 4 (Time + Entity FE)'], [1, 1, 1, 1])
pisa_sg.show_model_numbers(False)
print(pisa_sg.render_latex())
# pisa_sg

\begin{table}[!htbp] \centering
\begin{tabular}{@{\extracolsep{5pt}}lcccc}
\\[-1.8ex]\hline
\hline \\[-1.8ex]
& \multicolumn{4}{c}{\textit{Dependent variable: gdp_pc_growth}} \
\cr \cline{2-5}
\\[-1.8ex] & \multicolumn{1}{c}{Model 1 (base)} & \multicolumn{1}{c}{Model 2} & \multicolumn{1}{c}{Model 3 (Time FE)} & \multicolumn{1}{c}{Model 4 (Time + Entity FE)}  \\
\hline \\[-1.8ex]
 PISA Math in global P99 & -31.228$^{*}$ & 5.985$^{}$ & 8.659$^{}$ & 25.421$^{}$ \\
& (16.314) & (22.468) & (21.493) & (84.291) \\
 IMO score per log population & 10.674$^{*}$ & 6.824$^{}$ & 12.529$^{}$ & 50.506$^{}$ \\
& (6.363) & (9.440) & (8.631) & (32.187) \\
 ARWU insitutions & -166.641$^{**}$ & -190.750$^{}$ & -249.208$^{**}$ & -454.384$^{}$ \\
& (68.800) & (127.229) & (116.476) & (351.559) \\
 Time Effects & No & No & Yes & Yes \\
 Fixed Effects & No & No & No & Yes \\
 Controls & No & Yes & Yes & Yes \\
\hline \\[-1.8ex]
 Observations & 440 & 112 & 112 & 112 \\
 $R^2$ & 0.037 & 0.204 & 0.383 & 0.715 \\


In [132]:
ni_pisa_df[(ni_pisa_df["primary_completion"].notnull()) & (ni_pisa_df["democracy_eiu"].notnull()) & (ni_pd_df["lower_sec_completion"].notnull()) & (ni_pisa_df["upper_sec_completion"].notnull())][["gdp_pc", "math", "arwu_ranked_num", "imo_total_score", "in_math99", "math"]].describe()


Boolean Series key will be reindexed to match DataFrame index.



Unnamed: 0,gdp_pc,math,arwu_ranked_num,imo_total_score,in_math99,math.1
count,112.0,112.0,112.0,112.0,112.0,112.0
mean,25075.596453,456.916979,0.220021,4.640821,0.912355,456.916979
std,20846.730295,55.105992,0.292263,3.19232,1.66272,55.105992
min,1572.79794,327.250491,0.0,0.0,0.0,327.250491
25%,8965.398345,410.003171,0.0,1.736566,0.02798,410.003171
50%,17664.026111,473.00894,0.041274,5.139991,0.368485,473.00894
75%,39246.543188,497.459671,0.365121,6.845541,1.046649,497.459671
max,98041.362238,568.359669,1.211386,11.257523,9.501632,568.359669


In [133]:
pisa_df[["gdp_pc", "math", "arwu_ranked_num", "imo_total_score", "in_math99", "math"]].describe()

Unnamed: 0,gdp_pc,math,arwu_ranked_num,imo_total_score,in_math99,math.1
count,441.0,441.0,441.0,441.0,441.0,441.0
mean,28463.985814,461.932227,0.256697,4.724749,0.935478,461.932227
std,25255.097112,56.230835,0.334478,3.486964,1.458726,56.230835
min,543.110702,315.963154,0.0,0.0,0.0,315.963154
25%,8495.653081,416.13717,0.0,1.54044,0.061045,416.13717
50%,19861.69743,478.696484,0.078525,4.884144,0.486487,478.696484
75%,42801.908117,502.800431,0.460051,7.505591,1.185308,502.800431
max,149461.785571,574.66382,1.53612,11.786918,14.639147,574.66382


In [32]:
ni_pisa_df[(ni_pisa_df["primary_completion"].notnull()) & (ni_pisa_df["democracy_eiu"].notnull()) & (ni_pd_df["lower_sec_completion"].notnull()) & (ni_pisa_df["upper_sec_completion"].notnull())]["country"].value_counts().reset_index()


Boolean Series key will be reindexed to match DataFrame index.



Unnamed: 0,country,count
0,DEU,4
1,URY,4
2,BRA,4
3,COL,4
4,PRT,4
5,USA,4
6,MEX,4
7,ESP,4
8,HKG,3
9,TUR,3
