# Library importation

In [1]:
import numpy as np
import pandas as pd
from linearmodels import PanelOLS
import statsmodels.api as sm

# Data importation

In [2]:
df = pd.read_csv('./data/panel_data/Full_DB.csv')

In [16]:
df.columns

Index(['const', 'Country', 'Time', 'NEET', 'Exp_LMP', 'Exp_LMP_2', 'Exp_LMP_3',
       'STR', 'STR_2', 'STR_3', 'GDP', 'LogGDP', 'CPI', 'DEBT',
       'Years_schooling', 'Avg_class_size', 'Exp_educ', 'LogExp_educ',
       'Strictness_of_workers', 'PT_employ', 'Avg_dur_unemployment'],
      dtype='object')

# Regressions

## With explanatory variables only

In [25]:
df_explanatory = df[['const','Country','Time','NEET','Exp_LMP','Exp_LMP_2','Exp_LMP_3','STR','STR_2','STR_3']]
df_explanatory = df_explanatory.dropna().reset_index(drop=True)
df_explanatory = df_explanatory.set_index(['Country', 'Time'])

In [27]:
panel_explanatory_OLS = PanelOLS(df_explanatory.NEET, df_explanatory.drop('NEET', axis=1), time_effects=True).fit(cov_type='clustered', cluster_time=True)

In [28]:
print(panel_explanatory_OLS)

                          PanelOLS Estimation Summary                           
Dep. Variable:                   NEET   R-squared:                        0.2477
Estimator:                   PanelOLS   R-squared (Between):              0.1837
No. Observations:                 167   R-squared (Within):               0.1560
Date:                Thu, Apr 01 2021   R-squared (Overall):              0.2588
Time:                        23:31:35   Log-likelihood                   -447.24
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      8.3408
Entities:                          31   P-value                           0.0000
Avg Obs:                       5.3871   Distribution:                   F(6,152)
Min Obs:                       2.0000                                           
Max Obs:                       9.0000   F-statistic (robust):          7.492e+04
                            

## With economic features only

In [89]:
df_eco = df[['const','Country','Time','NEET','Exp_LMP','Exp_LMP_2','Exp_LMP_3','STR','STR_2','STR_3','LogGDP','CPI','DEBT']]
df_eco = df_eco.dropna().reset_index(drop=True)
df_eco = df_eco.set_index(['Country', 'Time'])

In [30]:
panel_eco_OLS = PanelOLS(df_eco.NEET,df_eco.drop('NEET',axis=1), time_effects=True).fit(cov_type='clustered', cluster_time=True)

In [31]:
print(panel_eco_OLS)

                          PanelOLS Estimation Summary                           
Dep. Variable:                   NEET   R-squared:                        0.5343
Estimator:                   PanelOLS   R-squared (Between):              0.4755
No. Observations:                 161   R-squared (Within):               0.2769
Date:                Thu, Apr 01 2021   R-squared (Overall):              0.5377
Time:                        23:31:44   Log-likelihood                   -395.05
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      18.230
Entities:                          30   P-value                           0.0000
Avg Obs:                       5.3667   Distribution:                   F(9,143)
Min Obs:                       2.0000                                           
Max Obs:                       9.0000   F-statistic (robust):          8.859e+11
                            

## With education features only

In [41]:
df_edu = df[['const','Country','Time','NEET','Exp_LMP','Exp_LMP_2','Exp_LMP_3','STR','STR_2','STR_3','Years_schooling','Avg_class_size','LogExp_educ']]
df_edu = df_edu.dropna().reset_index(drop=True)
df_edu = df_edu.set_index(['Country', 'Time'])

In [43]:
panel_edu_OLS = PanelOLS(df_edu.NEET,df_edu.drop('NEET',axis=1), time_effects=True).fit(cov_type='clustered', cluster_time=True)

In [44]:
print(panel_edu_OLS)

                          PanelOLS Estimation Summary                           
Dep. Variable:                   NEET   R-squared:                        0.6932
Estimator:                   PanelOLS   R-squared (Between):              0.6856
No. Observations:                  98   R-squared (Within):               0.3271
Date:                Thu, Apr 01 2021   R-squared (Overall):              0.6947
Time:                        23:36:29   Log-likelihood                   -226.97
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      21.089
Entities:                          21   P-value                           0.0000
Avg Obs:                       4.6667   Distribution:                    F(9,84)
Min Obs:                       3.0000                                           
Max Obs:                       5.0000   F-statistic (robust):          -1.53e+12
                            

## With labor features only

In [103]:
df_labor = df[['const','Country','Time','NEET','Exp_LMP','Exp_LMP_2','STR','STR_2','STR_3','Strictness_of_workers', 'PT_employ']]
df_labor = df_labor.dropna().reset_index(drop=True)
df_labor = df_labor.set_index(['Country', 'Time'])

In [105]:
panel_labor_OLS = PanelOLS(df_labor.NEET,df_labor.drop('NEET',axis=1), time_effects=True).fit(cov_type='clustered', cluster_time=True)

In [106]:
print(panel_labor_OLS)

                          PanelOLS Estimation Summary                           
Dep. Variable:                   NEET   R-squared:                        0.3632
Estimator:                   PanelOLS   R-squared (Between):              0.3119
No. Observations:                 161   R-squared (Within):               0.1690
Date:                Thu, Apr 01 2021   R-squared (Overall):              0.3757
Time:                        23:53:29   Log-likelihood                   -420.64
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      11.813
Entities:                          30   P-value                           0.0000
Avg Obs:                       5.3667   Distribution:                   F(7,145)
Min Obs:                       2.0000                                           
Max Obs:                       9.0000   F-statistic (robust):          2.214e+08
                            

## With education and economic features

In [86]:
df_eco_edu = df[['const','Country','Time','NEET','Exp_LMP','Exp_LMP_2','STR','STR_2','STR_3','LogGDP','CPI','DEBT','Years_schooling','Avg_class_size','LogExp_educ']]
df_eco_edu = df_eco_edu.dropna().reset_index(drop=True)
df_eco_edu = df_eco_edu.set_index(['Country', 'Time'])

In [87]:
panel_eco_edu_OLS = PanelOLS(df_eco_edu.NEET,df_eco_edu.drop('NEET',axis=1), time_effects=True).fit(cov_type='clustered', cluster_time=True)

In [88]:
print(panel_eco_edu_OLS)

                          PanelOLS Estimation Summary                           
Dep. Variable:                   NEET   R-squared:                        0.7282
Estimator:                   PanelOLS   R-squared (Between):              0.7465
No. Observations:                  98   R-squared (Within):               0.2582
Date:                Thu, Apr 01 2021   R-squared (Overall):              0.7298
Time:                        23:48:03   Log-likelihood                   -221.04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      19.971
Entities:                          21   P-value                           0.0000
Avg Obs:                       4.6667   Distribution:                   F(11,82)
Min Obs:                       3.0000                                           
Max Obs:                       5.0000   F-statistic (robust):          5.223e+14
                            

## With labor, economic and education features

In [107]:
df_all = df[['const','Country','Time','NEET','Exp_LMP','Exp_LMP_2','STR','STR_2','STR_3','LogGDP','CPI','DEBT','Years_schooling','Avg_class_size','LogExp_educ','Strictness_of_workers', 'PT_employ']]
df_all = df_all.dropna().reset_index(drop=True)
df_all = df_all.set_index(['Country', 'Time'])

In [109]:
panel_all_OLS = PanelOLS(df_all.NEET,df_all.drop('NEET',axis=1), time_effects=True).fit(cov_type='clustered', cluster_time=True)

In [110]:
print(panel_all_OLS)

                          PanelOLS Estimation Summary                           
Dep. Variable:                   NEET   R-squared:                        0.7323
Estimator:                   PanelOLS   R-squared (Between):              0.7495
No. Observations:                  98   R-squared (Within):               0.2835
Date:                Thu, Apr 01 2021   R-squared (Overall):              0.7321
Time:                        23:56:36   Log-likelihood                   -220.30
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      16.830
Entities:                          21   P-value                           0.0000
Avg Obs:                       4.6667   Distribution:                   F(13,80)
Min Obs:                       3.0000                                           
Max Obs:                       5.0000   F-statistic (robust):         -2.048e+15
                            