In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from linearmodels import PanelOLS

In [2]:
df = pd.read_csv('./data/panel_data/Full_DB.csv')

# Interaction term for LogGDP and Exp_LMP

In [3]:
df['LogGDPxExp_LMP'] = df['LogGDP']*df['Exp_LMP']
df['LogGDPxExp_LMP_2'] = df['LogGDP']*df['Exp_LMP_2']

In [4]:
df = df.set_index(['Country', 'Time'])

In [5]:
df = df.drop(['Avg_dur_unemployment', 'GDP', 'Exp_educ', 'Exp_LMP_3', 'Avg_class_size'], axis=1).dropna()

In [6]:
panel_interaction_exp_lmp = PanelOLS(df.NEET,df.drop('NEET', axis=1), entity_effects=True).fit(cov_type='clustered',cluster_entity=True)
print(panel_interaction_exp_lmp)

                          PanelOLS Estimation Summary                           
Dep. Variable:                   NEET   R-squared:                        0.5366
Estimator:                   PanelOLS   R-squared (Between):              0.0232
No. Observations:                 125   R-squared (Within):               0.5366
Date:                Sat, Apr 03 2021   R-squared (Overall):             -0.0208
Time:                        11:43:47   Log-likelihood                   -138.42
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      6.9481
Entities:                          30   P-value                           0.0000
Avg Obs:                       4.1667   Distribution:                   F(14,84)
Min Obs:                       0.0000                                           
Max Obs:                       8.0000   F-statistic (robust):             50.223
                            

# Test of the null hypothesis with F-test

In [7]:
inter_ols = sm.OLS(df.NEET,df.drop('NEET', axis=1)).fit()
hyp_inter = '(LogGDPxExp_LMP=0), (LogGDPxExp_LMP_2=0)'
print('Hyp 1:', inter_ols.f_test(hyp_inter))
hyp = '(LogGDPxExp_LMP=0), (LogGDPxExp_LMP_2=0), (Exp_LMP=0), (Exp_LMP_2=0)'
print('Hyp 2:', inter_ols.f_test(hyp))

Hyp 1: <F test: F=array([[2.03178601]]), p=0.13599234140514763, df_denom=110, df_num=2>
Hyp 2: <F test: F=array([[6.40121188]]), p=0.00011423437307763265, df_denom=110, df_num=4>


# Interaction term for STR and LogExp_educ

In [8]:
df['STRxLogExp_educ'] = df.STR * df.LogExp_educ
df['STR_2xLogExp_educ'] = df.STR_2 * df.LogExp_educ
df['STR_3xLogExp_educ'] = df.STR_3 * df.LogExp_educ

In [9]:
panel_interaction_str = PanelOLS(df.NEET,df.drop(['NEET','LogGDPxExp_LMP', 'LogGDPxExp_LMP_2'], axis=1), entity_effects=True).fit(cov_type='clustered',cluster_entity=True)
print(panel_interaction_str)

                          PanelOLS Estimation Summary                           
Dep. Variable:                   NEET   R-squared:                        0.5240
Estimator:                   PanelOLS   R-squared (Between):             -0.5174
No. Observations:                 125   R-squared (Within):               0.5240
Date:                Sat, Apr 03 2021   R-squared (Overall):             -0.3917
Time:                        11:43:48   Log-likelihood                   -140.09
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      6.0925
Entities:                          30   P-value                           0.0000
Avg Obs:                       4.1667   Distribution:                   F(15,83)
Min Obs:                       0.0000                                           
Max Obs:                       8.0000   F-statistic (robust):             96.425
                            

# All interaction terms

In [10]:
panel_inter = PanelOLS(df.NEET,df.drop(['NEET'], axis=1), entity_effects=True).fit(cov_type='clustered',cluster_entity=True)
print(panel_inter)

                          PanelOLS Estimation Summary                           
Dep. Variable:                   NEET   R-squared:                        0.5817
Estimator:                   PanelOLS   R-squared (Between):             -0.4017
No. Observations:                 125   R-squared (Within):               0.5817
Date:                Sat, Apr 03 2021   R-squared (Overall):             -0.4099
Time:                        11:43:48   Log-likelihood                   -132.01
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      6.6266
Entities:                          30   P-value                           0.0000
Avg Obs:                       4.1667   Distribution:                   F(17,81)
Min Obs:                       0.0000                                           
Max Obs:                       8.0000   F-statistic (robust):             145.68
                            