# Imports

In [1]:
#Pandas, Numpy importieren
import pandas as pd
import numpy as np
import seaborn as sns
import sklearn as sl
import scipy.stats
import statsmodels.api as sm
import matplotlib.pyplot as plt

#Importeieren der Modelle aus verschiedenen Bibliotheken
from sklearn.linear_model import LinearRegression
from statsmodels.regression.linear_model import GLS
from linearmodels.panel import PooledOLS


# Splitting data into training and testing
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold, KFold

from sklearn.model_selection import train_test_split

# No warnings about setting value on copy of slice
pd.options.mode.chained_assignment = None

# Display up to 60 columns of a dataframe
pd.set_option('display.max_columns', 600)

# Matplotlib visualization
%matplotlib inline
# Set default font size
plt.rcParams['font.size'] = 24

# Internal ipython tool for setting figure size
from IPython.core.pylabtools import figsize
#figsize(6, 6)

# Seaborn for visualization
sns.set(font_scale = 2)

# Import der Daten

In [2]:
pd.set_option('display.max_columns', 600)

data = pd.read_csv('../../data/science_imputed_2.csv')
data = data.sample(frac=1)

#data.info()
#data.head()

# Benötigte Funktionen

## Methode zur Modellierung

In [3]:
'''
Params:  
    target: Variable, die vorhergesagt werden soll
    pexog_vars: liste der exogenen Variablen Bsp.: ['GINI', "log(GDP)", "HDI", "TIME", "LOCATION"]
    pX: Datensatz, auf dem die Kreuzvalidierung durchgeführt werden soll
    
'''

def modellieren(pexog_vars, pX, target):
    exog_vars = pexog_vars
    exog = sm.add_constant(pX[exog_vars])
    # data = data.drop(columns={"LOCATION", "TIME", "SUBJECT"})

    mod = PooledOLS(target, exog)

    pooled_res = mod.fit(cov_type='clustered', clusters=pX.LOCATION)
    
    r2_adj = (1-(((1 - pooled_res.rsquared)*(target.shape[0] - 1))/(target.shape[0]-(len(pexog_vars))-1)))
    print("R^2 adj.: ", r2_adj)
    
    return pooled_res

## Kreuzvalidierung

Die Kreuzvalidierung wird durchgeführt mit einer K-Fold Kreuzvalidierung. 

In [4]:
'''
Params: 
    n_splits: Anzahl der folds 
    target
    pexog_vars: liste der exogenen Variablen Bsp.: ['GINI', "log(GDP)", "HDI", "TIME", "LOCATION"]
    pX: Datensatz, auf dem die Kreuzvalidierung durchgeführt werden soll
    
'''



def k_fold_pooledOLS(n_splits, target, pexog_vars, pX):
    
#     #
#     exog_vars = pexog_vars.append("LOCATION", "TIME")
    features = pX
    
    
    # Splitten in der Reihenfolge der Reihen im df
    kf = KFold(n_splits, shuffle=True)
    r2 = np.zeros(n_splits)
    rmse = np.zeros(n_splits)
    r2_adj = np.zeros(n_splits)
    # print(r2)
    i=0

    for train_index, val_index in kf.split(features):
    #     print("%s %s" % (train_index, test_index))
        X_train, X_validate, y_train, y_validate = features.iloc[train_index], features.iloc[val_index], target.iloc[train_index], target.iloc[val_index]
        #print(X_train)
        
#         print(X_train)
        
        r2[i], rmse[i], r2_adj[i] = prediction(pexog_vars, X_train, y_train, X_validate,  y_validate)

    #     print(r2[i])
        i = i+1

    print("--------------------------- TOTAL ---------------------------------")
    print("R^2s: ", r2)
    # calculate mean of metrics
    print("R^2 mean: ", r2.mean())
    print("R^2 std: ", r2.std())
    
    print("\nR^2 adj.: ", r2_adj)
    # calculate mean of metrics
    print("R^2 adj. mean: ", r2_adj.mean())
    print("R^2 adj. std: ", r2_adj.std())
    
    print("\nRMSEs: ", rmse)
    # calculate mean of metrics
    print("RMSE mean: ", rmse.mean())
    print("RMSE std: ", rmse.std())

## Modell Vorhersage

In [5]:
'''
Params: 
    pexog_vars: liste der exogenen Variablen Bsp.: ['GINI', "log(GDP)", "HDI", "TIME", "LOCATION"]
    pX: 
    y_test: Y Werte des Testdatensatzes
    
'''



'''
R^2 ist NICHT out of sample => es ist das R^2 für das Modell auf den Trainingsdaten
R^2 ajd. ist NICHT out of sample => es ist das R^2 adj. für das Modell auf den Trainingsdaten
RSME ist out of sample => es ist das RMSE für das Modell auf den Testdaten
'''

def prediction(pexog_vars, pX, py, pX_test, py_test):

    #pX enthält Loaction Time Subejct
    exog = sm.add_constant(pX[pexog_vars])
    
#     print(py, exog)
    mod = PooledOLS(py, exog)

    pooled_res = mod.fit(cov_type='clustered', clusters=pX.LOCATION)
    #print(mod.predict(params=pooled_res.params, exog=X_test))
    
    
    # Prediction y^= X_test * beta
    x_pred = np.array(sm.add_constant(pX_test[pexog_vars]))
    
    b_pred = np.array(pooled_res.params)[0:len(pexog_vars)+1]

    y_pred = x_pred.dot(b_pred) 
    #print(y_pred)
    #print('\n')
    
    y_test = np.array(py_test)
    #print(y_test)
#     print('\n')
    squares = (y_pred-py_test)**2
    mse = (1/y_pred.shape[0])*(np.sum(squares))
    
#     print('Anzahl exogene Variablen: ', len(pexog_vars))
#     print('Anzahl Beobachtungen: ', y_pred.shape[0])
#     print('\n')
#     r2 = 1 - RSS/TSS
#     RSS = (np.sum(squares)
    
    # r^2 adj in-sample calculation
    r2_adj = (1-(((1 - pooled_res.rsquared)*(py.shape[0] - 1))/(py.shape[0]-(len(pexog_vars))-1)))
    
    
    ### 
   
    print('\nR^2 = ', pooled_res.rsquared)
    print('R^2 adj. = ', r2_adj)
#     print("MSE = ", mse)
    print("RMSE = ", np.sqrt(mse))
    
    return pooled_res.rsquared, np.sqrt(mse), r2_adj

![image.png](attachment:image.png)

# Aufteilung der Daten in Train- & Testdatensatz

In [6]:
time = pd.Categorical(data["TIME"])
location = pd.Categorical(data["LOCATION"])
data = data.set_index(['LOCATION', 'TIME'])
data['TIME'] = time
data['LOCATION'] = location
# print(data)

features = data.drop(["PISA Science", "SUBJECT"], axis=1)
#PISA Math
X, X_test, y, y_test = train_test_split(features, data["PISA Science"], test_size = 0.1, random_state = 42)

#Festlegung der Parameter für die Modellierung methode
pX = X
target = y



# Null Modell

In [7]:
pexog_vars = []

modellieren(pexog_vars, pX, target)

R^2 adj.:  -2.220446049250313e-16


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,-2.22e-16
Estimator:,PooledOLS,R-squared (Between):,0.0000
No. Observations:,176,R-squared (Within):,0.0000
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.0000
Time:,22:00:58,Log-likelihood,-884.27
Cov. Estimator:,Clustered,,
,,F-statistic:,--
Entities:,44,P-value,--
Avg Obs:,4.0000,Distribution:,--
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,490.80,5.4922,89.362,0.0000,479.96,501.64


# 1. Regressor

### Gini-Koeffizient

In [8]:
pexog_vars = ['GINI']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.41473626154167


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.4181
Estimator:,PooledOLS,R-squared (Between):,0.5135
No. Observations:,176,R-squared (Within):,-0.2535
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.4181
Time:,22:00:58,Log-likelihood,-836.62
Cov. Estimator:,Clustered,,
,,F-statistic:,125.01
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(1,174)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,614.18,20.949,29.318,0.0000,572.83,655.52
GINI,-375.61,64.618,-5.8127,0.0000,-503.14,-248.07


### BIP

In [9]:
pexog_vars = ['log(GDP)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.34659270959835276


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.3503
Estimator:,PooledOLS,R-squared (Between):,0.5208
No. Observations:,176,R-squared (Within):,-1.4014
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.3503
Time:,22:00:58,Log-likelihood,-846.31
Cov. Estimator:,Clustered,,
,,F-statistic:,93.827
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(1,174)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,21.344,123.15,0.1733,0.8626,-221.71,264.39
log(GDP),45.124,11.753,3.8395,0.0002,21.928,68.320


### CPI

In [10]:
pexog_vars = ['CPI']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.40368335373192166


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.4071
Estimator:,PooledOLS,R-squared (Between):,0.4646
No. Observations:,176,R-squared (Within):,-0.0985
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.4071
Time:,22:00:58,Log-likelihood,-838.27
Cov. Estimator:,Clustered,,
,,F-statistic:,119.47
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(1,174)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,409.16,18.892,21.657,0.0000,371.87,446.45
CPI,1.2438,0.2540,4.8978,0.0000,0.7426,1.7451


### Anteil der 25-64 jährigen mit tertiärer Bildung

In [11]:
pexog_vars = ['log(PCT_EDU_TRY)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.4257007730201967


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.4290
Estimator:,PooledOLS,R-squared (Between):,0.4952
No. Observations:,176,R-squared (Within):,-1.3215
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.4290
Time:,22:00:59,Log-likelihood,-834.96
Cov. Estimator:,Clustered,,
,,F-statistic:,130.72
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(1,174)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,304.05,37.468,8.1150,0.0000,230.10,378.00
log(PCT_EDU_TRY),55.479,10.754,5.1591,0.0000,34.255,76.704


### Migration

In [12]:
pexog_vars = ['log(MIGRANTS)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.37478679362818135


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.3784
Estimator:,PooledOLS,R-squared (Between):,0.4758
No. Observations:,176,R-squared (Within):,-0.1446
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.3784
Time:,22:00:59,Log-likelihood,-842.43
Cov. Estimator:,Clustered,,
,,F-statistic:,105.90
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(1,174)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,456.75,10.659,42.851,0.0000,435.71,477.79
log(MIGRANTS),17.746,4.0539,4.3775,0.0000,9.7447,25.747


### Mordrate

In [13]:
pexog_vars = ['HOMICIDES']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.3262872520651492


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.3301
Estimator:,PooledOLS,R-squared (Between):,0.3969
No. Observations:,176,R-squared (Within):,-0.4656
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.3301
Time:,22:00:59,Log-likelihood,-849.01
Cov. Estimator:,Clustered,,
,,F-statistic:,85.755
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(1,174)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,502.48,5.0140,100.22,0.0000,492.59,512.38
HOMICIDES,-3.6792,0.4070,-9.0407,0.0000,-4.4825,-2.8760


### Alkohol Konsum pro Kopf

In [14]:
pexog_vars = ['ALC_PC']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.3686350873353522


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.3722
Estimator:,PooledOLS,R-squared (Between):,0.4712
No. Observations:,176,R-squared (Within):,-0.1195
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.3722
Time:,22:00:59,Log-likelihood,-843.29
Cov. Estimator:,Clustered,,
,,F-statistic:,103.18
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(1,174)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,425.20,14.239,29.863,0.0000,397.10,453.31
ALC_PC,7.4080,1.3992,5.2944,0.0000,4.6464,10.170


### Internet PC

In [15]:
pexog_vars = ['INTERNET_PC']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.2384259989064057


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.2428
Estimator:,PooledOLS,R-squared (Between):,0.3586
No. Observations:,176,R-squared (Within):,-2.9161
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.2428
Time:,22:00:59,Log-likelihood,-859.79
Cov. Estimator:,Clustered,,
,,F-statistic:,55.787
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(1,174)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,428.98,16.200,26.480,0.0000,397.00,460.95
INTERNET_PC,0.8751,0.1947,4.4938,0.0000,0.4908,1.2595


### Bildungsausgaben im Sekundär Bereich

In [16]:
pexog_vars = ['log(EDU_SPENDING)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.3421993845011301


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.3460
Estimator:,PooledOLS,R-squared (Between):,0.3686
No. Observations:,176,R-squared (Within):,-0.9364
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.3460
Time:,22:00:59,Log-likelihood,-846.90
Cov. Estimator:,Clustered,,
,,F-statistic:,92.038
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(1,174)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,220.45,56.869,3.8765,0.0001,108.21,332.69
log(EDU_SPENDING),87.917,18.005,4.8829,0.0000,52.380,123.45


### Schüler, Lehrer Verhältnis

In [17]:
pexog_vars = ['STR_SRY']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.13506002471351541


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.1400
Estimator:,PooledOLS,R-squared (Between):,0.1502
No. Observations:,176,R-squared (Within):,-0.0409
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.1400
Time:,22:00:59,Log-likelihood,-870.99
Cov. Estimator:,Clustered,,
,,F-statistic:,28.326
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(1,174)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,539.06,15.000,35.936,0.0000,509.45,568.66
STR_SRY,-3.8806,1.3432,-2.8890,0.0044,-6.5317,-1.2295


# 2. Regressor

### BIP

In [18]:
pexog_vars = ['log(PCT_EDU_TRY)', 'log(GDP)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.4542419558441062


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.4605
Estimator:,PooledOLS,R-squared (Between):,0.5805
No. Observations:,176,R-squared (Within):,-1.6287
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.4605
Time:,22:00:59,Log-likelihood,-829.96
Cov. Estimator:,Clustered,,
,,F-statistic:,73.827
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(2,173)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,154.61,106.66,1.4496,0.1490,-55.905,365.13
log(PCT_EDU_TRY),40.151,13.643,2.9430,0.0037,13.223,67.078
log(GDP),19.324,12.811,1.5083,0.1333,-5.9628,44.610


### CPI

In [19]:
pexog_vars = ['log(PCT_EDU_TRY)', 'CPI']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.5178117566883267


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.5233
Estimator:,PooledOLS,R-squared (Between):,0.5917
No. Observations:,176,R-squared (Within):,-0.6529
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.5233
Time:,22:00:59,Log-likelihood,-819.06
Cov. Estimator:,Clustered,,
,,F-statistic:,94.964
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(2,173)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,320.41,33.788,9.4830,0.0000,253.72,387.10
log(PCT_EDU_TRY),36.045,10.397,3.4669,0.0007,15.524,56.567
CPI,0.7474,0.1874,3.9885,0.0001,0.3775,1.1172


### GINI

In [20]:
pexog_vars = ['log(PCT_EDU_TRY)', 'GINI']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6240569269632052


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.6284
Estimator:,PooledOLS,R-squared (Between):,0.7281
No. Observations:,176,R-squared (Within):,-1.0154
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.6284
Time:,22:00:59,Log-likelihood,-797.16
Cov. Estimator:,Clustered,,
,,F-statistic:,146.25
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(2,173)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,442.21,34.999,12.635,0.0000,373.13,511.29
log(PCT_EDU_TRY),41.434,8.0637,5.1383,0.0000,25.518,57.350
GINI,-276.69,43.266,-6.3950,0.0000,-362.08,-191.29


### Migration

In [21]:
pexog_vars = ['log(PCT_EDU_TRY)', 'log(MIGRANTS)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.48296455552043316


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.4889
Estimator:,PooledOLS,R-squared (Between):,0.5763
No. Observations:,176,R-squared (Within):,-0.8518
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.4889
Time:,22:00:59,Log-likelihood,-825.21
Cov. Estimator:,Clustered,,
,,F-statistic:,82.734
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(2,173)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,346.99,38.708,8.9643,0.0000,270.59,423.39
log(PCT_EDU_TRY),37.381,13.767,2.7152,0.0073,10.207,64.555
log(MIGRANTS),9.3725,4.9185,1.9056,0.0584,-0.3355,19.081


### Mordrate

In [22]:
pexog_vars = ['log(PCT_EDU_TRY)', 'HOMICIDES']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.577402238499031


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.5822
Estimator:,PooledOLS,R-squared (Between):,0.6715
No. Observations:,176,R-squared (Within):,-1.1600
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.5822
Time:,22:00:59,Log-likelihood,-807.46
Cov. Estimator:,Clustered,,
,,F-statistic:,120.55
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(2,173)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,348.65,38.938,8.9541,0.0000,271.80,425.51
log(PCT_EDU_TRY),44.715,10.975,4.0741,0.0001,23.052,66.378
HOMICIDES,-2.6355,0.4034,-6.5336,0.0000,-3.4317,-1.8393


### Alkohol Konsum pro Kopf

In [23]:
pexog_vars = ['log(PCT_EDU_TRY)', 'ALC_PC']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.5973196402366594


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.6019
Estimator:,PooledOLS,R-squared (Between):,0.6738
No. Observations:,176,R-squared (Within):,-0.5479
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.6019
Time:,22:00:59,Log-likelihood,-803.21
Cov. Estimator:,Clustered,,
,,F-statistic:,130.79
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(2,173)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,298.48,29.671,10.060,0.0000,239.92,357.04
log(PCT_EDU_TRY),43.048,8.2101,5.2434,0.0000,26.844,59.253
ALC_PC,5.3545,1.0853,4.9335,0.0000,3.2123,7.4967


### Internet PC

In [24]:
pexog_vars = ['log(PCT_EDU_TRY)', 'INTERNET_PC']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.42485935315411116


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.4314
Estimator:,PooledOLS,R-squared (Between):,0.5084
No. Observations:,176,R-squared (Within):,-1.6175
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.4314
Time:,22:01:00,Log-likelihood,-834.58
Cov. Estimator:,Clustered,,
,,F-statistic:,65.637
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(2,173)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,309.14,40.492,7.6346,0.0000,229.22,389.06
log(PCT_EDU_TRY),51.390,14.659,3.5058,0.0006,22.457,80.324
INTERNET_PC,0.1228,0.2277,0.5393,0.5904,-0.3267,0.5722


### Bildungsausgaben im Sekundar Bereich

In [25]:
pexog_vars = ['log(PCT_EDU_TRY)', 'log(EDU_SPENDING)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.5478650190834442


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.5530
Estimator:,PooledOLS,R-squared (Between):,0.5969
No. Observations:,176,R-squared (Within):,-1.1447
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.5530
Time:,22:01:00,Log-likelihood,-813.40
Cov. Estimator:,Clustered,,
,,F-statistic:,107.03
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(2,173)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,171.56,41.108,4.1734,0.0000,90.421,252.70
log(PCT_EDU_TRY),42.194,9.2029,4.5849,0.0000,24.030,60.359
log(EDU_SPENDING),57.629,12.743,4.5225,0.0000,32.477,82.780


### Schüler, Lehrer Verhältnis

In [26]:
pexog_vars = ['log(PCT_EDU_TRY)', 'STR_SRY']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.47422564150164714


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.4802
Estimator:,PooledOLS,R-squared (Between):,0.5372
No. Observations:,176,R-squared (Within):,-1.1440
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.4802
Time:,22:01:00,Log-likelihood,-826.68
Cov. Estimator:,Clustered,,
,,F-statistic:,79.921
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(2,173)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,349.72,34.568,10.117,0.0000,281.49,417.95
log(PCT_EDU_TRY),50.836,9.7450,5.2166,0.0000,31.602,70.070
STR_SRY,-2.4158,0.9067,-2.6644,0.0084,-4.2054,-0.6262


# 3. Regressor

### BIP

In [27]:
pexog_vars = ['log(PCT_EDU_TRY)', 'GINI', 'log(GDP)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6233730522947456


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.6298
Estimator:,PooledOLS,R-squared (Between):,0.7205
No. Observations:,176,R-squared (Within):,-0.9501
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.6298
Time:,22:01:00,Log-likelihood,-796.81
Cov. Estimator:,Clustered,,
,,F-statistic:,97.550
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(3,172)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,485.32,92.712,5.2347,0.0000,302.32,668.32
log(PCT_EDU_TRY),44.552,11.987,3.7166,0.0003,20.890,68.213
GINI,-289.48,43.886,-6.5961,0.0000,-376.10,-202.85
log(GDP),-4.7487,10.577,-0.4490,0.6540,-25.626,16.128


### CPI

In [28]:
pexog_vars = ['log(PCT_EDU_TRY)', 'GINI', 'CPI']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6477935627233251


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.6538
Estimator:,PooledOLS,R-squared (Between):,0.7422
No. Observations:,176,R-squared (Within):,-0.6787
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.6538
Time:,22:01:00,Log-likelihood,-790.91
Cov. Estimator:,Clustered,,
,,F-statistic:,108.29
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(3,172)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,432.64,32.820,13.182,0.0000,367.86,497.42
log(PCT_EDU_TRY),32.535,8.4795,3.8369,0.0002,15.798,49.272
GINI,-239.31,38.657,-6.1906,0.0000,-315.61,-163.01
CPI,0.4152,0.1608,2.5814,0.0107,0.0977,0.7327


### Migration

In [29]:
pexog_vars = ['log(PCT_EDU_TRY)', 'GINI', 'log(MIGRANTS)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6276756767274363


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.6341
Estimator:,PooledOLS,R-squared (Between):,0.7373
No. Observations:,176,R-squared (Within):,-0.8790
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.6341
Time:,22:01:00,Log-likelihood,-795.80
Cov. Estimator:,Clustered,,
,,F-statistic:,99.340
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(3,172)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,446.65,35.340,12.639,0.0000,376.89,516.40
log(PCT_EDU_TRY),36.375,11.271,3.2272,0.0015,14.127,58.622
GINI,-256.71,51.269,-5.0072,0.0000,-357.91,-155.52
log(MIGRANTS),3.1451,4.6298,0.6793,0.4979,-5.9934,12.284


### Alkohol Konsum pro Kopf

In [30]:
pexog_vars = ['log(PCT_EDU_TRY)', 'GINI', 'ALC_PC']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6813086392834096


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.6868
Estimator:,PooledOLS,R-squared (Between):,0.7718
No. Observations:,176,R-squared (Within):,-0.5296
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.6868
Time:,22:01:00,Log-likelihood,-782.11
Cov. Estimator:,Clustered,,
,,F-statistic:,125.71
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(3,172)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,401.20,38.294,10.477,0.0000,325.61,476.79
log(PCT_EDU_TRY),37.158,6.9535,5.3438,0.0000,23.432,50.883
GINI,-201.81,52.375,-3.8531,0.0002,-305.19,-98.425
ALC_PC,3.4793,1.1824,2.9427,0.0037,1.1455,5.8132


### Mordrate

In [31]:
pexog_vars = ['log(PCT_EDU_TRY)', 'GINI', 'HOMICIDES']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6439663775806035


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.6501
Estimator:,PooledOLS,R-squared (Between):,0.7458
No. Observations:,176,R-squared (Within):,-0.9601
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.6501
Time:,22:01:00,Log-likelihood,-791.86
Cov. Estimator:,Clustered,,
,,F-statistic:,106.51
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(3,172)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,427.80,33.976,12.591,0.0000,360.73,494.86
log(PCT_EDU_TRY),39.919,8.6941,4.5915,0.0000,22.758,57.080
GINI,-205.09,58.676,-3.4953,0.0006,-320.91,-89.273
HOMICIDES,-1.2607,0.5605,-2.2494,0.0258,-2.3670,-0.1544


### Internet PC

In [32]:
pexog_vars = ['log(PCT_EDU_TRY)', 'GINI', 'INTERNET_PC']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6351375432079087


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.6414
Estimator:,PooledOLS,R-squared (Between):,0.7242
No. Observations:,176,R-squared (Within):,-0.5854
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.6414
Time:,22:01:00,Log-likelihood,-794.02
Cov. Estimator:,Clustered,,
,,F-statistic:,102.54
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(3,172)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,442.90,35.380,12.518,0.0000,373.06,512.73
log(PCT_EDU_TRY),50.161,10.853,4.6217,0.0000,28.738,71.585
GINI,-303.16,43.731,-6.9323,0.0000,-389.47,-216.84
INTERNET_PC,-0.3025,0.1827,-1.6558,0.0996,-0.6630,0.0581


### Bildungsausgaben im Sekundar Bereich

In [33]:
pexog_vars = ['log(PCT_EDU_TRY)', 'GINI', 'log(EDU_SPENDING)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6563074497558699


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.6622
Estimator:,PooledOLS,R-squared (Between):,0.7365
No. Observations:,176,R-squared (Within):,-0.8371
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.6622
Time:,22:01:00,Log-likelihood,-788.76
Cov. Estimator:,Clustered,,
,,F-statistic:,112.39
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(3,172)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,340.33,48.979,6.9485,0.0000,243.65,437.01
log(PCT_EDU_TRY),36.441,6.8270,5.3379,0.0000,22.966,49.917
GINI,-224.89,44.976,-5.0001,0.0000,-313.66,-136.11
log(EDU_SPENDING),33.064,12.132,2.7254,0.0071,9.1172,57.010


### Schüler, Lehrer Verhältnis

In [34]:
pexog_vars = ['log(PCT_EDU_TRY)', 'GINI', 'STR_SRY']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.622678223940304


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.6291
Estimator:,PooledOLS,R-squared (Between):,0.7309
No. Observations:,176,R-squared (Within):,-1.0413
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.6291
Time:,22:01:00,Log-likelihood,-796.97
Cov. Estimator:,Clustered,,
,,F-statistic:,97.265
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(3,172)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,441.24,34.727,12.706,0.0000,372.69,509.78
log(PCT_EDU_TRY),41.533,8.0142,5.1824,0.0000,25.714,57.352
GINI,-288.46,61.293,-4.7063,0.0000,-409.44,-167.48
STR_SRY,0.3625,1.1314,0.3204,0.7490,-1.8707,2.5958


# 4. Regressor

### BIP

In [35]:
pexog_vars = ['log(PCT_EDU_TRY)', 'GINI', 'ALC_PC', 'log(GDP)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6810779750413385


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.6884
Estimator:,PooledOLS,R-squared (Between):,0.7651
No. Observations:,176,R-squared (Within):,-0.4711
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.6884
Time:,22:01:00,Log-likelihood,-781.66
Cov. Estimator:,Clustered,,
,,F-statistic:,94.431
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(4,171)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,445.98,98.729,4.5172,0.0000,251.10,640.87
log(PCT_EDU_TRY),40.395,12.866,3.1397,0.0020,14.999,65.791
GINI,-215.03,51.619,-4.1657,0.0000,-316.92,-113.13
ALC_PC,3.4830,1.2169,2.8621,0.0047,1.0808,5.8851
log(GDP),-4.9375,12.242,-0.4033,0.6872,-29.103,19.228


### Mordrate

In [36]:
pexog_vars = ['log(PCT_EDU_TRY)', 'GINI', 'ALC_PC', 'HOMICIDES']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.7196435388335056


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.7261
Estimator:,PooledOLS,R-squared (Between):,0.8071
No. Observations:,176,R-squared (Within):,-0.4911
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.7261
Time:,22:01:00,Log-likelihood,-770.32
Cov. Estimator:,Clustered,,
,,F-statistic:,113.30
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(4,171)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,374.76,33.855,11.070,0.0000,307.94,441.59
log(PCT_EDU_TRY),34.385,6.4057,5.3679,0.0000,21.740,47.029
GINI,-91.455,55.467,-1.6488,0.1010,-200.94,18.034
ALC_PC,4.0454,1.0952,3.6938,0.0003,1.8836,6.2072
HOMICIDES,-1.7286,0.4723,-3.6595,0.0003,-2.6610,-0.7962


### Migration

In [37]:
pexog_vars = ['log(PCT_EDU_TRY)', 'GINI', 'ALC_PC', 'log(MIGRANTS)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6796177297680033


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.6869
Estimator:,PooledOLS,R-squared (Between):,0.7730
No. Observations:,176,R-squared (Within):,-0.5152
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.6869
Time:,22:01:01,Log-likelihood,-782.07
Cov. Estimator:,Clustered,,
,,F-statistic:,93.806
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(4,171)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,402.59,42.871,9.3906,0.0000,317.96,487.21
log(PCT_EDU_TRY),36.318,11.710,3.1015,0.0023,13.204,59.432
GINI,-199.33,52.932,-3.7658,0.0002,-303.82,-94.849
ALC_PC,3.4288,1.2581,2.7255,0.0071,0.9455,5.9122
log(MIGRANTS),0.5604,4.6104,0.1216,0.9034,-8.5402,9.6610


### CPI

In [38]:
pexog_vars = ['log(PCT_EDU_TRY)', 'GINI', 'ALC_PC', 'CPI']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.711889755684236


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.7185
Estimator:,PooledOLS,R-squared (Between):,0.7943
No. Observations:,176,R-squared (Within):,-0.2650
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.7185
Time:,22:01:01,Log-likelihood,-772.72
Cov. Estimator:,Clustered,,
,,F-statistic:,109.10
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(4,171)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,388.24,31.260,12.420,0.0000,326.54,449.95
log(PCT_EDU_TRY),26.969,7.7523,3.4788,0.0006,11.666,42.271
GINI,-155.89,46.005,-3.3886,0.0009,-246.70,-65.080
ALC_PC,3.6702,1.0467,3.5064,0.0006,1.6040,5.7363
CPI,0.4644,0.1767,2.6292,0.0093,0.1157,0.8131


### Internet PC

In [39]:
pexog_vars = ['log(PCT_EDU_TRY)', 'GINI', 'ALC_PC', 'INTERNET_PC']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6823280156338544


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.6896
Estimator:,PooledOLS,R-squared (Between):,0.7678
No. Observations:,176,R-squared (Within):,-0.3812
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.6896
Time:,22:01:01,Log-likelihood,-781.32
Cov. Estimator:,Clustered,,
,,F-statistic:,94.971
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(4,171)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,403.96,38.139,10.592,0.0000,328.68,479.24
log(PCT_EDU_TRY),41.613,10.712,3.8848,0.0001,20.468,62.757
GINI,-218.98,55.606,-3.9381,0.0001,-328.75,-109.22
ALC_PC,3.2732,1.2657,2.5860,0.0105,0.7748,5.7717
INTERNET_PC,-0.1456,0.1926,-0.7559,0.4508,-0.5259,0.2346


### Bildungsausgaben im Sekundar Bereich

In [40]:
pexog_vars = ['log(PCT_EDU_TRY)', 'GINI', 'ALC_PC', 'log(EDU_SPENDING)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6860013668605789


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.6932
Estimator:,PooledOLS,R-squared (Between):,0.7682
No. Observations:,176,R-squared (Within):,-0.4951
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.6932
Time:,22:01:01,Log-likelihood,-780.29
Cov. Estimator:,Clustered,,
,,F-statistic:,96.582
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(4,171)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,358.23,49.878,7.1821,0.0000,259.77,456.68
log(PCT_EDU_TRY),35.450,6.6873,5.3011,0.0000,22.250,48.650
GINI,-189.45,52.745,-3.5918,0.0004,-293.56,-85.332
ALC_PC,2.8681,1.2960,2.2131,0.0282,0.3100,5.4263
log(EDU_SPENDING),16.284,12.598,1.2926,0.1979,-8.5830,41.150


### Schüler, Lehrer Verhältnis

In [41]:
pexog_vars = ['log(PCT_EDU_TRY)', 'GINI', 'ALC_PC', 'STR_SRY']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.68397933857245


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.6912
Estimator:,PooledOLS,R-squared (Between):,0.7802
No. Observations:,176,R-squared (Within):,-0.5716
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.6912
Time:,22:01:01,Log-likelihood,-780.86
Cov. Estimator:,Clustered,,
,,F-statistic:,95.690
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(4,171)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,397.09,36.142,10.987,0.0000,325.75,468.43
log(PCT_EDU_TRY),37.209,6.8596,5.4244,0.0000,23.669,50.750
GINI,-226.73,66.094,-3.4305,0.0008,-357.20,-96.267
ALC_PC,3.6302,1.0725,3.3849,0.0009,1.5132,5.7472
STR_SRY,0.8674,1.0147,0.8548,0.3938,-1.1356,2.8704


# 5. Regressor

### BIP

In [42]:
pexog_vars = ['log(PCT_EDU_TRY)', 'GINI', 'ALC_PC', 'CPI', 'log(GDP)'] 

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.7302764347138407


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.7380
Estimator:,PooledOLS,R-squared (Between):,0.7842
No. Observations:,176,R-squared (Within):,-0.0624
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.7380
Time:,22:01:01,Log-likelihood,-766.40
Cov. Estimator:,Clustered,,
,,F-statistic:,95.762
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(5,170)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,560.09,94.022,5.9570,0.0000,374.49,745.69
log(PCT_EDU_TRY),35.541,11.240,3.1621,0.0019,13.354,57.729
GINI,-189.11,47.755,-3.9600,0.0001,-283.38,-94.842
ALC_PC,3.7639,1.0520,3.5780,0.0005,1.6873,5.8405
CPI,0.6577,0.2177,3.0210,0.0029,0.2279,1.0875
log(GDP),-19.542,11.530,-1.6949,0.0919,-42.302,3.2176


### Mordrate

In [43]:
pexog_vars = ['log(PCT_EDU_TRY)', 'GINI', 'ALC_PC', 'CPI', 'HOMICIDES'] 

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.7343684438465637


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.7420
Estimator:,PooledOLS,R-squared (Between):,0.8175
No. Observations:,176,R-squared (Within):,-0.3087
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.7420
Time:,22:01:01,Log-likelihood,-765.06
Cov. Estimator:,Clustered,,
,,F-statistic:,97.761
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(5,170)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,370.26,32.502,11.392,0.0000,306.10,434.42
log(PCT_EDU_TRY),27.379,6.9953,3.9139,0.0001,13.570,41.188
GINI,-78.698,60.552,-1.2997,0.1955,-198.23,40.832
ALC_PC,4.0777,1.0408,3.9180,0.0001,2.0233,6.1322
CPI,0.3437,0.1873,1.8347,0.0683,-0.0261,0.7134
HOMICIDES,-1.3962,0.5330,-2.6193,0.0096,-2.4484,-0.3440


### Migration

In [44]:
pexog_vars = ['log(PCT_EDU_TRY)', 'GINI', 'ALC_PC', 'CPI', 'log(MIGRANTS)'] 

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.7124477125486196


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.7207
Estimator:,PooledOLS,R-squared (Between):,0.7923
No. Observations:,176,R-squared (Within):,-0.2976
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.7207
Time:,22:01:01,Log-likelihood,-772.04
Cov. Estimator:,Clustered,,
,,F-statistic:,87.717
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(5,170)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,381.82,39.102,9.7648,0.0000,304.63,459.01
log(PCT_EDU_TRY),29.251,10.722,2.7282,0.0070,8.0863,50.416
GINI,-161.19,48.294,-3.3377,0.0010,-256.53,-65.858
ALC_PC,3.8792,1.1194,3.4654,0.0007,1.6695,6.0889
CPI,0.5059,0.1852,2.7317,0.0070,0.1403,0.8715
log(MIGRANTS),-2.1309,4.6072,-0.4625,0.6443,-11.226,6.9637


### Internet PC

In [45]:
pexog_vars = ['log(PCT_EDU_TRY)', 'GINI', 'ALC_PC', 'CPI', 'INTERNET_PC'] 

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.7162179744384319


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.7243
Estimator:,PooledOLS,R-squared (Between):,0.7940
No. Observations:,176,R-squared (Within):,-0.1388
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.7243
Time:,22:01:01,Log-likelihood,-770.87
Cov. Estimator:,Clustered,,
,,F-statistic:,89.334
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(5,170)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,391.53,31.127,12.579,0.0000,330.09,452.98
log(PCT_EDU_TRY),32.877,9.6107,3.4209,0.0008,13.906,51.849
GINI,-178.31,51.768,-3.4443,0.0007,-280.50,-76.115
ALC_PC,3.3813,1.1303,2.9915,0.0032,1.1500,5.6125
CPI,0.4902,0.1922,2.5509,0.0116,0.1109,0.8695
INTERNET_PC,-0.2116,0.2008,-1.0540,0.2934,-0.6079,0.1847


### Bildungsausgaben im Sekundar Bereich

In [46]:
pexog_vars = ['log(PCT_EDU_TRY)', 'GINI', 'ALC_PC', 'CPI', 'log(EDU_SPENDING)'] 

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.7156208438154146


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.7237
Estimator:,PooledOLS,R-squared (Between):,0.7898
No. Observations:,176,R-squared (Within):,-0.2326
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.7237
Time:,22:01:01,Log-likelihood,-771.06
Cov. Estimator:,Clustered,,
,,F-statistic:,89.075
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(5,170)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,349.45,44.511,7.8510,0.0000,261.59,437.32
log(PCT_EDU_TRY),25.594,7.4656,3.4282,0.0008,10.857,40.331
GINI,-145.46,45.234,-3.2157,0.0016,-234.75,-56.168
ALC_PC,3.1120,1.1923,2.6101,0.0099,0.7584,5.4655
CPI,0.4565,0.1736,2.6288,0.0094,0.1137,0.7992
log(EDU_SPENDING),14.783,12.716,1.1625,0.2466,-10.319,39.885


### Schüler, Lehrer Verhältnis

In [47]:
pexog_vars = ['log(PCT_EDU_TRY)', 'GINI', 'ALC_PC', 'CPI', 'STR_SRY'] 

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.7107015786517692


0,1,2,3
Dep. Variable:,PISA Science,R-squared:,0.7190
Estimator:,PooledOLS,R-squared (Between):,0.7965
No. Observations:,176,R-squared (Within):,-0.2811
Date:,"Mon, Feb 01 2021",R-squared (Overall):,0.7190
Time:,22:01:01,Log-likelihood,-772.57
Cov. Estimator:,Clustered,,
,,F-statistic:,86.982
Entities:,44,P-value,0.0000
Avg Obs:,4.0000,Distribution:,"F(5,170)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,387.25,31.089,12.456,0.0000,325.88,448.62
log(PCT_EDU_TRY),27.319,7.9693,3.4280,0.0008,11.587,43.051
GINI,-165.97,61.835,-2.6842,0.0080,-288.04,-43.911
ALC_PC,3.7159,1.0037,3.7022,0.0003,1.7346,5.6972
CPI,0.4493,0.1970,2.2812,0.0238,0.0605,0.8381
STR_SRY,0.2988,0.9949,0.3004,0.7643,-1.6651,2.2627


# Validierung des Modells

In [48]:
n_splits = 4
pexog_vars = ['log(PCT_EDU_TRY)', 'GINI', 'ALC_PC', 'CPI', 'log(GDP)']

k_fold_pooledOLS(n_splits, target, pexog_vars, pX)


R^2 =  0.7686165590558987
R^2 adj. =  0.7594346764787517
RMSE =  20.63527470085882

R^2 =  0.7327190005431412
R^2 adj. =  0.7221126116758055
RMSE =  20.21069409663915

R^2 =  0.7045312188559291
R^2 adj. =  0.6928062672232278
RMSE =  18.988384031680535

R^2 =  0.7422124626781457
R^2 adj. =  0.7319827984987071
RMSE =  16.97035841940916
--------------------------- TOTAL ---------------------------------
R^2s:  [0.76861656 0.732719   0.70453122 0.74221246]
R^2 mean:  0.7370198102832786
R^2 std:  0.022909188709162618

R^2 adj.:  [0.75943468 0.72211261 0.69280627 0.7319828 ]
R^2 adj. mean:  0.726584088469123
R^2 adj. std:  0.023818283499208757

RMSEs:  [20.6352747  20.2106941  18.98838403 16.97035842]
RMSE mean:  19.201177812146916
RMSE std:  1.4228135305520706


In [49]:
n_splits = 4
pexog_vars = ['log(PCT_EDU_TRY)', 'GINI', 'ALC_PC', 'CPI']

k_fold_pooledOLS(n_splits, target, pexog_vars, pX)


R^2 =  0.7049746641080685
R^2 adj. =  0.6956825275445431
RMSE =  22.246985585169146

R^2 =  0.7399006737304429
R^2 adj. =  0.7317085689660474
RMSE =  21.161579980842436

R^2 =  0.7174113125854661
R^2 adj. =  0.7085108814857957
RMSE =  19.3928426154856

R^2 =  0.7253996804331918
R^2 adj. =  0.7167508514704577
RMSE =  19.316236400617235
--------------------------- TOTAL ---------------------------------
R^2s:  [0.70497466 0.73990067 0.71741131 0.72539968]
R^2 mean:  0.7219215827142924
R^2 std:  0.012677593071934003

R^2 adj.:  [0.69568253 0.73170857 0.70851088 0.71675085]
R^2 adj. mean:  0.7131632073667109
R^2 adj. std:  0.01307688734191621

RMSEs:  [22.24698559 21.16157998 19.39284262 19.3162364 ]
RMSE mean:  20.529411145528606
RMSE std:  1.2362524395550702


# Anwendung des Modells auf die Testdaten

In [50]:
pexog_vars = ['log(PCT_EDU_TRY)', 'GINI', 'ALC_PC', 'CPI']

prediction(pexog_vars, pX, target, X_test, y_test)


R^2 =  0.7184751326971677
R^2 adj. =  0.711889755684236
RMSE =  15.184592374062342


(0.7184751326971677, 15.184592374062342, 0.711889755684236)