# Imports

In [1]:
#Pandas, Numpy importieren
import pandas as pd
import numpy as np
import seaborn as sns
import sklearn as sl
import scipy.stats
import statsmodels.api as sm
import matplotlib.pyplot as plt

#Importeieren der Modelle aus verschiedenen Bibliotheken
from sklearn.linear_model import LinearRegression
from statsmodels.regression.linear_model import GLS
from linearmodels.panel import PooledOLS


# Splitting data into training and testing
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold, KFold

from sklearn.model_selection import train_test_split

# No warnings about setting value on copy of slice
pd.options.mode.chained_assignment = None

# Display up to 60 columns of a dataframe
pd.set_option('display.max_columns', 600)

# Matplotlib visualization
%matplotlib inline
# Set default font size
plt.rcParams['font.size'] = 24

# Internal ipython tool for setting figure size
from IPython.core.pylabtools import figsize
#figsize(6, 6)

# Seaborn for visualization
sns.set(font_scale = 2)

# Import der Daten

In [2]:
pd.set_option('display.max_columns', 600)

data = pd.read_csv('../../data/math_imputed_2.csv')
data = data.sample(frac=1)

data.info()
#data.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 230 entries, 132 to 140
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   LOCATION           230 non-null    object 
 1   SUBJECT            230 non-null    object 
 2   TIME               230 non-null    float64
 3   PISA Math          230 non-null    float64
 4   GINI               230 non-null    float64
 5   STR_SRY            230 non-null    float64
 6   CPI                230 non-null    float64
 7   ALC_PC             230 non-null    float64
 8   INTERNET_PC        230 non-null    float64
 9   HOMICIDES          230 non-null    float64
 10  log(MIGRANTS)      230 non-null    float64
 11  log(GDP)           230 non-null    float64
 12  log(EDU_SPENDING)  230 non-null    float64
 13  log(PCT_EDU_TRY)   230 non-null    float64
dtypes: float64(12), object(2)
memory usage: 27.0+ KB


# Benötigte Funktionen

## Methode zur Modellierung

In [3]:
'''
Params:  
    target: Variable, die vorhergesagt werden soll
    pexog_vars: liste der exogenen Variablen Bsp.: ['GINI', "log(GDP)", "HDI", "TIME", "LOCATION"]
    pX: Datensatz, auf dem die Kreuzvalidierung durchgeführt werden soll
    
'''

def modellieren(pexog_vars, pX, target):
    exog_vars = pexog_vars
    exog = sm.add_constant(pX[exog_vars])
    # data = data.drop(columns={"LOCATION", "TIME", "SUBJECT"})

    mod = PooledOLS(target, exog)

    pooled_res = mod.fit(cov_type='clustered', clusters=pX.LOCATION)
    
    r2_adj = (1-(((1 - pooled_res.rsquared)*(target.shape[0] - 1))/(target.shape[0]-(len(pexog_vars))-1)))
    print("R^2 adj.: ", r2_adj)
    
    return pooled_res

## Kreuzvalidierung

Die Kreuzvalidierung wird durchgeführt mit einer K-Fold Kreuzvalidierung. 

In [4]:
'''
Params: 
    n_splits: Anzahl der folds 
    target
    pexog_vars: liste der exogenen Variablen Bsp.: ['GINI', "log(GDP)", "HDI", "TIME", "LOCATION"]
    pX: Datensatz, auf dem die Kreuzvalidierung durchgeführt werden soll
    
'''



def k_fold_pooledOLS(n_splits, target, pexog_vars, pX):
    
#     #
#     exog_vars = pexog_vars.append("LOCATION", "TIME")
    features = pX
    
    
    # Splitten in der Reihenfolge der Reihen im df
    kf = KFold(n_splits, shuffle=True)
    r2 = np.zeros(n_splits)
    rmse = np.zeros(n_splits)
    r2_adj = np.zeros(n_splits)
    # print(r2)
    i=0

    for train_index, val_index in kf.split(features):
    #     print("%s %s" % (train_index, test_index))
        X_train, X_validate, y_train, y_validate = features.iloc[train_index], features.iloc[val_index], target.iloc[train_index], target.iloc[val_index]
        #print(X_train)
        
#         print(X_train)
        
        r2[i], rmse[i], r2_adj[i] = prediction(pexog_vars, X_train, y_train, X_validate,  y_validate)

    #     print(r2[i])
        i = i+1

    print("--------------------------- TOTAL ---------------------------------")
    print("R^2s: ", r2)
    # calculate mean of metrics
    print("R^2 mean: ", r2.mean())
    print("R^2 std: ", r2.std())
    
    print("\nR^2 adj.: ", r2_adj)
    # calculate mean of metrics
    print("R^2 adj. mean: ", r2_adj.mean())
    print("R^2 adj. std: ", r2_adj.std())
    
    print("\nRMSEs: ", rmse)
    # calculate mean of metrics
    print("RMSE mean: ", rmse.mean())
    print("RMSE std: ", rmse.std())

## Modell Vorhersage

In [5]:
'''
Params: 
    pexog_vars: liste der exogenen Variablen Bsp.: ['GINI', "log(GDP)", "HDI", "TIME", "LOCATION"]
    pX: 
    y_test: Y Werte des Testdatensatzes
    
'''



'''
R^2 ist NICHT out of sample => es ist das R^2 für das Modell auf den Trainingsdaten
R^2 ajd. ist NICHT out of sample => es ist das R^2 adj. für das Modell auf den Trainingsdaten
RSME ist out of sample => es ist das RMSE für das Modell auf den Testdaten
'''

def prediction(pexog_vars, pX, py, pX_test, py_test):

    #pX enthält Loaction Time Subejct
    exog = sm.add_constant(pX[pexog_vars])
    
#     print(py, exog)
    mod = PooledOLS(py, exog)

    pooled_res = mod.fit(cov_type='clustered', clusters=pX.LOCATION)
    #print(mod.predict(params=pooled_res.params, exog=X_test))
    
    
    # Prediction y^= X_test * beta
    x_pred = np.array(sm.add_constant(pX_test[pexog_vars]))
    
    b_pred = np.array(pooled_res.params)[0:len(pexog_vars)+1]

    y_pred = x_pred.dot(b_pred) 
    #print(y_pred)
    #print('\n')
    
    y_test = np.array(py_test)
    #print(y_test)
#     print('\n')
    squares = (y_pred-py_test)**2
    mse = (1/y_pred.shape[0])*(np.sum(squares))
    
#     print('Anzahl exogene Variablen: ', len(pexog_vars))
#     print('Anzahl Beobachtungen: ', y_pred.shape[0])
#     print('\n')
#     r2 = 1 - RSS/TSS
#     RSS = (np.sum(squares)
    
    # r^2 adj in-sample calculation
    r2_adj = (1-(((1 - pooled_res.rsquared)*(py.shape[0] - 1))/(py.shape[0]-(len(pexog_vars))-1)))
    
    
    ### 
   
    print('\nR^2 = ', pooled_res.rsquared)
    print('R^2 adj. = ', r2_adj)
#     print("MSE = ", mse)
    print("RMSE = ", np.sqrt(mse))
    
    return pooled_res.rsquared, np.sqrt(mse), r2_adj

![image.png](attachment:image.png)

# Aufteilung der Daten in Train- & Testdatensatz

In [6]:
time = pd.Categorical(data["TIME"])
location = pd.Categorical(data["LOCATION"])
data = data.set_index(['LOCATION', 'TIME'])
data['TIME'] = time
data['LOCATION'] = location
# print(data)

features = data.drop(["PISA Math", "SUBJECT"], axis=1)
#PISA Math
X, X_test, y, y_test = train_test_split(features, data["PISA Math"], test_size = 0.1, random_state = 42)

#Festlegung der Parameter für die Modellierung methode
pX = X
target = y



# Nullmodell

In [7]:
pexog_vars = []

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.0


  if is_categorical(s):


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.0000
Estimator:,PooledOLS,R-squared (Between):,0.0000
No. Observations:,207,R-squared (Within):,0.0000
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.0000
Time:,11:49:38,Log-likelihood,-1062.8
Cov. Estimator:,Clustered,,
,,F-statistic:,--
Entities:,44,P-value,--
Avg Obs:,4.7045,Distribution:,--
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,487.57,6.2905,77.509,0.0000,475.17,499.98


# 1. Regressor

### Gini-Koeffizient

In [8]:
pexog_vars = ['GINI']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.5282089137283955


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.5305
Estimator:,PooledOLS,R-squared (Between):,0.5895
No. Observations:,207,R-squared (Within):,-0.1744
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.5305
Time:,11:49:39,Log-likelihood,-984.52
Cov. Estimator:,Clustered,,
,,F-statistic:,231.63
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(1,205)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,644.53,21.206,30.394,0.0000,602.72,686.34
GINI,-480.76,67.583,-7.1135,0.0000,-614.00,-347.51


### BIP 

In [9]:
pexog_vars = ['log(GDP)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.4083853830753581


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.4113
Estimator:,PooledOLS,R-squared (Between):,0.5777
No. Observations:,207,R-squared (Within):,-1.7279
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.4113
Time:,11:49:39,Log-likelihood,-1007.9
Cov. Estimator:,Clustered,,
,,F-statistic:,143.20
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(1,205)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-56.308,123.70,-0.4552,0.6494,-300.19,187.57
log(GDP),52.530,11.784,4.4577,0.0000,29.297,75.764


### CPI

In [10]:
pexog_vars = ['CPI']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.39658274895510315


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.3995
Estimator:,PooledOLS,R-squared (Between):,0.4802
No. Observations:,207,R-squared (Within):,-0.4509
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.3995
Time:,11:49:39,Log-likelihood,-1010.0
Cov. Estimator:,Clustered,,
,,F-statistic:,136.39
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(1,205)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,401.34,22.144,18.124,0.0000,357.68,444.99
CPI,1.3031,0.2840,4.5891,0.0000,0.7433,1.8630


### Anteil der 25-64 jährigen mit tertiärer Bildung

In [11]:
pexog_vars = ['log(PCT_EDU_TRY)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.4333628793099932


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.4361
Estimator:,PooledOLS,R-squared (Between):,0.4829
No. Observations:,207,R-squared (Within):,-1.3893
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.4361
Time:,11:49:39,Log-likelihood,-1003.5
Cov. Estimator:,Clustered,,
,,F-statistic:,158.55
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(1,205)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,287.72,41.843,6.8761,0.0000,205.22,370.22
log(PCT_EDU_TRY),59.849,11.941,5.0120,0.0000,36.306,83.393


### Migration

In [12]:
pexog_vars = ['log(MIGRANTS)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.3763289314125904


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.3794
Estimator:,PooledOLS,R-squared (Between):,0.5316
No. Observations:,207,R-squared (Within):,-0.2582
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.3794
Time:,11:49:39,Log-likelihood,-1013.4
Cov. Estimator:,Clustered,,
,,F-statistic:,125.30
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(1,205)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,449.65,13.222,34.007,0.0000,423.58,475.72
log(MIGRANTS),19.911,5.0043,3.9787,0.0001,10.044,29.777


### Mord Rate

In [13]:
pexog_vars = ['HOMICIDES']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.3214897206930498


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.3248
Estimator:,PooledOLS,R-squared (Between):,0.3952
No. Observations:,207,R-squared (Within):,-0.6292
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.3248
Time:,11:49:39,Log-likelihood,-1022.1
Cov. Estimator:,Clustered,,
,,F-statistic:,98.606
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(1,205)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,500.38,5.5900,89.514,0.0000,489.36,511.40
HOMICIDES,-4.0512,0.7223,-5.6085,0.0000,-5.4754,-2.6271


### Alkohol Konsum pro Kopf

In [14]:
pexog_vars = ['ALC_PC']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.32506381226901204


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.3283
Estimator:,PooledOLS,R-squared (Between):,0.4349
No. Observations:,207,R-squared (Within):,-0.4885
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.3283
Time:,11:49:39,Log-likelihood,-1021.6
Cov. Estimator:,Clustered,,
,,F-statistic:,100.21
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(1,205)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,419.05,17.625,23.776,0.0000,384.30,453.80
ALC_PC,7.6554,1.6874,4.5368,0.0000,4.3285,10.982


### Internet PC

In [15]:
pexog_vars = ['INTERNET_PC']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.3195964833225665


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.3229
Estimator:,PooledOLS,R-squared (Between):,0.4632
No. Observations:,207,R-squared (Within):,-3.5088
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.3229
Time:,11:49:39,Log-likelihood,-1022.4
Cov. Estimator:,Clustered,,
,,F-statistic:,97.762
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(1,205)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,418.42,17.669,23.680,0.0000,383.58,453.26
INTERNET_PC,1.0243,0.2147,4.7717,0.0000,0.6011,1.4476


### Bildungsausgaben im Sekundär Bereich

In [16]:
pexog_vars = ['log(EDU_SPENDING)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.3518974077780398


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.3550
Estimator:,PooledOLS,R-squared (Between):,0.4016
No. Observations:,207,R-squared (Within):,-1.1132
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.3550
Time:,11:49:39,Log-likelihood,-1017.4
Cov. Estimator:,Clustered,,
,,F-statistic:,112.85
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(1,205)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,190.64,60.096,3.1722,0.0017,72.152,309.12
log(EDU_SPENDING),96.592,18.752,5.1509,0.0000,59.620,133.56


### Schüler, Lehrer Verhältnis

In [17]:
pexog_vars = ['STR_SRY']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.1590756196910471


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.1632
Estimator:,PooledOLS,R-squared (Between):,0.2152
No. Observations:,207,R-squared (Within):,-0.1728
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.1632
Time:,11:49:40,Log-likelihood,-1044.3
Cov. Estimator:,Clustered,,
,,F-statistic:,39.969
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(1,205)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,549.01,20.170,27.219,0.0000,509.24,588.77
STR_SRY,-4.9978,1.8447,-2.7092,0.0073,-8.6348,-1.3607


# 2. Regressor

### BIP

In [18]:
pexog_vars = ['GINI', 'log(GDP)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6105587178639011


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.6143
Estimator:,PooledOLS,R-squared (Between):,0.7271
No. Observations:,207,R-squared (Within):,-0.7731
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.6143
Time:,11:49:40,Log-likelihood,-964.16
Cov. Estimator:,Clustered,,
,,F-statistic:,162.48
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(2,204)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,310.11,100.20,3.0949,0.0022,112.55,507.66
GINI,-355.58,51.518,-6.9020,0.0000,-457.15,-254.00
log(GDP),28.353,8.9426,3.1705,0.0018,10.721,45.985


### CPI

In [19]:
pexog_vars = ['GINI', 'CPI']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6266705195411044


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.6303
Estimator:,PooledOLS,R-squared (Between):,0.6945
No. Observations:,207,R-squared (Within):,-0.1516
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.6303
Time:,11:49:40,Log-likelihood,-959.79
Cov. Estimator:,Clustered,,
,,F-statistic:,173.90
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(2,204)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,556.87,23.761,23.436,0.0000,510.02,603.72
GINI,-363.67,53.293,-6.8239,0.0000,-468.74,-258.59
CPI,0.7470,0.2034,3.6716,0.0003,0.3458,1.1481


### Anteil der 25-64 jährigen mit tertiärer Bildung

In [20]:
pexog_vars = ['GINI', 'log(PCT_EDU_TRY)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.707391084710219


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.7102
Estimator:,PooledOLS,R-squared (Between):,0.7807
No. Observations:,207,R-squared (Within):,-0.9694
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.7102
Time:,11:49:40,Log-likelihood,-934.57
Cov. Estimator:,Clustered,,
,,F-statistic:,250.01
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(2,204)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,470.95,30.492,15.445,0.0000,410.83,531.07
GINI,-371.15,39.939,-9.2928,0.0000,-449.89,-292.40
log(PCT_EDU_TRY),41.264,7.0554,5.8485,0.0000,27.353,55.175


### Migration

In [21]:
pexog_vars = ['GINI', 'log(MIGRANTS)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6107525090257611


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.6145
Estimator:,PooledOLS,R-squared (Between):,0.7134
No. Observations:,207,R-squared (Within):,-0.1783
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.6145
Time:,11:49:40,Log-likelihood,-964.11
Cov. Estimator:,Clustered,,
,,F-statistic:,162.61
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(2,204)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,587.72,24.426,24.061,0.0000,539.56,635.88
GINI,-369.92,50.575,-7.3143,0.0000,-469.64,-270.20
log(MIGRANTS),10.830,4.3875,2.4683,0.0144,2.1790,19.480


### Mord Rate

In [22]:
pexog_vars = ['GINI', 'HOMICIDES']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.5463072546078547


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.5507
Estimator:,PooledOLS,R-squared (Between):,0.6116
No. Observations:,207,R-squared (Within):,-0.2000
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.5507
Time:,11:49:40,Log-likelihood,-979.97
Cov. Estimator:,Clustered,,
,,F-statistic:,125.03
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(2,204)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,623.73,29.014,21.498,0.0000,566.52,680.93
GINI,-404.42,101.94,-3.9674,0.0001,-605.41,-203.44
HOMICIDES,-1.3028,0.9732,-1.3386,0.1822,-3.2217,0.6161


### Alkohol Konsum pro Kopf

In [23]:
pexog_vars = ['GINI', 'ALC_PC']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.5960393423658124


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.6000
Estimator:,PooledOLS,R-squared (Between):,0.6708
No. Observations:,207,R-squared (Within):,-0.1379
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.6000
Time:,11:49:41,Log-likelihood,-967.95
Cov. Estimator:,Clustered,,
,,F-statistic:,152.98
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(2,204)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,578.93,28.555,20.274,0.0000,522.63,635.23
GINI,-388.96,68.720,-5.6600,0.0000,-524.45,-253.47
ALC_PC,3.9812,1.3005,3.0612,0.0025,1.4170,6.5455


### Internet PC

In [24]:
pexog_vars = ['GINI', 'INTERNET_PC']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.5932029171012199


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.5972
Estimator:,PooledOLS,R-squared (Between):,0.6982
No. Observations:,207,R-squared (Within):,-1.3501
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.5972
Time:,11:49:41,Log-likelihood,-968.68
Cov. Estimator:,Clustered,,
,,F-statistic:,151.20
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(2,204)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,579.65,23.761,24.395,0.0000,532.80,626.50
GINI,-390.84,59.791,-6.5368,0.0000,-508.73,-272.95
INTERNET_PC,0.5262,0.1546,3.4030,0.0008,0.2213,0.8311


### Bildungsausgaben im Sekundar Bereich

In [25]:
pexog_vars = ['GINI', 'log(EDU_SPENDING)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.5905220074613255


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.5945
Estimator:,PooledOLS,R-squared (Between):,0.6387
No. Observations:,207,R-squared (Within):,-0.3614
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.5945
Time:,11:49:41,Log-likelihood,-969.35
Cov. Estimator:,Clustered,,
,,F-statistic:,149.54
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(2,204)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,463.40,72.941,6.3532,0.0000,319.59,607.22
GINI,-378.62,74.113,-5.1087,0.0000,-524.75,-232.50
log(EDU_SPENDING),48.073,18.390,2.6141,0.0096,11.815,84.331


### Schüler, Lehrer Verhältnis

In [26]:
pexog_vars = ['GINI', 'STR_SRY']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.5267859173137742


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.5314
Estimator:,PooledOLS,R-squared (Between):,0.5888
No. Observations:,207,R-squared (Within):,-0.1925
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.5314
Time:,11:49:41,Log-likelihood,-984.33
Cov. Estimator:,Clustered,,
,,F-statistic:,115.66
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(2,204)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,643.60,21.297,30.220,0.0000,601.61,685.59
GINI,-494.98,82.022,-6.0348,0.0000,-656.70,-333.26
STR_SRY,0.4539,1.3711,0.3310,0.7410,-2.2495,3.1572


# 3. Regressor

### BIP

In [27]:
pexog_vars = ['GINI', 'log(PCT_EDU_TRY)', 'log(GDP)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.7060181508554435


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.7103
Estimator:,PooledOLS,R-squared (Between):,0.7828
No. Observations:,207,R-squared (Within):,-0.9833
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.7103
Time:,11:49:41,Log-likelihood,-934.55
Cov. Estimator:,Clustered,,
,,F-statistic:,165.91
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(3,203)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,461.22,73.908,6.2404,0.0000,315.49,606.94
GINI,-368.28,35.235,-10.452,0.0000,-437.76,-298.81
log(PCT_EDU_TRY),40.545,10.033,4.0412,0.0001,20.763,60.327
log(GDP),1.0818,8.7453,0.1237,0.9017,-16.161,18.325


### CPI

In [28]:
pexog_vars = ['GINI', 'log(PCT_EDU_TRY)', 'CPI']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.7223844546417904


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.7264
Estimator:,PooledOLS,R-squared (Between):,0.7928
No. Observations:,207,R-squared (Within):,-0.6693
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.7264
Time:,11:49:42,Log-likelihood,-928.62
Cov. Estimator:,Clustered,,
,,F-statistic:,179.68
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(3,203)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,459.32,27.235,16.865,0.0000,405.62,513.02
GINI,-335.41,35.384,-9.4792,0.0000,-405.18,-265.65
log(PCT_EDU_TRY),34.447,8.2610,4.1698,0.0000,18.158,50.735
CPI,0.3435,0.1672,2.0548,0.0412,0.0139,0.6731


### Migration

In [29]:
pexog_vars = ['GINI', 'log(PCT_EDU_TRY)', 'log(MIGRANTS)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.7086389594180451


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.7129
Estimator:,PooledOLS,R-squared (Between):,0.7901
No. Observations:,207,R-squared (Within):,-0.8738
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.7129
Time:,11:49:42,Log-likelihood,-933.62
Cov. Estimator:,Clustered,,
,,F-statistic:,168.01
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(3,203)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,473.73,33.030,14.342,0.0000,408.60,538.85
GINI,-356.48,39.957,-8.9217,0.0000,-435.26,-277.70
log(PCT_EDU_TRY),37.647,11.612,3.2419,0.0014,14.750,60.543
log(MIGRANTS),2.3720,4.9029,0.4838,0.6291,-7.2952,12.039


### Alkohol Konsum pro Kopf 

In [30]:
pexog_vars = ['GINI', 'log(PCT_EDU_TRY)', 'ALC_PC']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.7416093403171315


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.7454
Estimator:,PooledOLS,R-squared (Between):,0.8086
No. Observations:,207,R-squared (Within):,-0.6504
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.7454
Time:,11:49:42,Log-likelihood,-921.19
Cov. Estimator:,Clustered,,
,,F-statistic:,198.08
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(3,203)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,438.09,34.851,12.570,0.0000,369.37,506.80
GINI,-313.92,51.550,-6.0897,0.0000,-415.57,-212.28
log(PCT_EDU_TRY),37.784,6.1907,6.1034,0.0000,25.578,49.991
ALC_PC,2.8827,1.0438,2.7618,0.0063,0.8247,4.9408


### Mord Rate

In [31]:
pexog_vars = ['GINI', 'log(PCT_EDU_TRY)', 'HOMICIDES']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.7225894558225823


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.7266
Estimator:,PooledOLS,R-squared (Between):,0.8014
No. Observations:,207,R-squared (Within):,-0.9669
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.7266
Time:,11:49:42,Log-likelihood,-928.55
Cov. Estimator:,Clustered,,
,,F-statistic:,179.86
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(3,203)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,453.96,31.241,14.531,0.0000,392.36,515.56
GINI,-303.47,55.874,-5.4313,0.0000,-413.63,-193.30
log(PCT_EDU_TRY),40.846,7.4616,5.4742,0.0000,26.134,55.558
HOMICIDES,-1.1741,0.5082,-2.3101,0.0219,-2.1761,-0.1720


### Internet PC

In [32]:
pexog_vars = ['GINI', 'log(PCT_EDU_TRY)', 'INTERNET_PC']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.7084132544167077


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.7127
Estimator:,PooledOLS,R-squared (Between):,0.7734
No. Observations:,207,R-squared (Within):,-0.7465
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.7127
Time:,11:49:42,Log-likelihood,-933.70
Cov. Estimator:,Clustered,,
,,F-statistic:,167.83
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(3,203)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,468.74,31.309,14.972,0.0000,407.01,530.48
GINI,-382.71,45.348,-8.4394,0.0000,-472.12,-293.29
log(PCT_EDU_TRY),45.871,10.212,4.4920,0.0000,25.736,66.005
INTERNET_PC,-0.1393,0.2166,-0.6428,0.5211,-0.5664,0.2879


### Bildungsausgaben im Sekundar Bereich

In [33]:
pexog_vars = ['GINI', 'log(PCT_EDU_TRY)', 'log(EDU_SPENDING)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.719902600093637


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.7240
Estimator:,PooledOLS,R-squared (Between):,0.7840
No. Observations:,207,R-squared (Within):,-0.8806
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.7240
Time:,11:49:42,Log-likelihood,-929.54
Cov. Estimator:,Clustered,,
,,F-statistic:,177.49
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(3,203)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,398.95,43.789,9.1108,0.0000,312.61,485.29
GINI,-331.70,46.423,-7.1453,0.0000,-423.24,-240.17
log(PCT_EDU_TRY),37.189,6.3997,5.8111,0.0000,24.571,49.807
log(EDU_SPENDING),23.660,10.826,2.1854,0.0300,2.3135,45.006


### Schüler, Lehrer Verhältnis

In [34]:
pexog_vars = ['GINI', 'log(PCT_EDU_TRY)', 'STR_SRY']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.7077100564503981


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.7120
Estimator:,PooledOLS,R-squared (Between):,0.7801
No. Observations:,207,R-squared (Within):,-0.9878
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.7120
Time:,11:49:42,Log-likelihood,-933.95
Cov. Estimator:,Clustered,,
,,F-statistic:,167.26
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(3,203)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,469.16,30.820,15.223,0.0000,408.39,529.93
GINI,-390.81,52.961,-7.3792,0.0000,-495.24,-286.39
log(PCT_EDU_TRY),41.378,6.9848,5.9240,0.0000,27.606,55.150
STR_SRY,0.6371,1.2084,0.5272,0.5986,-1.7456,3.0198


# 4. Regressor

### BIP

In [35]:
pexog_vars = ['GINI', 'log(PCT_EDU_TRY)', 'ALC_PC', 'log(GDP)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.7406093601095194


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.7456
Estimator:,PooledOLS,R-squared (Between):,0.8123
No. Observations:,207,R-squared (Within):,-0.6709
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.7456
Time:,11:49:42,Log-likelihood,-921.08
Cov. Estimator:,Clustered,,
,,F-statistic:,148.04
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(4,202)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,418.33,82.796,5.0526,0.0000,255.08,581.59
GINI,-307.92,45.764,-6.7284,0.0000,-398.16,-217.68
log(PCT_EDU_TRY),36.321,11.108,3.2697,0.0013,14.418,58.224
ALC_PC,2.8940,1.0033,2.8845,0.0043,0.9157,4.8724
log(GDP),2.1808,10.573,0.2063,0.8368,-18.667,23.029


### Bildungsausgaben

In [36]:
pexog_vars = ['GINI', 'log(PCT_EDU_TRY)', 'ALC_PC', 'log(EDU_SPENDING)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.7414376329335233


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.7465
Estimator:,PooledOLS,R-squared (Between):,0.8069
No. Observations:,207,R-squared (Within):,-0.6415
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.7465
Time:,11:49:42,Log-likelihood,-920.75
Cov. Estimator:,Clustered,,
,,F-statistic:,148.68
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(4,202)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,418.24,41.142,10.166,0.0000,337.11,499.36
GINI,-306.72,52.443,-5.8487,0.0000,-410.13,-203.32
log(PCT_EDU_TRY),36.813,6.3230,5.8220,0.0000,24.345,49.280
ALC_PC,2.6126,1.1797,2.2147,0.0279,0.2865,4.9387
log(EDU_SPENDING),7.5349,10.644,0.7079,0.4798,-13.453,28.523


### Migration

In [37]:
pexog_vars = ['GINI', 'log(PCT_EDU_TRY)', 'ALC_PC', 'log(MIGRANTS)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.7403520884120744


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.7454
Estimator:,PooledOLS,R-squared (Between):,0.8093
No. Observations:,207,R-squared (Within):,-0.6443
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.7454
Time:,11:49:43,Log-likelihood,-921.19
Cov. Estimator:,Clustered,,
,,F-statistic:,147.85
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(4,202)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,438.56,41.445,10.582,0.0000,356.83,520.28
GINI,-312.93,45.302,-6.9076,0.0000,-402.25,-223.60
log(PCT_EDU_TRY),37.470,11.568,3.2391,0.0014,14.661,60.279
ALC_PC,2.8643,1.1752,2.4373,0.0157,0.5471,5.1815
log(MIGRANTS),0.2206,4.9359,0.0447,0.9644,-9.5119,9.9531


### CPI

In [38]:
pexog_vars = ['GINI', 'log(PCT_EDU_TRY)', 'ALC_PC', 'CPI']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.7606773775104676


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.7653
Estimator:,PooledOLS,R-squared (Between):,0.8247
No. Observations:,207,R-squared (Within):,-0.3882
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.7653
Time:,11:49:43,Log-likelihood,-912.75
Cov. Estimator:,Clustered,,
,,F-statistic:,164.69
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(4,202)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,423.34,27.154,15.590,0.0000,369.80,476.88
GINI,-271.02,46.285,-5.8553,0.0000,-362.28,-179.75
log(PCT_EDU_TRY),30.007,7.8774,3.8092,0.0002,14.474,45.539
ALC_PC,3.0410,0.8798,3.4563,0.0007,1.3061,4.7758
CPI,0.3822,0.1746,2.1895,0.0297,0.0380,0.7265


### Internet PC

In [39]:
pexog_vars = ['GINI', 'log(PCT_EDU_TRY)', 'ALC_PC', 'INTERNET_PC']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.7403620979259655


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.7454
Estimator:,PooledOLS,R-squared (Between):,0.8076
No. Observations:,207,R-squared (Within):,-0.6303
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.7454
Time:,11:49:43,Log-likelihood,-921.18
Cov. Estimator:,Clustered,,
,,F-statistic:,147.85
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(4,202)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,438.07,35.018,12.510,0.0000,369.02,507.11
GINI,-315.68,60.476,-5.2200,0.0000,-434.93,-196.44
log(PCT_EDU_TRY),38.347,10.372,3.6970,0.0003,17.895,58.799
ALC_PC,2.8620,1.1160,2.5645,0.0111,0.6615,5.0626
INTERNET_PC,-0.0163,0.2270,-0.0716,0.9430,-0.4639,0.4313


### Mordrate

In [40]:
pexog_vars = ['GINI', 'log(PCT_EDU_TRY)', 'ALC_PC', 'HOMICIDES']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.7771824741948038


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.7815
Estimator:,PooledOLS,R-squared (Between):,0.8461
No. Observations:,207,R-squared (Within):,-0.6032
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.7815
Time:,11:49:43,Log-likelihood,-905.35
Cov. Estimator:,Clustered,,
,,F-statistic:,180.63
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(4,202)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,401.72,31.653,12.692,0.0000,339.31,464.14
GINI,-191.55,56.757,-3.3749,0.0009,-303.46,-79.637
log(PCT_EDU_TRY),36.076,5.4120,6.6659,0.0000,25.405,46.747
ALC_PC,3.7619,0.8432,4.4613,0.0000,2.0992,5.4246
HOMICIDES,-1.8200,0.5270,-3.4533,0.0007,-2.8593,-0.7808


### Schüler, Lehrer Verhältnis

In [41]:
pexog_vars = ['GINI', 'log(PCT_EDU_TRY)', 'ALC_PC', 'STR_SRY']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.746005130948324


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.7509
Estimator:,PooledOLS,R-squared (Between):,0.8095
No. Observations:,207,R-squared (Within):,-0.6664
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.7509
Time:,11:49:43,Log-likelihood,-918.91
Cov. Estimator:,Clustered,,
,,F-statistic:,152.26
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(4,202)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,432.57,33.671,12.847,0.0000,366.18,498.97
GINI,-345.74,58.946,-5.8653,0.0000,-461.97,-229.51
log(PCT_EDU_TRY),37.753,6.0040,6.2880,0.0000,25.915,49.592
ALC_PC,3.0806,0.9479,3.2498,0.0014,1.2115,4.9497
STR_SRY,1.1579,1.1333,1.0218,0.3081,-1.0766,3.3925


# 5. Regressor

### BIP

In [42]:
pexog_vars = ['GINI', 'log(PCT_EDU_TRY)', 'ALC_PC', 'HOMICIDES', 'log(GDP)'] 

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.7771061072491824


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.7825
Estimator:,PooledOLS,R-squared (Between):,0.8410
No. Observations:,207,R-squared (Within):,-0.5645
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.7825
Time:,11:49:43,Log-likelihood,-904.87
Cov. Estimator:,Clustered,,
,,F-statistic:,144.64
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(5,201)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,439.29,81.439,5.3941,0.0000,278.70,599.87
GINI,-198.19,54.799,-3.6167,0.0004,-306.25,-90.136
log(PCT_EDU_TRY),38.900,8.8875,4.3769,0.0000,21.375,56.424
ALC_PC,3.7773,0.8642,4.3708,0.0000,2.0732,5.4813
HOMICIDES,-1.8981,0.5919,-3.2068,0.0016,-3.0652,-0.7310
log(GDP),-4.3186,9.2579,-0.4665,0.6414,-22.574,13.936


### Bildungsausgaben

In [43]:
pexog_vars = ['GINI', 'log(PCT_EDU_TRY)', 'ALC_PC', 'HOMICIDES', 'log(EDU_SPENDING)'] 

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.7786252893614801


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.7840
Estimator:,PooledOLS,R-squared (Between):,0.8456
No. Observations:,207,R-squared (Within):,-0.5978
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.7840
Time:,11:49:43,Log-likelihood,-904.17
Cov. Estimator:,Clustered,,
,,F-statistic:,145.91
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(5,201)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,370.71,40.073,9.2509,0.0000,291.69,449.72
GINI,-177.77,56.377,-3.1532,0.0019,-288.93,-66.601
log(PCT_EDU_TRY),34.559,5.1060,6.7684,0.0000,24.491,44.628
ALC_PC,3.3717,0.9160,3.6808,0.0003,1.5655,5.1780
HOMICIDES,-1.8623,0.5165,-3.6057,0.0004,-2.8807,-0.8439
log(EDU_SPENDING),11.453,8.9122,1.2851,0.2002,-6.1205,29.026


### Migration

In [44]:
pexog_vars = ['GINI', 'log(PCT_EDU_TRY)', 'ALC_PC', 'HOMICIDES', 'log(MIGRANTS)'] 

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.779240579246968


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.7846
Estimator:,PooledOLS,R-squared (Between):,0.8433
No. Observations:,207,R-squared (Within):,-0.6847
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.7846
Time:,11:49:43,Log-likelihood,-903.88
Cov. Estimator:,Clustered,,
,,F-statistic:,146.43
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(5,201)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,392.45,38.929,10.081,0.0000,315.69,469.21
GINI,-192.73,54.126,-3.5607,0.0005,-299.46,-86.000
log(PCT_EDU_TRY),39.870,9.0275,4.4165,0.0000,22.069,57.671
ALC_PC,4.0760,1.0074,4.0459,0.0001,2.0895,6.0624
HOMICIDES,-1.9893,0.6826,-2.9141,0.0040,-3.3353,-0.6432
log(MIGRANTS),-2.7762,4.4889,-0.6185,0.5370,-11.628,6.0752


### Internet PC

In [45]:
pexog_vars = ['GINI', 'log(PCT_EDU_TRY)', 'ALC_PC', 'HOMICIDES', 'INTERNET_PC'] 

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.7763427224931879


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.7818
Estimator:,PooledOLS,R-squared (Between):,0.8439
No. Observations:,207,R-squared (Within):,-0.5542
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.7818
Time:,11:49:43,Log-likelihood,-905.23
Cov. Estimator:,Clustered,,
,,F-statistic:,144.01
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(5,201)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,401.49,31.510,12.742,0.0000,339.36,463.62
GINI,-196.07,66.296,-2.9575,0.0035,-326.80,-65.348
log(PCT_EDU_TRY),37.700,8.8476,4.2611,0.0000,20.254,55.146
ALC_PC,3.7061,0.9564,3.8750,0.0001,1.8202,5.5920
HOMICIDES,-1.8287,0.5234,-3.4935,0.0006,-2.8608,-0.7965
INTERNET_PC,-0.0472,0.2043,-0.2308,0.8177,-0.4499,0.3556


### CPI

In [46]:
pexog_vars = ['GINI', 'log(PCT_EDU_TRY)', 'ALC_PC', 'HOMICIDES', 'CPI'] 

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.7847068599476534


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.7899
Estimator:,PooledOLS,R-squared (Between):,0.8503
No. Observations:,207,R-squared (Within):,-0.4256
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.7899
Time:,11:49:44,Log-likelihood,-901.28
Cov. Estimator:,Clustered,,
,,F-statistic:,151.17
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(5,201)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,396.85,31.980,12.409,0.0000,333.79,459.91
GINI,-179.71,62.363,-2.8817,0.0044,-302.68,-56.742
log(PCT_EDU_TRY),31.053,7.0207,4.4230,0.0000,17.209,44.896
ALC_PC,3.7454,0.8315,4.5043,0.0000,2.1058,5.3850
HOMICIDES,-1.5643,0.6307,-2.4800,0.0140,-2.8080,-0.3205
CPI,0.2587,0.1870,1.3834,0.1681,-0.1100,0.6274


### Schüler, Lehrer Verhältnis

In [47]:
pexog_vars = ['GINI', 'log(PCT_EDU_TRY)', 'ALC_PC', 'HOMICIDES', 'STR_SRY'] 

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.7810500810132558


0,1,2,3
Dep. Variable:,PISA Math,R-squared:,0.7864
Estimator:,PooledOLS,R-squared (Between):,0.8516
No. Observations:,207,R-squared (Within):,-0.6091
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.7864
Time:,11:49:44,Log-likelihood,-903.03
Cov. Estimator:,Clustered,,
,,F-statistic:,147.97
Entities:,44,P-value,0.0000
Avg Obs:,4.7045,Distribution:,"F(5,201)"
Min Obs:,0.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,396.92,33.542,11.834,0.0000,330.78,463.06
GINI,-222.44,56.391,-3.9447,0.0001,-333.64,-111.25
log(PCT_EDU_TRY),36.063,5.1359,7.0218,0.0000,25.936,46.190
ALC_PC,3.9384,0.8546,4.6082,0.0000,2.2532,5.6237
HOMICIDES,-1.8027,0.5236,-3.4426,0.0007,-2.8352,-0.7701
STR_SRY,1.0820,1.0639,1.0170,0.3104,-1.0159,3.1798


# Validierung des Modells

In [48]:
n_splits = 4
pexog_vars = ['GINI', 'log(PCT_EDU_TRY)', 'ALC_PC', 'HOMICIDES'] 

k_fold_pooledOLS(n_splits, target, pexog_vars, pX)


R^2 =  0.8090593610128459
R^2 adj. =  0.8039676106398551
RMSE =  23.393796802070067

R^2 =  0.7943964683017981
R^2 adj. =  0.7889137074565127
RMSE =  17.955693159031494

R^2 =  0.7387380685498159
R^2 adj. =  0.7317710837111444
RMSE =  17.50160319642322

R^2 =  0.785614299381463
R^2 adj. =  0.7799352079743495
RMSE =  19.62483825469457
--------------------------- TOTAL ---------------------------------
R^2s:  [0.80905936 0.79439647 0.73873807 0.7856143 ]
R^2 mean:  0.7819520493114808
R^2 std:  0.026317912723685753

R^2 adj.:  [0.80396761 0.78891371 0.73177108 0.77993521]
R^2 adj. mean:  0.7761469024454654
R^2 adj. std:  0.02702104578726077

RMSEs:  [23.3937968  17.95569316 17.5016032  19.62483825]
RMSE mean:  19.61898285305484
RMSE std:  2.3183552279563426


# Anwendung des Modells auf die Testdaten

In [49]:
pexog_vars = ['GINI', 'log(PCT_EDU_TRY)', 'ALC_PC', 'HOMICIDES'] 
prediction(pexog_vars, pX, target, X_test, y_test)


R^2 =  0.7815090280939339
R^2 adj. =  0.7771824741948038
RMSE =  18.932771664086026


(0.7815090280939339, 18.932771664086026, 0.7771824741948038)

Regressoren:
1. Gini
2. log(PCT_EDU_TRY)
3. ALC_PC
4. HOMICIDES

Keine Hinzunahmen weiterer Regressoren, damit die jetzigen Variablen signifikant bleiben.