# Imports

In [1]:
#Pandas, Numpy importieren
import pandas as pd
import numpy as np
import seaborn as sns
import sklearn as sl
import scipy.stats
import statsmodels.api as sm
import matplotlib.pyplot as plt

#Importeieren der Modelle aus verschiedenen Bibliotheken
from sklearn.linear_model import LinearRegression
from statsmodels.regression.linear_model import GLS
from linearmodels.panel import PooledOLS


# Splitting data into training and testing
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold, KFold

from sklearn.model_selection import train_test_split

# No warnings about setting value on copy of slice
pd.options.mode.chained_assignment = None

# Display up to 60 columns of a dataframe
pd.set_option('display.max_columns', 600)

# Matplotlib visualization
%matplotlib inline
# Set default font size
plt.rcParams['font.size'] = 24

# Internal ipython tool for setting figure size
from IPython.core.pylabtools import figsize
#figsize(6, 6)

# Seaborn for visualization
sns.set(font_scale = 2)

# Import der Daten

In [2]:
pd.set_option('display.max_columns', 600)

data = pd.read_csv('../../data/read_imputed_2.csv')
data = data.sample(frac=1)

data.info()
#data.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 259 entries, 76 to 87
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   LOCATION           259 non-null    object 
 1   SUBJECT            259 non-null    object 
 2   TIME               259 non-null    float64
 3   PISA Read          259 non-null    float64
 4   GINI               259 non-null    float64
 5   STR_SRY            259 non-null    float64
 6   CPI                259 non-null    float64
 7   ALC_PC             259 non-null    float64
 8   INTERNET_PC        259 non-null    float64
 9   HOMICIDES          259 non-null    float64
 10  log(MIGRANTS)      259 non-null    float64
 11  log(GDP)           259 non-null    float64
 12  log(EDU_SPENDING)  259 non-null    float64
 13  log(PCT_EDU_TRY)   259 non-null    float64
dtypes: float64(12), object(2)
memory usage: 30.4+ KB


# Benötigte Funktionen

## Methode zur Modellierung

In [3]:
'''
Params:  
    target: Variable, die vorhergesagt werden soll
    pexog_vars: liste der exogenen Variablen Bsp.: ['GINI', "log(GDP)", "HDI", "TIME", "LOCATION"]
    pX: Datensatz, auf dem die Kreuzvalidierung durchgeführt werden soll
    
'''

def modellieren(pexog_vars, pX, target):
    exog_vars = pexog_vars
    exog = sm.add_constant(pX[exog_vars])
    # data = data.drop(columns={"LOCATION", "TIME", "SUBJECT"})

    mod = PooledOLS(target, exog)

    pooled_res = mod.fit(cov_type='clustered', clusters=pX.LOCATION)
    
    r2_adj = (1-(((1 - pooled_res.rsquared)*(target.shape[0] - 1))/(target.shape[0]-(len(pexog_vars))-1)))
    print("R^2 adj.: ", r2_adj)
    
    return pooled_res

## Kreuzvalidierung

Die Kreuzvalidierung wird durchgeführt mit einer K-Fold Kreuzvalidierung. 

In [4]:
'''
Params: 
    n_splits: Anzahl der folds 
    target
    pexog_vars: liste der exogenen Variablen Bsp.: ['GINI', "log(GDP)", "HDI", "TIME", "LOCATION"]
    pX: Datensatz, auf dem die Kreuzvalidierung durchgeführt werden soll
    
'''



def k_fold_pooledOLS(n_splits, target, pexog_vars, pX):
    
#     #
#     exog_vars = pexog_vars.append("LOCATION", "TIME")
    features = pX
    
    
    # Splitten in der Reihenfolge der Reihen im df
    kf = KFold(n_splits, shuffle=True)
    r2 = np.zeros(n_splits)
    rmse = np.zeros(n_splits)
    r2_adj = np.zeros(n_splits)
    # print(r2)
    i=0

    for train_index, val_index in kf.split(features):
    #     print("%s %s" % (train_index, test_index))
        X_train, X_validate, y_train, y_validate = features.iloc[train_index], features.iloc[val_index], target.iloc[train_index], target.iloc[val_index]
        #print(X_train)
        
#         print(X_train)
        
        r2[i], rmse[i], r2_adj[i] = prediction(pexog_vars, X_train, y_train, X_validate,  y_validate)

    #     print(r2[i])
        i = i+1

    print("--------------------------- TOTAL ---------------------------------")
    print("R^2s: ", r2)
    # calculate mean of metrics
    print("R^2 mean: ", r2.mean())
    print("R^2 std: ", r2.std())
    
    print("\nR^2 adj.: ", r2_adj)
    # calculate mean of metrics
    print("R^2 adj. mean: ", r2_adj.mean())
    print("R^2 adj. std: ", r2_adj.std())
    
    print("\nRMSEs: ", rmse)
    # calculate mean of metrics
    print("RMSE mean: ", rmse.mean())
    print("RMSE std: ", rmse.std())

## Modell Vorhersage

In [5]:
'''
Params: 
    pexog_vars: liste der exogenen Variablen Bsp.: ['GINI', "log(GDP)", "HDI", "TIME", "LOCATION"]
    pX: 
    y_test: Y Werte des Testdatensatzes
    
'''



'''
R^2 ist NICHT out of sample => es ist das R^2 für das Modell auf den Trainingsdaten
R^2 ajd. ist NICHT out of sample => es ist das R^2 adj. für das Modell auf den Trainingsdaten
RSME ist out of sample => es ist das RMSE für das Modell auf den Testdaten
'''

def prediction(pexog_vars, pX, py, pX_test, py_test):

    #pX enthält Loaction Time Subejct
    exog = sm.add_constant(pX[pexog_vars])
    
#     print(py, exog)
    mod = PooledOLS(py, exog)

    pooled_res = mod.fit(cov_type='clustered', clusters=pX.LOCATION)
    #print(mod.predict(params=pooled_res.params, exog=X_test))
    
    
    # Prediction y^= X_test * beta
    x_pred = np.array(sm.add_constant(pX_test[pexog_vars]))
    
    b_pred = np.array(pooled_res.params)[0:len(pexog_vars)+1]

    y_pred = x_pred.dot(b_pred) 
    #print(y_pred)
    #print('\n')
    
    y_test = np.array(py_test)
    #print(y_test)
#     print('\n')
    squares = (y_pred-py_test)**2
    mse = (1/y_pred.shape[0])*(np.sum(squares))
    
#     print('Anzahl exogene Variablen: ', len(pexog_vars))
#     print('Anzahl Beobachtungen: ', y_pred.shape[0])
#     print('\n')
#     r2 = 1 - RSS/TSS
#     RSS = (np.sum(squares)
    
    # r^2 adj in-sample calculation
    r2_adj = (1-(((1 - pooled_res.rsquared)*(py.shape[0] - 1))/(py.shape[0]-(len(pexog_vars))-1)))
    
    
    ### 
   
    print('\nR^2 = ', pooled_res.rsquared)
    print('R^2 adj. = ', r2_adj)
#     print("MSE = ", mse)
    print("RMSE = ", np.sqrt(mse))
    
    return pooled_res.rsquared, np.sqrt(mse), r2_adj

![image.png](attachment:image.png)

# Aufteilung der Daten in Train- & Testdatensatz

In [6]:
time = pd.Categorical(data["TIME"])
location = pd.Categorical(data["LOCATION"])
data = data.set_index(['LOCATION', 'TIME'])
data['TIME'] = time
data['LOCATION'] = location
# print(data)

features = data.drop(["PISA Read", "SUBJECT"], axis=1)
#PISA Math
X, X_test, y, y_test = train_test_split(features, data["PISA Read"], test_size = 0.1, random_state = 42)

#Festlegung der Parameter für die Modellierung methode
pX = X
target = y



# Null Modell

In [7]:
pexog_vars = []

modellieren(pexog_vars, pX, target)

R^2 adj.:  -2.220446049250313e-16


  if is_categorical(s):


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,-2.22e-16
Estimator:,PooledOLS,R-squared (Between):,0.0000
No. Observations:,233,R-squared (Within):,0.0000
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.0000
Time:,11:48:10,Log-likelihood,-1159.8
Cov. Estimator:,Clustered,,
,,F-statistic:,--
Entities:,44,P-value,--
Avg Obs:,5.2955,Distribution:,--
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,484.45,5.4762,88.465,0.0000,473.66,495.24


# 1. Regressor

### CPI

In [8]:
pexog_vars = ['CPI']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.43255542598967733


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.4350
Estimator:,PooledOLS,R-squared (Between):,0.5070
No. Observations:,233,R-squared (Within):,-0.3839
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.4350
Time:,11:48:10,Log-likelihood,-1093.3
Cov. Estimator:,Clustered,,
,,F-statistic:,177.85
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(1,231)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,411.49,17.416,23.627,0.0000,377.17,445.80
CPI,1.1287,0.2279,4.9524,0.0000,0.6796,1.5777


### Gini-Koeffizient

In [9]:
pexog_vars = ['GINI']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.4222541422186853


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.4247
Estimator:,PooledOLS,R-squared (Between):,0.5041
No. Observations:,233,R-squared (Within):,0.0525
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.4247
Time:,11:48:10,Log-likelihood,-1095.4
Cov. Estimator:,Clustered,,
,,F-statistic:,170.56
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(1,231)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,601.98,20.631,29.178,0.0000,561.33,642.63
GINI,-355.06,64.584,-5.4976,0.0000,-482.31,-227.81


### BIP

In [10]:
pexog_vars = ['log(GDP)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.42559278776112885


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.4281
Estimator:,PooledOLS,R-squared (Between):,0.5685
No. Observations:,233,R-squared (Within):,-1.0528
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.4281
Time:,11:48:10,Log-likelihood,-1094.8
Cov. Estimator:,Clustered,,
,,F-statistic:,172.89
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(1,231)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,44.378,97.103,0.4570,0.6481,-146.94,235.70
log(GDP),42.816,9.2991,4.6043,0.0000,24.494,61.137


### Anteil der 25-64 jährigen mit tertiärer Bildung 

In [11]:
pexog_vars = ['log(PCT_EDU_TRY)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.4515500411824467


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.4539
Estimator:,PooledOLS,R-squared (Between):,0.5054
No. Observations:,233,R-squared (Within):,-0.9020
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.4539
Time:,11:48:11,Log-likelihood,-1089.4
Cov. Estimator:,Clustered,,
,,F-statistic:,192.01
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(1,231)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,319.36,34.856,9.1620,0.0000,250.68,388.03
log(PCT_EDU_TRY),50.419,10.123,4.9807,0.0000,30.474,70.364


### Migration

In [12]:
pexog_vars = ['log(MIGRANTS)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.4366219150326629


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.4391
Estimator:,PooledOLS,R-squared (Between):,0.5183
No. Observations:,233,R-squared (Within):,-0.1914
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.4391
Time:,11:48:11,Log-likelihood,-1092.5
Cov. Estimator:,Clustered,,
,,F-statistic:,180.80
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(1,231)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,452.79,9.5457,47.434,0.0000,433.98,471.60
log(MIGRANTS),17.506,3.7536,4.6637,0.0000,10.110,24.902


### Mordrate

In [13]:
pexog_vars = ['HOMICIDES']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.30466685504705626


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.3077
Estimator:,PooledOLS,R-squared (Between):,0.3878
No. Observations:,233,R-squared (Within):,-0.2942
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.3077
Time:,11:48:11,Log-likelihood,-1117.0
Cov. Estimator:,Clustered,,
,,F-statistic:,102.65
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(1,231)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,495.48,5.2362,94.625,0.0000,485.16,505.79
HOMICIDES,-3.2659,0.4175,-7.8234,0.0000,-4.0884,-2.4434


### Alkohol Konsum pro Kopf

In [14]:
pexog_vars = ['ALC_PC']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.28299792837652493


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.2861
Estimator:,PooledOLS,R-squared (Between):,0.3842
No. Observations:,233,R-squared (Within):,-0.3162
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.2861
Time:,11:48:11,Log-likelihood,-1120.6
Cov. Estimator:,Clustered,,
,,F-statistic:,92.569
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(1,231)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,432.95,16.517,26.213,0.0000,400.41,465.50
ALC_PC,5.8304,1.6247,3.5886,0.0004,2.6293,9.0315


### Internet PC

In [15]:
pexog_vars = ['INTERNET_PC']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.2992754831261413


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.3023
Estimator:,PooledOLS,R-squared (Between):,0.3881
No. Observations:,233,R-squared (Within):,-1.6400
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.3023
Time:,11:48:11,Log-likelihood,-1117.9
Cov. Estimator:,Clustered,,
,,F-statistic:,100.09
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(1,231)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,432.39,12.987,33.294,0.0000,406.80,457.98
INTERNET_PC,0.8104,0.1587,5.1070,0.0000,0.4977,1.1230


### Bildungsausgaben im Sekundär Bereich

In [16]:
pexog_vars = ['log(EDU_SPENDING)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.33530907043103986


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.3382
Estimator:,PooledOLS,R-squared (Between):,0.3712
No. Observations:,233,R-squared (Within):,-0.6923
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.3382
Time:,11:48:11,Log-likelihood,-1111.8
Cov. Estimator:,Clustered,,
,,F-statistic:,118.03
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(1,231)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,254.60,55.936,4.5517,0.0000,144.39,364.81
log(EDU_SPENDING),75.310,17.575,4.2851,0.0000,40.682,109.94


### Schüler, Lehrer Verhältnis

In [17]:
pexog_vars = ['STR_SRY']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.09939792847761197


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.1033
Estimator:,PooledOLS,R-squared (Between):,0.1181
No. Observations:,233,R-squared (Within):,-0.0162
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.1033
Time:,11:48:12,Log-likelihood,-1147.1
Cov. Estimator:,Clustered,,
,,F-statistic:,26.605
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(1,231)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,523.36,13.761,38.031,0.0000,496.24,550.47
STR_SRY,-3.1039,1.3331,-2.3283,0.0208,-5.7305,-0.4772


# 2. Regressor

### BIP

In [18]:
pexog_vars = ['CPI', 'log(GDP)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.49819681376489455


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.5025
Estimator:,PooledOLS,R-squared (Between):,0.6009
No. Observations:,233,R-squared (Within):,-0.4707
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.5025
Time:,11:48:12,Log-likelihood,-1078.5
Cov. Estimator:,Clustered,,
,,F-statistic:,116.17
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(2,230)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,190.15,100.74,1.8876,0.0603,-8.3381,388.63
CPI,0.6705,0.2044,3.2809,0.0012,0.2678,1.0731
log(GDP),24.416,10.271,2.3773,0.0183,4.1796,44.653


### GINI

In [19]:
pexog_vars = ['CPI', 'GINI']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.57320224160376


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.5769
Estimator:,PooledOLS,R-squared (Between):,0.6605
No. Observations:,233,R-squared (Within):,-0.0815
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.5769
Time:,11:48:12,Log-likelihood,-1059.6
Cov. Estimator:,Clustered,,
,,F-statistic:,156.79
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(2,230)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,512.88,22.878,22.418,0.0000,467.80,557.96
CPI,0.7659,0.1909,4.0128,0.0001,0.3898,1.1420
GINI,-235.47,48.552,-4.8499,0.0000,-331.14,-139.81


### Anteil der 25-64 jährigen mit tertiärer Bildung

In [20]:
pexog_vars = ['CPI', 'log(PCT_EDU_TRY)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.5538807700533267


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.5577
Estimator:,PooledOLS,R-squared (Between):,0.6243
No. Observations:,233,R-squared (Within):,-0.5177
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.5577
Time:,11:48:12,Log-likelihood,-1064.8
Cov. Estimator:,Clustered,,
,,F-statistic:,145.02
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(2,230)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,333.38,29.333,11.366,0.0000,275.59,391.18
CPI,0.6856,0.1685,4.0694,0.0001,0.3536,1.0176
log(PCT_EDU_TRY),32.599,8.9312,3.6500,0.0003,15.002,50.196


### Migration

In [21]:
pexog_vars = ['CPI', 'log(MIGRANTS)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.5317150041916106


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.5358
Estimator:,PooledOLS,R-squared (Between):,0.6116
No. Observations:,233,R-squared (Within):,-0.1759
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.5358
Time:,11:48:12,Log-likelihood,-1070.5
Cov. Estimator:,Clustered,,
,,F-statistic:,132.71
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(2,230)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,420.52,10.447,40.252,0.0000,399.94,441.11
CPI,0.6863,0.1641,4.1812,0.0000,0.3629,1.0097
log(MIGRANTS),10.815,4.2025,2.5735,0.0107,2.5349,19.095


### Mord Rate

In [22]:
pexog_vars = ['CPI', 'HOMICIDES']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.5030361551446312


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.5073
Estimator:,PooledOLS,R-squared (Between):,0.5874
No. Observations:,233,R-squared (Within):,-0.3225
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.5073
Time:,11:48:12,Log-likelihood,-1077.4
Cov. Estimator:,Clustered,,
,,F-statistic:,118.42
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(2,230)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,434.06,24.433,17.766,0.0000,385.92,482.20
CPI,0.8740,0.3082,2.8356,0.0050,0.2667,1.4812
HOMICIDES,-1.8098,0.7964,-2.2725,0.0240,-3.3789,-0.2406


### Alkohol Konsum pro Kopf

In [23]:
pexog_vars = ['CPI', 'ALC_PC']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.5596209124330955


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.5634
Estimator:,PooledOLS,R-squared (Between):,0.6697
No. Observations:,233,R-squared (Within):,-0.4033
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.5634
Time:,11:48:13,Log-likelihood,-1063.3
Cov. Estimator:,Clustered,,
,,F-statistic:,148.41
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(2,230)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,387.49,15.534,24.944,0.0000,356.88,418.10
CPI,0.9420,0.1581,5.9577,0.0000,0.6305,1.2535
ALC_PC,4.0832,0.9112,4.4813,0.0000,2.2879,5.8785


### Internet PC

In [24]:
pexog_vars = ['CPI', 'INTERNET_PC']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.4821124798985179


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.4866
Estimator:,PooledOLS,R-squared (Between):,0.5555
No. Observations:,233,R-squared (Within):,-0.5590
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.4866
Time:,11:48:13,Log-likelihood,-1082.2
Cov. Estimator:,Clustered,,
,,F-statistic:,108.99
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(2,230)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,402.17,16.080,25.011,0.0000,370.49,433.85
CPI,0.8761,0.2369,3.6975,0.0003,0.4092,1.3430
INTERNET_PC,0.3992,0.1630,2.4485,0.0151,0.0780,0.7204


### Bildungsausgaben im Sekundar Bereich

In [25]:
pexog_vars = ['CPI', 'log(EDU_SPENDING)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.5365040175401741


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.5405
Estimator:,PooledOLS,R-squared (Between):,0.5972
No. Observations:,233,R-squared (Within):,-0.4679
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.5405
Time:,11:48:13,Log-likelihood,-1069.3
Cov. Estimator:,Clustered,,
,,F-statistic:,135.27
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(2,230)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,286.15,41.565,6.8844,0.0000,204.25,368.05
CPI,0.8568,0.1773,4.8316,0.0000,0.5074,1.2063
log(EDU_SPENDING),46.824,13.212,3.5439,0.0005,20.791,72.857


### Schüler, Lehrer Verhältnis

In [26]:
pexog_vars = ['CPI', 'STR_SRY']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.483805595463049


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.4883
Estimator:,PooledOLS,R-squared (Between):,0.5556
No. Observations:,233,R-squared (Within):,-0.3696
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.4883
Time:,11:48:13,Log-likelihood,-1081.8
Cov. Estimator:,Clustered,,
,,F-statistic:,109.72
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(2,230)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,443.34,16.318,27.168,0.0000,411.19,475.49
CPI,1.0725,0.2032,5.2773,0.0000,0.6721,1.4729
STR_SRY,-2.2513,1.1051,-2.0372,0.0428,-4.4286,-0.0739


# 3. Regressor

### BIP

In [27]:
pexog_vars = ['CPI', 'GINI', 'log(GDP)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.591652217735668


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.5969
Estimator:,PooledOLS,R-squared (Between):,0.6937
No. Observations:,233,R-squared (Within):,-0.1460
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.5969
Time:,11:48:14,Log-likelihood,-1054.0
Cov. Estimator:,Clustered,,
,,F-statistic:,113.05
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(3,229)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,371.24,95.655,3.8811,0.0001,182.77,559.72
CPI,0.5494,0.1837,2.9912,0.0031,0.1875,0.9114
GINI,-203.92,47.040,-4.3350,0.0000,-296.61,-111.23
log(GDP),14.125,9.0018,1.5692,0.1180,-3.6116,31.862


### Anteil der Bevölkerung mit tertiärem Bildungsabschluss

In [28]:
pexog_vars = ['CPI','GINI', 'log(PCT_EDU_TRY)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6693949790281521


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.6737
Estimator:,PooledOLS,R-squared (Between):,0.7628
No. Observations:,233,R-squared (Within):,-0.3433
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.6737
Time:,11:48:14,Log-likelihood,-1029.4
Cov. Estimator:,Clustered,,
,,F-statistic:,157.58
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(3,229)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,433.92,28.021,15.486,0.0000,378.71,489.13
CPI,0.4030,0.1608,2.5057,0.0129,0.0861,0.7198
GINI,-214.14,35.379,-6.0528,0.0000,-283.85,-144.43
log(PCT_EDU_TRY),29.124,7.5182,3.8738,0.0001,14.310,43.937


### Migration

In [29]:
pexog_vars = ['CPI', 'GINI', 'log(MIGRANTS)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6154154894029106


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.6204
Estimator:,PooledOLS,R-squared (Between):,0.7083
No. Observations:,233,R-squared (Within):,-0.0255
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.6204
Time:,11:48:14,Log-likelihood,-1047.0
Cov. Estimator:,Clustered,,
,,F-statistic:,124.75
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(3,229)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,500.34,22.478,22.259,0.0000,456.05,544.63
CPI,0.5266,0.1428,3.6868,0.0003,0.2452,0.8081
GINI,-191.80,44.240,-4.3354,0.0000,-278.97,-104.63
log(MIGRANTS),7.4951,4.0020,1.8728,0.0624,-0.3903,15.380


### Alkohol Konsum pro Kopf

In [30]:
pexog_vars = ['CPI', 'GINI', 'ALC_PC']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6183218957300849


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.6233
Estimator:,PooledOLS,R-squared (Between):,0.7178
No. Observations:,233,R-squared (Within):,-0.1275
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.6233
Time:,11:48:14,Log-likelihood,-1046.1
Cov. Estimator:,Clustered,,
,,F-statistic:,126.28
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(3,229)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,468.67,31.575,14.843,0.0000,406.45,530.88
CPI,0.7420,0.1579,4.6977,0.0000,0.4308,1.0532
GINI,-170.03,58.785,-2.8925,0.0042,-285.86,-54.204
ALC_PC,2.7283,1.0817,2.5222,0.0123,0.5969,4.8597


### Mordrate

In [31]:
pexog_vars = ['CPI', 'GINI', 'HOMICIDES']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.5776155046213484


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.5831
Estimator:,PooledOLS,R-squared (Between):,0.6659
No. Observations:,233,R-squared (Within):,-0.0833
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.5831
Time:,11:48:14,Log-likelihood,-1057.9
Cov. Estimator:,Clustered,,
,,F-statistic:,106.75
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(3,229)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,507.20,22.942,22.108,0.0000,462.00,552.41
CPI,0.7258,0.2404,3.0189,0.0028,0.2521,1.1996
GINI,-204.08,75.997,-2.6854,0.0078,-353.83,-54.342
HOMICIDES,-0.6283,0.9770,-0.6431,0.5208,-2.5533,1.2967


### Internet PC

In [32]:
pexog_vars = ['CPI', 'GINI', 'INTERNET_PC']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.5896020475998514


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.5949
Estimator:,PooledOLS,R-squared (Between):,0.6806
No. Observations:,233,R-squared (Within):,-0.1853
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.5949
Time:,11:48:14,Log-likelihood,-1054.6
Cov. Estimator:,Clustered,,
,,F-statistic:,112.10
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(3,229)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,497.50,20.975,23.719,0.0000,456.17,538.83
CPI,0.6460,0.2210,2.9232,0.0038,0.2106,1.0814
GINI,-213.00,47.613,-4.4736,0.0000,-306.82,-119.19
INTERNET_PC,0.2443,0.1614,1.5141,0.1314,-0.0736,0.5623


### Bildungsausgaben im Sekundar Bereich

In [33]:
pexog_vars = ['GINI', 'log(PCT_EDU_TRY)', 'log(EDU_SPENDING)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6533919033074764


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.6579
Estimator:,PooledOLS,R-squared (Between):,0.7449
No. Observations:,233,R-squared (Within):,-0.5447
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.6579
Time:,11:48:15,Log-likelihood,-1034.9
Cov. Estimator:,Clustered,,
,,F-statistic:,146.78
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(3,229)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,382.94,50.244,7.6216,0.0000,283.94,481.94
GINI,-217.31,42.176,-5.1526,0.0000,-300.42,-134.21
log(PCT_EDU_TRY),34.102,6.7769,5.0321,0.0000,20.749,47.455
log(EDU_SPENDING),20.241,12.905,1.5684,0.1182,-5.1872,45.670


### Schüler, Lehrer Verhältnis

In [34]:
pexog_vars = ['GINI', 'log(PCT_EDU_TRY)', 'STR_SRY']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6473408135305236


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.6519
Estimator:,PooledOLS,R-squared (Between):,0.7497
No. Observations:,233,R-squared (Within):,-0.5727
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.6519
Time:,11:48:15,Log-likelihood,-1036.9
Cov. Estimator:,Clustered,,
,,F-statistic:,142.95
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(3,229)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,443.21,32.331,13.708,0.0000,379.51,506.92
GINI,-293.12,53.932,-5.4350,0.0000,-399.39,-186.86
log(PCT_EDU_TRY),37.766,7.5034,5.0332,0.0000,22.981,52.551
STR_SRY,1.1647,1.0211,1.1407,0.2552,-0.8472,3.1766


# 4. Regressor

### BIP

In [35]:
pexog_vars = ['CPI', 'GINI', 'log(PCT_EDU_TRY)', 'log(GDP)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6686265354264911


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.6743
Estimator:,PooledOLS,R-squared (Between):,0.7596
No. Observations:,233,R-squared (Within):,-0.3157
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.6743
Time:,11:48:15,Log-likelihood,-1029.1
Cov. Estimator:,Clustered,,
,,F-statistic:,118.03
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(4,228)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,460.29,80.504,5.7177,0.0000,301.67,618.92
CPI,0.4308,0.1651,2.6092,0.0097,0.1055,0.7561
GINI,-219.82,37.321,-5.8900,0.0000,-293.36,-146.28
log(PCT_EDU_TRY),30.622,9.4003,3.2575,0.0013,12.099,49.144
log(GDP),-3.0354,8.9362,-0.3397,0.7344,-20.643,14.573


### Homicides

In [36]:
pexog_vars = ['CPI', 'GINI', 'log(PCT_EDU_TRY)', 'HOMICIDES']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6828660809373455


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.6883
Estimator:,PooledOLS,R-squared (Between):,0.7808
No. Observations:,233,R-squared (Within):,-0.3976
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.6883
Time:,11:48:15,Log-likelihood,-1024.0
Cov. Estimator:,Clustered,,
,,F-statistic:,125.89
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(4,228)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,420.99,26.837,15.687,0.0000,368.11,473.87
CPI,0.3218,0.1780,1.8081,0.0719,-0.0289,0.6725
GINI,-164.31,54.446,-3.0178,0.0028,-271.59,-57.028
log(PCT_EDU_TRY),30.642,7.2428,4.2307,0.0000,16.371,44.914
HOMICIDES,-0.9752,0.6676,-1.4608,0.1455,-2.2907,0.3403


### Migration

In [37]:
pexog_vars = ['CPI', 'GINI', 'log(PCT_EDU_TRY)', 'log(MIGRANTS)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6738542686157988


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.6795
Estimator:,PooledOLS,R-squared (Between):,0.7699
No. Observations:,233,R-squared (Within):,-0.2903
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.6795
Time:,11:48:15,Log-likelihood,-1027.3
Cov. Estimator:,Clustered,,
,,F-statistic:,120.83
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(4,228)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,438.58,27.297,16.067,0.0000,384.80,492.37
CPI,0.3500,0.1435,2.4388,0.0155,0.0672,0.6328
GINI,-198.90,39.952,-4.9785,0.0000,-277.62,-120.18
log(PCT_EDU_TRY),25.509,10.021,2.5456,0.0116,5.7642,45.255
log(MIGRANTS),3.0697,4.2515,0.7220,0.4710,-5.3075,11.447


### Alkohol Konsum pro Kopf

In [38]:
pexog_vars = ['CPI', 'GINI', 'log(PCT_EDU_TRY)', 'ALC_PC']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6954405025642838


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.7007
Estimator:,PooledOLS,R-squared (Between):,0.7872
No. Observations:,233,R-squared (Within):,-0.2385
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.7007
Time:,11:48:15,Log-likelihood,-1019.3
Cov. Estimator:,Clustered,,
,,F-statistic:,133.44
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(4,228)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,406.93,31.127,13.073,0.0000,345.60,468.27
CPI,0.4178,0.1835,2.2765,0.0237,0.0562,0.7795
GINI,-165.40,42.469,-3.8946,0.0001,-249.08,-81.717
log(PCT_EDU_TRY),26.442,7.1759,3.6849,0.0003,12.303,40.582
ALC_PC,2.1140,0.9110,2.3205,0.0212,0.3189,3.9090


### Internet PC

In [39]:
pexog_vars = ['CPI', 'GINI', 'log(PCT_EDU_TRY)', 'INTERNET_PC']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6722982969534508


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.6779
Estimator:,PooledOLS,R-squared (Between):,0.7639
No. Observations:,233,R-squared (Within):,-0.2668
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.6779
Time:,11:48:15,Log-likelihood,-1027.8
Cov. Estimator:,Clustered,,
,,F-statistic:,119.99
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(4,228)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,430.84,29.108,14.801,0.0000,373.48,488.19
CPI,0.4188,0.1800,2.3265,0.0209,0.0641,0.7735
GINI,-224.46,35.892,-6.2536,0.0000,-295.18,-153.73
log(PCT_EDU_TRY),33.714,9.8876,3.4097,0.0008,14.231,53.196
INTERNET_PC,-0.1487,0.2016,-0.7378,0.4614,-0.5460,0.2485


### Bildungsausgaben im Sekundar Bereich

In [40]:
pexog_vars = ['CPI', 'GINI', 'log(PCT_EDU_TRY)', 'log(EDU_SPENDING)']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6801282240266937


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.6856
Estimator:,PooledOLS,R-squared (Between):,0.7672
No. Observations:,233,R-squared (Within):,-0.3539
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.6856
Time:,11:48:16,Log-likelihood,-1025.0
Cov. Estimator:,Clustered,,
,,F-statistic:,124.32
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(4,228)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,378.88,45.220,8.3785,0.0000,289.77,467.98
CPI,0.3820,0.1594,2.3958,0.0174,0.0678,0.6961
GINI,-183.29,37.924,-4.8332,0.0000,-258.02,-108.57
log(PCT_EDU_TRY),26.497,6.8863,3.8477,0.0002,12.928,40.066
log(EDU_SPENDING),17.953,12.010,1.4948,0.1364,-5.7128,41.618


### Schüler, Lehrer Verhältnis

In [41]:
pexog_vars = ['CPI', 'GINI', 'log(PCT_EDU_TRY)', 'STR_SRY']

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6706154369987161


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.6763
Estimator:,PooledOLS,R-squared (Between):,0.7676
No. Observations:,233,R-squared (Within):,-0.3605
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.6763
Time:,11:48:16,Log-likelihood,-1028.4
Cov. Estimator:,Clustered,,
,,F-statistic:,119.09
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(4,228)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,433.51,28.677,15.117,0.0000,377.00,490.01
CPI,0.3703,0.1897,1.9517,0.0522,-0.0035,0.7442
GINI,-238.66,58.901,-4.0519,0.0001,-354.72,-122.60
log(PCT_EDU_TRY),29.920,8.0970,3.6952,0.0003,13.966,45.875
STR_SRY,0.6404,1.1620,0.5512,0.5821,-1.6491,2.9300


# 5. Regressor

### BIP

In [42]:
pexog_vars = ['CPI', 'GINI', 'log(PCT_EDU_TRY)', 'ALC_PC', 'log(GDP)'] 

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6948103346496892


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.7014
Estimator:,PooledOLS,R-squared (Between):,0.7849
No. Observations:,233,R-squared (Within):,-0.2212
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.7014
Time:,11:48:16,Log-likelihood,-1019.0
Cov. Estimator:,Clustered,,
,,F-statistic:,106.64
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(5,227)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,433.81,82.885,5.2339,0.0000,270.49,597.13
CPI,0.4462,0.1806,2.4706,0.0142,0.0903,0.8022
GINI,-171.17,45.301,-3.7785,0.0002,-260.43,-81.906
log(PCT_EDU_TRY),27.968,10.179,2.7476,0.0065,7.9104,48.026
ALC_PC,2.1150,0.9375,2.2561,0.0250,0.2678,3.9622
log(GDP),-3.0945,9.5803,-0.3230,0.7470,-21.972,15.783


### Mordrate

In [43]:
pexog_vars = ['CPI', 'GINI', 'log(PCT_EDU_TRY)', 'ALC_PC', 'HOMICIDES'] 

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.724356508218179


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.7303
Estimator:,PooledOLS,R-squared (Between):,0.8180
No. Observations:,233,R-squared (Within):,-0.3132
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.7303
Time:,11:48:16,Log-likelihood,-1007.2
Cov. Estimator:,Clustered,,
,,F-statistic:,122.93
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(5,227)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,379.90,30.812,12.329,0.0000,319.18,440.61
CPI,0.3025,0.1762,1.7166,0.0874,-0.0447,0.6497
GINI,-77.489,55.527,-1.3955,0.1642,-186.90,31.925
log(PCT_EDU_TRY),27.894,6.1683,4.5222,0.0000,15.739,40.048
ALC_PC,2.7367,0.8783,3.1158,0.0021,1.0060,4.4674
HOMICIDES,-1.4395,0.5539,-2.5988,0.0100,-2.5310,-0.3480


### Migration

In [44]:
pexog_vars = ['CPI', 'GINI', 'log(PCT_EDU_TRY)', 'ALC_PC', 'log(MIGRANTS)'] 

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6949871356532147


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.7016
Estimator:,PooledOLS,R-squared (Between):,0.7887
No. Observations:,233,R-squared (Within):,-0.2222
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.7016
Time:,11:48:16,Log-likelihood,-1019.0
Cov. Estimator:,Clustered,,
,,F-statistic:,106.72
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(5,227)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,410.30,34.023,12.060,0.0000,343.26,477.34
CPI,0.3956,0.1640,2.4116,0.0167,0.0724,0.7189
GINI,-161.90,42.438,-3.8151,0.0002,-245.53,-78.283
log(PCT_EDU_TRY),25.127,10.136,2.4789,0.0139,5.1537,45.100
ALC_PC,1.9982,0.9604,2.0805,0.0386,0.1056,3.8907
log(MIGRANTS),1.2417,4.1872,0.2965,0.7671,-7.0091,9.4925


### Internet PC

In [45]:
pexog_vars = ['CPI', 'GINI', 'log(PCT_EDU_TRY)', 'ALC_PC', 'INTERNET_PC'] 

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6945920055816031


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.7012
Estimator:,PooledOLS,R-squared (Between):,0.7869
No. Observations:,233,R-squared (Within):,-0.2227
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.7012
Time:,11:48:16,Log-likelihood,-1019.1
Cov. Estimator:,Clustered,,
,,F-statistic:,106.53
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(5,227)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,406.87,31.199,13.041,0.0000,345.39,468.34
CPI,0.4228,0.1925,2.1969,0.0290,0.0436,0.8020
GINI,-170.82,45.474,-3.7564,0.0002,-260.42,-81.214
log(PCT_EDU_TRY),28.143,9.4077,2.9915,0.0031,9.6053,46.681
ALC_PC,2.0350,0.9495,2.1431,0.0332,0.1640,3.9060
INTERNET_PC,-0.0519,0.2012,-0.2579,0.7967,-0.4482,0.3445


### Bildungsausgaben im Sekundar Bereich

In [46]:
pexog_vars = ['CPI', 'GINI', 'log(PCT_EDU_TRY)', 'ALC_PC', 'log(EDU_SPENDING)'] 

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6959232756635146


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.7025
Estimator:,PooledOLS,R-squared (Between):,0.7861
No. Observations:,233,R-squared (Within):,-0.2456
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.7025
Time:,11:48:17,Log-likelihood,-1018.6
Cov. Estimator:,Clustered,,
,,F-statistic:,107.19
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(5,227)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,386.68,39.392,9.8162,0.0000,309.06,464.30
CPI,0.4070,0.1780,2.2865,0.0231,0.0563,0.7578
GINI,-158.21,42.443,-3.7277,0.0002,-241.85,-74.582
log(PCT_EDU_TRY),25.648,7.3117,3.5079,0.0005,11.241,40.056
ALC_PC,1.8521,0.9127,2.0293,0.0436,0.0537,3.6505
log(EDU_SPENDING),7.6949,11.002,0.6994,0.4850,-13.984,29.374


### Schüler, Lehrer Verhältnis

In [47]:
pexog_vars = ['CPI', 'GINI', 'log(PCT_EDU_TRY)', 'ALC_PC', 'STR_SRY'] 

modellieren(pexog_vars, pX, target)

R^2 adj.:  0.6998985943250091


0,1,2,3
Dep. Variable:,PISA Read,R-squared:,0.7064
Estimator:,PooledOLS,R-squared (Between):,0.7952
No. Observations:,233,R-squared (Within):,-0.2508
Date:,"Fri, Feb 05 2021",R-squared (Overall):,0.7064
Time:,11:48:17,Log-likelihood,-1017.1
Cov. Estimator:,Clustered,,
,,F-statistic:,109.21
Entities:,44,P-value,0.0000
Avg Obs:,5.2955,Distribution:,"F(5,227)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,404.55,31.625,12.792,0.0000,342.23,466.86
CPI,0.3704,0.2037,1.8186,0.0703,-0.0309,0.7717
GINI,-198.62,59.646,-3.3300,0.0010,-316.15,-81.093
log(PCT_EDU_TRY),27.449,7.5596,3.6310,0.0003,12.553,42.345
ALC_PC,2.2529,0.8568,2.6294,0.0091,0.5646,3.9412
STR_SRY,0.9514,1.1013,0.8639,0.3886,-1.2187,3.1214


# Validierung des Modells

In [48]:
n_splits = 4
pexog_vars = ['CPI', 'GINI', 'log(PCT_EDU_TRY)', 'ALC_PC']

k_fold_pooledOLS(n_splits, target, pexog_vars, pX)


R^2 =  0.707585513714041
R^2 adj. =  0.7006644607841959
RMSE =  20.28304037957314

R^2 =  0.7126322653152894
R^2 adj. =  0.7058706715580021
RMSE =  21.751759878103886

R^2 =  0.6630681742589384
R^2 adj. =  0.6551403665944429
RMSE =  17.32574973414315

R^2 =  0.7289810334182126
R^2 adj. =  0.7226041165574647
RMSE =  20.658371075361757
--------------------------- TOTAL ---------------------------------
R^2s:  [0.70758551 0.71263227 0.66306817 0.72898103]
R^2 mean:  0.7030667466766204
R^2 std:  0.024409805689226714

R^2 adj.:  [0.70066446 0.70587067 0.65514037 0.72260412]
R^2 adj. mean:  0.6960699038735263
R^2 adj. std:  0.02498227604246061

RMSEs:  [20.28304038 21.75175988 17.32574973 20.65837108]
RMSE mean:  20.004730266795484
RMSE std:  1.6381202628787703


# Anwendung des Modells auf die Testdaten

In [49]:
pexog_vars = ['CPI', 'GINI', 'log(PCT_EDU_TRY)', 'ALC_PC']

prediction(pexog_vars, pX, target, X_test, y_test)


R^2 =  0.7006915283821409
R^2 adj. =  0.6954405025642838
RMSE =  23.973647193625464


(0.7006915283821409, 23.973647193625464, 0.6954405025642838)