In [20]:
import pandas as pd
import statsmodels.formula.api as smf
from patsy.contrasts import Sum

In [21]:
# 1. Datensatz definieren
df = pd.DataFrame({
    "y": [100, 110, 120, 130, 140, 150],
    "Marke": ["VW", "Audi", "BMW", "Audi", "BMW", "VW"]
})
print("Originaldaten:")
print(df)
print()

Originaldaten:
     y Marke
0  100    VW
1  110  Audi
2  120   BMW
3  130  Audi
4  140   BMW
5  150    VW



In [22]:
# A1: Referenzkodierung mit Formel (C())
model_a1 = smf.ols("y ~ C(Marke)", data=df).fit()
print("A1: Formel mit C() (Referenz = Audi):")
print(model_a1.summary())
print()

A1: Formel mit C() (Referenz = Audi):
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.057
Model:                            OLS   Adj. R-squared:                 -0.571
Method:                 Least Squares   F-statistic:                   0.09091
Date:                Sat, 21 Jun 2025   Prob (F-statistic):              0.916
Time:                        02:18:01   Log-Likelihood:                -25.364
No. Observations:                   6   AIC:                             56.73
Df Residuals:                       3   BIC:                             56.10
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
Inte

  warn("omni_normtest is not valid with less than 8 observations; %i "


In [23]:
# A2: Referenzkodierung mit get_dummies
df_dummies = pd.get_dummies(df["Marke"], drop_first=True)
df_model = pd.concat([df["y"], df_dummies], axis=1)
model_a2 = smf.ols("y ~ BMW + VW", data=df_model).fit()
print("A2: get_dummies() + Formel (Referenz = Audi):")
print(model_a2.summary())
print()

A2: get_dummies() + Formel (Referenz = Audi):
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.057
Model:                            OLS   Adj. R-squared:                 -0.571
Method:                 Least Squares   F-statistic:                   0.09091
Date:                Sat, 21 Jun 2025   Prob (F-statistic):              0.916
Time:                        02:18:01   Log-Likelihood:                -25.364
No. Observations:                   6   AIC:                             56.73
Df Residuals:                       3   BIC:                             56.10
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
Inte

  warn("omni_normtest is not valid with less than 8 observations; %i "


In [24]:
# A3: Manuelle Dummy-Kodierung
df_manual = df.copy()
df_manual["BMW"] = (df_manual["Marke"] == "BMW").astype(int)
df_manual["VW"] = (df_manual["Marke"] == "VW").astype(int)
model_a3 = smf.ols("y ~ BMW + VW", data=df_manual).fit()
print("A3: Manuell erzeugte Dummies (Referenz = Audi):")
print(model_a3.summary())
print()

A3: Manuell erzeugte Dummies (Referenz = Audi):
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.057
Model:                            OLS   Adj. R-squared:                 -0.571
Method:                 Least Squares   F-statistic:                   0.09091
Date:                Sat, 21 Jun 2025   Prob (F-statistic):              0.916
Time:                        02:18:01   Log-Likelihood:                -25.364
No. Observations:                   6   AIC:                             56.73
Df Residuals:                       3   BIC:                             56.10
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Inte

  warn("omni_normtest is not valid with less than 8 observations; %i "


In [25]:
# B1: Kontrastkodierung mit patsy
model_b1 = smf.ols("y ~ C(Marke, Sum)", data=df).fit()
print("B1: Kontrastkodierung mit patsy (Sum):")
print(model_b1.summary())
print()

B1: Kontrastkodierung mit patsy (Sum):
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.057
Model:                            OLS   Adj. R-squared:                 -0.571
Method:                 Least Squares   F-statistic:                   0.09091
Date:                Sat, 21 Jun 2025   Prob (F-statistic):              0.916
Time:                        02:18:01   Log-Likelihood:                -25.364
No. Observations:                   6   AIC:                             56.73
Df Residuals:                       3   BIC:                             56.10
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------

  warn("omni_normtest is not valid with less than 8 observations; %i "


In [26]:
# B2: Manuelle Kontrastkodierung
df_contrast = df.copy()
df_contrast["Kontrast_Audi"] = df_contrast["Marke"].map({"Audi": 1, "BMW": -1, "VW": 0})
df_contrast["Kontrast_VW"]   = df_contrast["Marke"].map({"Audi": 0, "BMW": -1, "VW": 1})
model_b2 = smf.ols("y ~ Kontrast_Audi + Kontrast_VW", data=df_contrast).fit()
print("B2: Manuelle Kontrastkodierung (Summe = 0):")
print(model_b2.summary())

B2: Manuelle Kontrastkodierung (Summe = 0):
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.057
Model:                            OLS   Adj. R-squared:                 -0.571
Method:                 Least Squares   F-statistic:                   0.09091
Date:                Sat, 21 Jun 2025   Prob (F-statistic):              0.916
Time:                        02:18:02   Log-Likelihood:                -25.364
No. Observations:                   6   AIC:                             56.73
Df Residuals:                       3   BIC:                             56.10
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
In

  warn("omni_normtest is not valid with less than 8 observations; %i "
