In [1]:
import numpy as np

In [2]:
def pseudoinverse(A):
    """Die Funktion Berechnet die Pseudoinverse Mittels SVD"""
    # Perform Singular Value Decomposition
    U, S, Vt = np.linalg.svd(A, full_matrices=False)
    
    # Compute the reciprocal of non-zero singular values
    S_inv = np.diag(1.0 / S)
    
    # Compute pseudoinverse using Vt.T, S_inv, and U.T
    A_pseudo = Vt.T @ S_inv @ U.T
    
    return A_pseudo

# Example matrix (can be non-square or singular)
A = np.array([[1, 2], [3, 4]])

# Calculate the pseudoinverse
A_pseudo = pseudoinverse(A)
inv_A_real = np.linalg.inv(A)

print("Original Matrix A:\n", A)
print("\nPseudoinverse of A:\n", A_pseudo)
print("\nInverse of A:\n", inv_A_real)


Original Matrix A:
 [[1 2]
 [3 4]]

Pseudoinverse of A:
 [[-2.   1. ]
 [ 1.5 -0.5]]

Inverse of A:
 [[-2.   1. ]
 [ 1.5 -0.5]]


In [3]:
def calculate_regressors(X, y):
    """
    Calculate the regression coefficients (β) using the Moore-Penrose pseudoinverse.
    
    Parameters:
    X (numpy array): The feature matrix (m x n)
    y (numpy array): The target vector (m x 1)
    
    Returns:
    numpy array: The regression coefficients β (n x 1)
    """
    # Compute the Moore-Penrose pseudoinverse of X
    X_pseudo = pseudoinverse(X)
    
    # Calculate the regression coefficients
    beta = X_pseudo @ y
    
    return beta

# Example usage:
# Feature matrix X (with a column of ones for the intercept)
X = np.array([[1], [2], [3], [4],[5]])

# Target values y
y = np.array([1,2,3,4,5])

# Calculate the regression coefficients β
beta = calculate_regressors(X, y)

print("Regressors (β):", beta)


Regressors (β): [1.]


In [27]:
#Implementierung des statsmodels (Formatierung muss noch angepasst werden, dass es mit dem unteren stimmt)

import numpy as np
from scipy import stats
from datetime import datetime

def linear_regression_statsmodels_like(X, y):
    """
    Führt eine lineare Regression durch und gibt eine Zusammenfassung aus,
    die der OLS Regression Results von statsmodels entspricht.
    
    Parameters:
    X (numpy array): Feature-Matrix (m x n), ohne Intercept-Spalte.
    y (numpy array): Zielvektor (m x 1).
    
    Returns:
    None
    """
    # Sicherstellen, dass y ein Spaltenvektor ist
    y = y.reshape(-1, 1)
    
    # Anzahl der Beobachtungen und Prädiktoren
    n = X.shape[0]  # Anzahl der Beobachtungen
    k = X.shape[1]  # Anzahl der Prädiktoren (ohne Intercept)
    p = k + 1       # Anzahl der Parameter (inklusive Intercept)
    
    # Intercept-Spalte zu X hinzufügen
    X = np.hstack((np.ones((n, 1)), X))
    
    # Koeffizienten mit der Normalengleichung berechnen
    XtX_inv = np.linalg.inv(X.T @ X)
    beta_hat = XtX_inv @ X.T @ y  # (p x 1)
    
    # Vorhergesagte Werte
    y_hat = X @ beta_hat  # (n x 1)
    
    # Residuen
    residuals = y - y_hat  # (n x 1)
    residuals_flat = residuals.flatten()
    
    # Freiheitsgrade
    df_model = p - 1       # Modell-Freiheitsgrade
    df_resid = n - p       # Residuen-Freiheitsgrade
    
    # Sum of Squares
    y_mean = np.mean(y)
    TSS = np.sum((y - y_mean) ** 2)   # Total Sum of Squares
    RSS = np.sum(residuals_flat ** 2) # Residual Sum of Squares
    ESS = TSS - RSS                   # Explained Sum of Squares
    
    # Varianz und Standardfehler
    sigma2 = RSS / df_resid
    var_beta_hat = sigma2 * XtX_inv
    se_beta_hat = np.sqrt(np.diag(var_beta_hat)).reshape(-1, 1)
    
    # t-Werte und p-Werte für die Koeffizienten
    t_stats = beta_hat.flatten() / se_beta_hat.flatten()
    p_values = [2 * (1 - stats.t.cdf(np.abs(t), df_resid)) for t in t_stats]
    
    # R-Quadrat und Adjusted R-Quadrat
    R_squared = 1 - RSS / TSS
    adj_R_squared = 1 - (1 - R_squared) * (n - 1) / df_resid
    
    # Mittlere Quadratsummen
    MSR = ESS / df_model
    MSE = RSS / df_resid
    
    # F-Statistik und p-Wert
    F_stat = MSR / MSE
    F_p_value = 1 - stats.f.cdf(F_stat, df_model, df_resid)
    
    # Log-Likelihood
    LLF = - (n / 2) * (np.log(2 * np.pi) + np.log(RSS / n) + 1)
    
    # AIC und BIC
    AIC = -2 * LLF + 2 * p
    BIC = -2 * LLF + p * np.log(n)
    
    # Durbin-Watson-Statistik
    diff_resid = np.diff(residuals_flat, n=1)
    DW_stat = np.sum(diff_resid ** 2) / np.sum(residuals_flat ** 2)
    
    # Skewness und Kurtosis der Residuen
    skewness = stats.skew(residuals_flat)
    kurtosis = stats.kurtosis(residuals_flat, fisher=False)  # Pearson's Definition
    
    # Omnibus-Test
    Omnibus_stat, Omnibus_p_value = stats.normaltest(residuals_flat)
    
    # Jarque-Bera-Test
    JB_stat, JB_p_value = stats.jarque_bera(residuals_flat)
    
    # Bedingungsnummer
    _, svals, _ = np.linalg.svd(X)
    cond_no = svals[0] / svals[-1]
    
    # Konfidenzintervalle
    alpha = 0.05
    t_crit = stats.t.ppf(1 - alpha / 2, df_resid)
    ci_lower = beta_hat.flatten() - t_crit * se_beta_hat.flatten()
    ci_upper = beta_hat.flatten() + t_crit * se_beta_hat.flatten()
    
    # Aktuelles Datum und Uhrzeit
    now = datetime.now()
    date_str = now.strftime("%a, %d %b %Y")
    time_str = now.strftime("%H:%M:%S")
    
    # Ausgabe ähnlich wie statsmodels
    print("                            OLS Regression Results                            ")
    print("==============================================================================")
    print(f"Dep. Variable:                      y   R-squared:                  {R_squared:>10.4f}")
    print(f"Model:                            OLS   Adj. R-squared:             {adj_R_squared:>10.4f}")
    print(f"Method:                 Least Squares   F-statistic:                {F_stat:>10.4f}")
    print(f"Date:                {date_str}   Prob (F-statistic):         {F_p_value:>10.4f}")
    print(f"Time:                        {time_str}   Log-Likelihood:             {LLF:>10.4f}")
    print(f"No. Observations:          {n:>10}   AIC:                        {AIC:>10.4f}")
    print(f"Df Residuals:              {df_resid:>10}   BIC:                        {BIC:>10.4f}")
    print(f"Df Model:                  {df_model:>10}")
    print("Covariance Type:            nonrobust")
    print("==============================================================================")
    print("                 coef    std err          t      P>|t|      [0.025      0.975]")
    print("------------------------------------------------------------------------------")
    # Variablennamen
    var_names = ['const'] + [f'x{i}' for i in range(1, p)]
    
    for i in range(p):
        print(f"{var_names[i]:<10} {beta_hat[i,0]:>10.4f} {se_beta_hat[i,0]:>10.4f} {t_stats[i]:>10.4f} {p_values[i]:>10.4f} {ci_lower[i]:>11.4f} {ci_upper[i]:>11.4f}")
    
    print("==============================================================================")
    print(f"Omnibus:                   {Omnibus_stat:>10.4f}   Durbin-Watson:              {DW_stat:>10.4f}")
    print(f"Prob(Omnibus):             {Omnibus_p_value:>10.4f}   Jarque-Bera (JB):           {JB_stat:>10.4f}")
    print(f"Skew:                      {skewness:>10.4f}   Prob(JB):                   {JB_p_value:>10.4f}")
    print(f"Kurtosis:                  {kurtosis:>10.4f}   Cond. No.                   {cond_no:>10.4f}")
    print("==============================================================================")
    print("Notes:")
    print("[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.")

# Beispielverwendung:

# Feature-Matrix X (ohne Intercept-Spalte)
X = np.array([[1], [2], [3], [4], [5], [6], [7], [8], [9]])  # (n x k)
# Zielvektor y
y = np.array([2, 3, 5, 7, 9, 10, 20, 30, 40])          # (n,)

# Lineare Regression durchführen und Zusammenfassung ausgeben
linear_regression_statsmodels_like(X, y)


                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                      0.8399
Model:                            OLS   Adj. R-squared:                 0.8171
Method:                 Least Squares   F-statistic:                   36.7318
Date:                Thu, 10 Oct 2024   Prob (F-statistic):             0.0005
Time:                        13:57:11   Log-Likelihood:               -27.2501
No. Observations:                   9   AIC:                           58.5001
Df Residuals:                       7   BIC:                           58.8946
Df Model:                           1
Covariance Type:            nonrobust
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -8.1667     4.1163    -1.9840     0.0877    -17.9002      1.5669
x1             4.4333     0.7315     6.0607     0.0005 

In [11]:
#Zum Überprüfen ob Berechnung korrekt ist
import numpy as np
import statsmodels.api as sm

# Feature-Matrix X (ohne Intercept-Spalte)
X = np.array([[1], [2], [3], [4], [5], [6], [7], [8], [9]])  # (n x k)
# Zielvektor y
y = np.array([2, 3, 5, 7, 9, 10, 20, 30, 40])          # (n,)

# Add an intercept (constant) to the model
X_with_intercept = sm.add_constant(X)

# Fit the model using OLS (Ordinary Least Squares)
model = sm.OLS(y, X_with_intercept)
results = model.fit()

# Print the summary
print(results.summary())


                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.840
Model:                            OLS   Adj. R-squared:                  0.817
Method:                 Least Squares   F-statistic:                     36.73
Date:                Thu, 10 Oct 2024   Prob (F-statistic):           0.000511
Time:                        12:56:06   Log-Likelihood:                -27.250
No. Observations:                   9   AIC:                             58.50
Df Residuals:                       7   BIC:                             58.89
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -8.1667      4.116     -1.984      0.0

