In [1]:
import numpy as np
import time

def backward_substitution(A, y):
    n = A.shape[0]
    b = np.zeros(n)

    for i in range(n - 1, -1, -1):
        if A[i, i] == 0:
            raise ValueError("Die Matrix A enthält eine Null auf der Diagonale, keine eindeutige Lösung möglich.")
        
        b[i] = (y[i] - np.dot(A[i, i+1:], b[i+1:])) / A[i, i]

    return b

def gram_schmidt(X):
    n, m = X.shape
    Q = np.zeros((n, m))
    R = np.zeros((m, m))

    for j in range(m):
        v = X[:, j]
        
        for i in range(j):
            R[i, j] = np.dot(Q[:, i], X[:, j])
            v = v - R[i, j] * Q[:, i]

        R[j, j] = np.linalg.norm(v)
        Q[:, j] = v / R[j, j]

    return Q, R

def create_data(n, p, beta_true):
    np.random.seed(42)
    X = np.random.rand(n, p)
    X = np.column_stack([np.ones(X.shape[0]), X])
    y = X @ beta_true + np.random.randn(n) * 0.1
    return X, y

def linear_regression_manual_qr(X, y):
    start_time = time.time()
    Q, R = gram_schmidt(X)
    beta = backward_substitution(R, Q.T @ y)
    end_time = time.time()
    elapsed_time = end_time - start_time
    return beta, elapsed_time

def run_benchmark(n_list, repetitions=5):
    results = []
    beta_true = [-8, -1.6, 4.1, -10, -9.2, 1.3, 1.6, 2.3]
    p = 7

    for n in n_list:
        times = []
        for _ in range(repetitions):
            X, y = create_data(n, p, beta_true)
            beta, elapsed_time = linear_regression_manual_qr(X, y)
            times.append(elapsed_time)

        avg_time = np.mean(times)
        std_time = np.std(times)
        results.append([n, avg_time, std_time, beta])

    return results

n_values = [100, 500, 1000, 5000, 10000, 50000, 100000, 200000, 500000, 1000000]
benchmark_results = run_benchmark(n_values)

for i in benchmark_results:
    print("\n Datenzeilen: ", i[0])
    print("Laufzeit: ", i[1])


 Datenzeilen:  100
Laufzeit:  0.00013756752014160156

 Datenzeilen:  500
Laufzeit:  0.00023126602172851562

 Datenzeilen:  1000
Laufzeit:  0.00021834373474121093

 Datenzeilen:  5000
Laufzeit:  0.000815582275390625

 Datenzeilen:  10000
Laufzeit:  0.0010600090026855469

 Datenzeilen:  50000
Laufzeit:  0.011278200149536132

 Datenzeilen:  100000
Laufzeit:  0.010960769653320313

 Datenzeilen:  200000
Laufzeit:  0.024954557418823242

 Datenzeilen:  500000
Laufzeit:  0.10175361633300781

 Datenzeilen:  1000000
Laufzeit:  0.11514773368835449
