<a href="https://colab.research.google.com/github/helonayala/sysid/blob/main/orthogonal_least_squares.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from numpy.linalg import pinv, solve

# --- Modified Gram-Schmidt (MGS) Orthogonalization Function ---
# This implementation is based on Aguirre 2015.
# It produces an orthogonal matrix Q (columns are orthogonal,
# but not necessarily unit norm) and a unit upper triangular matrix A, such that P = Q @ A.
def MGS(P):
    """
    Performs Modified Gram-Schmidt orthogonalization on matrix P,
    as defined in Aguirre (2015).

    Args:
        P (np.ndarray): The input matrix where columns are vectors to be orthogonalized.

    Returns:
        dict: A dictionary containing:
            'Q' (np.ndarray): The orthogonalized matrix, where columns are orthogonal.
            'A' (np.ndarray): The unit upper triangular matrix of coefficients.
    """
    n_rows, n_cols = P.shape

    A = np.eye(n_cols, dtype=float) # Initialize A as an identity matrix (unit upper triangular)
    P_curr = P.astype(float)       # Working copy of P, converted to float
    Q = np.zeros_like(P_curr, dtype=float)

    # Iterate through columns to be orthogonalized
    for i in range(n_cols):
        Q[:, i] = P_curr[:, i] # The i-th orthogonal vector Q[:,i] is the current P_curr[:,i]

        # Orthogonalize subsequent columns (P_curr[:,j]) against the current orthogonal vector Q[:,i]
        # This loop applies the modification step for each subsequent column
        for j in range(i + 1, n_cols):
            # Check for zero norm to prevent division by zero for orthogonal vector Q[:,i]
            # If Q[:,i] is a zero vector, its projection onto other vectors is zero,
            # so A[i,j] remains 0 and P_curr[:,j] doesn't change from this step.
            q_i_norm_sq = Q[:, i].T @ Q[:, i]
            if q_i_norm_sq > 1e-18: # Use a small epsilon to check for non-zero norm
                # Compute coefficient A[i,j] (projection of P_curr[:,j] onto Q[:,i])
                A[i, j] = (Q[:, i].T @ P_curr[:, j]) / q_i_norm_sq
                # Subtract the projection from P_curr[:,j]
                P_curr[:, j] = P_curr[:, j] - A[i, j] * Q[:, i]
            # If q_i_norm_sq is zero, A[i,j] is already 0 (from identity init) and P_curr[:,j] remains unchanged

    return {'Q': Q, 'A': A}

# --- Data from Table 3.1 (Billings 2013 book, Example 3.3) ---
# This matrix corresponds to the 'Mat' variable in the original R code.
mat_data = np.array([
    [9, -5, 5, -1.53, 9.08],
    [1, -1, 8, -0.39, 7.87],
    [2, -5, 6, -3.26, 3.01],
    [8, -2, 0, 0.36, 5.98],
    [0, 0, 9, 0.13, 9.05]
])

# Separate predictors (P) and output (Y)
P_original = mat_data[:, :4]
Y = mat_data[:, 4].reshape(-1, 1)

print('--- Ordinary Least Squares (OLS) Solution ---')
# Calculate the OLS solution using the pseudo-inverse (generalized inverse).
# This is equivalent to 'th_ls = ginv(P) %*% Y' in the R code.
# The formula is: theta_hat = (P^T P)^-1 P^T Y, which is equivalent to pinv(P) @ Y.
th_ls = pinv(P_original) @ Y
print('OLS estimated parameters (th_ls):')
print(th_ls)

print('\n--- Orthogonal Least Squares (OLS) Solution ---')
print('Executing for example 3.3 in billings 2013 book')
print('Select which line (1, 2 or 3) in table 3.2 you want to check:')
print('1: P[:, [2, 0]] (corresponds to R\'s P[,c(3,1)] - using columns 3 and 1 from original P)')
print('2: P[:, [2, 0, 1]] (corresponds to R\'s P[,c(3,1,2)] - using columns 3, 1, and 2 from original P)')
print('3: P[:, [2, 0, 1, 3]] (corresponds to R\'s P[,c(3,1,2,4)] - using columns 3, 1, 2, and 4 from original P)')

# In a standard .py file, you'd typically use input() for user interaction.
# For direct execution and reproducibility, a default choice is set.
# Uncomment the 'choice = input(...)' line and comment out the 'choice = ...' line below it
# if you want interactive input when running from a terminal.
# choice = input('Enter your choice (1, 2, or 3): ')
choice = '1' # Default choice for direct execution. Change this to '2' or '3' to test other cases.

# Select specific columns of P based on the user's choice,
# mirroring the 'switch' statement in the R code.
if choice == '1':
    P = P_original[:, [2, 0]] # Python indices 2 and 0 correspond to R's 3 and 1
elif choice == '2':
    P = P_original[:, [2, 0, 1]] # Python indices 2, 0, 1 correspond to R's 3, 1, 2
elif choice == '3':
    P = P_original[:, [2, 0, 1, 3]] # Python indices 2, 0, 1, 3 correspond to R's 3, 1, 2, 4
else:
    print("Invalid choice. Using default choice '1'.")
    P = P_original[:, [2, 0]]

niter = P.shape[1] # Number of regressors (columns in the selected P matrix)

# Perform Modified Gram-Schmidt orthogonalization on the selected P matrix.
out = MGS(P)
W = out['Q'] # Orthogonalized matrix (columns are orthogonal, not necessarily unit norm)
A = out['A'] # Unit upper triangular matrix from the MGS decomposition

# Calculate Alpha (W.T @ W).
# Since W has orthogonal (not orthonormal) columns, Alpha will be a diagonal matrix
# where the diagonal elements are the squared norms of the columns of W.
Alpha = W.T @ W
print('\nAlpha (W.T @ W, should be diagonal for orthogonal W):')
print(Alpha)

# Calculate 'g' coefficients in the orthogonal basis.
# This calculates the projection of Y onto each orthogonal vector in W.
g = np.zeros(niter)
for i in range(niter):
    # (Y.T @ W[:, i]) is the dot product (projection) of Y onto W[:,i]
    # (W[:, i].T @ W[:, i]) is the squared norm of W[:,i].
    g[i] = (Y.T @ W[:, i]) / (W[:, i].T @ W[:, i])

g = g.reshape(-1, 1) # Reshape 'g' to a column vector for matrix operations
print('\ng (coefficients in orthogonal basis):')
print(g)

# Calculate 'g2' as an alternative verification for 'g'.
# This is equivalent to 'solve(Alpha) %*% t(W) %*% Y' in R.
# Since Alpha is diagonal, solve(Alpha, ...) effectively scales W.T @ Y.
g2 = solve(Alpha, W.T @ Y)
print('\ng2 (alternative calculation of g, should be same as g):')
print(g2)

# Calculate ERR (Error Reduction Ratio) for each regressor.
# ERR measures the proportion of the total output variance explained by each orthogonal regressor.
ERR = np.zeros(niter)
for i in range(niter):
    # Y.T @ Y is the sum of squares of Y (total variance before normalization).
    # ((Y.T @ W[:, i])**2) is the squared projection of Y onto W[:,i].
    # (W[:, i].T @ W[:, i]) is the squared norm of W[:,i].
    ERR[i] = ((Y.T @ W[:, i])**2) / ((Y.T @ Y) * (W[:, i].T @ W[:, i]))

# Calculate ESR (Error Sum Ratio).
# ESR is 1 minus the sum of ERR, representing the unexplained variance.
ESR = 1 - np.sum(ERR)

# Calculate the final OLS parameters (th_OLS) from the orthogonal basis.
# This is equivalent to 'th_OLS = solve(A,g)' in R.
# It converts the coefficients from the orthogonal basis back to the original regressor basis.
th_OLS = solve(A, g)

print('\n--- Final Results ---')
print('OLS estimated parameters (th_OLS from OLS method using Gram-Schmidt):')
print(th_OLS)
print('\nERR (Error Reduction Ratio) for each orthogonal regressor:')
print(ERR)
print('\nESR (Error Sum Ratio, total unexplained variance):')
print(ESR)