In [66]:
if True: # enable folding code
    if False:
        from julia.api import Julia
        jl = Julia(compiled_modules=False)

    import julia; julia.install(quiet=True)
    from julia import Main

    import numpy as np
    import sympy as sp
    from IPython.display import display, Math

    np.set_printoptions(precision=3, suppress=True)

    try:
        from scipy.linalg import norm, inv, cholesky, eig, eigh, qz, ordqz
        HAVE_SCIPY = True
    except Exception as e:
        HAVE_SCIPY = False
        print("SciPy not available; QZ and ordqz demos will be skipped:", e)

In [2]:
%load_ext julia.magic

Initializing Julia interpreter. This may take some time...


In [3]:
%%julia
using Pkg
gla_dir = "../GenLinAlgProblems"
Pkg.activate(gla_dir)
using GenLinAlgProblems, LinearAlgebra, BlockArrays, RowEchelon, LaTeXStrings, Latexify, SymPy, Random

using PyCall
itikz = pyimport("itikz")
nM    = pyimport("itikz.nicematrix");

function is_upper_hessenberg(H; tol = 1e-10)
    n = size(H, 1)
    for i in 3:n
        for j in 1:i-2
            if abs(H[i,j]) > tol
                return false
            end
        end
    end
    return true
end

function is_upper_triangular(T; tol = 1e-10)
    n = size(T, 1)
    for i in 2:n
        for j in 1:i-1
            if abs(T[i,j]) > tol
                return false
            end
        end
    end
    return true
end
;

  Activating project at `C:\Users\jeff\NOTEBOOKS\elementary-linear-algebra\GenLinAlgProblems`


<div style="height:2cm;">
<div style="float:center;width:100%;text-align:center;"><strong style="height:100px;color:darkred;font-size:40px;">Generalized Eigenvalue Computation</strong>
</div></div>

# 1. Introduction

This notebook addresses numerical methods for solving the generalized eigenproblem (GEP):  
$\qquad \displaystyle{ A \mathbf{v} = \lambda B \mathbf{v} }$

The focus is on algorithmic strategies for computing eigenvalues and eigenvectors,  
particularly in structured cases where $A$ and $B$ are symmetric, and $B$ is positive definite.  
Structured solvers are contrasted with general-purpose routines suitable for arbitrary pencils.

Topics include:

- Solvers for symmetric-definite pencils
- Treatment of general matrix pencils
- Sensitivity to conditioning and numerical roundoff
- Interpretation of computed eigenpairs

The theoretical foundation, including variational formulations and physical interpretations,  
is presented in the preceding notebook: [**GEP_intro.ipynb**](GEP_intro.ipynb).

____
Below, we introduce relevant algorithms to solve the GEP problem using simple reference implementations.

# 2. Symmetric-Definite Case

Consider the generalized eigenproblem:  
$\qquad \displaystyle A x = \lambda B x$  
with $A = A^T$, $B = B^T \succ 0$ (i.e., real symmetric $A$ and symmetric positive-definite $B$).

**Remark:** If $B$ is invertible, we may write:  
$\qquad A x = \lambda B x \;\Leftrightarrow\; B^{-1} A x = \lambda x$

$\qquad$ However, explicitly forming $B^{-1} A$ is **ill-advised** when $B$ is ill-conditioned,  
$\qquad$ and destroys any symmetry or definiteness.

We therefore use the **Cholesky factorization** of $B$ to transform the problem without inverting
$\;\; B = R^T R$

____

Since $B$ is symmetric positive-definite, it admits a **Cholesky decomposition**:  
$\qquad \displaystyle B = L^T L$  
where $L$ is upper triangular with positive diagonal entries.

Substitute into the eigenproblem:  
$\qquad \displaystyle A x = \lambda\, L^T L x$

Let $\;\; \displaystyle{L x = z \Leftrightarrow x = L^{-1} z,\;\; }$ then  
$\qquad \displaystyle{
\begin{aligned}
A x = \lambda B x 
\quad &\Leftrightarrow\quad A L^{-1} z = \lambda L^T L L^{-1} z \\
     &\Leftrightarrow\quad A L^{-1} z = \lambda L^T z \\
     &\Leftrightarrow\quad (L^T)^{-1} A L^{-1} z = \lambda z
\end{aligned}
}$

Define:  
$\qquad \displaystyle \tilde{A} = (L^T)^{-1} A L^{-1}$

Then the problem reduces to the standard eigenproblem:  
$\qquad \displaystyle \tilde{A} z = \lambda z$

Once eigenpairs $(\lambda, z)$ are computed, the original eigenvectors are recovered via  
$\qquad \displaystyle x = L^{-1} z$

**Remark:** **explicitly computing** $\displaystyle{ L^{-1} }$ **or** $\displaystyle{ (L^T)^{-1} }$ **should be avoided in practice**.
- To evaluate $\displaystyle x = L^{-1} z$, solve the triangular system $L x = z$ (forward substitution).
- To apply the transformation $\displaystyle \tilde{A} = (L^T)^{-1} A L^{-1}$,  
  use triangular solves instead of forming matrix inverses.

#### **Algorithm Summary**

1) Compute $L$ (via Cholesky factorization of $B = L^T L$).  
2) Form $\tilde A = L^{-1} A L^{-T}$ by triangular solves.  
3) Compute eigenpairs $(\lambda, z)$ of the symmetric matrix $\tilde A$.  
4) Recover $x = L^{-T} z$; optionally via the $B$ norm: $x \leftarrow x/\sqrt{x^T B x}$.

#### **Example Implementation and Test**

In [4]:
def gep_symmetric_definite(A,B):
    """Solve A x = λ B x for symmetric A and SPD B via Cholesky whitening.
    Returns (lam, X) with columns of X being B-normalized eigenvectors.
    """
    if not HAVE_SCIPY:
        raise RuntimeError("SciPy required here for robust Cholesky/eigh.")
    L = cholesky(B, lower=True)        # B = L Lᵗ
    # Solve for At = L^{-1} A L^{-T}
    At = np.linalg.solve(L, A)
    At = At @ np.linalg.solve(L.T, np.eye(A.shape[0]))  # Note we are avoiding computing an inverse by solving Lᵀ X = I, not inv( Lᵀ )

    lam, Z = eigh(At)

    # Map back and B-normalize
    X = np.linalg.solve(L.T, Z)
    for i in range(X.shape[1]):
        xi = X[:,i]
        nB = np.sqrt(xi.T @ B @ xi)
        if nB > 0:
            X[:,i] = xi / nB
    return lam, X

In [5]:
def sym_pos_def_demo():
    #np.random.seed(0)
    n = 5
    R = np.random.randn(n,n)
    B = R@R.T + n*np.eye(n)       # SPD
    S = np.random.randn(n,n)
    A = 0.5*(S+S.T)
    lam, X = gep_symmetric_definite(A,B)

    Main.py_show("A = ", np.round(A,2), r",\quad B = ", np.round(B,2));

    print("\nEigenvalues (symmetric-definite route):", lam)
    print("\n    ||X^T B X - I||         = ", np.linalg.norm(X.T @ B @ X - np.eye(n)))
    print("    ||A X - B X diag(lam)|| = ",   np.linalg.norm(A@X - B@X@np.diag(lam)))
sym_pos_def_demo()

<IPython.core.display.Latex object>


Eigenvalues (symmetric-definite route): [-0.274 -0.049 -0.042 -0.029  0.163]

    ||X^T B X - I||         =  9.963783643130553e-16
    ||A X - B X diag(lam)|| =  2.6965829009509e-15


____
**Observe:**

- The eigenvectors are $B$-orthonormal:  
  $\qquad \|X^T B X - I\| \approx  10^{-15}$,  
  consistent with numerical precision.
- The eigenpair residual:  
  $\qquad \|A X - B X \operatorname{diag}(\lambda)\| \approx 10^{-15}$,  
  consistent with numerical precision.
- All eigenvalues are real, as expected for symmetric-definite problems.

____
<div style="display: flex; margin-top: 1em; margin-bottom: 1em;">
<!-- -------------------- Python box -->
<div style="width: 32em; height: 7.0em; padding-right: 1cm;
            background: #f8f8f8; border: 1px solid #ccc; border-radius: 4px;
            font-family: monospace; font-size: 90%; line-height: 1.2;
            display: flex; align-items: flex-start; padding: 0.6em 0.75em;
            box-sizing: border-box;">
  <div>
    <div style="font-weight: bold; margin-bottom: 0.2em;">Python</div>
    <div># A symmetric, B symmetric positive-definite</div>
    <div>from scipy.linalg import eigh</div>
    <div>lam, X = eigh(A, B)</div>
  </div>
</div>
<!-- -------------------- Julia box -->
<div style="width: 32em; height: 7.0em; margin-left: 1cm;
            background: #f8f8f8; border: 1px solid #ccc; border-radius: 4px;
            font-family: monospace; font-size: 90%; line-height: 1.2;
            display: flex; align-items: flex-start; padding: 0.6em 0.75em;
            box-sizing: border-box;">
  <div>
    <div style="font-weight: bold; margin-bottom: 0.2em;">Julia</div>
    <div># A symmetric, B symmetric positive-definite</div>
    <div>using LinearAlgebra</div>
    <div>lam, X = eigen(Symmetric(A), Symmetric(B))</div>
  </div>
</div>
</div>

# 3. General Case

TA solution to the generalized eigenproblem $A x = \lambda B x$ in the **general case**,  
where no symmetry or definiteness is assumed, is obtained using the **QZ algorithm**.

This section introduces:
- The **generalized Schur form**, a triangular representation of the matrix pair $(A, B)$
- **Orthogonal transformations** used to reduce the pair to this form
- The structure of the QZ algorithm, organized into four stages:

1. **Preparation**: Apply initial orthogonal transformations
2. **Reduction**: Convert $(A, B)$ to **Hessenberg–triangular form**
3. **Schur Form**: Reduce to **generalized Schur form** $(S, T)$ via the QZ iteration
4. **Extraction**: Compute eigenvalues $\lambda = \alpha / \beta$ and (optionally) eigenvectors

## 3.1 Matrices in Generalized Schur Form

<div style="background-color:#F2F5A9;color:black;">

**Definition:**  A pair $(S,T)$ is in **(real, quasi-) generalized Schur form**<br>
$\qquad$ if both are block upper triangular with the **same** block pattern<br>
$\qquad$ and each diagonal block is either $1\times1$ or $2\times2$.

The generalized eigenproblem for $(S,T)$ is $\;\; S x=\lambda\,T x$.
</div>

Once the pencil $(A, B)$ has been reduced to generalized Schur form, the eigenproblem reduces to solving the homogeneous system:

$\qquad S x = \lambda T x$

This can be solved efficiently by backward substitution, block by block.

#### Example: Extracting Eigenvalues and Eigenvectors from a Matrix Pair in Generalized Schur Form

Consider the following matrix pair in generalized Schur form, with one block of size $2\times 2$ and 2 blocks of size $1\times 1$<br>
highlighted in red:

$\qquad
S=\left(\begin{array}{rrrr}
\color{red}4&\color{red}7&5&6\\
\color{red}{-4}&\color{red}{-2}&3&8\\
0&0&\color{red}6&2\\
0&0&0&\color{red}{-3}
\end{array}\right),\quad
T=\begin{pmatrix}
\color{red}2&\color{red}1&4&3\\
\color{red}0&\color{red}2&7&5\\
0&0&\color{red}2&1\\
0&0&0&\color{red}1
\end{pmatrix}\;\;
$

**Eigenvalues.**<BR>
The characteristic polynomial $\det(S - \lambda T)$ decomposes as a product of blockwise determinants,  
with each $1 \times 1$ or $2 \times 2$ diagonal block contributing a distinct factor corresponding to one/two eigenvalues respectively.

- Leading $2\times2$ block $\;\;S_{1} = \left(\begin{array}{rr} 4&7 \\ {-4}&{-2}\\ \end{array}\right), \;\;
T_{1} = \begin{pmatrix} 2 & 1 \\ 0 & 2\end{pmatrix}$

$\qquad \det\!\big(S_{1}-\lambda T_{1}\big)=4(\lambda^2-2\lambda+5)=0
\;\Rightarrow\; \lambda=1\pm 2i.$
- $1\times1$ blocks $\;\;S_2=\begin{pmatrix}6\end{pmatrix},\;\; T_2 = \begin{pmatrix}2\end{pmatrix}\;\;$ and
$S_3=\begin{pmatrix}-3\end{pmatrix},\;\; T_3 = \begin{pmatrix}1\end{pmatrix}\;\;$

$\qquad \lambda_3=6/2=3,\quad \lambda_4=-3/1=-3.$

**Right eigenvectors (Schur basis).** For each listed $\lambda$, solve
<br>$\qquad (S-\lambda T) x = 0$
by back substitution as above:

$\qquad
x_{1-2i} =
\left(\begin{array}{c}
-1 + i\\
1\\
0\\
0
\end{array}\right),
\;\;
x_{1+2i} =
\left(\begin{array}{c}
-1 - i\\
1\\
0\\
0
\end{array}\right),
\;\;
x_{3} =
\left(\begin{array}{r}
-16\\
-1\\
4\\
0
\end{array}\right),
\;\;
x_{-3} =
\left(\begin{array}{r}
59\\
-97\\
-20\\
48
\end{array}\right).
$

## 3.2 The A = RQ and the A = QR Factorizations

To transform a square matrix pair to generalized Schur Form, we will use two operations that strategically introduce zeros<br>
in rows/columns of a matrix using orthogonal matrices:

Orthogonal matrices $Q$ from the left or $Z$ from the right can be chosen to annihilate selected entries in a matrix $A$<br>
while preserving numerical stability:
* $Q A = R$ the matrix $Q$ forms linear combinations of the rows of $A$. We can choose $Q$ to introduce 0 in a particular column of the matrix $A$
* $A Z = R$ the matrix $A$ forms linear combinations of the columns of $A$. We can choose $Z$ to introduce 0 in a particular row of the matrix $A$

IIn this notebook, these operations are implemented via:
- `QR_step!()`: introduces zeros in a **column**
- `RQ_step!()`: introduces zeros in a **row**

Both operations use **Householder reflections**, which are compact and numerically stable.

Alternatively, **Givens rotations** may be used to accomplish the same effect, typically in sequential two-by-two steps.

### 3.2.1 Householder Reflections

Householder reflections were first introduced in [**HouseholderReflections**](HouseholderReflections)

<div>
<div style="float:left;padding-right:2cm;padding-top:0cm;">

Let $x \in \mathbb{R}^n$, and let $e$ be a unit vector defining the target direction.  
Define $y = x - \|x\| e$, which determines the reflection that maps $x$ onto $e$.  
Decompose $x$ along $y$ as $x = x_\parallel + x_\perp$,  
where $x_\parallel = \dfrac{y^T x}{y^T y} y$ is the component of $x$ parallel to $y$.  
The reflected vector is  
$\qquad
\|x\| e = x - 2x_\parallel = x - 2\dfrac{y^T x}{y^T y} y
$

The corresponding orthogonal and symmetric Householder matrix is  
$\qquad
H = I - 2\dfrac{y y^T}{y^T y},
$

which satisfies $H x = \|x\| e$ and preserves vector norms under reflection.
</div>
<div style="float:left;width:30%;"><img src="./Figs/HouseholderReflection_v1.svg" width=600></div>
</div>

In [6]:
%%julia
@doc """
householder_vector(x::AbstractVector; target::Symbol = :first) -> Vector

Computes a unit length Householder vector `w` such that the reflection
    `Q = I - 2wwᵀ` maps `x` to a vector aligned with the `target` coordinate vector :first or :last

Arguments:
- `x`: A vector to be reflected.
- `target`: `:first` (default) aligns with the first coordinate; `:last` aligns with the last.

Returns:
- A unit Householder vector `w` such that `Q x` has all zeros except possibly the first or last entry.
"""
function householder_vector(x::AbstractVector; target::Symbol = :first)
    x = vec(x)
    if all(iszero, x)
        return zeros(eltype(x), length(x))
    end

    σ        = norm(x)
    e        = zeros(eltype(x), length(x))
    index    = target == :first ? 1 : length(x)
    e[index] = sign(x[index]) == 0 ? σ : sign(x[index]) * σ  # the sign determines which of the two possible bisectors to use

    w     = x - e
    wnorm = norm(w)

    if wnorm ≈ 0
        return zeros(eltype(x), length(x))
    end

    result = w / wnorm
    return result
end
;

In [10]:
%%julia
function test_householder_vector(x; target::Symbol=:first)
   w = householder_vector(float.(x), target=target)
   Q = 1.0I - 2w*w'
   py_show( "target = ", target==:first ? L"e_1" : L_interp( L"e_$(s)", Dict( "s" => length(x)) ), L",\qquad x=", Int.(x), L",\qquad w = ", w, L",\quad Q = ", Q, L"\qquad Q x = ", Q*x, number_formatter=x->round(x,digits=2))
   flush(stdout); println("    (Q'Q ≈ I) = ", Q'Q ≈ I); flush(stdout)
end
println( "Reflecting vector [1,1,1] onto e₁ and onto e₃\n\n"); flush(stdout)
test_householder_vector( [ 1,1,1 ] )
test_householder_vector( [ 1,1,1 ]; target=:last  )

Reflecting vector [1,1,1] onto e₁ and onto e₃




<IPython.core.display.Latex object>

    (Q'Q ≈ I) = true


<IPython.core.display.Latex object>

    (Q'Q ≈ I) = true


____
Below, we will use **Householder Reflections to introduce zeros in a row and/or a column** of a matrix.

### 3.2.2 $A = QR$ one Column at a Time, $A = RZ$ one Row at a Time

#### QR_step: Introduce Zeros in a Column

Zeroing out entries in a column can be done by using a Householder reflector matrix $Q$ from the left. The transformation<br>
$\qquad \tilde{A} = Q A$
uses $Q$ to form linear combinations of the rows of $A$, creating zeros in the column targeted by $Q$.

Below a naive reference implementation using **Householder reflections** to zero out entries **one column at a time**.

In [11]:
%%julia
"""
    QR_step!(A::Matrix{Float64}, i::Int, j::Int) -> Matrix

Applies a Householder reflection **from the left** to zero out entries
**below** the pivot `A[i,j]` in column `j`. That is, it modifies `A`
in-place as `A ← Q * A`, where `Q` is orthogonal.

Returns the orthogonal matrix `Q` such that the updated `A = Q * A`
has zeros in entries A[i+1:end, j].

Arguments:
- `A`: Matrix to modify in-place.
- `i`: Row index of the pivot.
- `j`: Column index of the pivot.

Returns:
- The orthogonal matrix `Q` such that `A ← Q * A`.
"""
function QR_step!(A::AbstractMatrix{Float64}, i::Int, j::Int)
    m = size(A, 1)

    # Extract column segment below and including the pivot
    x     = copy(@view A[i:end, j])
    v_sub = householder_vector(x; target = :first)  # Align with first coordinate

    v = zeros(eltype(A), m)
    v[i:end] .= v_sub

    Q = I - 2 * (v * v')  # Householder matrix
    A .= Q * A            # Apply from the left

    return Q
end;

#### RQ_step: Introduce Zeros in a Row

Similarly, zeroing out a entries in a row can be done by using a Householder reflector matrix $H$ from the right. The transformation<br>
$\qquad \tilde{A} = A H$
uses $H$ to form linear combinations of the columns of $A$, creating zeros in the row targeted by $H$.

Below a naive reference implementation using **Householder reflections** to zero out entries **one row at a time**

In [12]:
%%julia

function RQ_step!(
    A::AbstractMatrix{Float64},
    i::Int,              # pivot row index
    j::Int;              # pivot column index
    len::Int = j         # number of active entries (default: full 1:j)
)
    n = size(A, 2)
    if len <= 0
        return Matrix{Float64}(I, n, n)
    end

    start_col = max(1, j - len + 1)
    x = @view A[i, start_col:j]          # read directly from A

    v_sub = householder_vector(x; target = :last)

    v = zeros(eltype(A), n)
    v[start_col:j] .= v_sub

    Qr = I - 2 * (v * v')
    A .= A * Qr
    return Qr
end
;

#### Example: RQ and QR Factorization, one Column/Row at a Time

These two operations can be combined to obtain factorizations of $A$
* $A = Q R_l$ consists of $Q$, a product of orthogonal matrices zeroing out one column at a time, from left to right<br>
and an upper triangular matrix $R\_l$
* $A = R_r Z$ consists of $Z$, a product of orthogonal matrices zeroing out one row at a time, from bottom to top<br>
and an upper triangular matrix $R\_r$

In [13]:
%%julia
function test_qr_and_rq(n::Int = 4)
    A = randn(n, n)
    Rl = copy(A)
    Rr = copy(A)
    py_show("Convert A to upper triangular: ", L"A = Q R_l\;\;", "and", L"\;\; A = R_r Z" )

    display(py_show("original matrix A = ", A, number_formatter = x -> round(x, digits=2)))

    Q = Matrix{Float64}(I, n, n)
    Z = Matrix{Float64}(I, n, n)
    for j in 1:n
        Qj     = QR_step!(Rl, j, j)
        Zj     = RQ_step!(Rr, n-j+1, n-j+1)
        Q      = Qj * Q  # Accumulate from left
        Z      = Z * Zj  # Accumulates from right
        py_show(L"R_l = ",      (m=Rl, per_element_style = (x,ii,jj,fx)->tril_formatter(x,ii,jj,fx; k=-1, c1=1,c2=j)),
                L"\qquad R_r =",(m=Rr, per_element_style = (x,ii,jj,fx)->tril_formatter(x,ii,jj,fx; k=-1, r1=n-j+1,r2=n)),
                        number_formatter = x -> round(x, digits=2))
    end

    @show (A ≈ Q' * Rl)           # A = Qᵗ R → Q R ≈ A
    @show (A ≈ Rr * Z')
    flush(stdout)
    @show (Q'Q ≈ I)
    @show (Z'Z ≈ I)
    flush(stdout)
end
test_qr_and_rq(4)

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

A ≈ Q' * Rl = true
A ≈ Rr * Z' = true
Q' * Q ≈ I = true
Z' * Z ≈ I = true


* On the left, we form successive Householder reflection matrices using the $x$ vector from the diagonal down,<br>
introducing zeros in each successive column (from left to right).<br>
The reflector is chosen to maintain a diagonal value (QR_step with target=:first)
* On the right, we form successive Householder reflection matrices using the $x$ vector from the left to the current diagonal entry,<br>
introducing zeros in each successive row (from the bottom up).<br>
The reflector is chosen to maintain a diagonal value (RQ_step with target=:last)

## 3.3 Reducing a Pencil to Hessenberg Triangular Form

### 3.3.1 Hessenberg–Triangular Reduction

The **Hessenberg–triangular (HT) reduction** transforms a matrix pair $(A,B)$ into a pair $(H,T)$,<br>

$\qquad
Q^{T} A Z = H, \quad Q^{T} B Z = T.
$

where $H$ is upper Hessenberg and $T$ is upper triangular, and<br>
$Q$, $Z$ are orthogonal matrices applied from the left and right, respectively.

---

#### **Step 1.** Begin by reducing $B$ to upper triangular form using an orthogonal matrix from the left:

> Triangularize $B$ using the QR decomposition:
> 
> $\qquad$   Apply a sequence of $Q_k$ (*QR steps*) to $B$ until it becomes upper triangular.<br>
$\qquad$   The orthogonal matrices are applied to both matrices $A$ and $B$.
> 
>   $\qquad\qquad
   B \leftarrow Q_k B, \qquad  A \leftarrow Q_k A.
   $

#### **Step 2.** Form Hessenberg–triangular pair (optional step):

>   Reduce $(A,B)$ to generalizd Hessenberg form
>
> For each column $k = 1, \dots, n-2$:
> - Apply a **left** orthogonal matrix $Q_k$ to zero entries below $A_{k+1,k}$  
  (i.e., reduce column $k$ of $A$ to Hessenberg form):  
  $\qquad A \leftarrow Q_k A, \qquad B \leftarrow Q_k B$
> - This left update introduces a **bulge** in $B$: nonzero elements just below the main diagonal.  
  Eliminate the bulge using a sequence of **right** orthogonal matrices $Z_i$, applied **bottom to top**:
>
>  $\qquad i = n, n{-}1, \dots, k{+}2,\qquad Z_i \text{ acts on row } i \text{ with length } (i - k)$
> - Each $Z_i$ removes one subdiagonal element of $B$ and updates $A$:  
  $\qquad B \leftarrow B Z_i, \qquad A \leftarrow A Z_i$
>
> After $k = n{-}2$ iterations:
> - $A$ is upper Hessenberg
> - $B$ is upper triangular
> - The accumulated transformations $Q = Q_k Q_{k-1} \cdots Q_1$, $Z = Z_1 Z_2 \cdots Z_k$ satisfy:
>
>$\qquad H = Q^T A Z, \qquad T = Q^T B Z$

---
This two-sided process alternates left QR reductions and right RQ *bulge chases*,<br>
progressively constructing the upper‑Hessenberg/upper‑triangular pair<br>
that serves as the starting point of the **QZ (Generalized Schur) algorithm**.

In [None]:
%%julia
"""
Educational Hessenberg–Triangular reduction for a matrix pair (A,B). (Steps 1 and 2)

Uses:
  • QR_step!(A, i, j): left Householder; zeros entries below A[i,j].
  • RQ_step!(A, i, j): right Householder; zeros entries left of A[i,j].

Both functions modify the given matrix in place and return the full N×N orthogonal Q or Z.

Performs:
    Qᵗ * A * Z = H
    Qᵗ * B * Z = T
where H is upper Hessenberg and T is upper triangular.
"""
function hessenberg_triangular_reduction!(A::Matrix{Float64}, B::Matrix{Float64}; debug::Bool = true)
    n = size(A, 1)
    Q_total = Matrix{Float64}(I, n, n)
    Z_total = Matrix{Float64}(I, n, n)

    # ---------------------------------------------------------
    # Step 1 : Triangularize B (QR-type left transformations)
    # ---------------------------------------------------------
    if debug println("=========== Step 1 : S=A, T=B; Triangularize B from the left (QR_steps)");flush(stdout) end
    for j in 1:n-1
        Q = QR_step!(B, j, j)      # internally: B ← Q B
        A .= Q * A                 # apply Q to A once
        Q_total .= Q * Q_total
    end
    if debug py_show("S = ",  (a=A,), L"\quad T = ",
             (b=B, per_element_style = (x,i,j,fx)->tril_formatter(x,i,j,fx; k=-1, c2=n) ),
             number_formatter=x->round(x,digits=2)
    ) end

    # ---------------------------------------------------------
    # Step 2 : Hessenberg–Triangular Reduction
    # ---------------------------------------------------------
    if debug println("\n=========== Step 2 : Hessenberg–Triangular Reduction ===");flush(stdout) end
    for k in 1:n-2
        # ---- Left reflector Qₖ: zero below A[k+1,k]
        if debug println("Make column $k of A upper Hessenberg:");flush(stdout) end
        Qk = QR_step!(A, k+1, k)   # internally: A ← Qₖ A
        B .= Qk * B
        Q_total .= Qk * Q_total

        if debug py_show( "S = ", (a=A, per_element_style = (x,i,j,fx)->tril_formatter(x,i,j,fx;k=-2, c2=k) ), L"\quad",
                 "T = ", (b=B, per_element_style = (x,i,j,fx)->tril_formatter(x,i,j,fx; k=-1) ),
                     number_formatter=x->round(x,digits=2)
        ) end

        # ---- Right reflectors Z : clear left‑of‑diagonal entries in B
        # Start one row below the current pivot (e.g. (3,2) when k = 1)
        for i in n:-1:(k+2)
            len_i = i - k         # shrinking active length as we go up
            Z = RQ_step!(B, i, i; len=len_i)
            A .= A * Z
            Z_total .= Z_total * Z
            if debug
                println("        Remove bulge in T in row  $i (len=$len_i):");flush(stdout)
                py_show( L"\qquad\qquad S = ", (a=A, per_element_style = (x,i,j,fx)->tril_formatter(x,i,j,fx;k=-2, c2=k) ), L"\quad",
                         "T = ", (b=B, per_element_style = (x,i,j,fx)->tril_formatter(x,i,j,fx; k=-1) ),
                         number_formatter=x->round(x,digits=2)
                )
            end
        end
    end

    return Q_total', Z_total
end
;

In [None]:
%%julia
function test_hessenberg_triangular_qz(n=4)
    S = randn(n, n)
    T = randn(n, n)
    A = copy(S)
    B = copy(T)

    Q, Z = hessenberg_triangular_reduction!(S, T)

    @show (A ≈ Q*S*Z')
    @show (B ≈ Q*T*Z')
    flush(stdout)

    @show (Q'Q ≈ I)
    @show (Z'Z ≈ I)
    flush(stdout)

    @show is_upper_hessenberg(S)
    @show is_upper_triangular(T)
    flush(stdout)
end

test_hessenberg_triangular_qz(4);

____
**Observe:**
* Introducing zeros in a column of $A$ (step 2) to drive it to upper Hessenberg form<br>
reintroduces non-zero entries in $B$
* these non-zero entries are removed from the bottom up, using orthogonal matrices that do not affect the zero columns to the right of the bulge.<br>
This keeps the zeros in $A$ intact.

### 3.3.2 QZ Iteration

The QZ algorithm reduces a matrix pencil $(A,B)$ to its generalized (quasi-)Schur form $(S,T)$,<br>
$\qquad$ in which the generalized eigenvalues are read directly from the diagonal blocks.<br>
$\qquad$ It preserves the spectrum and records transformations so that eigenvectors can be recovered from $(S,T)$.

The **QZ algorithm** generalizes the [**Schur decomposition**](Schur_Decomposition.ipynb) by repeatedly applying a similarity transform<br>
$\qquad$ obtained from the QR decomposition of $A = Q R$, i.e., computing $Q^T A Q = R Q,\;\; Q^T B Q$.<br>
$\qquad$ Each iteration step reintroduces a bulge in the $B$ matrix that is removed as before.

____
It therefore obtains two orthogonal (or unitary) matrices $Q$ and $Z$ such that<br>
$\qquad S = Q^H A Z$ and $T = Q^H B Z = T$, where $(S,T)$

$\qquad$ The spectrum is preserved, and the transformations $Q,Z$ allow recovery of eigenvectors from $(S,T)$ (Section 3.4)

In [17]:
%%julia

"""
Perform basic unshifted QZ eigenvalue iterations on a matrix pair (A,B) (optionally in Hessenberg–triangular form).

Each step executes:
    A = Q*R
    A ← R*Q  (so A ← Qᵀ*A*Q)
    B ← Qᵀ*B*Q
then applies an optional bottom‑up RQ sweep to keep B upper‑triangular.

Args:
    A, B : square Float64 matrices (A Hessenberg‑like, B upper‑triangular)
    nsteps : number of outer iterations
    debug : print intermediate results

Returns:
    A, B, Qacc, Zacc : updated matrices and accumulated orthogonal transforms
"""
function qz_eigen_iteration!(A::Matrix{Float64}, B::Matrix{Float64},
                             nsteps::Int=1; debug::Bool=true)
    n = size(A,1)
    Q_total = Matrix{Float64}(I,n,n)
    Z_total = Matrix{Float64}(I,n,n)

    for step in 1:nsteps
        if debug println("=========== QZ Eigen Iteration step $step ============="); flush(stdout) end

        # ---------------------------------------------------------
        # Step 1 : Left QR similarity transform
        if debug println("\n-- Left QR similarity update (A = Q*R; A ← R*Q, B ← Qᵀ B Q)") end

        F = qr(A)
        Q = Matrix(F.Q)
        R = Matrix(F.R)

        A .= R * Q
        B .= Q' * B * Q
        Q_total .= Q' * Q_total     # new Q acts on the left  → prepend
        Z_total .= Z_total * Q     # same Q acts on the right → append

        if debug
            py_show("S = ",
                (a=A, per_element_style=(x,i,j,fx)->tril_formatter(x,i,j,fx;k=-2)),
                L"\quad","T = ",
                (b=B, per_element_style=(x,i,j,fx)->tril_formatter(x,i,j,fx;k=-1)),
                number_formatter=x->round(x,digits=2)
            ); flush(stdout)
        end

        # ---------------------------------------------------------
        # Step 2 : Right RQ steps to restore B to upper‑triangular form
        if debug println("\n-- Right RQ sweep to restore T (upper triangular)"); flush(stdout) end

        for i in n:-1:2
            Z = RQ_step!(B, i, i; len=i)
            A .= A * Z
            Z_total .= Z_total * Z
            if debug
                println("   Remove bulge from T in row $i (len=$i):"); flush(stdout)
                py_show(
                    L"\qquad S=", (a=A, per_element_style=(x,p,q,fx)->tril_formatter(x,p,q,fx;k=-2)),
                    L"\quad T=", (b=B, per_element_style=(x,p,q,fx)->tril_formatter(x,p,q,fx;k=-1)),
                    number_formatter=x->round(x,digits=2)
                ); flush(stdout)
            end
        end
        if step==100
            py_show(L"S = ",
                (a=A, per_element_style=(x,i,j,fx)->tril_formatter(x,i,j,fx;k=-2)),
                L"\quad T = ",
                (b=B, per_element_style=(x,i,j,fx)->tril_formatter(x,i,j,fx;k=-1)),
                number_formatter=x->round(x,digits=2)
            ); flush(stdout)
        end
    end

    return A, B, Q_total', Z_total
end
;

In [18]:
%%julia
function test_qz_iteration()
    n = 4
    A = [1.0 2 3 4;
         5 6 7 8;
         0.1 9 10 11;
         1   0.1 12 13]

    B = [4.0 0.5 0.1 0;
         0.0 3.0 2.1 0.3;
         2.0 0.0 2.0 0.2;
         1.0 -1.0 0.0 1.0]

    py_show("Initial A = ",A, L"\quad B = ",B)
    λ, X = eigen(A, B)
    py_show( "Generalized eigenvalues: ", round.(λ,digits=2)' )
    println()

    S = copy(A)
    T = copy(B)
    S, T, Qc, Zc = qz_eigen_iteration!(S, T, 1)

    println( "\n.... iterate....\n\n")
    S = copy(A)
    T = copy(B)
    S, T, Q, Z = qz_eigen_iteration!(S, T, 100; debug=false)

    # --- Orthogonality check for accumulated Q and Z
    flush(stdout)
    @show (A ≈ Q*S*Z')
    @show (B ≈ Q*T*Z')
    flush(stdout)
    @show (Q' * Q ≈ I)
    @show (Z' * Z ≈ I);
end
test_qz_iteration();

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>



-- Left QR similarity update (A = Q*R; A ← R*Q, B ← Qᵀ B Q)

<IPython.core.display.Latex object>



-- Right RQ sweep to restore T (upper triangular)
   Remove bulge from T in row 4 (len=4):


<IPython.core.display.Latex object>

   Remove bulge from T in row 3 (len=3):


<IPython.core.display.Latex object>

   Remove bulge from T in row 2 (len=2):

<IPython.core.display.Latex object>



.... iterate....



<IPython.core.display.Latex object>


A ≈ Q * S * Z' = true
B ≈ Q * T * Z' = true
Q' * Q ≈ I = true
Z' * Z ≈ I = true


----
**Observe**:
* The code shows the first step of the QZ iteration, followed by the final result<br>
The optional step 2 reduction to generalized Hessenberg from has been omitted for clarity.
* A similarity transform based on $A$ destroys the zeros in $B$,<br>
consequently each such step is followed by a bulge chasing, applying orthogonal matrices from the left to resore $B$
to upper triangular form.

## 3.4 Eigenvalue and Eigenvector Computation

<div style="background-color:#F2F5A9;color:black;">

**Thm: Eigenvalue Invariance under Orthogonal Reduction**

Let $Q^T A Z = S$ and $Q^T B Z = T$, with $Q$, $Z$ orthogonal matrices.  
$\qquad$ Then the generalized eigenvalues of $(A, B)$ and $(S, T)$ are the same.  
$\qquad$ Moreover, if $(\lambda, y)$ is a generalized eigenpair of $(S, T)$,  
$\qquad$ then $x = Z y$ is the corresponding eigenvector of $(A, B)$.
</div>

Let $(\lambda, y)$ be a generalized eigenpair of $(S, T)$. Then

$\qquad
\begin{align}
S y = \lambda T y \;\;
& \Leftrightarrow \; && Q^T A Z y = \lambda Q^T B Z y \quad &&\text{(definition of } S = Q^T A Z, \; T = Q^T B Z) \\
& \Leftrightarrow \; && A Z y = \lambda B Z y \quad &&\text{(left-multiply both sides by } Q) \\
& \Rightarrow \;     && A x = \lambda B x \quad &&\text{(change of variables)}\;\; x = Z y
\end{align}
$

$\qquad$ Hence $(\lambda, x)$ is a generalized eigenpair of $(A, B)$.

____
The reverse direction follows similarly:  
Assume $A x = \lambda B x$ and define $y = Q^T x$. Then

$\qquad
\begin{align}
A x = \lambda B x \;\;
& \Rightarrow \;     && Q^T A x = \lambda Q^T B x      \quad &&\text{(apply } Q^T \text{ to both sides)} \\
& \Leftrightarrow \; && Q^T A Z y = \lambda Q^T B Z y  \quad &&\text{(substitute } x = Z y) \\
& \Rightarrow \;     && S y = \lambda T y              \quad &&\text{(by definitions)}
\end{align}
$

$\qquad$ Hence  $(\lambda, y)$ is a generalized eigenpair of $(S, T)$.

### 3.4.1 Eigenvalues

> The eigenvalue estimation for matrix $S$ is mostly identical to the estimation for the QR eigenvalue problem:
> * detecting $2\times 2$ blocks in $A$ by checking if the off-diagonal element is sufficiently large
> * For each such block, calculating the eigenvalues using the block in $A$ and the corresponding block in $B$,<br>
which might be real or complex depending on the discriminant of the corresponding quadratic equation.<br>
The divisor $\lambda_t$ in this case is the product of the corresponding entries on the diagonal of $T$.
> * If there’s no $2\times 2$ block, it’s a single real eigenvalue.

For the $T$ matrix, the corresponding eigenvalues are the diagonal entries.

In [19]:
%%julia
function estimate_generalized_eigenvalue(A::Matrix{T}, B::Matrix{T}) where T
    """
    Estimate generalized eigenvalues and identify block sizes
    for a quasi-upper-triangular matrix pair (A, B)
    produced by a QZ (generalized Schur) decomposition.

    Returns both the eigenvalues and a vector of block sizes,
    where each entry in `blocks` indicates the dimension of
    its corresponding diagonal block (1 for real, 2 for complex pair).

    Args:
        A::Matrix{T}, B::Matrix{T} : quasi-upper-triangular matrices (same size)

    Returns:
        eigenvalues::Vector{Complex{T}}
        blocks::Vector{Int}  (e.g., [1, 2, 1])
    """
    n = size(A, 1)
    @assert n == size(B, 1) "A and B must have same dimensions"

    λ = Complex{T}[]
    blocks = Int[]
    tol = 1e-12
    i = 1

    while i <= n
        # --- 2×2 block?
        if i < n && abs(A[i+1, i]) > tol
            a11, a12, a21, a22 = A[i,i], A[i,i+1], A[i+1,i], A[i+1,i+1]
            b11, b12, b21, b22 = B[i,i], B[i,i+1], B[i+1,i], B[i+1,i+1]

            # Coefficients of det(A - λB) = 0
            α = b11*b22 - b12*b21
            β = -(a11*b22 + a22*b11 - a12*b21 - a21*b12)
            γ = a11*a22 - a12*a21
            Δ = β^2 - 4*α*γ

            if abs(Δ) < tol
                λ1 = -β / (2*α)
                λ2 = λ1
                append!(blocks, [1,1])
            elseif Δ >= 0
                λ1 = (-β + sqrt(Δ)) / (2*α)
                λ2 = (-β - sqrt(Δ)) / (2*α)
                append!(blocks, [1,1])
            else
                realpart = -β / (2*α)
                imagpart = sqrt(-Δ) / (2*α)
                λ1 = realpart + im*imagpart
                λ2 = realpart - im*imagpart
                push!(blocks, 2)
            end

            append!(λ, [λ1, λ2])
            i += 2
        else
            # --- 1×1 real block
            a_ii, b_ii = A[i,i], B[i,i]
            if abs(b_ii) > tol
                push!(λ, a_ii / b_ii)
            else
                push!(λ, complex(Inf))
            end
            push!(blocks, 1)
            i += 1
        end
    end

    return λ, blocks
end
;

In [20]:
%%julia
function test_qz_eigenproblem()
    n = 4
    A = [1.0 2 3 4;
         5 6 7 8;
         0.1 9 10 11;
         1   0.1 12 13]

    B = [4.0 0.5 0.1 0;
         0.0 3.0 2.1 0.3;
         2.0 0.0 2.0 0.2;
         1.0 -1.0 0.0 1.0]

    py_show("Initial A = ",A, L"\quad B = ",B)
    λ, X = eigen(A, B)

    S = copy(A)
    T = copy(B)
    S, T, Qc, Zc = qz_eigen_iteration!(S, T, 99; debug=false) # avoid the printout out step 100

    λₛ, blocks = estimate_generalized_eigenvalue(S, T)

    py_show("S = ",
        (a=S,
         per_element_style=(x,i,j,fx) ->
             diagonal_blocks_formatter(x,i,j,fx; blocks=blocks)),
        L"\quad T = ",
        (a=T,
         per_element_style=(x,i,j,fx) ->
             diagonal_blocks_formatter(x,i,j,fx; blocks=blocks)),

        number_formatter = x -> round(x, digits = 2)
    )
    py_show( "Naive QZ estimates:",L"\qquad", round.(λₛ,digits=2)' )
    py_show( "Generalized eigenvalues: ", reverse!(round.(λ,digits=2)') )
end
test_qz_eigenproblem()

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

____
**Observe:**
* The QZ algorithm reduces the original matrix pair $(A,B)$ to generalized Schur form $(S,T)$.
* The eigenvalue estimation step reveals two blocks of size $1\times 1$ (real eigenvalues) and a $2\times 2$ block
* The eigenvalue computations agree with the values obtained using a library routine (implementing a far more sophisticated version of the QZ algorithm)

### 3.4.2 Eigenvectors

<div style="background-color:#F2F5A9;color:black;">

**Thm:**
Let $(S, T)$ be a pair of real $n \times n$ matrices in *generalized Schur form*, i.e.
$
S = Q^T A Z, \quad T = Q^T B Z,
$

<div style="padding-left:0.8cm;">
where $S$ and $T$ are block upper triangular with identical $1\times 1$ or $2\times 2$ diagonal block structure.

Then, for any eigenvalue $\lambda$ associated with one diagonal block $(S_{kk}, T_{kk})$,  
the system $\;\;
(S - \lambda T)\,y = 0
$
admits a nontrivial solution obtained by setting all $y_j = 0$ for $j$ below the active block  
and solving upward by block back substitution.
</div></div>

Write the matrix difference in block form

$\qquad
S - \lambda T =
\begin{pmatrix}
B_{11} & B_{12} & \cdots & B_{1m} \\
0      & B_{22} & \cdots & B_{2m} \\
\vdots & \vdots & \ddots & \vdots \\
0      & 0      & \cdots & B_{mm}
\end{pmatrix},\;\;
$
where each $B_{kk} = S_{kk} - \lambda T_{kk}$ is a $1\times1$ or $2\times2$ block.

Since $(S-\lambda T)$ is block upper triangular, the equation
$\;\;
(S - \lambda T) y = 0
$
expands blockwise as

$\qquad
B_{kk}\ y_{k} + \sum_{j>k} B_{kj} y_{j} = 0, \qquad k = 1, \dots, m.
$

Let $\lambda$ correspond to the diagonal block $(S_{pp}, T_{pp})$.
Set all $y_{j} = 0$ for $j > p$ (lower blocks).  
Then the $p$‑th block equation reduces to

$\qquad
B_{pp} y_{p} = (S_{pp} - \lambda T_{pp}) y_{p} = 0.
$

By the definition of $\lambda,\;$ $\det(S_{pp} - \lambda T_{pp}) = 0$,  
so this homogeneous system admits a nontrivial solution $y_{p}$ (unique up to scale).

For all $k < p$, the equations
$\;\;
B_{kk} y_{k} + \sum_{j>k} B_{kj} y_{j} = 0
$  
now involve only known quantities $y_{j}$ for $j > k$.  
Since each $B_{kk}$ is nonsingular for $k \ne p$, each $y_{k}$ is uniquely determined by back substitution.

Thus a complete nontrivial vector $y$ is obtained by:
1. Setting $y_{j} = 0$ for $j > p$,
2. Solving $(S_{pp} - \lambda T_{pp}) y_{p} = 0$ for the active block,
3. Computing $y_{k}$ for $k < p$ recursively.

Therefore, block back substitution produces a valid eigenvector $y$
satisfying $(S - \lambda T)y = 0$.

<div style="background-color:#F2F5A9;color:black;">

**Corollary:** If all diagonal blocks of $(S, T)$ are nondefective (i.e., each $S_{kk} - \lambda T_{kk}$ has a one‑dimensional null space),  
$\qquad$ then this procedure yields one linearly independent eigenvector per block,  
$\qquad$ and hence a complete eigenbasis for the diagonalizable pencil $(A, B)$.
</div>

**Remark.**  
<div style="padding-left:0.8cm;">

If a block $(S_{pp}, T_{pp})$ is *defective*,
then $\dim \ker(S_{pp} - \lambda T_{pp}) < \text{algebraic multiplicity of } \lambda$.  
In this case, the system $(S - \lambda T) y = 0$ has fewer independent solutions than the eigenvalue multiplicity.

The back substitution procedure remains formally valid but produces linearly dependent vectors,  
as all constructed solutions lie within the same one‑dimensional null space.  
A complete set of generalized eigenvectors would then require extending the procedure to higher‑order equations  
$\qquad
(S - \lambda T) y_2 = T y_1, \quad (S - \lambda T) y_3 = T y_2, \; \text{etc.},
$

forming a **Jordan chain**
</div>

In [52]:
%%julia
"""
    schur_eigenvectors(S, T, blocks, λs)

Compute the (right) eigenvectors of a generalized Schur pair (S, T),
given:
- `blocks`: compressed block structure (e.g. [1,2,1])
- `λs`: corresponding eigenvalues

Returns a matrix `Y` whose columns satisfy `S * Y ≈ T * Y * Λ`.
The eigenvectors of the original pencil (A, B) are `X = Z * Y`.
"""
function schur_eigenvectors(S, T, blocks::Vector{Int}, λs::Vector)
    function red(x, i, j, formatted_x; row=1, color="red")
        return i >= row ? "\\textcolor{$color}{$formatted_x}" : formatted_x
    end
    n = size(S, 1)
    Y = zeros(ComplexF64, n, n)

    # --- Expand blocks to column-wise format (e.g. [1,2,1] → [1,2,2,1])
    block_sizes = Int[]
    for b in blocks
        if b == 1
            push!(block_sizes, 1)
        elseif b == 2
            push!(block_sizes, 2, 2)
        else
            error("Unsupported block size $b (only 1 or 2 allowed)")
        end
    end

    if length(block_sizes) != n
        error("Expanded block_sizes has length $(length(block_sizes)), but matrix size is $n")
    end

    # --- Back substitution loop
    col     = n
    eig_idx = length(λs)

    while col ≥ 1
        bsize = block_sizes[col]

        if bsize == 1
            # ---- 1×1 real block ----
            λ = λs[eig_idx]
            y = zeros(ComplexF64, n)
            y[col] = 1

            # Back substitution for column col
            for i in (col-1):-1:1
                num = dot(S[i, i+1:col] - λ * T[i, i+1:col], y[i+1:col])
                den = S[i, i] - λ * T[i, i]
                y[i] = -num / den
            end

            py_show( "block row ",
                L"%$(col),\quad \lambda = %$(round(λ,digits=2)),\qquad (S - λ T) = ",
                (s=S-λ*T, per_element_style=(x,i,j,fx)->block_formatter(x,i,j,fx;r1=col,r2=col, c1=col, c2=col)),
                L",\qquad y = ", (y, per_element_style=(x,i,j,fx)->red(x,i,j,fx;row=col)),
                number_formatter = x -> round(x, digits=2)
            )

            Y[:, col] = y
            col -= 1
            eig_idx -= 1

        elseif bsize == 2
            # ---- 2×2 real block → complex conjugate pair ----
            λ₂ = λs[eig_idx]
            λ₁ = λs[eig_idx - 1]

            for (j, λ) in enumerate((λ₁, λ₂))
                y = zeros(ComplexF64, n)
                y[col] = 1.0
                y[col-1] = (λ * T[col-1, col] - S[col-1, col]) /
                           (S[col-1, col-1] - λ * T[col-1, col-1])

                for i in (col-2):-1:1
                    num = dot(S[i, i+1:col] - λ * T[i, i+1:col], y[i+1:col])
                    den = S[i, i] - λ * T[i, i]
                    y[i] = -num / den
                end

                py_show( "block rows",
                    L" %$(col-1,col),\quad \lambda = %$(round(λ,digits=2)),\qquad (S - λ T) = ",
                    (s=S-λ*T, per_element_style=(x,i,j,fx)->block_formatter(x,i,j,fx;r1=col-1,r2=col, c1=col-1, c2=col)),
                    L",\qquad y = ", (y, per_element_style=(x,i,j,fx)->red(x,i,j,fx;row=col-1)),
                    number_formatter = x -> round(x, digits=2)
                )

                Y[:, col - (2 - j)] = y
            end

            col -= 2
            eig_idx -= 2

        else
            error("Unexpected expanded block size $bsize at column $col")
        end
    end

    return Y
end
;

In [53]:
%%julia
function test_full_qz_eigenproblem()
    n = 4
    A = [1.0 2 3 4;
         5 6 7 8;
         0.1 9 10 11;
         1   0.1 12 13]

    B = [4.0 0.5 0.1 0;
         0.0 3.0 2.1 0.3;
         2.0 0.0 2.0 0.2;
         1.0 -1.0 0.0 1.0]

    S = copy(A)
    T = copy(B)
    S, T, Qc, Zc = qz_eigen_iteration!(S, T, 99; debug=false) # avoid the printout out step 100

    λ, blocks = estimate_generalized_eigenvalue(S, T)

    Y = schur_eigenvectors(S, T, blocks, λ )
    return Zc * Y
end
py_show( "\nEigenvectors X = ", test_full_qz_eigenproblem(), number_formatter=x->round(x,digits=3))

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

**Observe:**
* each block $k$ is solved by setting $y_k=1. y_{k+1} \dots y_n = 0$
* the remaining $y$ values are obtained by back substitution for $(S-\Lambda T) y = 0$
* the eignevectors $X$ are $X = A Y$.

#### **Limitations: Degeneracy and Defectiveness**

Block back substitution assumes that each eigenvalue $\lambda$ gives rise to a corresponding eigenvector $y$ via:
$\qquad
(S - \lambda T) y = 0.
$

This process works when the matrix pencil $(A, B)$ (or $(S, T)$) is **diagonalizable** — i.e., has a full set of linearly independent eigenvectors.

---

##### What if $(A, B)$ is defective?

- A **defective** eigenvalue has **algebraic multiplicity greater than geometric multiplicity**.
- In this case, the homogeneous system $(S - \lambda T) y = 0$ may have only one linearly independent solution, even if $\lambda$ appears multiple times.
- Block back substitution will still **solve one vector per eigenvalue**, but the resulting matrix $Y$ will have **linearly dependent columns**.
- This means the method will not recover a **complete eigenvector basis**.

---

##### What does this look like in practice?

- The matrix $S - \lambda T$ may have a **null space of dimension < multiplicity of $\lambda$**.
- Solving the system for each $\lambda$ will yield **fewer than $n$ independent eigenvectors**.
- If you try to verify $X^T B X = I$ or diagonalize $A$ via $X^{-1} A X = \Lambda$, it will fail.

____
<div style="display: flex; margin-top: 1em; margin-bottom: 1em;">
<!-- -------------------- Python box -->
<div style="width: 40em; height: 7.5em; padding-right: 1cm;
            background: #f8f8f8; border: 1px solid #ccc; border-radius: 4px;
            font-family: monospace; font-size: 90%; line-height: 1.2;
            display: flex; align-items: flex-start; padding: 0.6em 0.75em;
            box-sizing: border-box;">
  <div>
    <div style="font-weight: bold; margin-bottom: 0.2em;">Python</div>
    <div># A, B arbitrary square matrices</div>
    <div>from scipy.linalg import qz, eig</div>
    <div>S, T, Q, Z = qz(A, B, output='real')  # or output='complex'</div>
    <div>lam, X = eig(A, B)</div>
  </div>
</div>
<!-- -------------------- Julia box -->
<div style="width: 40em; height: 7.5em; margin-left: 1cm;
            background: #f8f8f8; border: 1px solid #ccc; border-radius: 4px;
            font-family: monospace; font-size: 90%; line-height: 1.2;
            display: flex; align-items: flex-start; padding: 0.6em 0.75em;
            box-sizing: border-box;">
  <div>
    <div style="font-weight: bold; margin-bottom: 0.2em;">Julia</div>
    <div># A, B arbitrary square matrices</div>
    <div>using LinearAlgebra</div>
    <div>S, T, Q, Z = schur(A, B)</div>
    <div>λ, X = eigen(A, B)</div>
  </div>
</div>
</div>

<strong style="margin-top: 0.5em;">
It is the user's responsibility to check for degeneracy:</strong> if the matrix pair is defective, returned eigenvectors may be linearly dependent.

## 3.5 Diagonalization

Let $A, B$ be a pair of non-degenerate square matrices of size $n \times n$  
with generalized eigenvector matrix $\Lambda$ and corresponding eigenvector matrix $X$.

$\qquad A x = \lambda B x \Rightarrow A X = B X \Lambda \Rightarrow X^{-1} A X = X^{-1} B X \Lambda$

In [88]:
%%julia
function diagonalize_pencil()
    Random.seed!(22)
    n = 4
    A = randn(n, n)

    # Make B symmetric positive definite
    R = randn(n, n)
    B = R*R' + n*I

    # Step 1: Compute generalized eigenvalues and right eigenvectors
    λ, X = eigen(A, B)  # A x = λ B x

    # Step 2: A and B in eigenvector basis/ naive inverse computation
    X_inv   = inv(X)
    A_tilde = X_inv * A * X
    B_tilde = X_inv * B * X

    # Step 3: Verify results
    py_show("Generalized eigenvalues (λ): ", round.(λ, digits=3)')
    py_show( L"\tilde{A} = ", A_tilde, L",\quad \tilde{B} = ", B_tilde, number_formatter=x->round(x,digits=3))
    # Pretty print using sympy

    # Verify: A X ≈ B X Λ
    AX  = A * X
    BXΛ = B * X * diagm(λ)
    py_show( L"\tilde{B}^{-1} \tilde{A} = ", inv(B_tilde)*A_tilde, " is diagonal", number_formatter=x->round(x,digits=3))
    println("\n‖A X - B X Λ‖ = ", norm(AX - BXΛ)); flush(stdout)
end
diagonalize_pencil()

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>


4.790100905289266e-15


# 4. Detecting $\lambda=\infty$ in practice

In QZ form, $T_{ii}\approx 0$ indicates $\lambda_i=\infty$ (since $\lambda_i=S_{ii}/T_{ii}$).

Equivalently, flip to the **reciprocal pencil**
$\;\ B - \mu A \; $
and look for eigenvalues at $\mu=0$.


In [59]:
%%julia
function infinite_eigenvalue_example()
    # Example: regular pencil with B singular -> one eigenvalue at infinity
    A = [1. 0.; 0. 2.]
    B = [1. 0.; 0. 0.]
    λ, X = eigen(A, B)
    py_show( "A = ", A, L",\quad B = ", B, L",\qquad \lambda = ", λ', L",\quad X = ", X, number_formatter=x->round(x,digits=2))
    λ, X = eigen(B, A)
    py_show( "B = ", B, L",\quad A = ", A, L",\qquad \lambda = ", λ', L",\quad X = ", X, number_formatter=x->round(x,digits=2))
end
infinite_eigenvalue_example()

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

# 5. Take Away

Practical recipes and pitfalls
- **If $A=A^T$, $B=B^T \succ 0$**: use the **Cholesky route** (symmetric-definite). You get real eigenvalues and $B$-orthonormal eigenvectors.  
- **Otherwise (regular pencils)**: use **QZ**; read eigenvalues from $S_{ii}/T_{ii}$, detect $\lambda=\infty$ via $T_{ii}\approx 0$.  
- **Never form $B^{-1}A$ explicitly**; prefer triangular solves or Schur/QZ.  
- **Scale/equilibrate** poorly scaled inputs before solving (see Notebook 3).  
- **Always check residuals**; when in doubt, verify with multiple methods (e.g., Cholesky vs QZ) in cases where both apply.