#### Load Libraries and Demo Code

In [1]:
import numpy as np
from scipy.linalg import eig
from scipy.interpolate import CubicSpline
from scipy.interpolate import PchipInterpolator
from scipy.optimize import minimize_scalar

import param
import holoviews as hv
import panel as pn
hv.extension('bokeh', logo=False)
pn.extension('katex')

In [2]:
class GEPRayleighViewer(pn.viewable.Viewer, param.Parameterized):
    alpha = param.Number(default=1.5, bounds=(0.1, 10.0), doc="B[2,2] entry")

    def __init__(self, A, B_template=None, **params):
        super().__init__(**params)
        self.A = A
        self.B_template = B_template if B_template is not None else np.eye(A.shape[0])

    @pn.depends('alpha')
    def view(self):
        B = self.B_template.copy()
        B[1, 1] = self.alpha

        theta_rad = np.linspace(0, np.pi, 600)
        theta_deg = np.degrees(theta_rad)
        rhos = []

        for t in theta_rad:
            x = np.array([np.cos(t), np.sin(t)])
            num = x @ self.A @ x
            den = x @ B @ x
            rhos.append(num / den if np.abs(den) > 1e-14 else np.nan)  # stricter check

        rhos = np.array(rhos)
        finite = np.isfinite(rhos)
        theta_valid = theta_deg[finite]
        rho_valid = rhos[finite]

        i_min = np.argmin(rho_valid)
        i_max = np.argmax(rho_valid)
        theta_min, rho_min = theta_valid[i_min], rho_valid[i_min]
        theta_max, rho_max = theta_valid[i_max], rho_valid[i_max]

        vals, vecs = eig(self.A, B)
        is_real = np.isreal(vals)
        real_vals = vals[is_real].real
        vecs = vecs[:, is_real].real
        theta_eig = np.degrees(np.arctan2(vecs[1], vecs[0]) % np.pi)
        rho_eig = np.array([(v.T @ self.A @ v) / (v.T @ B @ v) for v in vecs.T])


        curve = hv.Curve((theta_valid, rho_valid), 'θ (°)', 'ρ(θ)').opts(width=400, height=300)
        eig_pts = hv.Scatter(list(zip(theta_eig, rho_eig)), 'θ (°)', 'ρ').opts(
            color='green', marker='diamond', size=8)

        hlines = [hv.HLine(rho_min).opts(color='red', line_dash='dotted'),
                  hv.HLine(rho_max).opts(color='red', line_dash='dotted')]
        vlines = [hv.VLine(theta_min).opts(color='red', line_dash='dotted'),
                  hv.VLine(theta_max).opts(color='red', line_dash='dotted')]

        plot = curve  * hv.Overlay(hlines + vlines)
        title = f"Rayleigh Quotient & Eigenvalues (α = {self.alpha:.2f})"

        return pn.Row(
            pn.pane.HoloViews(plot.opts(title=title, show_grid=True)),
            pn.Column(
                pn.Param(self, parameters=["alpha"]),
                pn.pane.Markdown(
                    f"- Found {len(real_vals)} real eigenvalue(s): {np.round(real_vals, 4)}\n\n"
                    f"- **ρ_min** = {rho_min:.4f} at **θ** = {theta_min:.2f}°\n"
                    f"- **ρ_max** = {rho_max:.4f} at **θ** = {theta_max:.2f}°"
                )
            )
        )

    def __panel__(self):
        return pn.Row(self.view)


In [3]:
class LDAViewer(pn.viewable.Viewer, param.Parameterized):
    spread1 = param.Number(default=1.0, bounds=(0.1, 2.0), doc="Class 1 spread")
    spread2 = param.Number(default=1.0, bounds=(0.1, 2.0), doc="Class 2 spread")
    r = param.Number(default=4.0, bounds=(0.0, 10.0), doc="Distance between class centers")
    theta = param.Number(default=45.0, bounds=(0.0, 180.0), doc="Direction angle (°)")

    @pn.depends('spread1', 'spread2', 'r', 'theta')
    def view(self):
        center1 = np.array([2.0, 3.0])
        rad = np.radians(self.theta)
        center2 = center1 + self.r * np.array([np.cos(rad), np.sin(rad)])

        X1 = np.random.multivariate_normal(center1, self.spread1*np.eye(2), 50)
        X2 = np.random.multivariate_normal(center2, self.spread2*np.eye(2), 50)

        # Compute means, scatter matrices
        mean1, mean2 = X1.mean(axis=0), X2.mean(axis=0)
        mean_total = 0.5*(mean1 + mean2)
        S_W = np.cov(X1.T, bias=True) + np.cov(X2.T, bias=True)
        mean_diff = (mean1 - mean2).reshape(-1, 1)
        S_B = mean_diff @ mean_diff.T

        eigvals, eigvecs = eig(S_B, S_W)
        idx = np.argmax(np.real(eigvals))
        lambda_max = eigvals[idx].real
        w_opt = eigvecs[:, idx].real
        w_opt /= np.linalg.norm(w_opt)

        # Project data
        proj1_vals = (X1 - mean_total) @ w_opt
        proj2_vals = (X2 - mean_total) @ w_opt

        # Classification threshold and errors
        thresh = 0.5*(proj1_vals.mean() + proj2_vals.mean())
        if proj1_vals.mean() < proj2_vals.mean():
            err1 = np.mean(proj1_vals > thresh)
            err2 = np.mean(proj2_vals < thresh)
        else:
            err1 = np.mean(proj1_vals < thresh)
            err2 = np.mean(proj2_vals > thresh)
        total_error = 0.5 * (err1 + err2)

        # Main scatter + projection line + centers
        line = np.array([mean_total - 3*w_opt, mean_total + 3*w_opt])
        pts1 = hv.Scatter(X1, 'x', 'y').opts(color='blue', alpha=0.6)
        pts2 = hv.Scatter(X2, 'x', 'y').opts(color='red', alpha=0.6)
        direction = hv.Curve((line[:,0], line[:,1]), 'x', 'y').opts(color='black')
        centers = hv.Scatter([center1], 'x', 'y').opts(color='blue', marker='diamond', size=10) * \
                  hv.Scatter([center2], 'x', 'y').opts(color='red', marker='diamond', size=10)
        main_plot = (pts1 * pts2 * direction * centers).opts(
            width=450, height=350,
            title=f"LDA Projection (λ = {lambda_max:.2f}, Ave Error = {100*total_error:.1f}%)",
            show_legend=False, show_grid=True
        )

        # Combined histogram with transparency and legend
        h1 = hv.Histogram(np.histogram(proj1_vals, bins=20), label='Class 1').opts(
            alpha=0.4, color='blue'
        )
        h2 = hv.Histogram(np.histogram(proj2_vals, bins=20), label='Class 2').opts(
            alpha=0.4, color='red'
        )

        hist = (h1 * h2).opts(
            width=400, height=220,
            show_legend=True, legend_position='bottom',
            xlabel='LDA Projection Score',
            title='Projected Class Distributions along LDA Direction'
        )

        # Stats pane
        stats = pn.pane.Markdown(f"""
**λ (max eigenvalue):** {lambda_max:.2f}<br>
**Error Class 1:** {err1*100:.1f}%<br>
**Error Class 2:** {err2*100:.1f}%<br>
**Average Error:** {total_error*100:.1f}%
""")

        return pn.Column(
            pn.Row(hv.render(main_plot, backend='bokeh'),
                   pn.Column( pn.Row(hv.render(hist, backend='bokeh')), pn.Row( pn.Spacer(width=40), stats,))
            ),
            pn.Row(
                pn.Param(self, parameters=["spread1", "spread2"], name="Class Spread"),
                pn.Param(self, parameters=["r", "theta"], name="Class Position")
            )
        )

    def __panel__(self):
        return pn.Column(self.view)


# 
<div style="height:2cm;">
<div style="float:center;width:100%;text-align:center;">
<strong style="height:100px;color:darkred;font-size:40px;">
The Generalized Eigenvalue Problem
</strong>
</div></div>

# 1. Motivation

In many advanced engineering and data science problems, we encounter *pairs* of matrices $A$ and $B$ acting on the same space. This leads us beyond the familiar territory of eigenvalues and eigenvectors of a single matrix, into the realm of the **generalized eigenvalue problem (GEP)**

$\qquad
A x = \lambda B x
$

This notebook introduces the GEP through the lens of **Rayleigh quotients**, offering geometric intuition, variational interpretations, and practical applications. We’ll explore how this framework reveals optimal directions in mechanical systems, statistical classification, and signal separation — laying the groundwork for the **Generalized Singular Value Decomposition (GSVD)** in the next notebook.

The pair $(A, B)$ defines a **matrix pencil**
$\;\; \color{blue}{A - \lambda B}$

This is a parameterized family of matrices. The scalar values $\lambda$ for which $A - \lambda B$ becomes singular<br>
$\qquad$ (i.e., $\det(A - \lambda B) = 0$) are called **generalized eigenvalues**.

Such problems naturally arise in contexts involving **two quadratic forms** acting on the same space:
$\;\; x^H A\ x \;\; \text{vs.} \;\; x^H B\ x$

At a **generalized eigenvector** $x$, these forms are in exact balance:<br>
$\qquad A x = \lambda\ B x \quad \Rightarrow \quad x^H A\ x = \lambda\ x^H B\ x$

# 2. The Generalized Rayleigh Quotient

To better understand the directional behavior of matrix pairs $(A, B)$, we introduce a scalar quantity<br>
that encodes their relative action along a given vector: the **generalized Rayleigh quotient**

<div style="float:left;width:100%;background-color:#F2F5A9;color:black;">

**Definition (Generalized Rayleigh Quotient):**
Let $A$ and $B$ be symmetric matrices, and let $x \in \mathbb{R}^n$ (or $\mathbb{C}^n$).
<div style="float:left;padding-left:1cm;background-color:#F2F5A9;color:black;">

The **generalized Rayleigh quotient** of $x$ with respect to $(A, B)$ is defined as

$\qquad \rho(x) = \displaystyle\frac{x^H\ A\ x}{x^H\ B\ x}$

provided $x^H B x \neq 0$.
</div>
</div>

This quotient expresses how strongly the quadratic form $A$ acts relative to $B$ in the direction of a complex vector $x \in \mathbb{C}^n$.

**When both $A$ and $B$ are Hermitian,** the Rayleigh quotient $\rho(x)$ is real-valued and lends itself to variational analysis.<br>
Since $\rho(x)$ is homogeneous and invariant under scaling, we normalize the denominator and consider

$\qquad \text{maximize} \quad x^H A\ x \quad \text{subject to} \quad x^H B\ x = 1$

This leads to the Lagrangian

$\qquad L(x, \lambda) = x^H A\ x - \lambda\ (x^H B\ x - 1)$

$\qquad \therefore \quad \nabla_x L = 2 A x - 2 \lambda B x = 0 \quad \Rightarrow \quad A x = \lambda B x$

Thus, the **stationary values** of $\rho(x)$ occur exactly at the **generalized eigenvectors** of the matrix pair $(A, B)$,<br>
$\qquad$ with $\rho(x) = \lambda$ at those vectors.

> **Note:**  
> • If $B$ is **positive definite**, the constraint surface is compact, and $\rho(x)$ attains well-defined maximum and minimum values.  
> • If $B$ is **not positive definite**, the constraint surface may be unbounded or degenerate, and $\rho(x)$ may not attain finite extrema.

---

<details>
<summary><strong>What does the Rayleigh quotient mean in the complex case?</strong></summary>

- If both $A$ and $B$ are **Hermitian**, then $x^H A x$ and $x^H B x$ are **real-valued** for all $x \in \mathbb{C}^n$.<br>
  The generalized Rayleigh quotient
  $\;\; \rho(x) = \dfrac{x^H A x}{x^H B x}$<br>
  is therefore real-valued and meaningful to optimize.<br>
  Stationary values of $\rho(x)$ correspond to generalized eigenvectors of the pair $(A, B)$,<br>
  with $\rho(x) = \lambda$ at such vectors.
    - If **$B$ is also positive definite**, the constraint $x^H B x = 1$ defines a compact manifold (generalized unit sphere).<br>
      The quotient $\rho(x)$ is bounded and guaranteed to attain minimum and maximum values.
    - If **$B$ is not positive definite**, then the constraint set may be non-compact or degenerate.<br>
      In this case, $\rho(x)$ may become unbounded or undefined on some directions, and extrema may not exist.
- If either $A$ or $B$ is **not Hermitian**, then $x^H A x$ or $x^H B x$ may be **complex-valued**,<br>
  making the Rayleigh quotient $\rho(x)$ complex.<br>
  In such cases, the notion of "maximizing" $\rho(x)$ becomes ambiguous, and additional structure<br>
  (e.g., real part, modulus, or optimization in $\mathbb{C}$) is needed to interpret or use it.
____
<details>
<summary><strong>When is the Rayleigh quotient still useful?</strong></summary>

Even when $A$ is not Hermitian and/or $B$ is not positive definite, the Rayleigh quotient<br>
remains algebraically meaningful. While extremal values may no longer exist or be real,<br>
$\rho(x)$ still captures the **relative action** of $A$ vs. $B$ in direction $x$.

It is useful in
- Non-Hermitian spectral analysis,
- Indefinite energy problems,
- Control and stability diagnostics.

Interpretation depends on context — especially if $\rho(x)$ becomes complex or unbounded.
</details>
</details>

## 2.1. Numerical Example

Let’s visualize how the Rayleigh quotient behaves in a simple 2D case.

Consider $A = \begin{pmatrix} 3 & 2 \\ 1 & 4 \end{pmatrix}$ and
$B = \begin{pmatrix} 2 & 0.5 \\ 0.1 & \alpha \end{pmatrix}$, where $\alpha$ is a parameter.

The following plot shows the Rayleigh quotient $\rho(x)$ as $x$ ranges over unit vectors
$x(\theta) = (\cos\theta, \sin\theta)^T$, with $\theta \in [0^\circ, 180^\circ]$,<br>
$\qquad$ with a slider controlling the value of $\alpha$.

**Remark:** Numerical issues (e.g., when the denominator of the Rayleigh coefficient approaches 0) are ignored in this demo code.

#### Interactive Exploration: Rayleigh Quotient Behavior

Use the slider below to vary the parameter $\alpha$ in matrix $B$.
- Observe how the Rayleigh quotient curve $\rho(\theta)$ changes.
- Red lines show the minimum and maximum values of $\rho(x)$ over directions $x(\theta)$.

**Observe** how increasing $\alpha$ affects the number and position of real eigenvalues

In [4]:
A          = np.array([[3.0, 2.0], [1.0, 4.0]])
B_template = np.array([[2.0, 0.5], [0.1, 1.5]])
GEPRayleighViewer(A, B_template).servable()

**Generalized Rayleigh Quotient**

The plot shows the Rayleigh quotient
$\;\; \rho(x) = \frac{x^T A x}{x^T B x}\;\;$
as a function of direction $x = (\cos\theta, \sin\theta)^T$.<br>
Red lines mark the min and max values of $\rho(x)$.<br>
Numerical sensitivity may affect accuracy near singular configurations.

> **Numerical Note:**<br>
> The Rayleigh quotient $\rho(x)$ becomes unstable when the denominator $x^T B x$ approaches zero.<br>
> These directions correspond to points where $B$ is nearly singular with respect to $x$, and the quotient may spike or become undefined.<br>
> In the plot, such values are masked or marked as NaN to avoid misleading artifacts.

#### **Limitations of the Rayleigh Quotient Viewpoint**

This variational approach works well **only under certain assumptions**, namely
- $A$ is Hermitian (i.e., $A^H = A$)
- $B$ is Hermitian positive definite (i.e., $x^H B x > 0$ for all $x \ne 0$)

Under these conditions, $\rho(x)$ is real-valued and meaningful as an optimization objective.<br>
The constraint surface $x^H B x = 1$ is compact, and $\rho(x)$ attains a maximum and minimum.

However, these conditions are not always satisfied:
- If $B$ is not positive definite, the constraint set may be unbounded or empty.
- If $A$ or $B$ are not Hermitian, then $\rho(x)$ may be **complex-valued**, and its optimization is ill-posed.

#### Toward a More Complete Picture

The Rayleigh quotient framework focuses on **optimal directions**<br>
$\qquad$ those that maximize or minimize the ratio $\rho(x) = \dfrac{x^H A x}{x^H B x}$.<br>
$\qquad$ But in many applications, we care about **how $A$ and $B$ behave in *all* directions**, not just the extremal ones.

For example:
- In **signal processing**, we may want to decompose signals based on their relative response to two systems.
- In **multivariate analysis**, it's not enough to find the best discriminant — we may need a full basis that respects both $A$ and $B$.

However, the Rayleigh approach offers only a 1D view — one direction at a time.

---

To uncover the **full relationship** between $A$ and $B$, we need a deeper tool: the **Generalized Singular Value Decomposition (GSVD)**.
- It works for general (even non-square, non-Hermitian) matrices.
- It constructs a **shared coordinate system** for both matrices.
- It reveals how $A$ and $B$ act **jointly** across the entire space.

We will introduce the GSVD and show how it generalizes both eigenvalue problems and the standard SVD in the next notebooks.

## 2.2 Summary: Generalized Eigenvalue Problem and Rayleigh Quotient

<div style="float:left;padding-left:1cm;">

| Concept                          | Expression / Interpretation                                                                 |
|:---------------------------------|:------------------------------------------------------------------------------------------- |
| **Generalized Eigenvalue Problem (GEP)**| $A x = \lambda B x \quad$ where $x \ne 0$                                  |
| **Matrix Pencil**                | $A - \lambda B$ is singular when $\det(A - \lambda B) = 0$                                  |
| **Generalized Rayleigh Quotient**| $\rho(x) = \dfrac{x^H A x}{x^H B x}$ (defined when $x^H B x \ne 0$)                         |
| **Variational Characterization** ($A$ hermitian, $B$ positive definite)  $\quad$ | $\max \; x^H A x \quad \text{subject to} \quad x^H B x = 1$                                 |
| **Stationary Point Condition**   | $\nabla_x \rho(x) = 0 \quad \Rightarrow \quad A x = \lambda B x$                            |
</div>

# 3. GEP Application: Vibration Analysis

## 3.1. Vibration Analysis

The concepts above are not merely theoretical — the GEP arises naturally in physical and engineering systems.

A classic application of the generalized eigenvalue problem arises in **mechanical vibrations**, particularly in systems of coupled oscillators.

#### **Physical Setup**

Consider a system of $n$ masses connected by springs.<br>
The motion of the system can be described by Newton's second law, leading to a second-order matrix differential equation

$\qquad
M \ddot{x}(t) + K x(t) = 0
$
- $M$ is the **mass matrix** — symmetric and positive definite.
- $K$ is the **stiffness matrix** — symmetric and typically positive semi-definite.
- $x(t) \in \mathbb{R}^n$ describes the displacement of the masses from equilibrium.

#### **Modal Analysis**

Assuming a solution of the form $x(t) = v e^{i \omega t}$, we substitute into the equation

$\qquad
M (-\omega^2 v) + K v = 0 \quad \Rightarrow \quad K v = \omega^2 M v
$

This is a **generalized eigenvalue problem** of the form

$\qquad
K x = \lambda M x \qquad \text{with } \lambda = \omega^2
$

The eigenvalues $\lambda$ yield squared natural frequencies $\omega^2$, and the eigenvectors are the **mode shapes** of the system.

#### **Example: Two Coupled Masses**

Adjust the sliders to change class spreads, distances, and orientation.
- The black line shows the direction of maximal class separation $w$.
- Blue and red dots are data from two Gaussian-distributed classes.
- The histograms show how well-separated the classes are after projection.

**Try this:** Increase both spreads — what happens to the classification error? What about when you change the angle?

In [5]:
M = np.diag([2.0, 1.0])
K = np.array([[6.0, -2.0],
              [-2.0, 4.0]])

w2, modes = eig(K, M)
ω = np.sqrt(np.real(w2))

print('Natural frequencies (ω):', ω)
print('Mode shapes:')
print(np.real(modes))

Natural frequencies (ω): [1.41421356 2.23606798]
Mode shapes:
[[ 0.70710678 -0.4472136 ]
 [ 0.70710678  0.89442719]]


**Reference:**  [**MIT Open Courseware** Modal Analysis: Orthogonality, Mass Stiffness, Damping Matrix](https://www.youtube.com/watch?v=OxcCPTc_bXw)

## 3.2 Multivariate Statistics: Linear Discriminant Analysis (LDA)

#### Problem Statement and Objective

Project high-dimensional data onto a line that best separates two classes,
where
- Each class is modeled as a multivariate **Gaussian distribution**.
- Classes differ in their **means**, but may share a similar **covariance** structure.
- The optimal projection maximizes **between-class variance** and minimizes **within-class variance**.

#### Generalized Eigenvalue Formulation

Let<br>
$\qquad C_i$ = samples from class $i$<br>
$\qquad \mu_i$ = mean of class $i$<br>
$\qquad \mu$ = overall mean<br>
$\qquad N_i$ = number of samples in class $i$

and define
- **Within-class scatter matrix:**<br>
  $\qquad S_W = \sum_{i=1}^2 \sum_{x \in C_i} (x - \mu_i)(x - \mu_i)^T$<br>
  It captures the spread **within** each class.
- **Between-class scatter matrix:**<br>
  $\qquad S_B = \sum_{i=1}^2 N_i (\mu_i - \mu)(\mu_i - \mu)^T$<br>
  It captures the separation **between** class means.

---

LDA selects a projection direction $w \in \mathbb{R}^n$ that maximizes the class separation ratio:

$\qquad J(w) = \displaystyle\frac{w^T S_B\ w}{w^T S_W\ w}$

This variational objective leads directly to the **generalized eigenvalue problem**

$\qquad S_B\ w = \lambda\ S_W\ w$

The optimal direction $w$ corresponds to the **largest eigenvalue** $\lambda$, which gives the maximum value of $J(w)$:

- Larger values of $\lambda$ imply clearer class separation.
- $\lambda$ quantifies the ratio of between-class to within-class variance — a signal-to-noise measure in the projected space.

In [6]:
LDAViewer().servable()

**LDA via Generalized Eigenvalue Problem**<br>
The plot shows two classes of 2D data (blue and red), each generated from a Gaussian distribution with adjustable spread.<br>
The dashed black line indicates the optimal projection direction $w$ found by solving the generalized eigenproblem<br>
$\qquad S_B w = \lambda S_W w$<br>
which maximizes class separability in Linear Discriminant Analysis (LDA).<br>
The diamond shapes show the cluster centers.<br>
The side histograms show the projected values of each class along $w$, highlighting the degree of separation.


**References:**
* [**StatQuest: Linear Discriminant Analysis (LDA) clearly explained.**](https://youtu.be/azXCzI57Yfc?t=33)
* [**Sebastian Raschka, "Linear Discriminant Analysis"**](https://sebastianraschka.com/Articles/2014_python_lda.html)

# 4. Take Away

- The **generalized eigenvalue problem**<br>
  $\qquad A x = \lambda B x$<br>
  extends the standard eigenproblem by introducing a second matrix $B$.
- The **generalized Rayleigh quotient**<br>
  $\qquad \rho(x) = \dfrac{x^H A x}{x^H B x}$<br>
  captures the balance of action between $A$ and $B$ along direction $x$.
- If $A$ and $B$ are Hermitian and $B$ is positive definite:
  - Eigenvalues $\lambda$ are real.
  - Eigenvectors are $B$-orthogonal.
  - Stationary values of $\rho(x)$ occur at generalized eigenvectors with $\rho(x) = \lambda$.

- The GEP arises in
  - **Vibrations**: $K x = \omega^2 M x$
  - **Stability**, **PDEs**, **optimization**, and **data analysis**