#### Initialization

In [1]:
import numpy as np
from scipy.linalg import eigh

import param
import holoviews as hv
import panel as pn
from holoviews.operation import contours

hv.extension('bokeh', logo=False)
pn.extension('katex')

In [2]:
def interactive_regularized_contours(A, b, B, xlim=(-1.5, 2.5), ylim=(-1.5, 2.5), λ_range=(0, 10), λ0=1.0):
    """
    Interactive visualization of the regularized least squares objective:
        f(x) = ||Ax - b||^2 + λ ||Bx||^2
    Plots contours for varying λ using holoviews and panel.

    Parameters:
    - A, B : 2×2 numpy arrays
    - b : 2D target vector
    - xlim, ylim : x/y axis limits
    - λ_range : tuple for lambda slider (min, max)
    - λ0 : initial lambda value
    """

    def objective_surface(lmbda):
        xs = np.linspace(*xlim, 200)
        ys = np.linspace(*ylim, 200)
        xx, yy = np.meshgrid(xs, ys)

        Z = np.empty_like(xx)
        for i in range(xx.shape[0]):
            for j in range(xx.shape[1]):
                x_vec = np.array([xx[i, j], yy[i, j]])
                fit = A @ x_vec - b
                reg = B @ x_vec
                Z[i, j] = np.dot(fit, fit) + lmbda * np.dot(reg, reg)

        img = hv.Image((xs, ys, Z)).opts(xlim=(-2,2), ylim=(-2,2))
        levels = np.round(np.linspace(Z.min(), Z.max(), 10), 2)
        contour_plot = contours(img, levels=levels).opts(
            xlim=xlim, ylim=ylim,
            cmap='plasma',
            colorbar=True,
            width=550, height=350,
            line_width=1.2,
            xlabel='x₀', ylabel='x₁',
            title=r"|Ax - b|² + λ |Bx|²"
        )

        ATA = A.T @ A
        BTB = B.T @ B
        rhs = A.T @ b
        lhs = ATA + lmbda * BTB

        try:
            x_lambda = np.linalg.solve(lhs, rhs)
        except np.linalg.LinAlgError:
            x_lambda = np.array([np.nan, np.nan])  # Handle singular case gracefully

        b_point = hv.Points([b]).opts(
            color='red',
            size=8,
            xlim=xlim, ylim=ylim
        )

        x_point = hv.Points([x_lambda]).opts(
            color='blue',
            size=8,
            marker='triangle',
            xlim=xlim, ylim=ylim
        )
        return (contour_plot * b_point * x_point \
                * hv.HLine(0).opts(line_width=0.5) * hv.VLine(0).opts(line_width=0.5)).opts(legend_position="right")

    λ_slider = pn.widgets.FloatSlider(name='λ (lambda)', start=λ_range[0], end=λ_range[1], step=0.1, value=λ0)

    @pn.depends(λ_slider)
    def plot_callback(lmbda):
        return objective_surface(lmbda)

    caption_md = pn.pane.Markdown(
    """**Figure:** Contour plot of the objective function.""",
        styles={'font-size': '12pt'},
    #sizing_mode='stretch_width'
    )
    caption_md_2 = pn.pane.Markdown(
            """The red point is the vector b.  
              The blue riangle is the solution x for a given λ.  
              As λ increases, observe how the solution shifts away  
              from the direction penalized by B.",
              """,
        styles={'font-size': '12pt'},
    )

    caption_eq = pn.pane.LaTeX(
        r"$\qquad f(x) = \Vert Ax - b\Vert^2 + \lambda \Vert Bx\Vert^2$", styles={'font-size': '12pt'}
        #sizing_mode='stretch_width'
    )

    return pn.Column(
        "## Regularized Objective: Directional Penalty",
        pn.Row(plot_callback, pn.Column( λ_slider, caption_md, caption_eq, caption_md_2))
    )

In [51]:
class RayleighQuotientViewer(pn.viewable.Viewer):
    alpha = param.Number(1.0, bounds=(1e-12, 1.0), step=0.01, doc="B rank control α")

    def __init__(self, A, B, **params):
        self.A = A
        self.B_base = B.copy()
        self.theta = np.linspace(0, np.pi, 500)

        # Initialize internal data
        self.rho = None
        self.mu = None
        self.xs = None

        super().__init__(**params)
        self._update_data()

    @param.depends('alpha', watch=True)
    def _update_data(self, *events):
        """Recompute rho(θ), eigenvalues, and eigenvectors."""
        B = self.B_base.copy()
        B[1, :] *= self.alpha  # rank control via alpha

        self.rho = []
        for t in self.theta:
            x = np.array([np.cos(t), np.sin(t)])
            num = x @ (self.A.T @ self.A) @ x
            den = x @ (B.T @ B) @ x
            self.rho.append(num / den if den > 1e-12 else np.nan)
        self.rho = np.array(self.rho)

        try:
            BTB_inv = np.linalg.inv(B.T @ B)
            eigvals, eigvecs = np.linalg.eig(BTB_inv @ (self.A.T @ self.A))
            idx = np.argsort(eigvals)
            self.mu = eigvals[idx]
            self.xs = eigvecs[:, idx].T
            self.sigma_max = np.sqrt(self.mu[-1]+1e-12)
            self.sigma_min = np.sqrt(self.mu[ 0]+1e-12)
        except np.linalg.LinAlgError:
            self.mu = np.array([])
            self.xs = np.array([])
            self.sigma_min = 0
            self.sigma_max = 0

    @param.depends('alpha')
    def _get_plot(self):
        curve = hv.Curve((self.theta, self.rho), 'θ', 'ρ(θ)').opts(line_width=2)

        pts = []
        vlines = []
        for x, mu in zip(self.xs, self.mu):
            t = np.arctan2(x[1], x[0]) % np.pi
            pts.append((t, mu))
            vlines.append(hv.VLine(t).opts(color='red', line_width=1.5))

        markers = hv.Scatter(pts, 'θ', 'ρ').opts(color='red', size=10)
        plot = curve * markers
        for vline in vlines:
            plot *= vline

        return plot.opts(
            width=550, height=300,
            title='Generalized Rayleigh Quotient and Stationary Directions',
            show_grid=True
        )

    @param.depends('alpha')
    def _get_markdown(self):
        if self.sigma_max != 0:
            kappa = self.sigma_max / self.sigma_min
            singularity_md = pn.pane.Markdown(
                f"σ = ({self.sigma_max:.2g}, {self.sigma_min:.2g}) &nbsp; Condition number = {kappa:.0g}",
                styles={'font-size': '11pt'}
            )
        else:
            singularity_md = pn.pane.Markdown(
                "σ = NaN, &nbsp; Condition number: = NaN",
                styles={'font-size': '11pt'}
            )

        items = []
        for i, (x, mu) in enumerate(zip(self.xs, self.mu), start=1):
            t = np.arctan2(x[1], x[0]) % np.pi
            text_md = f'**μ_{i}** = {mu:.4f},&nbsp;&nbsp;&nbsp; **θ** = {t:.4f} rad'
            vec_latex = (
                r'$$\qquad x_' + f'{i} = \\begin{{pmatrix}} ' +
                f'{x[0]:.4f} \\\\ {x[1]:.4f}' +
                r'\end{pmatrix}$$'
            )
            items.append(
                pn.Column(
                    pn.pane.Markdown(text_md, styles={'font-size': '11pt'}),
                    pn.pane.LaTeX(vec_latex, styles={'font-size': '11pt'})
                )
            )
        return pn.Column(singularity_md, *items, width=350)

    def __panel__(self):
        """Required by pn.viewable.Viewer for .servable() to work."""
        return pn.Column(
            """## Generalizes Rayleigh Quotient Viewer""",
            pn.Row(
            pn.Column(
                #pn.pane.Markdown("### Generalized Rayleigh Quotient"),
                self.param.alpha,
                self._get_plot
            ),
            pn.Spacer(width=20),
            pn.Column(
                pn.Spacer(height=30),
                #'## kappa',
                pn.Row(pn.Spacer(width=23), self._get_markdown)
            )
        ))


# 

<div style="float:center;width:100%;text-align:center;"><strong style="height:100px;color:darkred;font-size:40px;">Regularized Least Squares</strong><br>
    <strong style="height:100px;color:darkred;font-size:30px;">and the Generalized Rayleigh Quotient</strong>
</div>

# 1 Preliminaries and Notation

This notebook presents a variational example that leads to a [**generalized eigenvalue problem**](GEP_intro.ipynb).  
The setting arises in regularized least squares problems, where directional penalties induce generalized eigenstructure.

____
Let $A \in \mathbb{R}^{m \times n}$, $B \in \mathbb{R}^{p \times n}$, with $\operatorname{rank}(A) = r_A, \operatorname{rank}(B) = r_B$.  

**Consider the regularized least squares problem:**  
$\qquad\displaystyle{
\operatorname{argmin}_x \Vert A x - b\Vert^2 + \lambda \Vert B x\Vert^2
}$

- The first term enforces data fidelity.
- The second term imposes a penalty in directions selected by $B$.
- The parameter $\lambda \geq 0$ balances the two terms.

This formulation includes standard Tikhonov regularization when $B = I$,  
and more general structural or smoothing priors when $B \neq I$.

**Example:**  
Let $n = 2$, and let  
$\qquad
B = \begin{pmatrix} 1 & 0 \\ 0 & 0 \end{pmatrix} \quad \Rightarrow \quad \|B x\|^2 = x_1^2
$

The penalty affects $x_1$, but not $x_2$.  
As $\lambda$ increases, the optimizer avoids directions that are heavily penalized.  
The tradeoff between fitting $A x \approx b$ and penalizing $x_1$ depends on the relative orientation of $A$ and $B$.

In [3]:
interactive_regularized_contours(np.eye(2), np.array([1.0, 1.0]), np.array([[1, 0], [0, 0]]))

# 2. Variational Formulation

## 2.1 Normal Equation

The objective function is

$\qquad
\begin{align}
f(x) &= \Vert A x - b \Vert^2 + \lambda \Vert B x \Vert^2 \\
     &= (A x - b)^T (A x - b) + \lambda (B x)^T (B x) \\
     &= x^T A^T A x - 2 b^T A x + b^T b + \lambda x^T B^T B x
\end{align}
$

The minimum occurs at a **critical point.** Taking the gradient with respect to $x$ yields:

$\qquad
\nabla_x f(x) = 2 A^T A\ x - 2 A^T b + 2 \lambda B^T B\ x
$

Setting $\nabla_x f(x) = 0$ gives the normal equation:

$\qquad
(A^T A + \lambda B^T B)\ x = A^T\ b
$

---

The matrix $A^T A + \lambda B^T B$ governs the solution. The parameter $\lambda$ controls the relative influence of $A^T A$ (data fit) and $B^T B$ (penalty).

To analyze this interaction, consider the associated [**generalized eigenvalue problem**](GEP_intro.html):

$\qquad
A^T A x = \mu B^T B\ x
\quad \Leftrightarrow \quad
(A^T A - \mu B^T B)\ x = 0
$

The pair $(A^T A, B^T B)$ defines a **matrix pencil**:

$\qquad
A^T A - \mu B^T B
$

#### Observations

- The solution depends on the interaction between $A^T A$ and $B^T B$.
- The matrix $B^T B$ introduces directional penalties not visible from $A$ alone.
- Understanding how these quadratic forms compete across directions in $\mathbb{R}^n$ motivates analyzing their relative action on vectors.
- This leads to the generalized [**Rayleigh quotient**](RayleighQuotients.ipynb).

## 2.2 Generalized Rayleigh Coefficient

The **generalized Rayleigh quotient** is defined by

$\qquad
\rho(x) = \Large{\frac{ \Vert A x \Vert^2 }{ \Vert B x \Vert^2 }} = \Large{\frac{ x^T A^T A\ x }{ x^T B^T B\ x }},\quad x \ne 0.
$

This quotient is invariant under positive scaling of $x$.  
It is therefore natural to impose a normalization constraint and consider the constrained maximization problem

$\qquad
\max_x \rho(x) 
= \;\; \max_{x} \; x^T A^T A x \quad \text{subject to} \quad x^T B^T B\ x = 1
$

The associated Lagrangian is

$\qquad
L(x, \mu) = x^T A^T A x - \mu (x^T B^T B\ x - 1)
$

Setting $\nabla_x L = 0$ gives the condition

$\qquad
A^T A x = \mu B^T B x
$

This is the symmetric positive semidefinite generalized eigenvalue problem associated with the matrix pair $(A^T A, B^T B)$.

**The generalized eigenvectors and eigenvalues $\mu$ correspond to the stationary values of $\rho(x)$.**

**Remark**  
The matrices $A^T A$ and $B^T B$ are always symmetric and positive semidefinite, since  
$\qquad
x^T A^T A x = \Vert Ax\Vert^2 \ge 0,
\quad
x^T B^T B x = \Vert Bx\Vert^2 \ge 0.
$

The matrix pair $(A^T A,\; B^T B)$ therefore defines a **symmetric positive–semidefinite pencil**.  
It is *not* a symmetric-definite pair unless $B^T B$ is positive definite, which requires $B$ to have full column rank.

When $B^T B$ is singular, the generalized eigenvalue problem  $\;\;A^T A x = \mu\, B^T B x$  
may have infinite eigenvalues corresponding to vectors in $\mathscr{N}(B)$, and the spectrum generally splits into finite and infinite parts.

#### Example

Consider the generalized Rayleigh quotient

$\qquad\displaystyle{
\rho(x) = \frac{ \Vert A x \Vert^2 }{ \Vert B x \Vert^2 },
\quad A = \begin{pmatrix} 3 & 2\\ 1 & 4 \end{pmatrix},\quad
B_\alpha = \begin{pmatrix} 2.0 & 0.5 \\ 0.1 \alpha & 1.5 \alpha \end{pmatrix}
},$  
The parameter $\alpha \in [0, 1]$ interpolates between a full-rank matrix $\alpha = 1$) and a rank-deficient matrix $(\alpha = 0$),  
where the second row of $B$ is zero.


To visualize how $\rho(x)$ varies with direction, evaluate it on the unit circle in $\mathbb{R}^2$

$\qquad
x(\theta) = \begin{pmatrix} \cos \theta \\ \sin \theta \end{pmatrix}, \quad \theta \in [0, \pi]
$

Since $\rho(x)$ is homogeneous and satisfies $\rho(x) = \rho(-x)$, it suffices to consider $\theta \in [0, \pi]$.

The stationary values of $\rho(x)$ correspond to the generalized eigenvalues $\mu$ satisfying

$\qquad
A^T A x = \mu B^T B x
$

These eigenvalues represent the extremal values of the quotient and are marked on the plot  
to illustrate the connection between the variational formulation and the generalized eigenvalue problem.

In [52]:
rq_viewer = RayleighQuotientViewer( np.array([[3.0, 2.0], [1.0, 4.0]]), np.array([[2.0, 0.5], [0.1, 1.5]]))
rq_viewer.servable()

**Plot of the generalized Rayleigh quotient** $\displaystyle{ \rho(x) = \frac{\|A x\|^2}{\|B x\|^2}}$ evaluated on unit vectors $x(\theta) = \begin{pmatrix} \cos \theta \\ \sin \theta \end{pmatrix}$ for $\theta \in [0, \pi]$.  
<div style="padding-left:1cm;">

Stationary values of $\rho(x)$ correspond to the generalized eigenvalues $\mu_i$ of the pencil $(A^T A, B^T B)$, and are marked in red.

Use the slider to vary the matrix $B_\alpha)$, interpolating from full rank $\alpha = 1$ to rank-deficient $\alpha = 0$.  
As $B$ becomes nearly singular, the denominator $\\Vert B x\Vert^2$ becomes small in certain directions,  
causing peaks in $\rho(x)$ and unstable or infinite eigenvalues.  
The singular values and condition number of $B_\alpha$ are displayed to quantify proximity to singularity
</div>

# 3. Take Away

- The solution to the **normal equation**
  $\;\;
  (A^T A + \lambda B^T B)\, x = A^T b
  $  
  depends on the interaction between $A^T A$ and $B^T B$ across all directions in $\mathbb{R}^n$.
- The **generalized Rayleigh quotient**
  $\;\;\displaystyle{
  \rho(x) = \frac{\|A x\|^2}{\|B x\|^2}
  }$  
  measures the relative action of $A$ and $B$ along a given direction.
- The **generalized eigenvalue problem**
  $\;\;
  A^T A\, x = \mu\, B^T B\, x
  $  
 identifies directions where this ratio is stationary.  
The associated eigenvalues $\mu_i$ are the extremal values of $\rho(x)$.
- These eigenvectors reveal **dominant directions**, but do not provide a full basis for understanding the joint action of $A$ and $B$.
- The [**generalized singular value decomposition (GSVD)**](GSVD_intro.ipynb) will construct a basis in which both \$A^T A$ and $B^T B$ are diagonal, clarifying their structure across all directions.