# Householder Similarity Transforms

In [1]:
import numpy as np
import numpy.linalg as la

np.set_printoptions(precision=2, linewidth=150)

In [2]:
n = 8

e1 = np.zeros(n); e1[0] = 1
e2 = np.zeros(n); e2[1] = 1

A = np.random.randn(n, n)
A

array([[-0.72,  1.  ,  0.42, -0.18,  0.73, -0.92, -1.34, -0.47],
       [ 1.63, -1.19,  1.12,  0.9 , -0.27,  0.96, -0.95,  0.24],
       [-1.49,  0.18,  0.92,  1.93, -1.76,  0.63,  1.07, -0.48],
       [ 1.05,  1.21, -0.61,  0.99, -2.11, -0.77, -0.85,  0.58],
       [ 2.09, -0.89,  0.25, -0.43, -0.03,  1.24, -1.89, -0.19],
       [-0.79, -0.05, -1.59,  1.03, -0.95, -2.14,  1.07,  0.69],
       [ 0.88, -1.95,  2.2 ,  1.05,  0.21, -0.86, -0.31,  1.57],
       [ 0.07, -0.87,  0.32, -0.78, -0.83,  0.52, -0.26,  0.65]])

Now try to zero the first column with a similarity transform.

## Starting with the first row

Lets first try to proceed as in Householder QR, creating a transformation
$$H=I-2\frac{vv^T}{v^Tv}$$
where $v = a_1 - ||a_1||_2e_1$ with $a_1$ being the first column of $A$.

In [3]:
a = A[:, 0].copy()
v = a-la.norm(a)*e1

H1 = np.eye(n) - 2*np.outer(v, v)/(v@v)

We can apply the transformation from the left as in QR to reduce the first column to a multiple of the first elementary vector.

In [5]:
(H1@A).round(4)

array([[ 3.5 , -1.5 ,  0.93, -0.31,  0.07,  1.16, -2.32,  0.72],
       [-0.  , -0.23,  0.92,  0.95, -0.02,  0.16, -0.57, -0.22],
       [ 0.  , -0.69,  1.1 ,  1.88, -1.99,  1.36,  0.73, -0.06],
       [-0.  ,  1.83, -0.74,  1.02, -1.95, -1.29, -0.6 ,  0.28],
       [-0.  ,  0.35, -0.  , -0.36,  0.3 ,  0.21, -1.4 , -0.78],
       [ 0.  , -0.52, -1.49,  1.01, -1.07, -1.75,  0.89,  0.91],
       [-0.  , -1.43,  2.1 ,  1.08,  0.35, -1.29, -0.1 ,  1.32],
       [-0.  , -0.83,  0.32, -0.78, -0.82,  0.49, -0.24,  0.63]])

However, to ensure we do not perturb the eigenvalues of $A$, we must also apply the matrix from the right, resulting in a similarity transformation.

In [6]:
(H1@A@H1.T).round(4)

array([[-2.7 ,  0.89, -1.25,  1.23,  3.14, -0.01, -1.03,  0.82],
       [-0.41, -0.07,  0.78,  1.05,  0.18,  0.09, -0.48, -0.22],
       [-1.54, -0.1 ,  0.56,  2.27, -1.23,  1.07,  1.05, -0.04],
       [ 0.45,  1.66, -0.58,  0.91, -2.17, -1.2 , -0.69,  0.27],
       [-0.18,  0.42, -0.07, -0.32,  0.39,  0.18, -1.36, -0.78],
       [ 0.69, -0.79, -1.25,  0.83, -1.41, -1.62,  0.74,  0.9 ],
       [-0.73, -1.15,  1.84,  1.26,  0.71, -1.43,  0.05,  1.33],
       [-1.4 , -0.29, -0.18, -0.43, -0.13,  0.23,  0.05,  0.65]])

Note that applying the Householder transformation from the right filled in the elements annihilated by applying it from the left.

## Starting in the second row

To avoid this, we define the Householder transformation to annihilate elements below the first subdiagonal. That way, the first transformation does not affect the first row when applied from the left, and consequently does not affect the first column when applied for the right, preserving the zeros we've annihilated.

In [7]:
a = A[:, 0].copy()
a[0] = 0
v = a-la.norm(a)*e2

H2 = np.eye(n) - 2*np.outer(v, v)/(v@v)

In [8]:
(H2 @ A).round(4)

array([[-0.72,  1.  ,  0.42, -0.18,  0.73, -0.92, -1.34, -0.47],
       [ 3.43, -1.32,  1.04, -0.36,  0.22,  0.99, -2.66,  0.64],
       [-0.  ,  0.07,  0.86,  0.9 , -1.34,  0.66, -0.34, -0.15],
       [ 0.  ,  1.29, -0.56,  1.71, -2.4 , -0.79,  0.15,  0.34],
       [-0.  , -0.73,  0.34,  1.02, -0.61,  1.21,  0.1 , -0.66],
       [ 0.  , -0.11, -1.62,  0.48, -0.73, -2.13,  0.32,  0.87],
       [ 0.  , -1.89,  2.24,  1.67, -0.04, -0.87,  0.53,  1.37],
       [-0.  , -0.86,  0.33, -0.73, -0.85,  0.52, -0.2 ,  0.63]])

In [9]:
(H2 @ A @ H2.T).round(4)

array([[-0.72,  0.55,  0.04,  0.09,  1.26, -1.12, -1.11, -0.45],
       [ 3.43, -1.95,  0.52,  0.01,  0.96,  0.71, -2.35,  0.66],
       [-0.  , -1.12, -0.13,  1.6 ,  0.05,  0.13,  0.25, -0.11],
       [ 0.  ,  0.14, -1.51,  2.38, -1.07, -1.29,  0.71,  0.38],
       [-0.  , -0.82,  0.27,  1.07, -0.51,  1.17,  0.14, -0.65],
       [ 0.  ,  0.95, -0.75, -0.14, -1.96, -1.67, -0.2 ,  0.83],
       [ 0.  , -1.01,  2.96,  1.16, -1.05, -0.49,  0.1 ,  1.34],
       [-0.  , -1.45, -0.16, -0.39, -0.16,  0.26,  0.09,  0.65]])

To generalize this process, we continue to eliminate everything below the subdiagonal in the next column and applying the two-sided transformations, finally resulting in an upper-Hessenberg matrix.

-----

Why does post-multiplying with `H2` not destroy the zeros?

In [19]:
H2.round(4)

array([[ 1.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
       [ 0.  ,  0.47, -0.43,  0.31,  0.61, -0.23,  0.26,  0.02],
       [ 0.  , -0.43,  0.64,  0.25,  0.5 , -0.19,  0.21,  0.02],
       [ 0.  ,  0.31,  0.25,  0.82, -0.36,  0.14, -0.15, -0.01],
       [ 0.  ,  0.61,  0.5 , -0.36,  0.29,  0.27, -0.3 , -0.02],
       [ 0.  , -0.23, -0.19,  0.14,  0.27,  0.9 ,  0.11,  0.01],
       [ 0.  ,  0.26,  0.21, -0.15, -0.3 ,  0.11,  0.87, -0.01],
       [ 0.  ,  0.02,  0.02, -0.01, -0.02,  0.01, -0.01,  1.  ]])