In [2]:
import torch 
import numpy as np 
import matplotlib.pyplot as plt
import torch.optim as optim
torch.set_default_tensor_type(torch.DoubleTensor) 

Consider: $A_\epsilon u=g$, ($A_\epsilon = A_0+\epsilon I$)
$$
A_0 =
\left(
\begin{array}{rrr}
1  &  -1   &  0\\
-1 &   2   &  -1\\
0  &  -1   &  1
\end{array}
\right),\quad
g=
\left(
\begin{array}{r}
-1 \\
-1 \\
2  \\
\end{array}
\right)\in R(A_0), \quad
p=
\begin{pmatrix}
1\\
1\\
1
\end{pmatrix}
\in N(A_0).
$$
<br><br><br>

For $f(u) = \frac{1}{2}u^T A u -g^T u$

Gradient descent method: 

$$
u^{k+1} = u^{k} - \eta \nabla f(u^{k})
 =u^{k} - \eta (Au^{k}-g)
$$

Scaled gradient descent
$$
u^{k+1} =u^{k} - \eta [{\rm diag}(A)]^{-1}(Au^{k}-g)
$$

<br>
<br>




In [None]:
## GD for 3by3 system 
print("Plain GD: number of iterations needed for 3 by 3 system")
for eps in [0.1,0.01,0.001,1e-4,1e-5,1e-9, 0.]: 
    A3 = torch.tensor([[1+eps,-1,0],[-1,2+eps,-1],[0,-1,1+eps]])
    x = torch.zeros(3)
    x = x.view(3,1)
    b = torch.tensor([[-1.],[-1.],[2.]]) # must be in kernel is eps = 0
    x.data = torch.tensor([[1.0],[2.],[3.0]])
    tol = 1e-8 # tolerance for residual norm 
    residual_norm = torch.norm(torch.matmul(A3,x) -b,2)
    iters = 0 
    while residual_norm > tol: 
        gd = torch.matmul(A3,x) - b 
        x = x - 0.5*gd 
        residual_norm = torch.norm(gd,2)
        iters += 1 
        if iters > 1000000: 
            break
        assert torch.isnan(residual_norm)!=True, "norm is nan, reset learning rate" #somehow nan>tol returns false
    if iters > 1000000: 
        print("eps = "+str(eps)+": over 1,000,000")
    else:
        print("eps = "+str(eps)+": ", iters)

Expanded system:
    
Write $u\in \mathbb{R}^3=u_1e_1+u_2e_2+u_3e_3$ as
$$
u=\tilde u_1 e_1+\tilde u_2e_2+\tilde u_3e_3+\tilde
    u_4 p =P\tilde u,
$$
where 
$$
P=\begin{pmatrix}
    1 & 0 & 0 & 1\\
    0 & 1 & 0 & 1\\
    0 & 0 & 1 & 1
\end{pmatrix}, \quad p=
\begin{pmatrix}
    1 \\ 1 \\ 1
\end{pmatrix}
\in {\rm ker}(A_0). 
$$

The equation $A_{\epsilon}u=g$ becomes
$$
A_{\epsilon}P\tilde u=g \Longleftrightarrow
(P^TA_{\epsilon}P)\tilde u=P^Tg,
$$

This leads to a semi-definite system:
$$
\begin{pmatrix}
    1+\epsilon  &  -1   &  0&\epsilon\\
    -1 &   2+\epsilon   &  -1&\epsilon\\
    0  &  -1   &  1+\epsilon&\epsilon\\
    \epsilon&\epsilon&\epsilon&3\epsilon
\end{pmatrix}
\tilde u=
\begin{pmatrix}
      -1 \\
    -1 \\
    2  \\
    0\\
\end{pmatrix}.
$$

In [3]:
## GD for 4by4 system, GD
print("GD: 4 by 4 system")
P = torch.tensor([[1.,0.,0.,1.],[0.,1.,0.,1.],[0.,0.,1.,1.]])
for eps in [0.1,0.01,0.001,1e-4,1e-5,1e-9,0.]: 
    A3 = torch.tensor([[1+eps,-1,0],[-1,2+eps,-1],[0,-1,1+eps]])
    A4 = torch.tensor([[1+eps,-1.0,0,eps],[-1,2+eps,-1,eps],[0,-1,1+eps,eps],[eps,eps,eps,3*eps]])
    x = torch.rand(4)
    x = x.view(4,1)
    b = torch.tensor([[-1.],[-1.],[2.],[0.]]) #
    tol = 1e-8
    residual_norm = torch.norm(A3@(P@x)-P@b,2)
    iters = 0 
    while residual_norm > tol: 
        gd = torch.matmul(A4,x) - b 
        x.data = x.data - 0.5*gd 
        residual_norm = torch.norm(A3@(P@x)-P@b,2)
        iters += 1 
        if iters > 100000: 
            break
    assert torch.isnan(residual_norm)!=True, "norm is nan, reset learning rate"
    print("eps = "+str(eps)+": ", iters)
print()

GD: 4 by 4 system
eps = 0.1:  71
eps = 0.01:  714
eps = 0.001:  6264
eps = 0.0001:  48098
eps = 1e-05:  100001
eps = 1e-09:  28
eps = 0.0:  29



In [4]:
#GD for 4by4 system, modified Jacobi preconditioner
print("Scaled GD: 4 by 4 system")
P = torch.tensor([[1.,0.,0.,1.],[0.,1.,0.,1.],[0.,0.,1.,1.]])
for eps in [0.1,0.01,0.001,1e-4,1e-5,1e-9]: 
    A3 = torch.tensor([[1+eps,-1,0],[-1,2+eps,-1],[0,-1,1+eps]])
    A4 = torch.tensor([[1+eps,-1.0,0,eps],[-1,2+eps,-1,eps],[0,-1,1+eps,eps],[eps,eps,eps,3*eps]])
    D = torch.diag(torch.diag(A4))
    x = torch.rand(4)
    x = x.view(4,1)
    b = torch.tensor([[-1.],[-1.],[2.],[0.]]) 
    tol = 1e-8
    residual_norm = torch.norm(A3@(P@x)-P@b,2)
    iters = 0 
    while residual_norm > tol: 
        gd = torch.matmul(A4,x) - b 
        x.data = x.data - 0.7*torch.matmul(torch.linalg.inv(D),gd)
        residual_norm = torch.norm(A3@(P@x)-P@b,2)
        iters += 1 
        if iters > 100000: 
            break
    assert torch.isnan(residual_norm)!=True, "norm is nan, reset learning rate"
    print("eps = "+str(eps)+": ", iters)

    

Scaled GD: 4 by 4 system
eps = 0.1:  17
eps = 0.01:  21
eps = 0.001:  21
eps = 0.0001:  21
eps = 1e-05:  21
eps = 1e-09:  21
