# LEFT PRECONDITIONED GMRES

In [18]:
import torch
import numpy as np
from torch import linalg as lg
import time

In [19]:
# # Define the numpy array
# A = np.array([[1, 1, 4, 9], [3, 4, 6, 9], [4, 1, 1, 3], [3, 2, 1, 1]])

# # Convert to COO format
# coo = torch.sparse_coo_tensor(indices=torch.tensor(np.nonzero(A)).long(), values=torch.tensor(A[np.nonzero(A)]).float(), size=A.shape)

# # Convert COO to CSR format
# csr = coo.to_sparse_csr()

# print(coo)

In [20]:
def fb_solve(L, U, r):
    y = L.solve_triangular(upper=False, unit=False, b=r)
    z = U.solve_triangular(upper=True, unit=False, b=y)
    return z

In [21]:
# It works fine, I have tested it comparing it with the results of arnoldi_one_iter.ipynb

def arnoldi_one_iter(prec, A, V, k, tol=1e-12):
    """
    Computes the new vectors of the Arnoldi iteration for both V_{k+1} and H_{k + 1, k}

    Input parameters:
    -----------------
    A: array_like
         An (n x n) array.
          
    V: array_like
        An (n x (k + 1)) array. The current Krylov orthonormal basis.
      
    k: int
        One less than the step we are obtaining in the Arnoldi's algorithm to increase
        the dimension of the Krylov subspace. Must be >= 0.
    
    precondition: PreconditionEnum or None
        An enumeration representing the preconditioning method to be applied.
        
    M: scipy.sparse matrix or None
        The preconditioning matrix if applicable, otherwise None.    
      
    epsilon : float, optional
        Tolerance for convergence.
    
    Output:
    -------
      h_k: 
          
      v_new:
          
    """
    # Note that to obtain the first column of H ((k + 1) x k) we need 2 vectors in V. Later in the GMRES algorithm
    # we will use the notation H[: k + 2, : k + 1] as k starts at 0 and we select the first two rows and first column.
    
    # Here h_k respresents the column k + 1 in H. (k starts at 0)
    
    # Inialize k + 2 nonzero elements of H along column k. (k starts at 0)
    h_k = torch.zeros((k + 2, ))

    v_new = prec(A@V[:, k])
    
    # Calculate first k elements of the kth Hessenberg column
    for j in range(k + 1): # Here k is from 0 to k 
        h_k[j] = torch.dot(v_new, V[:, j])
        v_new = v_new - h_k[j] * V[:, j]
    
    # Add the k+1 element
    h_k[k + 1] = torch.norm(v_new, p = 2)

    if h_k[k + 1] <= tol:
        # None for v to check in gmres (early termination with EXACT SOLUTION)
        return h_k, None
    
    else:
        # Find the new orthogonal vector in the basis of the Krylov subspace
        v_new = v_new / h_k[k + 1]

    return h_k, v_new

Verifying the algorithm works fine

In [22]:
# n = 4
# A = torch.tensor([[1, 1, 4, 9], [3, 4, 6, 9], [4, 1, 1, 3], [3, 2, 1, 1]]).float()
# b = torch.tensor([3, 2, 2, -3]).float()
# x0 = torch.zeros_like(b).float()
# r0 = b - A@x0
# r0 = r0
# # Apply initial preconditioning to the residual
# r0 = prec(r0)
# p0 = torch.norm(r0, p=2)


# V = torch.zeros((n, 1))
# V[:, 0] = r0 / p0
# V = torch.cat((V, torch.zeros((n, 1))), axis=1)

# H = torch.zeros((n + 1, 1))
# H = torch.cat((H, torch.zeros((n + 1, 1))), axis=1)
# k = 0

# H[:(k + 2), k], v_new  = arnoldi_one_iter(prec, A, V, k)
# V[:, k + 1] = v_new
# V, H
# Q, R = lg.qr(H[:k + 2, :k + 1], mode = 'complete') # this does not support csr torch format

In [23]:
def back_substitution(A, b):
    """
    Solve a linear system using back substitution.
    
    Args:
    ----------
        A: torch.Tensor
            Coefficient matrix (must be upper triangular).
        
        b: torch.Tensor
            Column vector of constants.
    
    Returns:
    --------
        torch.Tensor: Solution vector.
        
    Raises:
        ValueError: If the matrix A is not square or if its dimensions are incompatible with the vector b.
    """
    
    n = b.size(0)
    
    # Check if A is a square matrix
    if A.size(0) != n or A.size(1) != n:
        raise ValueError("Matrix A must be square.")
    
    # Check if dimensions of A and b are compatible
    if A.size(0) != b.size(0):
        raise ValueError("Dimensions of A and b are incompatible.")
    
    x = torch.zeros(n, dtype=b.dtype)
    
    for i in range(n - 1, -1, -1):
        x[i] = (b[i] - torch.sum(A[i, i+1:] * x[i+1:])) / A[i, i]
    
    return x


In [25]:
def precon_GMRES_restarted(prec, A, b, x0 = None, k_max = None, restart = None, epsilon = 1e-12):
    """
    Generalized Minimal RESidual method for solving linear systems. With both restart and left preconditioning options.
    
    Parameters:
    -----------
    prec: preconditioner function
    
    A : torch.Tensor
        Coefficient matrix of the linear system.
        
    b : torch.Tensor
        Right-hand side vector of the linear system.
        
    x0 : torch.Tensor
        Initial guess for the solution.
        
    k_max : int, optional
        Maximum number of iterations. Defaults to None, which sets it to the dimension of A.
        
    restart : int, optional
        Number of iterations before restart. If None, the method will not restart.
    
    epsilon : float, optional
        Tolerance for convergence. Defaults to 1e-12.
    
    Returns:
    --------
    xk : torch.Tensor
        Approximate solution to the linear system.
    
    error_list : list
        List containing the error at each iteration.
    
    total_k : int
        Total number of iterations performed.
        
    total_precondition_time : float
        Total time spent on preconditioning.
    """
    
    x0 = x0 if x0 is not None else torch.zeros_like(b)
    
    n = A.shape[0]
    
    if k_max is None or k_max > n:
        k_max = n
    
    r0 = b - A@x0
    r0 = r0.float()
    
    # Apply initial preconditioning to the residual
    r0 = prec(r0)

    p0 = torch.norm(r0)
    beta = p0.clone()
    pk = p0.clone()
    k = 0
    total_k = 0
    
    # Save list of errors at each iteration
    error_list = [pk]
    
    # Initialize the V basis of the Krylov subspace (concatenate as iteration continues). May terminate early.
    V = torch.zeros((n, 1))
    V[:, 0] = r0 / beta
    
    # Hessenberg matrix
    H = torch.zeros((n + 1, 1))        
    
    while pk > epsilon * p0 and total_k < k_max: 

        # Arnoldi iteration
        V = torch.cat((V, torch.zeros((n, 1))), dim=1)
        H = torch.cat((H, torch.zeros((n + 1, 1))), dim=1)
        
        
        # Minv_A will be A if precondition is None
        H[:k + 2, k], v_new = arnoldi_one_iter(prec, A, V, k)

        if v_new is None:
            print("ENCOUNTER EXACT SOLUTION")
            # Append 0 for plots...
            error_list.append(0)
        
        else:
            V[:, k + 1] = v_new
        
        Q, R = lg.qr(H[:k + 2, :k + 1], mode = 'complete') # this does not support csr torch format
        
        pk = abs(beta * Q[0, k])  # Compute norm of residual vector
        error_list.append(pk)  # Add new error at current iteration       
        
        yk = back_substitution(R[:-1, :], beta * Q[0][:-1])
        # yk = torch.triangular_solve(beta * Q[0][:-1], R[:-1, :], upper = True)
        xk = x0 + V[:, :k + 1]@yk  # Compute the new approximation x0 + V_{k}y

        k += 1
        total_k += 1
        
        if restart is not None and k == restart:
            x0 = xk
            r0 = b - A@x0
            
            r0 = prec(r0)
            
            p0 = torch.norm(r0)
            beta = p0
            pk = p0
            k = 0
            
            V = torch.zeros((n, 1))
            V[:, 0] = r0 / beta
            H = torch.zeros((n + 1, 1))
  
    return xk, error_list, total_k


In [26]:
method = "baseline"

time_function = lambda: time.perf_counter()

# Define the numpy array
A = torch.tensor([[1, 1, 4, 9], [3, 4, 6, 9], [4, 1, 1, 3], [3, 2, 1, 1]]).float()
b = torch.tensor([3, 2, 2, -3]).float()

# Convert to COO format
# coo = torch.sparse_coo_tensor(indices=torch.tensor(np.nonzero(A)).long(), values=torch.tensor(A[np.nonzero(A)]).float(), size=A.shape)

# Start timing
start = time_function()
            
if method == "jacobi":
    p_start = time_function()
    
    data = 1 / torch.sqrt(torch.Tensor(A.diagonal()))
    indices = torch.vstack((torch.arange(A.shape[0]), torch.arange(A.shape[0])))
    M = torch.sparse_coo_tensor(indices, data, size = A.shape)
    
    M = M.to_sparse_csr() # optimized format for matrix multiplication
                
    # construct preconditioner function
    prec = lambda x: M@x
    
    p_stop = time_function()
            
elif method == "baseline":
    
    p_start, p_stop = 0, 0

    prec = lambda x: x
            
else:
    raise NotImplementedError(f"Preconditioner {method} not implemented!")
                
stop = time_function()
p_time = (p_stop - p_start)
overhead = (stop - start) - (p_time)
            
x, _, _ = precon_GMRES_restarted(prec, A, b)
x

tensor([ 2.9630, -9.4445, 10.7037, -3.7037])

In [27]:
import scipy.sparse.linalg as spla

A = np.array([[1, 1, 4, 9], [3, 4, 6, 9], [4, 1, 1, 3], [3, 2, 1, 1]])
b = np.array([3, 2, 2, -3])
x0 = np.array([0, 0, 0, 0])

print(spla.gmres(A, b, x0, restart = None)[0])

A = torch.tensor([[1, 1, 4, 9], [3, 4, 6, 9], [4, 1, 1, 3], [3, 2, 1, 1]]).float()
b = torch.tensor([3, 2, 2, -3]).float()
x0 = torch.tensor([0, 0, 0, 0]).float()

precon_GMRES_restarted(prec, A, b, x0, 4)[0] # Converges in n = 4 iterations! God

[ 2.96296296 -9.44444444 10.7037037  -3.7037037 ]


tensor([ 2.9630, -9.4445, 10.7037, -3.7037])

In [28]:
from scipy.sparse import coo_matrix

def discretise_poisson(N):
    """Generate the matrix and rhs associated with the discrete Poisson operator."""
    
    nelements = 5 * N**2 - 16 * N + 16
    
    row_ind = np.empty(nelements, dtype=np.float64)
    col_ind = np.empty(nelements, dtype=np.float64)
    data = np.empty(nelements, dtype=np.float64)
    
    f = np.empty(N * N, dtype=np.float64)
    
    count = 0
    for j in range(N):
        for i in range(N):
            if i == 0 or i == N - 1 or j == 0 or j == N - 1:
                row_ind[count] = col_ind[count] = j * N + i
                data[count] =  1
                f[j * N + i] = 0
                count += 1
                
            else:
                row_ind[count : count + 5] = j * N + i
                col_ind[count] = j * N + i
                col_ind[count + 1] = j * N + i + 1
                col_ind[count + 2] = j * N + i - 1
                col_ind[count + 3] = (j + 1) * N + i
                col_ind[count + 4] = (j - 1) * N + i
                                
                data[count] = 4 * (N - 1)**2
                data[count + 1 : count + 5] = - (N - 1)**2
                f[j * N + i] = 1
                
                count += 5
                                                
    return coo_matrix((data, (row_ind, col_ind)), shape=(N**2, N**2)), f

In [29]:
# Step 1: Extract row indices, column indices, and data

N = 40

A, b = discretise_poisson(N)

row_indices = torch.tensor(A.row, dtype=torch.long)
col_indices = torch.tensor(A.col, dtype=torch.long)
values = torch.tensor(A.data, dtype=torch.float)

# Step 2: Create COO tensor in PyTorch
A = torch.sparse_coo_tensor(torch.stack((row_indices, col_indices)), values, A.shape)

# Step 3: Convert COO tensor to dense PyTorch tensor
dense_tensor = A.to_dense().float()
b = torch.tensor(b).float()
x0 = torch.zeros_like(b).float()
type(A), type(b), type(x0)

(torch.Tensor, torch.Tensor, torch.Tensor)

In [30]:
from time import time
from scipy.sparse.linalg import gmres

maxiter = 500
restart = 10


start_time = time()
x, _,_ = precon_GMRES_restarted(prec , A, b, x0, k_max = maxiter, restart = None)
residual_calculated2 = np.linalg.norm(A@x - b)
end_time = time()
print("Optimized GMRES_restarted Time:", end_time - start_time)

print(f"Our implementation residual with Ax-b (max_iterations = {maxiter}, restart = {restart}): {residual_calculated2}")

Optimized GMRES_restarted Time: 79.91808819770813
Our implementation residual with Ax-b (max_iterations = 500, restart = 10): 0.002920696046203375


IT TAKES CRAZY LONG