# CS111 Lecture 4
## Spring 2023, Z. Matni

## `A = LU` Factorization with PIVOTING

### Again, we'll start off by importing numpy and the linear algebra class (linalg) from numpy

In [9]:
import numpy as np
import numpy.linalg as npla

### Back to LU Factorization...!

In [10]:
def LUfactorNoPiv(A):
    """Factor a square matrix, A == L @ U (no partial pivoting)
    Parameters: 
      A: the matrix.
    Outputs (in order):
      L: the lower triangular factor, same dimensions as A, with ones on the diagonal
      U: the upper triangular factor, same dimensions as A
    """
    
    # Check the input - matrix A has to be square
    m, n = A.shape
    assert m == n, 'input matrix A must be square'
    
    # Make a copy of the matrix that we will transform into L and U
    # This is to ensure we can operate with floating-point numbers 
    LU = A.astype(np.float64).copy()
    
    # Go through the algorithm:
    # Eliminate each column in turn
    for piv_col in range(n):
            
        # Update the rest of the matrix
        # This routine creates a combination of the L and U matrices in one matrix (called LU here)
        # Then L and U are separated from LU
        pivot = LU[piv_col, piv_col]
        assert pivot != 0., "pivot is zero, can't continue"
        
        for row in range(piv_col + 1, n):
            multiplier = LU[row, piv_col] / pivot
            LU[row, piv_col] = multiplier
            LU[row, (piv_col+1):] -= multiplier * LU[piv_col, (piv_col+1):]
            
    # Separate L and U in the result
    # .triu() makes the lower-triangle half of a matrix all zeros
    U = np.triu(LU)
    L = LU - U + np.eye(n)
        
    return (L, U)

In [11]:
A = np.array([
    [ 2. ,  7. ,  1. ,  8. ],
    [ 1. ,  5.5,  8.5,  5. ],
    [ 0. ,  1. , 12. ,  2.5],
    [-1. , -4.5, -4.5,  3.5]])

#Alternative to try:
A = np.array([[1, 2, 3], [1,1,1], [-1,1,2]])

L,U = LUfactorNoPiv(A)
print("\nA\n", A, "\n\nL\n", L, "\n\nU\n", U)


A
 [[ 1  2  3]
 [ 1  1  1]
 [-1  1  2]] 

L
 [[ 1.  0.  0.]
 [ 1.  1.  0.]
 [-1. -3.  1.]] 

U
 [[ 1.  2.  3.]
 [ 0. -1. -2.]
 [ 0.  0. -1.]]


In [12]:
import numpy as np
A = np.array([[5,3,3],[3,5,3],[3,3,5]])
L,U = LUfactorNoPiv(A)
print("\nA\n", A, "\n\nL\n", L, "\n\nU\n", U)
print(U @L)
print(L @ U)


A
 [[5 3 3]
 [3 5 3]
 [3 3 5]] 

L
 [[1.    0.    0.   ]
 [0.6   1.    0.   ]
 [0.6   0.375 1.   ]] 

U
 [[5.   3.   3.  ]
 [0.   3.2  1.2 ]
 [0.   0.   2.75]]
[[8.6     4.125   3.     ]
 [2.64    3.65    1.2    ]
 [1.65    1.03125 2.75   ]]
[[5. 3. 3.]
 [3. 5. 3.]
 [3. 3. 5.]]


*Note how the results of the 4x4 compare perfectly to the manual calculation we had done earlier in lecture.*

*Let's try it with a "special case" where the pivot A[0,0] = 0*

In [13]:
# LU factorization (without pivoting) fails if it encounters a zero pivot

A = np.array([[0, 1], [1, 2]])
print(A)
print( npla.det(A) )

[[0 1]
 [1 2]]
-1.0


In [14]:
# So, matrix A is NOT singular - let's try to factor it into L and U:
L,U = LUfactorNoPiv(A)

AssertionError: pivot is zero, can't continue

### We need to edit our function to deal with these cases!!

*We need to ammend our `LUfactorNoPiv()` function to a more general `LUfactor()` function*

*Please review this code carefully to understand the role of permuation matrices in pivoting*


In [15]:
def LUfactor(A, pivoting = True):
    """Factor a square matrix with partial pivoting, A[p,:] == L @ U
    Parameters: 
      A: the matrix.
      pivoting: whether or not to do partial pivoting
    Outputs (in order):
      L: the lower triangular factor, same dimensions as A, with ones on the diagonal
      U: the upper triangular factor, same dimensions as A
      p: the permutation vector that permutes the rows of A by partial pivoting
    """
    
    # Check the input
    m, n = A.shape
    assert m == n, 'input matrix A must be square'
    
    # Initialize p to be the identity permutation
    p = np.array(range(n))
    
    # Make a copy of the matrix that we will transform into L and U
    LU = A.astype(np.float64).copy()
    
    # Eliminate each column in turn
    for piv_col in range(n):
     
        # Choose the pivot row and swap it into place
        if pivoting:
            piv_row = piv_col + np.argmax(np.abs(LU[piv_col:, piv_col]))   # Added np.abs() to fix bug
            assert LU[piv_row, piv_col] != 0., "can't find nonzero pivot, matrix is singular"
            # print("Before:")
            # print(LU)

            LU[[piv_col, piv_row], :]  = LU[[piv_row, piv_col], :]
            p[ [piv_col, piv_row] ]      = p[[piv_row, piv_col]]
            
            # print("After: ")
            # print(LU)
        # Update the rest of the matrix
        pivot = LU[piv_col, piv_col]
        assert pivot != 0., "pivot is zero, can't continue"
        
        # This is the standard "core" of the algorithm (same as in LUfactorNoPiv)
        for row in range(piv_col + 1, n):
            multiplier = LU[row, piv_col] / pivot
            LU[row, piv_col] = multiplier
            LU[row, (piv_col+1):] -= multiplier * LU[piv_col, (piv_col+1):]
            # print("Did add on row: " + str(row) + "Column: " + str(piv_col))
            
    # Separate L and U in the result
    U = np.triu(LU)
    L = LU - U + np.eye(n)
    
    # This will return the L, U, AS WELL AS p (the permutation vector).
    return (L, U, p)

In [None]:
A = np.array([
    [ 1. , 0. , 0. ],   # Note: I changed the first element to zero for this demo
    [ 2. , 1. , 0. ],
    [ 3. , 5. , 1. ]]) 

A = A + A.T
print(A)
print(npla.det(A))

d,v = npla.eig(A)
print(d)



[[2. 2. 3.]
 [2. 2. 5.]
 [3. 5. 2.]]
-7.999999999999998
[ 8.83908632  0.28930841 -3.12839474]


In [None]:
A = np.array([
    [ 0. ,  7. ,  1. ,  8. ],   # Note: I changed the first element to zero for this demo
    [ 1. ,  5.5,  8.5,  5. ],
    [ 0. ,  1. , 12. ,  2.5],
    [-1. , -4.5, -4.5,  3.5]]) 

#L,U = LUfactorNoPiv(A)
#print("\nA\n", A, "\n\nL\n", L, "\n\nU\n", U)

L,U,p = LUfactor(A)
print("\nA\n", A, "\n\nL\n", L, "\n\nU\n", U, "\n\np\n", p)


A
 [[ 0.   7.   1.   8. ]
 [ 1.   5.5  8.5  5. ]
 [ 0.   1.  12.   2.5]
 [-1.  -4.5 -4.5  3.5]] 

L
 [[ 1.          0.          0.          0.        ]
 [ 0.          1.          0.          0.        ]
 [ 0.          0.14285714  1.          0.        ]
 [-1.          0.14285714  0.3253012   1.        ]] 

U
 [[ 1.          5.5         8.5         5.        ]
 [ 0.          7.          1.          8.        ]
 [ 0.          0.         11.85714286  1.35714286]
 [ 0.          0.          0.          6.91566265]] 

p
 [1 0 2 3]


In [None]:
import numpy as np 

A = np.array([[0,2,3],[1,1,1],[-1,1,0]])
L,U,p = LUfactor(A)
print("\nA\n", A, "\n\nL\n", L, "\n\nU\n", U, "\n\np\n", p)


A
 [[ 0  2  3]
 [ 1  1  1]
 [-1  1  0]] 

L
 [[ 1.  0.  0.]
 [ 0.  1.  0.]
 [-1.  1.  1.]] 

U
 [[ 1.  1.  1.]
 [ 0.  2.  3.]
 [ 0.  0. -2.]] 

p
 [1 0 2]


### Using L.U Factorization to help solve Ax = b problems
#### Assuming we have L.U, how can we solve b?

Here's a function called Lsolve(L, b) that takes an L and b, and returns a vector y, such that Ly = b.

y is an "in-between" solution for what we ultimately want (vector x). We then use y and U to give us x in *another* function called Usolve(U, b).

In [17]:
def Lsolve(L, b):
    """Forward solve a unit lower triangular system Ly = b for y
    Parameters: 
      L: the matrix, must be square, lower triangular, with ones on the diagonal
      b: the right-hand side vector
    Output:
      y: the solution vector to L @ y == b
    """
    
    # Check the input
    m, n = L.shape
    assert m == n, "matrix L must be square"
    assert np.all(np.tril(L) == L), "matrix L must be lower triangular"
    assert np.all(np.diag(L) == 1), "matrix L must have ones on the diagonal"
    
    # Make a copy of b that we will transform into the solution
    y = b.astype(np.float64).copy()
    
    # Forward solve
    for col in range(n):
        y[col+1:] -= y[col] * L[col+1:, col]
        
    return y

### The following is a missing piece of this puzzle - it's also a question on your homework this week!

In [51]:
def Usolve(U, y):
    """Backward solve an upper triangular system Ux = y for x
    Parameters: 
      U: the matrix, must be square, upper triangular, with nonzeros on the diagonal
      y: the right-hand side vector
    Output:
      x: the solution vector to U @ x == y
    """
    m, n = U.shape
    assert m == n, "matrix U must be square"
    assert np.all(np.triu(U) == U), "matrix U must be upper triangular"
    for i in range(n):
        assert U[i][i] != 0
    
    x = y.astype(np.float64).copy()

    for row in range(n):
        print(x[n-row-1], " divided by ", U[n-row-1, n-row-1] )
        x[n-row-1] = x[n-row-1]  / U[n-row-1, n-row-1] 
        print(x[:n-row-1], "subtracted by ",x[n-row-1] * U[:n-row-1, n-row-1] )
        x[:n-row-1] -= x[n-row-1] * U[:n-row-1, n-row-1]
    return x

In [None]:
def Usolve(U, y):
    # Check the input
    m, n = U.shape
    assert m == n, "matrix U must be square"
    assert np.all(np.triu(U) == U), "matrix U must be upper triangular"
    assert np.all(np.diag(U) != 0), "matrix U must have nonzeros on the diagonal"
    # Make a copy of y that we will transform into the solution
    x = y.astype(np.float64).copy()
    # Backward solve
    for row in reversed(range(n)):
        x[row] /= U[row, row]
        x[:row] -= x[row] * U[:row, row]
    return x


### Some more examples to play with...

In [None]:
# A larger example of LU with partial pivoting

A = np.round(20*np.random.rand(5,5))
print('matrix A:\n', A)
xorig = np.round(10*np.random.rand(5))
print('\noriginal x:', xorig)
b = A @ xorig
print('\nright-hand side b:', b)

matrix A:
 [[ 6. 17.  0. 13. 15.]
 [17. 12. 17. 11. 12.]
 [ 8.  2.  3.  8.  5.]
 [16.  7. 11.  5. 18.]
 [ 2. 10. 13.  8. 17.]]

original x: [1. 5. 5. 7. 5.]

right-hand side b: [257. 299. 114. 231. 258.]


In [None]:
# Factor the larger example

L, U, p = LUfactor(A)
print(L,"\n\n",U,"\n\n",p,"\n")

print("norm of difference between L times U and permuted A:", npla.norm( L@U - A[p,:]))
print("RELATIVE norm of difference between L times U and permuted A:", npla.norm( L@U - A[p,:])/npla.norm(A[p, :]))

[[ 1.          0.          0.          0.          0.        ]
 [ 0.35294118  1.          0.          0.          0.        ]
 [ 0.11764706  0.67281106  1.          0.          0.        ]
 [ 0.47058824 -0.28571429 -0.44652161  1.          0.        ]
 [ 0.94117647 -0.33640553 -0.46674839 -0.35522485  1.        ]] 

 [[17.         12.         17.         11.         12.        ]
 [ 0.         12.76470588 -6.          9.11764706 10.76470588]
 [ 0.          0.         15.03686636  0.57142857  8.34562212]
 [ 0.          0.          0.          5.68372663  6.15507202]
 [ 0.          0.          0.          0.         16.40892915]] 

 [1 0 4 2 3] 

norm of difference between L times U and permuted A: 0.0
RELATIVE norm of difference between L times U and permuted A: 0.0


In [None]:
for r in range(3):
    print(3-r)

3
2
1


In [52]:
# Solve with the larger example - check this after your homework is done! :)

y = Lsolve(L,b[p])
print("y:", y)
print(U)
x = Usolve(U,y)
print("\nx:", x)
print("\nresidual norm:", npla.norm(b - A @ x))

y: [3.     1.9998]
[[1.     1.    ]
 [0.     0.9999]]
1.9998000000000002  divided by  0.9999
[3.] subtracted by  [2.]
0.9999999999999996  divided by  1.0
[] subtracted by  []

x: [1. 2.]

residual norm: 4.440892098500626e-16


In [53]:
#Test USolve
U = np.array([[1,2,3],[0,5,6],[0,0,9]])
y = np.array([1,2,3])
x = Usolve(U,y)
print(U @ x)
print(y)
print("\nresidual norm:", npla.norm(y - U @ x))

3.0  divided by  9
[1. 2.] subtracted by  [1. 2.]
0.0  divided by  5
[0.] subtracted by  [0.]
0.0  divided by  1
[] subtracted by  []
[1. 2. 3.]
[1 2 3]

residual norm: 0.0


In [None]:
#Test BOTH
A = np.array([[0,2,3], [1,1,1], [-1,1,0]])
b = np.array([1,2,3])
L, U, p = LUfactor(A)
y = Lsolve(L, b[p])
print("Y: " + str(y))
x = Usolve(U, y)
print("U: " + str(U))
print("X: " + str(x))
print("\nresidual norm:", npla.norm(b - A @ x))

Y: [2. 1. 4.]
U: [[ 1.  1.  1.]
 [ 0.  2.  3.]
 [ 0.  0. -2.]]
X: [ 0.5  3.5 -2. ]


NameError: name 'npla' is not defined

In [49]:
import numpy as np
import numpy.linalg as npla
t = 10**-4
A = np.array([[t,1],[1,1]])
b = np.array([t+2,3])
L, U, p = LUfactor(A, True)
y = Lsolve(L, b[p])
x = Usolve(U, y)
t_predict = np.array([[1],[2]])
print(x)
print("\nrelative residual norm:", npla.norm(t_predict - x)/npla.norm(t_predict))

[1. 2.]

relative residual norm: 0.6324555320336761
