In [1]:
import numpy as np
from scipy.io import loadmat

---
# Question 2
---

In [2]:
Xtrue = loadmat("incomplete.mat")["Xtrue"]
Y1 = loadmat("incomplete.mat")["Y1"]
Y2 = loadmat("incomplete.mat")["Y2"]
Y3 = loadmat("incomplete.mat")["Y3"]

In [3]:
### DO NOT change
def ItSingValThresh(Y, r):
    """
    Iterative Singular Value Thresholding function for Matrix Completion
    """
    tol = 10**(-3)  # difference between iterates at termination
    max_its = 100;
    n,p = Y.shape 
    X = np.array(Y) #make a copy so operations do not mutate the original
    X[np.isnan(X)] = 0 # Fill in missing entries with zeros

    err = 10**6 
    itt = 0
    
    while err > tol and itt < max_its:
        U,s,VT = np.linalg.svd(X, full_matrices=False)
        V, S = VT.T, np.diag(s)
        Xnew = U[:,:r] @ S[:r,:r] @ V[:,:r].T
        for i in range(n):
            for j in range(p):
                if ~np.isnan(Y[i,j]):  #replace Xnew with known entries
                    Xnew[i,j] = Y[i,j]
        err = np.linalg.norm(X-Xnew,'fro') 
        X = Xnew
        itt += 1
    return X

In [4]:
print("\nY1:\n")
print(Y1)
print("\nY2:\n")
print(Y2)
print("\nY3:\n")
print(Y3)


Y1:

[[nan nan 24.  8. 12. 14. nan nan 22. nan nan 10. 14. 24. nan nan]
 [nan 21. nan nan nan nan nan nan nan 15. 17. nan  8. 15. 23. nan]
 [nan nan nan nan  9. nan nan nan nan  9.  8. nan  5. nan nan  6.]
 [nan nan nan 10. nan nan nan nan 23. nan 17. nan nan 26. 24. nan]
 [nan  9.  9. nan nan nan nan nan nan  9.  8. nan nan  9. 11. nan]
 [nan 11. 19.  7. nan 13. 10. 11. nan 19. nan  9. 11. 19. nan nan]
 [45. nan nan nan nan nan 18. nan nan nan nan 19. 17. 30. 32. nan]
 [nan nan 21. nan 15. 18. nan 15. 18. nan 15. nan 12. 21. nan 12.]
 [nan 11. nan  7. nan 13. 10. 11. 17. 19. nan  9. nan 19. nan nan]
 [nan 13. nan  7. nan 16. nan nan  8. 11. 11. nan nan nan 15.  8.]
 [45. 24. 30. 14. 24. 29. 18. nan 25. nan 23. nan nan nan 32. 18.]
 [nan 15. nan nan nan nan 12. 15. 18. 21. 15. 12. nan 21. 21. nan]
 [25. nan 13. nan 17. nan 10. nan nan 13. nan 13.  7. 13. 19. 10.]
 [nan nan nan 12. nan nan 16. nan nan nan nan nan 16. nan 28. 16.]
 [nan nan 18. 10. nan 22. 12. nan nan nan 16. nan 10. na

In [5]:
completedY1 = ItSingValThresh(Y1, 2)
completedY2 = ItSingValThresh(Y2, 2)
completedY3 = ItSingValThresh(Y3, 2)

In [6]:
Xtrue

array([[30, 12, 24,  8, 12, 14, 12, 12, 22, 24, 14, 10, 14, 24, 20, 12],
       [30, 21, 15, 11, 21, 26, 12, 21, 10, 15, 17, 16,  8, 15, 23, 12],
       [15,  9,  9,  5,  9, 11,  6,  9,  7,  9,  8,  7,  5,  9, 11,  6],
       [35, 16, 26, 10, 16, 19, 14, 16, 23, 26, 17, 13, 15, 26, 24, 14],
       [15,  9,  9,  5,  9, 11,  6,  9,  7,  9,  8,  7,  5,  9, 11,  6],
       [25, 11, 19,  7, 11, 13, 10, 11, 17, 19, 12,  9, 11, 19, 17, 10],
       [45, 24, 30, 14, 24, 29, 18, 24, 25, 30, 23, 19, 17, 30, 32, 18],
       [30, 15, 21,  9, 15, 18, 12, 15, 18, 21, 15, 12, 12, 21, 21, 12],
       [25, 11, 19,  7, 11, 13, 10, 11, 17, 19, 12,  9, 11, 19, 17, 10],
       [20, 13, 11,  7, 13, 16,  8, 13,  8, 11, 11, 10,  6, 11, 15,  8],
       [45, 24, 30, 14, 24, 29, 18, 24, 25, 30, 23, 19, 17, 30, 32, 18],
       [30, 15, 21,  9, 15, 18, 12, 15, 18, 21, 15, 12, 12, 21, 21, 12],
       [25, 17, 13,  9, 17, 21, 10, 17,  9, 13, 14, 13,  7, 13, 19, 10],
       [40, 20, 28, 12, 20, 24, 16, 20, 24, 28, 20,

In [7]:
print("Difference between rank 2 ISVT of Y1 and Xtrue matrix: \n")
print(np.round((completedY1-Xtrue), decimals=1))
print("\nDifference between rank 2 ISVT of Y2 and Xtrue matrix: \n")
print(np.round((completedY2-Xtrue), decimals=3))
print("\nDifference between rank 2 ISVT of Y3 and Xtrue matrix: \n")
print(np.round((completedY3-Xtrue), decimals=3))

Difference between rank 2 ISVT of Y1 and Xtrue matrix: 

[[-40.7  -1.3   0.    0.    0.    0.   -0.3   9.7   0.    0.5   0.    0.
    0.    0.   -0.3   0.1]
 [ 27.8   0.    2.   -0.6  -0.9  -1.5   0.4 -25.7   2.5   0.    0.   -0.8
    0.    0.    0.    0.2]
 [  2.1  -0.5   0.7  -0.2   0.   -0.7   0.   -6.9   0.8   0.    0.   -0.4
    0.    0.1  -0.2   0. ]
 [-30.3  -1.1   0.4   0.    0.1  -0.3  -0.1   2.7   0.    0.2   0.   -0.3
    0.5   0.    0.    0.2]
 [  3.5   0.    0.   -0.2  -0.2  -0.4   0.1  -7.4   0.6   0.    0.   -0.3
    0.3   0.    0.    0.1]
 [-18.7   0.    0.    0.    0.9   0.    0.    0.   -0.9   0.    0.4   0.
    0.    0.    0.2   0.2]
 [  0.    0.8   0.3   0.2   1.5   1.4   0.  -14.6  -0.2  -1.4   0.8   0.
    0.    0.    0.    0.5]
 [-20.4  -1.6   0.   -0.4   0.    0.   -0.2   0.    0.    0.4   0.   -0.7
    0.    0.   -0.5   0. ]
 [-18.9   0.   -0.3   0.    0.9   0.    0.    0.    0.    0.    0.4   0.
   -0.1   0.    0.2   0.2]
 [ 14.7   0.    0.7   0.   -0.1   0.  

We see that the accuracy indeed depends on the number of missing values. In the Y1 case, since there were so many missing values, there was a big difference between the individual elements of completed Y1 and Xtrue. And in Y2 case, this difference was much lower, almost 0, as fewer elements were missing values. In the Y3 case, even fewer elements were missing values and we see that the difference in all the elements between completedY3 and Xtrue is practically negligible.

# 2b

In [8]:
completedY1 = ItSingValThresh(Y1, 3)
completedY2 = ItSingValThresh(Y2, 3)
completedY3 = ItSingValThresh(Y3, 3)

In [9]:
Xtrue

array([[30, 12, 24,  8, 12, 14, 12, 12, 22, 24, 14, 10, 14, 24, 20, 12],
       [30, 21, 15, 11, 21, 26, 12, 21, 10, 15, 17, 16,  8, 15, 23, 12],
       [15,  9,  9,  5,  9, 11,  6,  9,  7,  9,  8,  7,  5,  9, 11,  6],
       [35, 16, 26, 10, 16, 19, 14, 16, 23, 26, 17, 13, 15, 26, 24, 14],
       [15,  9,  9,  5,  9, 11,  6,  9,  7,  9,  8,  7,  5,  9, 11,  6],
       [25, 11, 19,  7, 11, 13, 10, 11, 17, 19, 12,  9, 11, 19, 17, 10],
       [45, 24, 30, 14, 24, 29, 18, 24, 25, 30, 23, 19, 17, 30, 32, 18],
       [30, 15, 21,  9, 15, 18, 12, 15, 18, 21, 15, 12, 12, 21, 21, 12],
       [25, 11, 19,  7, 11, 13, 10, 11, 17, 19, 12,  9, 11, 19, 17, 10],
       [20, 13, 11,  7, 13, 16,  8, 13,  8, 11, 11, 10,  6, 11, 15,  8],
       [45, 24, 30, 14, 24, 29, 18, 24, 25, 30, 23, 19, 17, 30, 32, 18],
       [30, 15, 21,  9, 15, 18, 12, 15, 18, 21, 15, 12, 12, 21, 21, 12],
       [25, 17, 13,  9, 17, 21, 10, 17,  9, 13, 14, 13,  7, 13, 19, 10],
       [40, 20, 28, 12, 20, 24, 16, 20, 24, 28, 20,

In [10]:
print("Difference between rank 3 ISVT of Y1 and Xtrue matrix: \n")
print(np.round((completedY1-Xtrue), decimals=1))
print("\nDifference between rank 3 ISVT of Y2 and Xtrue matrix: \n")
print(np.round((completedY2-Xtrue), decimals=2))
print("\nDifference between rank 3 ISVT of Y3 and Xtrue matrix: \n")
print(np.round((completedY3-Xtrue), decimals=3))

Difference between rank 3 ISVT of Y1 and Xtrue matrix: 

[[-50.   -3.    0.    0.    0.    0.   -0.7   9.9   0.    1.    0.5   0.
    0.    0.   -0.3   0.5]
 [ 13.7   0.   -0.1  -0.9  -3.  -10.5   0.6 -23.8   4.8   0.    0.   -1.9
    0.    0.    0.    0.2]
 [ -2.   -3.1   4.   -0.3   0.    4.   -0.1  -7.8   1.1   0.    0.   -0.8
    0.   -1.7  -0.5   0. ]
 [-35.9   1.5  -6.4   0.    0.9  -9.9  -0.3   1.5   0.   -1.6   0.    0.
    0.3   0.    0.    0.5]
 [ -3.    0.    0.   -0.2  -0.7  -3.5   0.2  -6.4   1.6   0.    0.   -0.5
    0.8   0.    0.    0.3]
 [-24.1   0.    0.    0.    1.6   0.    0.    0.   -0.3   0.    1.1   0.
    0.    0.    0.9   0.8]
 [  0.   19.  -35.3  -1.1   1.  -54.4   0.  -17.8  -2.7 -13.2  -2.6   0.
    0.    0.    0.   -0.4]
 [-31.8  -4.    0.   -0.8   0.    0.   -0.7   0.    0.    0.9   0.   -1.4
    0.    0.   -1.1   0. ]
 [-23.9   0.   -0.8   0.    1.6   0.    0.    0.    0.    0.    1.1   0.
    0.1   0.    0.9   0.7]
 [ 12.5   0.    1.8   0.   -0.9   0.   

We see that the rank 2 Xtrue matrix has less error associated with the same Y matrix when we use rank 2 to complete a Y matrix as compared to rank 3. We notice that individual elements have much larger errors associated with rank 3 compared to rank 2 regardless of how many missing values were there in the Y matrix originally. As an example, for rank 2, Y3 can safely be said to be equal to Xtrue on account of negligible difference between the two matrices. But for rank 3, we see that Y3 and Xtrue has several elements where there is a non-zero difference between said elements.