In [None]:
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import scipy.linalg as la

In [None]:
np.set_printoptions(formatter={'float': '{: 0.1f}'.format})

In [None]:
np.random.seed(123)

In [None]:
%load_ext rpy2.ipython

In [None]:
import pandas as pd

# Applications of SVD

## Reconstruction

In [None]:
iris = %R iris

In [None]:
X  = iris.iloc[:, :-1].values

In [None]:
X[:5]

In [None]:
U, s, Vt = la.svd(X, full_matrices=False)

In [None]:
U.shape, s.shape, Vt.shape

In [None]:
(U @ np.diag(s) @ Vt)[:5]

## PCA

Center the data

In [None]:
Xc = X - X.mean(0)

In [None]:
Xc[:5]

Find SVD

In [None]:
U, s, Vt = la.svd(Xc, full_matrices=False)

PCA is $U \Sigma$

In [None]:
Y = U[:, :2] @ np.diag(s[:2])

In [None]:
plt.scatter(Y[:, 0], Y[:, 1],
            c=iris['Species'].astype('category').cat.codes)
plt.xlabel('PC1')
plt.ylabel('PC2')
pass

PCA is also $XV$

In [None]:
Z = X @ Vt.T[:, :2]

In [None]:
plt.scatter(Z[:, 0], Z[:, 1],
            c=iris['Species'].astype('category').cat.codes)
plt.xlabel('PC1')
plt.ylabel('PC2')
pass

Check with PCA routine. It should be the same (eigenvectors may flip)

In [None]:
from sklearn.decomposition import PCA

In [None]:
pca = PCA(n_components=2)

In [None]:
Y1 = pca.fit_transform(Xc)

In [None]:
plt.scatter(Y1[:, 0], Y1[:, 1],
            c=iris['Species'].astype('category').cat.codes)
plt.xlabel('PC1')
plt.ylabel('PC2')
pass

Flip directions for the second eigenvector

In [None]:
plt.scatter(Y1[:, 0], -Y1[:, 1],
            c=iris['Species'].astype('category').cat.codes)
plt.xlabel('PC1')
plt.ylabel('PC2')
pass

## Data compression (Low rank approximations)

In [None]:
X = np.ones((25, 15))
X[5:-5, 2:-2] = 0
X[8:-8, 6:-6] = 1

In [None]:
plt.imshow(X, cmap='gray')
pass

In [None]:
U, s, Vt = la.svd(X, full_matrices=False)

Note that tehre are only 3 types of columns, and so 3 singular values suffice to capture all the information.

In [None]:
np.cumsum(s)/s.sum()

In [None]:
X1 = U[:, :3] @ np.diag(s[:3]) @ Vt[:3, :]

In [None]:
plt.imshow(X, cmap='gray')
pass

Using MNIST example

In [None]:
mnist = pd.read_csv('https://pjreddie.com/media/files/mnist_test.csv')

In [None]:
mnist.shape

In [None]:
img = mnist.iloc[0, :-1].values.reshape((28,28))

In [None]:
plt.imshow(img, cmap='gray')
pass

In [None]:
U, s, Vt = la.svd(img, full_matrices=False)

In [None]:
img1 = U[:, :1] @ np.diag(s[:1]) @ Vt[:1, :]

In [None]:
plt.imshow(img1, cmap='gray')
pass

In [None]:
np.cumsum(s)/s.sum()

In [None]:
k = 6
imgk = U[:, :k] @ np.diag(s[:k]) @ Vt[:k, :]

In [None]:
plt.imshow(imgk, cmap='gray')
pass

We get slightly more than 50% compression with $k=6$. Note that there are better methods for image compression.

In [None]:
sizes = (U[:, :k].size, s[:k].size, Vt[:k, :].size)

In [None]:
sizes

In [None]:
img.size

In [None]:
sum(sizes)

## Denoising

SVD by itself can do some denoising, but effective use requires more sophisticated algorithms such as [k-SVD](https://en.wikipedia.org/wiki/K-SVD)

In [None]:
img_noise = np.clip(img + np.random.normal(0, 30, img.shape), 0, 255)

In [None]:
plt.imshow(img_noise, cmap='gray')
pass

In [None]:
U, s, Vt = la.svd(img_noise, full_matrices=False)

In [None]:
np.cumsum(s)/s.sum()

In [None]:
k = 6
imgk_noise = U[:, :k] @ np.diag(s[:k]) @ Vt[:k, :]

In [None]:
plt.imshow(imgk_noise, cmap='gray')
pass

## Recommender system

Based on toy example from this [blog post](https://hackernoon.com/introduction-to-recommender-system-part-1-collaborative-filtering-singular-value-decomposition-44c9659c5e75)

In [None]:
from collections import OrderedDict

We have a database of movies and user ratings, but since most users watch and rate only a small subset of all possible movies, there is a lot of missing data. Our job is to predict what other movies a user might like, based on the movies that the user has rated. 

Recall that SVD gives the optimal (in terms of Frobenius norm) low rank reconstruction for a matrix. This is true even for sparse matrices, and we make use of this to make predictions about user movie preferences.

Note: Real world recommender systems based on SVD calculate an approximate SVD using iterative methods for computational efficiency, but the idea is the same - we assume that the data can be modeled by $k$ latent factors, then reconstruct the rank-$k$ matrix. You'd also normalize the data in a real-use case.

In [None]:
ratings = pd.DataFrame([
    [2,None,2,4,5,None],
    [5,None,4,None,None,1],
    [None,None,5,None,2,None],
    [None,1,None,5,None,4],
    [None,None,4,None,None,2,],
    [4,5,None,1,None,None]],
    index=list('ABCDEF'),
    columns=['The Avengers', 'Sherlock', 'Transformers',
             'Matrix', 'Titanic', 'Me Before You']
)

In [None]:
ratings = ratings.astype(pd.SparseDtype("float", np.nan))
ratings

We need to deal with the sparsity.

In [None]:
from scipy.sparse.linalg import svds

In [None]:
X = ratings.sparse.to_coo()

In [None]:
print(X)

In [None]:
U, s, Vt = svds(X, k=min(ratings.shape)-1)

In [None]:
s

svds gives singular values in ascending order, so we need to perform a permutation to get it in the fmiliar form.

In [None]:
perm = np.arange(len(s))[::-1]
U = U[:, perm]
s = s[perm]
Vt = Vt[perm, :]

In [None]:
k = 3
Y = U[:, :k] @ np.diag(s[:k]) @ Vt[:k, :]
Y

In [None]:
user = 'E'
pd.DataFrame(dict(
    Observed = ratings.loc[user].sparse.to_dense(),
    Predicted = Y[ratings.index.tolist().index(user)]))