In [2]:
from scipy.sparse.linalg import svds
import numpy as np
import pandas as pd

In [14]:
# Seed to get consistent values
np.random.seed(seed = 1)

# Generate sample data
data = np.random.randint(5, size=(5, 10))
data = np.array([[5,3,0,1],
     [4,0,0,1],
     [1,1,0,5],
     [1,0,0,4],
     [0,1,5,4]])

print('original: {}'.format(data))

# Get the mean
mean = np.mean(data, axis=1)

# Demean the data (so that the mean is always zero)
data_demeaned = data - mean.reshape(-1, 1)

# Get the Singular Value Decomposition
U, sigma, Vt = svds(data_demeaned, k = 2)

sigma = np.diag(sigma)

print('U: \n{}\n'.format(U))
print('sigma: \n{}\n'.format(sigma))
print('Vt: \n{}\n'.format(Vt))

predicted_ratings = np.dot(np.dot(U, sigma), Vt) + mean.reshape(-1, 1)
print('predicted_ratings: \n{}\n'.format(predicted_ratings))


original: [[5 3 0 1]
 [4 0 0 1]
 [1 1 0 5]
 [1 0 0 4]
 [0 1 5 4]]
U: 
[[-0.18540429 -0.59393667]
 [-0.30892111 -0.3747321 ]
 [-0.69499063  0.25779275]
 [-0.60844446  0.21147617]
 [ 0.13029336  0.62899588]]

sigma: 
[[ 4.91892905  0.        ]
 [ 0.          6.2698682 ]]

Vt: 
[[-0.20568993  0.27108679  0.63140444 -0.6968013 ]
 [-0.71493407 -0.21981658  0.42453521  0.51021544]]

predicted_ratings: 
[[ 5.0999336   2.82134739  0.09323647  0.98548255]
 [ 3.24231091  1.35453072 -0.70691189  1.11007025]
 [ 1.29760663  0.46796473  0.27766226  4.95676639]
 [ 0.91765749  0.14720479 -0.07682426  4.01196198]
 [-0.45132814  1.80684524  4.57891801  4.06556489]]

