In [12]:
import scipy.io
import numpy as np
from scipy.sparse import csr_matrix
from scipy.sparse import linalg
from time import time

In [21]:
def sharpNuc(Z, kappa):
    #SHARPNUC This function implements the sharp operator for the nuclear norm ball constraint. .
    
    # Implement sharp operator here!
    u, s, vt = linalg.svds(Z, k=1)
        
    return -kappa*u@vt

In [28]:
data = scipy.io.loadmat('./dataset/ml-100k/ub_base')  # load 100k dataset

Rating = data['Rating'].flatten()
UserID = data['UserID'].flatten() - 1  # Python indexing starts from 0 whereas Matlab from 1
MovID = data['MovID'].flatten() - 1    # Python indexing starts from 0 whereas Matlab from 1

nM = np.amax(data['MovID'])
nU = np.amax(data['UserID'])

Z = csr_matrix((Rating, (MovID, UserID)),shape=(nM, nU),dtype=float)
kappa = 5000

tstart = time()
Z_proj = sharpNuc(Z, kappa)
elapsed = time() - tstart
print('sharp of 100k data takes {} sec'.format(elapsed))

sharp of 100k data takes 0.04544949531555176 sec


In [29]:
Rating[:20]

array([5, 3, 4, 3, 3, 5, 4, 1, 5, 3, 2, 5, 5, 5, 5, 5, 4, 5, 4, 1],
      dtype=uint8)

In [30]:
Z_proj[0:20,0]

array([-31.84960272, -12.19246172,  -6.69408692, -20.24625317,
        -7.5317583 ,  -1.65817068, -27.54142438, -21.66204169,
       -21.46827497,  -7.20190624, -22.47228126, -27.02582488,
       -13.09306912, -12.509695  , -18.86603159,  -2.6786904 ,
        -7.55149913,  -0.87429869,  -4.13837519,  -5.16173825])

In [31]:
# NOTE: This one can take few minutes!
data = scipy.io.loadmat('./dataset/ml-1m/ml1m_base')  # load 1M dataset

Rating = data['Rating'].flatten()
UserID = data['UserID'].flatten() - 1  # Python indexing starts from 0 whereas Matlab from 1
MovID = data['MovID'].flatten() - 1    # Python indexing starts from 0 whereas Matlab from 1

nM = np.amax(data['MovID'])
nU = np.amax(data['UserID'])

Z = csr_matrix((Rating, (MovID, UserID)),shape=(nM, nU),dtype=float)
kappa = 5000

tstart = time()
Z_proj = sharpNuc(Z, kappa)
elapsed = time() - tstart
print('sharp of 1M data takes {} sec'.format(elapsed))

sharp of 1M data takes 0.369107723236084 sec


In [32]:
Rating[:20]

array([5, 3, 3, 4, 5, 3, 5, 5, 4, 4, 5, 4, 4, 5, 4, 3, 4, 5, 4, 3],
      dtype=uint8)

In [33]:
Z_proj[0:20,0]

array([-1.74490371, -0.59442132, -0.34728259, -0.12315799, -0.24135477,
       -0.91886848, -0.40419198, -0.05231632, -0.08135042, -0.79167959,
       -0.89482339, -0.11179658, -0.08815798, -0.17709768, -0.10574404,
       -0.68334762, -0.69126119, -0.18484565, -0.25043436, -0.13396691])