In [39]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split

In [48]:
def read_data(path):      #read data from path
  df = pd.read_csv(path, sep="\t")
  df.columns = ["User_ID", "Movie_ID", "Rating", "Timestamp"]
  mat = np.zeros([943, 1682])
  for idx, data in df.iterrows():
    user = data["User_ID"]
    movie = data["Movie_ID"]
    rating = data["Rating"]
    mat[user-1, movie-1]=rating
  return mat

In [49]:
#R matrix is the same as M matrix given in the paper

def create_r(y):      #create the R matrix as mentioned in class notes
  r = np.zeros_like(y)      #has the same dimensions as ratings matrix
  for i in range(y.shape[0]):
    for j in range(y.shape[1]):
      if y[i,j]!=0:
        r[i,j]=1 
  return r

In [50]:
def nmae(test_path, x):
  test = pd.read_csv(test_path, sep='\t')   #read test data
  test.columns = ["User_ID", "Movie_ID", "Rating", "Timestamp"]

  error = 0
  for idx, data in test.iterrows():
    user = data["User_ID"]
    movie = data["Movie_ID"]
    rating = data["Rating"] 
    error+=abs(x[user-1, movie-1]-rating)   #calculate error

  mae = error/test.shape[0]   #calculate NMAE
  n_mae=mae/4
  return n_mae

In [51]:
file_path = "/content/gdrive/MyDrive/ml-100k/"
train_path = file_path + "u" +str(1) + ".base"
test_path = file_path + "u" +str(1) + ".test"

y = read_data(train_path)
x = np.random.randint(1, 6, [y.shape[0], y.shape[1]])
r = create_r(y)

In [54]:
import scipy

lamb = 2      #setting hyperparameters
epochs = 50
p = 1


for i in range(epochs):
  b = x + y - np.multiply(r, x)     #Create b as mentioned in the algorithm
  u, sigma, v = scipy.sparse.linalg.svds(b)   #apply SVD on b
  sigma = np.diag(sigma)      #diagonalise the singular values

  #create a new diagonal matrix for the Schatten-p norm
  sigma_p = np.zeros_like(sigma) 

  for i in range(sigma.shape[0]):
    for j in range(sigma.shape[1]):
      #define threshold for the soft threshold function as mentioned in the paper
      if sigma[i, j] != 0:
        threshold = (lamb/2)*p*(abs(sigma[i, j])**(p-1))  
      else:
        threshold = 0 #only defined when singular value != 0

      #apply soft thresholding to singular values with above threshold
      sigma_p[i, j] = np.sign(sigma[i, j])*max(abs(sigma[i, j])-threshold, 0)

  #Uptate x
  x = u@sigma_p@v

error = nmae(test_path, x)
print("NMAE = ", error)

NMAE =  0.1866134973429396
