In [1]:
from google.colab import drive
drive.mount('/content/gdrive')
%cd '/content/gdrive/MyDrive/CFDataset/ml-100k'
%ls
import warnings
warnings.filterwarnings('ignore')

Mounted at /content/gdrive
/content/gdrive/MyDrive/CFDataset/ml-100k
allbut.pl  u1.base  u2.test  u4.base  u5.test  ub.base  u.genre  u.occupation
mku.sh     u1.test  u3.base  u4.test  ua.base  ub.test  u.info   u.user
README     u2.base  u3.test  u5.base  ua.test  u.data   u.item


In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error
from scipy.sparse.linalg import lsqr
from scipy.sparse.linalg import cg

In [3]:
def readData(name):
  fold = pd.read_csv(name, sep="\t", header=None)
  fold.columns = ['userId', 'movieId', 'rating', 'timestamp']
  fold['userId'] = fold['userId'].astype(int)
  fold['movieId'] = fold['movieId'].astype(int)
  fold['rating'] = fold['rating'].astype(int)
  fold['timestamp'] = fold['timestamp'].astype(int)
  return fold

In [4]:
def userItemMatrix(data):
  n = 943
  m = 1682
  matrix = [[0]*m for i in range(n)]

  for i in range(len(data)):
    userid = data['userId'][i]
    movieid = data['movieId'][i]
    rating = data['rating'][i]
    matrix[userid-1][movieid-1] = rating
  return np.array(matrix)

In [5]:
def MAEScore(actualR,predR):
  return mean_absolute_error(actualR, predR)

In [10]:
class LFM:
  def __init__(self, n_factor,  n_iteration, ratings):
    self.n_factors = n_factor
    self.n_iterations = n_iteration
    self.ratings = ratings
    self.n_users = ratings.shape[0]
    self.n_items  = ratings.shape[1]

  def mask(self, X):
    for i in range(self.n_users):
      for j in range(self.n_items):
        if(self.ratings[i][j] == 0):
            X[i][j] =0
    return X

  def ALS(self):
    user_factor = np.random.random((self.n_users,self.n_factors))
    item_factor = np.random.random((self.n_factors,self.n_items))
    user_factor, item_factor = self.start_iterations(user_factor,item_factor)
    pred_ratings = user_factor.dot(item_factor)
    return pred_ratings

  def start_iterations(self, user_factor, item_factor):
    for iter in range(self.n_iterations):
      X = user_factor.dot(item_factor)
      B = X + self.ratings - self.mask(user_factor.dot(item_factor))
      for inner in range(10):
        item_factor = np.linalg.solve(user_factor.T@user_factor,user_factor.T@B)

        user_factor = user_factor.T
        item_factor = item_factor.T
        B = B.T
        user_factor = np.linalg.solve(item_factor.T@item_factor,item_factor.T@B)
        user_factor = user_factor.T
        item_factor = item_factor.T
        B = B.T
    return user_factor,item_factor

In [11]:
def prediction(ratings_pred, testData):
  predList = list()
  actualList =list()
  for i in range(testData.shape[0]):
    userID = testData['userId'][i]-1
    movieID = testData['movieId'][i]-1
    predList.append(ratings_pred[userID][movieID])
    actualList.append(testData['rating'][i])

  finalAccuracy = MAEScore(actualList,predList)
  return finalAccuracy

In [12]:
def normalize(ratings_pred):
  for i in range(len(ratings_pred)):
    for j in range(len(ratings_pred[0])):
      ratings_pred[i][j] = abs( ratings_pred[i][j])
      ratings_pred[i][j] = round(ratings_pred[i][j])
      if(ratings_pred[i][j]<1):
        ratings_pred[i][j] =1
      if(ratings_pred[i][j]>5):
        ratings_pred[i][j] =5
  return ratings_pred

In [21]:
for fold in range(1,6):
  trainData = readData('u' + str(fold) + '.base')
  testData = readData('u' + str(fold) + '.test')

  matrix = userItemMatrix(trainData)
  LFM_class = LFM(10,30,matrix)
  ratings_pred = LFM_class.ALS()
  ratings_pred = normalize(ratings_pred)
  NMAE =  prediction(ratings_pred,testData)/4
  print(f"Fold {fold} NMAE score: {NMAE}")

Fold 1 NMAE score: 0.185
Fold 2 NMAE score: 0.18375
Fold 3 NMAE score: 0.1817875
Fold 4 NMAE score: 0.18235
Fold 5 NMAE score: 0.1857125


In [29]:
for fold in range(1,6):
  trainData = readData('u' + str(fold) + '.base')
  testData = readData('u' + str(fold) + '.test')

  matrix = userItemMatrix(trainData)
  LFM_class = LFM(7,30,matrix)
  ratings_pred = LFM_class.ALS()
  ratings_pred = normalize(ratings_pred)
  NMAE =  prediction(ratings_pred,testData)/4
  print(f"Fold {fold} NMAE score: {NMAE}")

Fold 1 NMAE score: 0.18745
Fold 2 NMAE score: 0.182375
Fold 3 NMAE score: 0.1822875
Fold 4 NMAE score: 0.1830875
Fold 5 NMAE score: 0.1857875


In [30]:
for fold in range(1,6):
  trainData = readData('u' + str(fold) + '.base')
  testData = readData('u' + str(fold) + '.test')

  matrix = userItemMatrix(trainData)
  LFM_class = LFM(10,45,matrix)
  ratings_pred = LFM_class.ALS()
  ratings_pred = normalize(ratings_pred)
  NMAE =  prediction(ratings_pred,testData)/4
  print(f"Fold {fold} NMAE score: {NMAE}")

Fold 1 NMAE score: 0.1841
Fold 2 NMAE score: 0.1812875
Fold 3 NMAE score: 0.179875
Fold 4 NMAE score: 0.1829125
Fold 5 NMAE score: 0.1825375
