In [1]:
# Import libraries

In [2]:
import numpy as np
import pandas as pd

In [3]:
# Class Recommend for loadind data, dimensionality reduction and prediction methods

In [4]:
class Recommend():
    def __init__(self,similarity,user,N):
        self.similiarity = similarity
        self.user = user
        self.N = N
        self.threshold = 0.9
        
    def loadData(self,path):
        self.ratings_matrix = pd.read_csv(path, index_col=0)
        self.data = np.mat(self.ratings_matrix)      
        
    def processOriginalData(self,path):
        pageId = []
        description = []
        url = []
        with open(path, 'r') as file:
            lines = file.read().splitlines()
            for line in lines:
                items = line.split(',')
                if items[0] == 'A':
                    pageId.append('X' + str(items[1]))
                    description.append(items[3])
                    url.append(items[4])
        data_table = pd.DataFrame({'Description': description, 'url': url}, index=pageId)
        return data_table
    
    def processSVD(self):
        U, Sigma, VT = np.linalg.svd(self.data, full_matrices=False)
        return U,Sigma,VT
    
    def processSingularValue(self,Sigma):
        square  = 0
        energy = 0
        for si in Sigma:
            square = si*si
            energy = energy+square 
        threshold =energy*self.threshold
        energy = 0
        for i, si in enumerate(Sigma):
            sqaure = si * si
            energy += sqaure
            if energy > threshold:
                break
        return i
    
    def processNewSVD(self,U,Sigma,VT,index):
        return U[:, :index], Sigma[:index], VT[:index, :]
    
    def getUnRatedItems(self):
        return np.nonzero(self.data[self.user, :].A == 0)[1]
    
    def getDiagonalMatrix(self,Sigma_new):
        return np.mat(np.eye(len(Sigma_new)) * Sigma_new)
    
    def getTransformedItems(self,U,diag):
        return self.data.T * U * diag.I
    
    def cosineSimilarity(self,A, B):
        num = float(A.T*B)
        denom = np.linalg.norm(A) * np.linalg.norm(B)
        return 0.5 + 0.5 * (num/denom)
    
    def pearsonCorrelationSimilarity(self,A, B):
        if len(A) < 3: return 1
        else: return 0.5 + 0.5 * np.corrcoef(A, B, rowvar=0)[0][1]
        
    def estimateSVD(self,item,transformItem):
        numberOfItems = np.shape(self.data)[1]
        similarityTotal = 0; ratingSimilarityTotal = 0
        if self.similiarity == "cosine": 
            for j in range(numberOfItems):
                userRating = self.data[self.user, j]
                if not (userRating == 0 or j == item):
                    similarity = self.cosineSimilarity(transformItem[item, :].T, transformItem[j, :].T)
                    similarityTotal += similarity
                    ratingSimilarityTotal += similarity * userRating
            if similarityTotal == 0: 
                return 0
            else: 
                return ratingSimilarityTotal / similarityTotal
        else:
            for j in range(numberOfItems):
                userRating = self.data[self.user, j]
                if not (userRating == 0 or j == item):
                    similarity = self.pearsonCorrelationSimilarity(transformItem[item, :].T, transformItem[j, :].T)
                    similarityTotal += similarity
                    ratingSimilarityTotal += similarity * userRating
            if similarityTotal == 0: 
                return 0
            else: 
                return ratingSimilarityTotal / similarityTotal
    
    
    def predictRecommendation(self,itemToRate,transformItem):
        pageScores = []
        for item in itemToRate:
            predictedScore = self.estimateSVD(item,transformItem)
            pageScores.append((item, predictedScore))
        recommendations = sorted(pageScores, key=lambda jj: jj[1], reverse=True)
        if not self.N:
            return recommendations
        else:
            return recommendations[:self.N]
        
        

In [5]:
# Get user ID and get number of predictions for user

In [6]:
userID = 200
predictionNumber = 5
c_recommend = Recommend("cosine",userID,predictionNumber)

In [7]:
# Load the data

In [8]:
c_recommend.loadData("./datasets/MS_ratings_matrix.csv")

In [9]:
# Do Dimensionality reduction using SVD

In [10]:
U,sigma,VT = c_recommend.processSVD()
index = c_recommend.processSingularValue(sigma)
U,sigma,VT = c_recommend.processNewSVD(U,sigma,VT,index)
diag = c_recommend.getDiagonalMatrix(sigma)

In [11]:
# Get unrated items for the user

In [12]:
itemsToRate = c_recommend.getUnRatedItems()

In [13]:
# Construct matrix with reduced data

In [14]:
transforMatrix = c_recommend.getTransformedItems(U,diag)

In [15]:
# Predict recommendations using COSINE SIMILARITY

In [16]:
recommendations = c_recommend.predictRecommendation(itemsToRate,transforMatrix)



In [17]:
ids = []
table = c_recommend.processOriginalData("./datasets/anonymous-msweb.csv")
print("Top 5 Recommended URLs for userID : ", userID)
for item, score in recommendations:
    page_id = c_recommend.ratings_matrix.columns[item]
    ids.append(page_id)
    print('Recommended URL - %s with Recommendation Score - %s' % (page_id, score))
    print(table.loc[page_id])
  
print("-------end------------")
print((table.loc[page_id][1]))

Top 5 Recommended URLs for userID :  200
Recommended URL - X1001 with Recommendation Score - 1.0
Description    Support Desktop
url                   /support
Name: X1001, dtype: object
Recommended URL - X1002 with Recommendation Score - 1.0
Description    End User Produced View
url                           /athome
Name: X1002, dtype: object
Recommended URL - X1003 with Recommendation Score - 1.0
Description    Knowledge Base
url                       /kb
Name: X1003, dtype: object
Recommended URL - X1004 with Recommendation Score - 1.0
Description    Microsoft.com Search
url                         /search
Name: X1004, dtype: object
Recommended URL - X1005 with Recommendation Score - 1.0
Description    Norway
url            /norge
Name: X1005, dtype: object
-------end------------
/norge


In [18]:
# Predict recommendations using Pearson Correlation SIMILARITY

In [19]:
userID = 200
predictionNumber = 5
p_recommend = Recommend("pearson",userID,predictionNumber)
p_recommend.loadData("./datasets/MS_ratings_matrix.csv")
U,sigma,VT = p_recommend.processSVD()
index = p_recommend.processSingularValue(sigma)
U,sigma,VT = p_recommend.processNewSVD(U,sigma,VT,index)
diag = p_recommend.getDiagonalMatrix(sigma)
itemsToRate = p_recommend.getUnRatedItems()
transforMatrix = p_recommend.getTransformedItems(U,diag)
recommendations = p_recommend.predictRecommendation(itemsToRate,transforMatrix)
ids = []
table = p_recommend.processOriginalData("./datasets/anonymous-msweb.csv")
print("Top 5 Recommended URLs for userID : ", userID)
for item, score in recommendations:
    page_id = p_recommend.ratings_matrix.columns[item]
    ids.append(page_id)
    print('Recommended URL - %s with Recommendation Score - %s' % (page_id, score))
    print(table.loc[page_id])

Top 5 Recommended URLs for userID :  200
Recommended URL - X1001 with Recommendation Score - 1.0
Description    Support Desktop
url                   /support
Name: X1001, dtype: object
Recommended URL - X1002 with Recommendation Score - 1.0
Description    End User Produced View
url                           /athome
Name: X1002, dtype: object
Recommended URL - X1003 with Recommendation Score - 1.0
Description    Knowledge Base
url                       /kb
Name: X1003, dtype: object
Recommended URL - X1004 with Recommendation Score - 1.0
Description    Microsoft.com Search
url                         /search
Name: X1004, dtype: object
Recommended URL - X1005 with Recommendation Score - 1.0
Description    Norway
url            /norge
Name: X1005, dtype: object


  c /= stddev[:, None]
  c /= stddev[None, :]
