In [1]:
import random
import math
import numpy as np
import pandas as pd
import warnings
import copy
import sys
from scipy.sparse import csc_matrix, linalg, eye
warnings.filterwarnings('ignore')

In [2]:
class Dataset:
    def __init__(self, path='movielens_data/ratings.dat'):
        self.data = self.load_data(path)
    
    def load_data(self, path):
        data = []
        for l in open(path):
            data.append(tuple(map(int, l.strip().split('::')[:2])))
            
        return data
    
    def split_data(self, M=5, k=0, seed=1):
        test = []
        train = []
        random.seed(seed)
        
        for user, item in self.data:
            if random.randint(0, M-1) == k:
                test.append((user, item))
            else:
                train.append((user, item))
        
        def convert_dict(data):
            data_dict = {}
            for user, item in data:
                if user not in data_dict:
                    data_dict[user] = set()
                data_dict[user].add(item)
            data_dict = {k: list(data_dict[k]) for k in data_dict}
            return data_dict
        
        return convert_dict(train), convert_dict(test)

In [3]:
class PersonalRank:
    def __init__(self, train_data, alpha, N):
        self.items, self.users = self.set_index(train_data)
        self.M = self.transition_martix(train_data)
        self.alpha = alpha
        self.N = N
        
        
    def set_index(self, train_data):
        items = []
        for user in train_data:
            items.extend(train_data[user])
        
        self.id2item = list(set(items))
        users = {u: i for i, u in enumerate(train_data.keys())}
        items = {u: i+len(users) for i, u in enumerate(self.id2item)}
        
        # users: {user_id : e_id}
        # items: {item_id : e_id+len(users)}
        
        return items, users
    
    
    def transition_martix(self, train_data):
        # item_user tabel
        item_user = dict()
        for user, items in train_data.items():
            for item in items:
                if item not in item_user:
                    item_user[item] = []
                item_user[item].append(user)
                
        data, row, col = [], [], []
        for u in train_data.keys():
            for v in train_data[u]:
                # users' out = length of all the items he likes
                data.append(1 / len(train_data[u]))   # length of items in each user
                row.append(self.users[u])             # e_id of user
                col.append(self.items[v])             # e_id of item + len(users)
        
        for v in item_user.keys():
            for u in item_user[v]:
                # items' out = length of users who like it
                data.append(1 / len(item_user[v]))     # length of users in each item
                row.append(self.items[v])              # e_id of item + len(users)
                col.append(self.users[u])              # e_id of user
                
        M = csc_matrix((data, (row, col)), shape=(len(data), len(data)))
        
        self.data = data
        
        return M
    
    
    def recommend(self, user, train_data):
        seen_items = set(train_data[user])
        
        # r = (1-a)r0 + a(M.T)r
        r0 = [0] * len(self.data)
        r0[self.users[user]] = 1
        r0 = csc_matrix(r0)
        
        # r = (1-a)(I-aM)^-1 * r0
        r  = (1 - self.alpha) * linalg.inv(eye(len(self.data)) - self.alpha * self.M.T) * r0
        
        r = r.T.toarray()[0][len(self.users):]
        idx = np.argsort(-r)[:self.N]
        recs = [(self.id2item[ii], r[ii]) for ii in idx]
        
        return recs

In [4]:
dataset = Dataset()
train_data, test_data = dataset.split_data()

In [5]:
pr = PersonalRank(train_data, 1, 1)

In [None]:
pr.recommend(1, train_data)

In [None]:
# extend and append in list