In [2]:
import numpy as np

In [3]:
data = np.loadtxt("../ml-1m/ratings.dat", skiprows=1, delimiter='::', usecols=[0,1], dtype=int)
N_USER = np.max(data[:,0])
N_ITEM = np.max(data[:,1])

In [4]:
def generate_train_from_local(path, n_user, n_item):
    data = np.loadtxt(fname=path, delimiter="\t", skiprows=1, dtype=int)
    train_matrix = np.zeros((n_user, n_item), dtype = np.int8)
    for line in data:
        train_matrix[line[0],line[1]] = 1
    return train_matrix

def generate_test_from_local(path):
    data = np.loadtxt(fname=path, delimiter="\t", skiprows=1, dtype=int)
    return data

In [5]:
class SVD():
    def __init__(self, train_matrix, d, alpha, _lambda, neg_num):
        self.d = d
        self.alpha = alpha
        self._lambda = _lambda 
        self.neg_num = neg_num
        self.train_matrix = train_matrix
        self.n_user = train_matrix.shape[0]
        self.n_item = train_matrix.shape[1]
        self.P = np.random.rand(self.n_user,d)
        self.Q = np.random.rand(self.n_item,d)
        return
    
    def predict(self, user, item):
        score = np.dot(self.P[user], self.Q[item])
        return 1.0 / (1.0 + np.exp(-score))
    
    def recommand(self, user):
        items = np.nonzero(self.train_matrix[user]-1)[0]
        rank = dict()
        for item in items:
            rank[item] = np.dot(self.P[user], self.Q[item])
        rank = sorted(rank.items(), key=lambda x:x[1], reverse=True)
        res  = list()
        for item, score in rank:
            res.append(item)
        return res
    
    def sample(self, vec):
        pos_items = np.nonzero(vec)[0]
        neg_items = np.nonzero(vec-1)[0]
        n = len(pos_items)
        n_sample = n * self.neg_num
        if n_sample > len(neg_items):
            n_sample = len(neg_items)
        neg_items = np.random.choice(neg_items, size = n_sample, replace=False)
        items = np.hstack((pos_items, neg_items))
        ones  = vec[pos_items]
        zeros = vec[neg_items]
        real  = np.hstack((ones, zeros))
        data  = np.hstack((items.reshape(-1,1), real.reshape(-1,1)))
        np.random.shuffle(data)
        return data
    
    def train(self):
        for user, vec in enumerate(self.train_matrix):
            samples = self.sample(vec)
            for record in samples:
                item = record[0]
                real = record[1]
                res  = self.predict(user, item)
                eui  = real - res
                self.P[user] += self.alpha * (eui * self.Q[item] - self._lambda * self.P[user])
                self.Q[item] += self.alpha * (eui * self.P[user] - self._lambda * self.Q[item])
        self.alpha *= 0.9
        return

In [6]:
train_matrix = generate_train_from_local("../ml-1m/ml.train.txt", n_user=N_USER, n_item=N_ITEM)
test = generate_test_from_local("../ml-1m/ml.test.txt")

In [6]:
def getHitRatio(ranklist, gtItem):
    #HR击中率，如果topk中有正例ID即认为正确
    if gtItem in ranklist:
        return 1
    return 0

def getNDCG(ranklist, gtItem):
    #NDCG归一化折损累计增益
    for i in range(len(ranklist)):
        item = ranklist[i]
        if item == gtItem:
            return np.log(2) / np.log(i+2)
    return 0

def movieEval(rank_all_users, test, topK):
    hit_list = list()
    undcg_list = list()
    for u, rlist in enumerate(rank_all_users):
        pos_item = test[u][1]
        hit_list.append(getHitRatio(rlist[:topK], pos_item))
        undcg_list.append(getNDCG(rlist[:topK], pos_item))
    hr = np.mean(hit_list)
    ndcg = np.mean(undcg_list)
    print('HR@', topK, ' = %.4f' %  hr)
    print('NDCG@', topK, ' = %.4f' % ndcg)
    return hr, ndcg

In [7]:
def train_eval(train_matrix, test, n_factors, alpha, epoch, neg_num, _lambda, topK):
    hr_list = [.0]
    ndcg_list = [.0]
    svd = SVD(train_matrix, n_factors, alpha, _lambda, neg_num)
    for e in range(epoch):
        print("-----第",str(e+1),"个epoch-----")
        svd.train()
        if (e+1)%5==0:
            rank_all_users = list()
            for user in range(train_matrix.shape[0]):
                rank_all_users.append(svd.recommand(user))
            hr, ndcg = movieEval(rank_all_users, test, topK)
            hr_list.append(hr)
            ndcg_list.append(ndcg)
    np.savetxt("./evalres/svd/hr_list_"+str(epoch)+"epoch.txt", hr_list)
    np.savetxt("./evalres/svd/ndcg_list_"+str(epoch)+"epoch.txt", ndcg_list)    
    print('------Finished------')
    return 

N_FACTORS = 64
ALPHA = 0.01
EPOCH = 200
NEG_NUM = 4
LAMBDA = 0.01
TOPK  = 100
train_eval(train_matrix, test, n_factors=N_FACTORS, alpha=ALPHA, epoch=EPOCH, neg_num=NEG_NUM, _lambda=LAMBDA, topK=TOPK)

-----第 1 个epoch-----
-----第 2 个epoch-----
-----第 3 个epoch-----
-----第 4 个epoch-----
-----第 5 个epoch-----
HR@ 100  = 0.1977
NDCG@ 100  = 0.0470
-----第 6 个epoch-----
-----第 7 个epoch-----
-----第 8 个epoch-----
-----第 9 个epoch-----
-----第 10 个epoch-----
HR@ 100  = 0.2331
NDCG@ 100  = 0.0580
-----第 11 个epoch-----
-----第 12 个epoch-----
-----第 13 个epoch-----
-----第 14 个epoch-----
-----第 15 个epoch-----
HR@ 100  = 0.2528
NDCG@ 100  = 0.0595
-----第 16 个epoch-----
-----第 17 个epoch-----
-----第 18 个epoch-----
-----第 19 个epoch-----
-----第 20 个epoch-----
HR@ 100  = 0.2641
NDCG@ 100  = 0.0627
-----第 21 个epoch-----
-----第 22 个epoch-----
-----第 23 个epoch-----
-----第 24 个epoch-----
-----第 25 个epoch-----
HR@ 100  = 0.2775
NDCG@ 100  = 0.0660
-----第 26 个epoch-----
-----第 27 个epoch-----
-----第 28 个epoch-----
-----第 29 个epoch-----
-----第 30 个epoch-----
HR@ 100  = 0.2790
NDCG@ 100  = 0.0659
-----第 31 个epoch-----
-----第 32 个epoch-----
-----第 33 个epoch-----
-----第 34 个epoch-----
-----第 35 个epoch-----
HR@ 100  = 

In [8]:
def train_eval_d(train_matrix, test, n_factors, alpha, epoch, neg_num, _lambda, topK):
    hr_list = list()
    ndcg_list = list()
    for d in n_factors:
        svd = SVD(train_matrix, d, alpha, _lambda, neg_num)
        for e in range(epoch):
            print("-----第",str(e+1),"个epoch-----")
            svd.train()
        rank_all_users = list()
        for user in range(train_matrix.shape[0]):
            rank_all_users.append(svd.recommand(user))
        hr, ndcg = movieEval(rank_all_users, test, topK)
        hr_list.append(hr)
        ndcg_list.append(ndcg)
    np.savetxt("./evalres/svd/hr_list_d.txt", hr_list)
    np.savetxt("./evalres/svd/ndcg_list_d.txt", ndcg_list) 
    print('------Finished------')
    return

N_FACTORS = [8, 16, 32, 64]
ALPHA = 0.01
EPOCH = 50
NEG_NUM = 4
LAMBDA = 0.01
TOPK  = 100
train_eval_d(train_matrix, test, n_factors=N_FACTORS, alpha=ALPHA, epoch=EPOCH, neg_num=NEG_NUM, _lambda=LAMBDA, topK=TOPK)

-----第 1 个epoch-----
-----第 2 个epoch-----
-----第 3 个epoch-----
-----第 4 个epoch-----
-----第 5 个epoch-----
-----第 6 个epoch-----
-----第 7 个epoch-----
-----第 8 个epoch-----
-----第 9 个epoch-----
-----第 10 个epoch-----
-----第 11 个epoch-----
-----第 12 个epoch-----
-----第 13 个epoch-----
-----第 14 个epoch-----
-----第 15 个epoch-----
-----第 16 个epoch-----
-----第 17 个epoch-----
-----第 18 个epoch-----
-----第 19 个epoch-----
-----第 20 个epoch-----
-----第 21 个epoch-----
-----第 22 个epoch-----
-----第 23 个epoch-----
-----第 24 个epoch-----
-----第 25 个epoch-----
-----第 26 个epoch-----
-----第 27 个epoch-----
-----第 28 个epoch-----
-----第 29 个epoch-----
-----第 30 个epoch-----
-----第 31 个epoch-----
-----第 32 个epoch-----
-----第 33 个epoch-----
-----第 34 个epoch-----
-----第 35 个epoch-----
-----第 36 个epoch-----
-----第 37 个epoch-----
-----第 38 个epoch-----
-----第 39 个epoch-----
-----第 40 个epoch-----
-----第 41 个epoch-----
-----第 42 个epoch-----
-----第 43 个epoch-----
-----第 44 个epoch-----
-----第 45 个epoch-----
-----第 46 个epoch---

In [9]:
def train_eval_topk(train_matrix, test, n_factors, alpha, epoch, neg_num, _lambda, topK):
    hr_list = list()
    ndcg_list = list()
    svd = SVD(train_matrix, n_factors, alpha, _lambda, neg_num)
    for e in range(epoch):
        print("-----第",str(e+1),"个epoch-----")
        svd.train()
    rank_all_users = list()
    for user in range(train_matrix.shape[0]):
        rank_all_users.append(svd.recommand(user))
    for k in topK:
        hr, ndcg = movieEval(rank_all_users, test, k)
        hr_list.append(hr)
        ndcg_list.append(ndcg)
    np.savetxt("./evalres/svd/hr_list_topk.txt", hr_list)
    np.savetxt("./evalres/svd/ndcg_list_topk.txt", ndcg_list)    
    print('------Finished------')
    return

N_FACTORS = 64
ALPHA = 0.01
EPOCH = 50
NEG_NUM = 4
LAMBDA = 0.01
TOPK  = [50,100,200]
train_eval_topk(train_matrix, test, n_factors=N_FACTORS, alpha=ALPHA, epoch=EPOCH, neg_num=NEG_NUM, _lambda=LAMBDA, topK=TOPK)

-----第 1 个epoch-----
-----第 2 个epoch-----
-----第 3 个epoch-----
-----第 4 个epoch-----
-----第 5 个epoch-----
-----第 6 个epoch-----
-----第 7 个epoch-----
-----第 8 个epoch-----
-----第 9 个epoch-----
-----第 10 个epoch-----
-----第 11 个epoch-----
-----第 12 个epoch-----
-----第 13 个epoch-----
-----第 14 个epoch-----
-----第 15 个epoch-----
-----第 16 个epoch-----
-----第 17 个epoch-----
-----第 18 个epoch-----
-----第 19 个epoch-----
-----第 20 个epoch-----
-----第 21 个epoch-----
-----第 22 个epoch-----
-----第 23 个epoch-----
-----第 24 个epoch-----
-----第 25 个epoch-----
-----第 26 个epoch-----
-----第 27 个epoch-----
-----第 28 个epoch-----
-----第 29 个epoch-----
-----第 30 个epoch-----
-----第 31 个epoch-----
-----第 32 个epoch-----
-----第 33 个epoch-----
-----第 34 个epoch-----
-----第 35 个epoch-----
-----第 36 个epoch-----
-----第 37 个epoch-----
-----第 38 个epoch-----
-----第 39 个epoch-----
-----第 40 个epoch-----
-----第 41 个epoch-----
-----第 42 个epoch-----
-----第 43 个epoch-----
-----第 44 个epoch-----
-----第 45 个epoch-----
-----第 46 个epoch---

In [10]:
def train_eval_neg(train_matrix, test, n_factors, alpha, epoch, neg_num, _lambda, topK):
    hr_list = list()
    ndcg_list = list()
    for n in neg_num:
        svd = SVD(train_matrix, n_factors, alpha, _lambda, n)
        for e in range(epoch):
            print("-----第",str(e+1),"个epoch-----")
            svd.train()
        rank_all_users = list()
        for user in range(train_matrix.shape[0]):
            rank_all_users.append(svd.recommand(user))
        hr, ndcg = movieEval(rank_all_users, test, topK)
        hr_list.append(hr)
        ndcg_list.append(ndcg)
    np.savetxt("./evalres/svd/hr_list_neg.txt", hr_list)
    np.savetxt("./evalres/svd/ndcg_list_neg.txt", ndcg_list) 
    print('------Finished------')
    return

N_FACTORS = 64
ALPHA = 0.01
EPOCH = 50
NEG_NUM = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
LAMBDA = 0.01
TOPK  = 100
train_eval_neg(train_matrix, test, n_factors=N_FACTORS, alpha=ALPHA, epoch=EPOCH, neg_num=NEG_NUM, _lambda=LAMBDA, topK=TOPK)

-----第 1 个epoch-----
-----第 2 个epoch-----
-----第 3 个epoch-----
-----第 4 个epoch-----
-----第 5 个epoch-----
-----第 6 个epoch-----
-----第 7 个epoch-----
-----第 8 个epoch-----
-----第 9 个epoch-----
-----第 10 个epoch-----
-----第 11 个epoch-----
-----第 12 个epoch-----
-----第 13 个epoch-----
-----第 14 个epoch-----
-----第 15 个epoch-----
-----第 16 个epoch-----
-----第 17 个epoch-----
-----第 18 个epoch-----
-----第 19 个epoch-----
-----第 20 个epoch-----
-----第 21 个epoch-----
-----第 22 个epoch-----
-----第 23 个epoch-----
-----第 24 个epoch-----
-----第 25 个epoch-----
-----第 26 个epoch-----
-----第 27 个epoch-----
-----第 28 个epoch-----
-----第 29 个epoch-----
-----第 30 个epoch-----
-----第 31 个epoch-----
-----第 32 个epoch-----
-----第 33 个epoch-----
-----第 34 个epoch-----
-----第 35 个epoch-----
-----第 36 个epoch-----
-----第 37 个epoch-----
-----第 38 个epoch-----
-----第 39 个epoch-----
-----第 40 个epoch-----
-----第 41 个epoch-----
-----第 42 个epoch-----
-----第 43 个epoch-----
-----第 44 个epoch-----
-----第 45 个epoch-----
-----第 46 个epoch---

-----第 15 个epoch-----
-----第 16 个epoch-----
-----第 17 个epoch-----
-----第 18 个epoch-----
-----第 19 个epoch-----
-----第 20 个epoch-----
-----第 21 个epoch-----
-----第 22 个epoch-----
-----第 23 个epoch-----
-----第 24 个epoch-----
-----第 25 个epoch-----
-----第 26 个epoch-----
-----第 27 个epoch-----
-----第 28 个epoch-----
-----第 29 个epoch-----
-----第 30 个epoch-----
-----第 31 个epoch-----
-----第 32 个epoch-----
-----第 33 个epoch-----
-----第 34 个epoch-----
-----第 35 个epoch-----
-----第 36 个epoch-----
-----第 37 个epoch-----
-----第 38 个epoch-----
-----第 39 个epoch-----
-----第 40 个epoch-----
-----第 41 个epoch-----
-----第 42 个epoch-----
-----第 43 个epoch-----
-----第 44 个epoch-----
-----第 45 个epoch-----
-----第 46 个epoch-----
-----第 47 个epoch-----
-----第 48 个epoch-----
-----第 49 个epoch-----
-----第 50 个epoch-----
HR@ 100  = 0.3180
NDCG@ 100  = 0.0782
-----第 1 个epoch-----
-----第 2 个epoch-----
-----第 3 个epoch-----
-----第 4 个epoch-----
-----第 5 个epoch-----
-----第 6 个epoch-----
-----第 7 个epoch-----
-----第 8 个epoch-----
--

In [8]:
import time
def train_eval_time(train_matrix, test, n_factors, alpha, epoch, neg_num, _lambda):
    svd = SVD(train_matrix, n_factors, alpha, _lambda, neg_num)
    for e in range(epoch):
        time_start, time_end = 0, 0
        svd.train()
        time_end=time.time()
        print('time cost:', time_end-time_start)
    np.savetxt("./evalres/svd/single_time.txt", [time_end-time_start])  
    print('------Finished------')
    return 

N_FACTORS = 64
ALPHA = 0.01
EPOCH = 1
NEG_NUM = 4
LAMBDA = 0.01
train_eval_time(train_matrix, test, n_factors=N_FACTORS, alpha=ALPHA, epoch=EPOCH, neg_num=NEG_NUM, _lambda=LAMBDA)

time cost: 1616772082.5085318
------Finished------
