# 基于用户的协同过滤算法

In [1]:
# 导入包
import random
import math
import time
from tqdm import tqdm

## 一. 通用函数定义

In [2]:
# 定义装饰器，监控运行时间
def timmer(func):
    def wrapper(*args, **kwargs):
        start_time = time.time()
        res = func(*args, **kwargs)
        stop_time = time.time()
        print('Func %s, run time: %s' % (func.__name__, stop_time - start_time))
        return res
    return wrapper

### 1. 数据处理相关
1. load data
2. split data

数据集详情：https://grouplens.org/datasets/movielens/ 查阅readme file

In [3]:
class Dataset():
    
    def __init__(self, fp):
        # fp: data file path
        self.data = self.loadData(fp)
    
    @timmer
    def loadData(self, fp):
        data = []
        #尽量使用with来处理文件
        with open(fp) as f:
            for l in f:
                data.append(tuple(map(int, l.strip().split('::')[:2])))
        return data
    
    @timmer
    def splitData(self, M, k, seed=1):
        '''
        :params: data, 加载的所有(user, item)数据条目
        :params: M, 划分的数目，最后需要取M折的平均
        :params: k, 本次是第几次划分，k~[0, M)
        :params: seed, random的种子数，对于不同的k应设置成一样的
        :return: train, test
        '''
        train, test = [], []
        random.seed(seed)
        for user, item in self.data:
            # 这里与书中的不一致，本人认为取M-1较为合理，因randint是左右都覆盖的
            if random.randint(0, M-1) == k:  
                test.append((user, item)) #随机取一份作为test
            else:
                train.append((user, item))

        # 处理成字典的形式，user->set(items)
        def convert_dict(data):
            data_dict = {}
            for user, item in data:
                if user not in data_dict:
                    data_dict[user] = set() #物品集合，去重
                data_dict[user].add(item)
            data_dict = {k: list(data_dict[k]) for k in data_dict} #物品集合转为列表
            return data_dict

        return convert_dict(train), convert_dict(test)

### 2. 评价指标
1. Precision
2. Recall
3. Coverage
4. Popularity(Novelty)

In [4]:
#TopN推荐，不关心用户具体的评分，只预测用户是否会对某部电影评分
class Metric():
    
    def __init__(self, train, test, GetRecommendation):
        '''
        :params: train, 训练数据，在定义覆盖率、新颖度时会用到
        :params: test, 测试数据
        :params: GetRecommendation, 为某个用户获取推荐物品的接口函数
        '''
        self.train = train
        self.test = test
        self.GetRecommendation = GetRecommendation
        self.recs = self.getRec()
        
    # 为test中的每个用户进行推荐
    def getRec(self):
        recs = {}
        for user in self.test:
            rank = self.GetRecommendation(user) #推荐列表
            recs[user] = rank
        return recs
        
    # 定义精确率指标计算方式
    def precision(self):
        total, hit = 0, 0
        for user in self.test:
            test_items = set(self.test[user]) #true list
            rank = self.recs[user]            #recommend list
            for item, score in rank:
                if item in test_items:
                    hit += 1                  #命中
            total += len(rank)
        return round(hit / total * 100, 2)
    
    # 定义召回率指标计算方式
    def recall(self):
        total, hit = 0, 0
        for user in self.test:
            test_items = set(self.test[user])
            rank = self.recs[user]
            for item, score in rank:
                if item in test_items:
                    hit += 1
            total += len(test_items)
        return round(hit / total * 100, 2)
    
    # 定义覆盖率指标计算方式：覆盖率反映了推荐算法发掘长尾的能力
    def coverage(self):
        all_item, recom_item = set(), set()
        for user in self.test:
            for item in self.train[user]:
                all_item.add(item)       #注意all_item只能累计训练集的item
            rank = self.recs[user]
            for item, score in rank:
                recom_item.add(item)
        return round(len(recom_item) / len(all_item) * 100, 2)
    
    # 定义新颖度指标计算方式:平均流行度越低，新颖性越高(物品的流行度是指 对物品产生过行为的用户总数)
    def popularity(self):
        # 计算物品的流行度
        item_pop = {}
        for user in self.train:
            for item in self.train[user]:
                if item not in item_pop:
                    item_pop[item] = 0
                item_pop[item] += 1

        num, pop = 0, 0
        for user in self.test:
            rank = self.recs[user]
            for item, score in rank:
                # 物品的流行度满足长尾分布，取对数后流行度的平均值更加稳定，防止被流行物品所主导（削弱流行物品的影响）
                pop += math.log(1 + item_pop[item])
                num += 1 #对推荐列表计数
        return round(pop / num, 6) #平均流行度
    
    def eval(self):
        metric = {'Precision': self.precision(),
                  'Recall': self.recall(),
                  'Coverage': self.coverage(),
                  'Popularity': self.popularity()}
        print('Metric:', metric)
        return metric

## 二. 算法实现
1. Random
2. MostPopular
3. UserCF：UserCollaborationFilter
4. UserIIF

In [5]:
# 1. 随机推荐
def Random(train, K, N):
    '''
    :params: train, 训练数据集
    :params: K, 可忽略
    :params: N, 超参数，设置取TopN推荐物品数目
    :return: GetRecommendation，推荐接口函数
    '''
    items = {}
    for user in train:
        for item in train[user]:
            items[item] = 1
    
    def GetRecommendation(user):
        # 随机推荐N个未见过的
        user_items = set(train[user])
        rec_items = {k: items[k] for k in items if k not in user_items}
        rec_items = list(rec_items.items())
        random.shuffle(rec_items)
        return rec_items[:N]
    
    return GetRecommendation

In [6]:
# 2. 热门推荐
def MostPopular(train, K, N):
    '''
    :params: train, 训练数据集
    :params: K, 可忽略
    :params: N, 超参数，设置取TopN推荐物品数目
    :return: GetRecommendation, 推荐接口函数
    '''
    items = {}
    for user in train:
        for item in train[user]:
            if item not in items:
                items[item] = 0
            items[item] += 1  #统计物品频率
        
    def GetRecommendation(user):
        # 随机推荐N个没见过的最热门的
        user_items = set(train[user])
        rec_items = {k: items[k] for k in items if k not in user_items}
        rec_items = list(sorted(rec_items.items(), key=lambda x: x[1], reverse=True))
        return rec_items[:N] #topN最热门
    
    return GetRecommendation

In [7]:
# 3. 基于用户余弦相似度的推荐
def UserCF(train, K, N):
    '''
    :params: train, 训练数据集
    :params: K, 超参数，设置取TopK相似用户数目
    :params: N, 超参数，设置取TopN推荐物品数目
    :return: GetRecommendation, 推荐接口函数
    '''
    # 计算item->user的倒排索引
    item_users = {}
    for user in train:
        for item in train[user]:
            if item not in item_users:
                item_users[item] = set()  #集合，去重
            item_users[item].add(user)
    item_users = {k: list(v) for k, v in item_users.items()}

    # 计算用户相似度矩阵：calculate co-rated items between users
    sim = {}
    num = {}
    for item in item_users:
        users = item_users[item]
        for i in range(len(users)):
            u = users[i]
            if u not in num:
                num[u] = 0
            num[u] += 1
            if u not in sim:
                sim[u] = {}
            for j in range(len(users)):
                if j == i: continue
                v = users[j]
                if v not in sim[u]:
                    sim[u][v] = 0
                sim[u][v] += 1
    for u in sim:
        for v in sim[u]:
            sim[u][v] /= math.sqrt(num[u] * num[v])

    # 按照相似度排序
    sorted_user_sim = {k: list(sorted(v.items(), \
                               key=lambda x: x[1], reverse=True)) \
                       for k, v in sim.items()}

    # 获取接口函数：给user推荐与其最相似的K个用户喜欢的物品i(排除掉user已见的)，按照喜欢物品i的用户u与user的累计相似度排序
    def GetRecommendation(user):
        items = {}
        seen_items = set(train[user])
        for u, _ in sorted_user_sim[user][:K]:
            for item in train[u]:
                # 要去掉用户见过的
                if item not in seen_items:
                    if item not in items:
                        items[item] = 0
                    items[item] += sim[user][u]  #累计用户相似度
        recs = list(sorted(items.items(), key=lambda x: x[1],
                           reverse=True))[:N]
        return recs

    return GetRecommendation

In [8]:
# 4. 基于改进的用户余弦相似度的推荐：两个用户对冷门物品采取过同样的行为更能说明他们兴趣的相似度，按物品的流行度进行惩罚
# IIF：inverse item frequency
def UserIIF(train, K, N):
    '''
    :params: train, 训练数据集
    :params: K, 超参数，设置取TopK相似用户数目
    :params: N, 超参数，设置取TopN推荐物品数目
    :return: GetRecommendation, 推荐接口函数
    '''
    # 计算item->user的倒排索引
    item_users = {}
    for user in train:
        for item in train[user]:
            if item not in item_users:
                item_users[item] = set()  #集合，去重
            item_users[item].add(user)
    item_users = {k: list(v) for k, v in item_users.items()}
    
    # 计算用户相似度矩阵
    sim = {}
    num = {}
    for item in item_users:
        users = item_users[item]
        for i in range(len(users)):
            u = users[i]
            if u not in num:
                num[u] = 0
            num[u] += 1
            if u not in sim:
                sim[u] = {}
            for j in range(len(users)):
                if j == i: continue
                v = users[j]
                if v not in sim[u]:
                    sim[u][v] = 0
                # 相比UserCF，主要是改进了这里, len(users)表示u,v共同爱好的物品一共有多少人喜欢(流行度)
                #如果该物品本身就很热门，则无法说明u,v的相似性
                #反之，如果该物品很冷门，则更能说明u,v的相似性
                sim[u][v] += 1 / math.log(1 + len(users))
    for u in sim:
        for v in sim[u]:
            sim[u][v] /= math.sqrt(num[u] * num[v])
    
    # 按照相似度排序
    sorted_user_sim = {k: list(sorted(v.items(), \
                               key=lambda x: x[1], reverse=True)) \
                       for k, v in sim.items()}
    
    # 获取接口函数
    def GetRecommendation(user):
        items = {}
        seen_items = set(train[user])
        for u, _ in sorted_user_sim[user][:K]:
            for item in train[u]:
                # 要去掉用户见过的
                if item not in seen_items:
                    if item not in items:
                        items[item] = 0
                    items[item] += sim[user][u]
        recs = list(sorted(items.items(), key=lambda x: x[1], reverse=True))[:N]
        return recs
    
    return GetRecommendation

## 三. 实验
1. Random实验
2. MostPopular实验
3. UserCF实验，K=[5, 10, 20, 40, 80, 160]
4. UserIIF实验, K=80

M=8, N=10

In [9]:
class Experiment():
    
    def __init__(self, M, K, N, fp='../dataset/ml-1m/ratings.dat', rt='UserCF'):
        '''
        :params: M, 进行多少次实验
        :params: K, TopK相似用户的个数
        :params: N, TopN推荐物品的个数
        :params: fp, 数据文件路径
        :params: rt, 推荐算法类型
        '''
        self.M = M
        self.K = K
        self.N = N
        self.fp = fp
        self.rt = rt
        self.alg = {'Random': Random, 'MostPopular': MostPopular, \
                    'UserCF': UserCF, 'UserIIF': UserIIF}
    
    # 定义单次实验
    @timmer
    def worker(self, train, test):
        '''
        :params: train, 训练数据集
        :params: test, 测试数据集
        :return: 各指标的值
        '''
        getRecommendation = self.alg[self.rt](train, self.K, self.N)
        metric = Metric(train, test, getRecommendation)
        return metric.eval()
    
    # 多次实验取平均
    @timmer
    def run(self):
        metrics = {'Precision': 0, 'Recall': 0, 
                   'Coverage': 0, 'Popularity': 0}
        dataset = Dataset(self.fp)
        for ii in range(self.M):
            train, test = dataset.splitData(self.M, ii)
            print('Experiment {}:'.format(ii))
            metric = self.worker(train, test)
            metrics = {k: metrics[k]+metric[k] for k in metrics}
        metrics = {k: metrics[k] / self.M for k in metrics}
        print('Average Result (M={}, K={}, N={}): {}'.format(\
                              self.M, self.K, self.N, metrics))

In [10]:
# 1. random实验：precision和recall很低，覆盖率100%
M, N = 8, 10
K = 0 # 为保持一致而设置，随便填一个值
random_exp = Experiment(M, K, N, rt='Random')
random_exp.run() #注意随机推荐的覆盖率应该是100，实验结果中有的超过了100是因为在all_items中只统计了训练集

Func loadData, run time: 0.7724082469940186
Func splitData, run time: 1.1011993885040283
Experiment 0:
Metric: {'Precision': 0.61, 'Recall': 0.29, 'Coverage': 100.0, 'Popularity': 4.395145}
Func worker, run time: 11.038302898406982
Func splitData, run time: 1.087388277053833
Experiment 1:
Metric: {'Precision': 0.6, 'Recall': 0.29, 'Coverage': 100.03, 'Popularity': 4.384132}
Func worker, run time: 10.903393745422363
Func splitData, run time: 1.105825424194336
Experiment 2:
Metric: {'Precision': 0.64, 'Recall': 0.3, 'Coverage': 100.0, 'Popularity': 4.38877}
Func worker, run time: 11.02796983718872
Func splitData, run time: 1.1083877086639404
Experiment 3:
Metric: {'Precision': 0.61, 'Recall': 0.29, 'Coverage': 100.0, 'Popularity': 4.392672}
Func worker, run time: 10.904098749160767
Func splitData, run time: 1.1099495887756348
Experiment 4:
Metric: {'Precision': 0.62, 'Recall': 0.3, 'Coverage': 100.0, 'Popularity': 4.394046}
Func worker, run time: 10.994879961013794
Func splitData, run ti

In [11]:
# 2. MostPopular实验：precision和recall较高，但覆盖率很低，流行度很高
M, N = 8, 10
K = 0 # 为保持一致而设置，随便填一个值
mp_exp = Experiment(M, K, N, rt='MostPopular')
mp_exp.run()

Func loadData, run time: 0.7593598365783691
Func splitData, run time: 1.0881989002227783
Experiment 0:
Metric: {'Precision': 12.85, 'Recall': 6.17, 'Coverage': 2.47, 'Popularity': 7.724273}
Func worker, run time: 5.46601128578186
Func splitData, run time: 1.0951519012451172
Experiment 1:
Metric: {'Precision': 13.07, 'Recall': 6.26, 'Coverage': 2.28, 'Popularity': 7.721385}
Func worker, run time: 5.478949308395386
Func splitData, run time: 1.0996294021606445
Experiment 2:
Metric: {'Precision': 12.89, 'Recall': 6.16, 'Coverage': 2.44, 'Popularity': 7.722067}
Func worker, run time: 5.462384462356567
Func splitData, run time: 1.0899097919464111
Experiment 3:
Metric: {'Precision': 12.81, 'Recall': 6.15, 'Coverage': 2.49, 'Popularity': 7.723152}
Func worker, run time: 5.465885162353516
Func splitData, run time: 1.1052076816558838
Experiment 4:
Metric: {'Precision': 12.7, 'Recall': 6.11, 'Coverage': 2.47, 'Popularity': 7.724644}
Func worker, run time: 5.579876899719238
Func splitData, run tim

In [12]:
# 3. UserCF实验：注意K值的影响
M, N = 8, 10
for K in [5, 10, 20, 40, 80, 160]:
    cf_exp = Experiment(M, K, N, rt='UserCF')
    cf_exp.run()

Func loadData, run time: 0.7352418899536133
Func splitData, run time: 1.0684223175048828
Experiment 0:
Metric: {'Precision': 16.91, 'Recall': 8.12, 'Coverage': 52.36, 'Popularity': 6.819144}
Func worker, run time: 108.72494435310364
Func splitData, run time: 1.1765117645263672
Experiment 1:
Metric: {'Precision': 17.05, 'Recall': 8.16, 'Coverage': 52.03, 'Popularity': 6.815604}
Func worker, run time: 107.48157739639282
Func splitData, run time: 1.0871975421905518
Experiment 2:
Metric: {'Precision': 16.91, 'Recall': 8.09, 'Coverage': 51.69, 'Popularity': 6.818911}
Func worker, run time: 107.46169400215149
Func splitData, run time: 1.0935592651367188
Experiment 3:
Metric: {'Precision': 16.95, 'Recall': 8.15, 'Coverage': 52.11, 'Popularity': 6.817778}
Func worker, run time: 108.54300141334534
Func splitData, run time: 1.113144874572754
Experiment 4:
Metric: {'Precision': 17.06, 'Recall': 8.2, 'Coverage': 52.14, 'Popularity': 6.821389}
Func worker, run time: 107.53886890411377
Func splitDat

Func splitData, run time: 1.096573829650879
Experiment 7:
Metric: {'Precision': 25.11, 'Recall': 12.04, 'Coverage': 20.4, 'Popularity': 7.295009}
Func worker, run time: 125.15021443367004
Average Result (M=8, K=80, N=10): {'Precision': 25.10875, 'Recall': 12.055, 'Coverage': 20.24625, 'Popularity': 7.28811625}
Func run, run time: 1006.8813600540161
Func loadData, run time: 0.7275311946868896
Func splitData, run time: 1.0544140338897705
Experiment 0:
Metric: {'Precision': 24.9, 'Recall': 11.96, 'Coverage': 15.34, 'Popularity': 7.37001}
Func worker, run time: 142.05634140968323
Func splitData, run time: 1.0724847316741943
Experiment 1:
Metric: {'Precision': 25.07, 'Recall': 12.0, 'Coverage': 15.43, 'Popularity': 7.359474}
Func worker, run time: 140.68221473693848
Func splitData, run time: 1.1082525253295898
Experiment 2:
Metric: {'Precision': 24.94, 'Recall': 11.92, 'Coverage': 15.51, 'Popularity': 7.365722}
Func worker, run time: 142.43383049964905
Func splitData, run time: 1.0824670791

In [13]:
# 4. UserIIF实验
M, N = 8, 10
K = 80 # 与书中保持一致
iif_exp = Experiment(M, K, N, rt='UserIIF')
iif_exp.run()

Func loadData, run time: 0.7448866367340088
Func splitData, run time: 1.0915324687957764
Experiment 0:
Metric: {'Precision': 25.36, 'Recall': 12.18, 'Coverage': 21.33, 'Popularity': 7.26129}
Func worker, run time: 281.34474444389343
Func splitData, run time: 1.192413091659546
Experiment 1:
Metric: {'Precision': 25.5, 'Recall': 12.21, 'Coverage': 21.39, 'Popularity': 7.248747}
Func worker, run time: 284.4900622367859
Func splitData, run time: 1.1491913795471191
Experiment 2:
Metric: {'Precision': 25.39, 'Recall': 12.14, 'Coverage': 21.33, 'Popularity': 7.255987}
Func worker, run time: 282.44178104400635
Func splitData, run time: 1.133812665939331
Experiment 3:
Metric: {'Precision': 25.08, 'Recall': 12.05, 'Coverage': 21.4, 'Popularity': 7.259753}
Func worker, run time: 283.64695477485657
Func splitData, run time: 1.1447200775146484
Experiment 4:
Metric: {'Precision': 24.92, 'Recall': 11.98, 'Coverage': 21.25, 'Popularity': 7.261206}
Func worker, run time: 288.9579725265503
Func splitDat

## 四. 实验结果

1. Random实验

    Running time: 185.54872608184814
    
    Average Result (M=8, K=0, N=10): 
    {'Precision': 0.61, 'Recall': 0.29, 
     'Coverage': 100.0, 'Popularity': 4.38958}
 
2. MostPopular实验

    Running time: 103.3697898387909
    
    Average Result (M=8, K=0, N=10): 
    {'Precision': 12.83, 'Recall': 6.16, 
    'Coverage': 2.43, 'Popularity': 7.72326}

3. UserCF实验

    Running time: 1456.9617431163788
    
    Average Result (M=8, K=5, N=10): 
    {'Precision': 16.89, 'Recall': 8.11,
     'Coverage': 52.09, 'Popularity': 6.8192915}
     
    Running time: 1416.0529160499573
    
    Average Result (M=8, K=10, N=10): 
    {'Precision': 20.46, 'Recall': 9.83, 
     'Coverage': 41.64, 'Popularity': 6.979140375}
     
    Running time: 1463.8790090084076
    
    Average Result (M=8, K=20, N=10): 
    {'Precision': 22.99, 'Recall': 11.04, 
     'Coverage': 32.78, 'Popularity': 7.102363}
     
    Running time: 1540.0677690505981
    
    Average Result (M=8, K=40, N=10):
    {'Precision': 24.54, 'Recall': 11.78, 
     'Coverage': 25.89, 'Popularity': 7.20221475}
     
    Running time: 1643.4831750392914
    
    Average Result (M=8, K=80, N=10): 
    {'Precision': 25.11, 'Recall': 12.06, 
     'Coverage': 20.25, 'Popularity': 7.288118125}
     
    Running time: 1891.5019328594208
    
    Average Result (M=8, K=160, N=10): 
    {'Precision': 24.81, 'Recall': 11.91, 
     'Coverage': 15.39, 'Popularity': 7.367559}
     
4. UserIIF实验
    
    Running time: 3006.6924328804016
    
    Average Result (M=8, K=80, N=10): 
    {'Precision': 25.22, 'Recall': 12.11, 
     'Coverage': 21.32, 'Popularity': 7.258887}

## 五. 总结
1. 数据集分割的小技巧，用同样的seed
2. 各个指标的实现，要注意
3. 为每个用户推荐的时候是推荐他们**没有见过**的，因为测试集里面是这样的
4. 倒排物品-用户索引，可进行时间优化
5. 推荐的时候K和N各代表什么意思，要分开设置，先取TopK，然后取TopN

## 附：运行日志（请双击看）

1. Random实验
Func loadData, run time: 1.40358304977417
Func splitData, run time: 2.1179611682891846
Experiment 0:
Metric: {'Precision': 0.61, 'Recall': 0.29, 'Coverage': 100.0, 'Popularity': 4.3952}
Func worker, run time: 20.80728793144226
Func splitData, run time: 2.039689064025879
Experiment 1:
Metric: {'Precision': 0.61, 'Recall': 0.29, 'Coverage': 100.03, 'Popularity': 4.384244}
Func worker, run time: 22.058059692382812
Func splitData, run time: 2.129431962966919
Experiment 2:
Metric: {'Precision': 0.64, 'Recall': 0.3, 'Coverage': 100.0, 'Popularity': 4.38938}
Func worker, run time: 18.35742425918579
Func splitData, run time: 2.0330629348754883
Experiment 3:
Metric: {'Precision': 0.62, 'Recall': 0.3, 'Coverage': 100.0, 'Popularity': 4.393025}
Func worker, run time: 22.459643840789795
Func splitData, run time: 2.0501880645751953
Experiment 4:
Metric: {'Precision': 0.61, 'Recall': 0.3, 'Coverage': 100.0, 'Popularity': 4.393217}
Func worker, run time: 23.829069137573242
Func splitData, run time: 1.952528953552246
Experiment 5:
Metric: {'Precision': 0.57, 'Recall': 0.27, 'Coverage': 100.03, 'Popularity': 4.388441}
Func worker, run time: 21.796540021896362
Func splitData, run time: 2.1322124004364014
Experiment 6:
Metric: {'Precision': 0.61, 'Recall': 0.29, 'Coverage': 100.0, 'Popularity': 4.382586}
Func worker, run time: 19.419902801513672
Func splitData, run time: 1.9659440517425537
Experiment 7:
Metric: {'Precision': 0.61, 'Recall': 0.29, 'Coverage': 100.0, 'Popularity': 4.390586}
Func worker, run time: 18.834516286849976
Average Result (M=8, K=0, N=10): {'Precision': 0.61, 'Recall': 0.29125, 'Coverage': 100.0075, 'Popularity': 4.389584875000001}
Func run, run time: 185.54872608184814

2. MostPopular实验
Func loadData, run time: 1.403282880783081
Func splitData, run time: 1.9211320877075195
Experiment 0:
Metric: {'Precision': 12.85, 'Recall': 6.17, 'Coverage': 2.47, 'Popularity': 7.724273}
Func worker, run time: 10.972801923751831
Func splitData, run time: 1.9256069660186768
Experiment 1:
Metric: {'Precision': 13.07, 'Recall': 6.26, 'Coverage': 2.28, 'Popularity': 7.721385}
Func worker, run time: 10.841933012008667
Func splitData, run time: 1.910295009613037
Experiment 2:
Metric: {'Precision': 12.89, 'Recall': 6.16, 'Coverage': 2.44, 'Popularity': 7.722067}
Func worker, run time: 10.727141857147217
Func splitData, run time: 1.882903814315796
Experiment 3:
Metric: {'Precision': 12.81, 'Recall': 6.15, 'Coverage': 2.49, 'Popularity': 7.723152}
Func worker, run time: 10.670467138290405
Func splitData, run time: 1.918154001235962
Experiment 4:
Metric: {'Precision': 12.7, 'Recall': 6.11, 'Coverage': 2.47, 'Popularity': 7.724644}
Func worker, run time: 10.960633993148804
Func splitData, run time: 1.9205529689788818
Experiment 5:
Metric: {'Precision': 12.9, 'Recall': 6.22, 'Coverage': 2.38, 'Popularity': 7.7234}
Func worker, run time: 10.842862129211426
Func splitData, run time: 1.9104499816894531
Experiment 6:
Metric: {'Precision': 12.91, 'Recall': 6.21, 'Coverage': 2.47, 'Popularity': 7.721658}
Func worker, run time: 10.716413974761963
Func splitData, run time: 1.9528350830078125
Experiment 7:
Metric: {'Precision': 12.53, 'Recall': 6.01, 'Coverage': 2.41, 'Popularity': 7.725531}
Func worker, run time: 10.732755184173584
Average Result (M=8, K=0, N=10): {'Precision': 12.832500000000001, 'Recall': 6.16125, 'Coverage': 2.42625, 'Popularity': 7.723263749999999}
Func run, run time: 103.3697898387909

3. UserCF实验
Func loadData, run time: 1.3605561256408691
Func splitData, run time: 1.8727848529815674
Experiment 0:
Metric: {'Precision': 16.9, 'Recall': 8.12, 'Coverage': 52.44, 'Popularity': 6.819093}
Func worker, run time: 201.4078812599182
Func splitData, run time: 2.1514930725097656
Experiment 1:
Metric: {'Precision': 17.04, 'Recall': 8.16, 'Coverage': 52.06, 'Popularity': 6.815413}
Func worker, run time: 183.0848479270935
Func splitData, run time: 1.9143519401550293
Experiment 2:
Metric: {'Precision': 16.91, 'Recall': 8.08, 'Coverage': 51.75, 'Popularity': 6.818886}
Func worker, run time: 177.24900722503662
Func splitData, run time: 1.8360939025878906
Experiment 3:
Metric: {'Precision': 16.94, 'Recall': 8.14, 'Coverage': 52.14, 'Popularity': 6.817815}
Func worker, run time: 182.475821018219
Func splitData, run time: 1.805711030960083
Experiment 4:
Metric: {'Precision': 17.06, 'Recall': 8.2, 'Coverage': 52.12, 'Popularity': 6.82111}
Func worker, run time: 173.00265192985535
Func splitData, run time: 1.801429033279419
Experiment 5:
Metric: {'Precision': 16.75, 'Recall': 8.08, 'Coverage': 51.91, 'Popularity': 6.818678}
Func worker, run time: 174.97946214675903
Func splitData, run time: 1.80289626121521
Experiment 6:
Metric: {'Precision': 16.68, 'Recall': 8.02, 'Coverage': 51.71, 'Popularity': 6.82425}
Func worker, run time: 173.59705901145935
Func splitData, run time: 1.803412914276123
Experiment 7:
Metric: {'Precision': 16.86, 'Recall': 8.09, 'Coverage': 52.56, 'Popularity': 6.819087}
Func worker, run time: 174.63527822494507
Average Result (M=8, K=5, N=10): {'Precision': 16.8925, 'Recall': 8.11125, 'Coverage': 52.08624999999999, 'Popularity': 6.8192915}
Func run, run time: 1456.9617431163788
Func loadData, run time: 1.257431983947754
Func splitData, run time: 1.8042638301849365
Experiment 0:
Metric: {'Precision': 20.52, 'Recall': 9.86, 'Coverage': 41.95, 'Popularity': 6.982226}
Func worker, run time: 173.35024309158325
Func splitData, run time: 1.8343029022216797
Experiment 1:
Metric: {'Precision': 20.46, 'Recall': 9.8, 'Coverage': 42.06, 'Popularity': 6.972529}
Func worker, run time: 173.38346886634827
Func splitData, run time: 1.808082103729248
Experiment 2:
Metric: {'Precision': 20.61, 'Recall': 9.85, 'Coverage': 41.62, 'Popularity': 6.980192}
Func worker, run time: 175.31061029434204
Func splitData, run time: 1.8049170970916748
Experiment 3:
Metric: {'Precision': 20.41, 'Recall': 9.81, 'Coverage': 41.47, 'Popularity': 6.97886}
Func worker, run time: 174.2243037223816
Func splitData, run time: 1.815324068069458
Experiment 4:
Metric: {'Precision': 20.59, 'Recall': 9.9, 'Coverage': 41.5, 'Popularity': 6.980629}
Func worker, run time: 174.46058702468872
Func splitData, run time: 1.7919108867645264
Experiment 5:
Metric: {'Precision': 20.33, 'Recall': 9.81, 'Coverage': 41.26, 'Popularity': 6.981318}
Func worker, run time: 172.53949809074402
Func splitData, run time: 1.8133158683776855
Experiment 6:
Metric: {'Precision': 20.19, 'Recall': 9.71, 'Coverage': 41.49, 'Popularity': 6.976388}
Func worker, run time: 169.70669603347778
Func splitData, run time: 1.7420899868011475
Experiment 7:
Metric: {'Precision': 20.58, 'Recall': 9.87, 'Coverage': 41.8, 'Popularity': 6.980981}
Func worker, run time: 187.25051093101501
Average Result (M=8, K=10, N=10): {'Precision': 20.46125, 'Recall': 9.826250000000002, 'Coverage': 41.64375, 'Popularity': 6.979140375}
Func run, run time: 1416.0529160499573
Func loadData, run time: 1.2509210109710693
Func splitData, run time: 2.0944771766662598
Experiment 0:
Metric: {'Precision': 23.11, 'Recall': 11.1, 'Coverage': 32.6, 'Popularity': 7.104519}
Func worker, run time: 185.00779795646667
Func splitData, run time: 1.8321330547332764
Experiment 1:
Metric: {'Precision': 22.96, 'Recall': 10.99, 'Coverage': 33.0, 'Popularity': 7.094808}
Func worker, run time: 182.49092984199524
Func splitData, run time: 1.7799580097198486
Experiment 2:
Metric: {'Precision': 23.2, 'Recall': 11.09, 'Coverage': 32.1, 'Popularity': 7.101386}
Func worker, run time: 182.88875007629395
Func splitData, run time: 1.7766752243041992
Experiment 3:
Metric: {'Precision': 22.87, 'Recall': 10.99, 'Coverage': 32.77, 'Popularity': 7.101266}
Func worker, run time: 181.71431589126587
Func splitData, run time: 1.8331959247589111
Experiment 4:
Metric: {'Precision': 23.0, 'Recall': 11.06, 'Coverage': 33.25, 'Popularity': 7.10377}
Func worker, run time: 176.38355994224548
Func splitData, run time: 1.7539498805999756
Experiment 5:
Metric: {'Precision': 22.96, 'Recall': 11.07, 'Coverage': 32.48, 'Popularity': 7.10406}
Func worker, run time: 178.63581705093384
Func splitData, run time: 1.8071832656860352
Experiment 6:
Metric: {'Precision': 22.83, 'Recall': 10.97, 'Coverage': 32.79, 'Popularity': 7.100858}
Func worker, run time: 180.997900724411
Func splitData, run time: 1.8272180557250977
Experiment 7:
Metric: {'Precision': 23.0, 'Recall': 11.03, 'Coverage': 33.27, 'Popularity': 7.108237}
Func worker, run time: 179.63274002075195
Average Result (M=8, K=20, N=10): {'Precision': 22.99125, 'Recall': 11.037500000000001, 'Coverage': 32.7825, 'Popularity': 7.102363}
Func run, run time: 1463.8790090084076
Func loadData, run time: 1.2451589107513428
Func splitData, run time: 1.7343308925628662
Experiment 0:
Metric: {'Precision': 24.73, 'Recall': 11.88, 'Coverage': 25.8, 'Popularity': 7.204384}
Func worker, run time: 190.66554594039917
Func splitData, run time: 1.8477561473846436
Experiment 1:
Metric: {'Precision': 24.66, 'Recall': 11.81, 'Coverage': 26.03, 'Popularity': 7.19405}
Func worker, run time: 193.2389531135559
Func splitData, run time: 1.8444321155548096
Experiment 2:
Metric: {'Precision': 24.68, 'Recall': 11.8, 'Coverage': 25.66, 'Popularity': 7.20158}
Func worker, run time: 188.7122507095337
Func splitData, run time: 1.8413538932800293
Experiment 3:
Metric: {'Precision': 24.46, 'Recall': 11.76, 'Coverage': 25.89, 'Popularity': 7.201308}
Func worker, run time: 186.81220722198486
Func splitData, run time: 1.8592839241027832
Experiment 4:
Metric: {'Precision': 24.25, 'Recall': 11.66, 'Coverage': 25.76, 'Popularity': 7.204154}
Func worker, run time: 197.69361400604248
Func splitData, run time: 2.064145803451538
Experiment 5:
Metric: {'Precision': 24.46, 'Recall': 11.8, 'Coverage': 26.04, 'Popularity': 7.205482}
Func worker, run time: 192.9972779750824
Func splitData, run time: 1.797558069229126
Experiment 6:
Metric: {'Precision': 24.49, 'Recall': 11.77, 'Coverage': 26.12, 'Popularity': 7.199023}
Func worker, run time: 185.58164811134338
Func splitData, run time: 1.815227746963501
Experiment 7:
Metric: {'Precision': 24.58, 'Recall': 11.79, 'Coverage': 25.79, 'Popularity': 7.207737}
Func worker, run time: 188.1497700214386
Average Result (M=8, K=40, N=10): {'Precision': 24.53875, 'Recall': 11.783749999999998, 'Coverage': 25.886249999999997, 'Popularity': 7.20221475}
Func run, run time: 1540.0677690505981
Func loadData, run time: 1.1918129920959473
Func splitData, run time: 1.7471270561218262
Experiment 0:
Metric: {'Precision': 25.23, 'Recall': 12.12, 'Coverage': 20.35, 'Popularity': 7.288647}
Func worker, run time: 191.55905103683472
Func splitData, run time: 1.7517518997192383
Experiment 1:
Metric: {'Precision': 25.34, 'Recall': 12.13, 'Coverage': 20.2, 'Popularity': 7.280265}
Func worker, run time: 190.3586311340332
Func splitData, run time: 1.7286112308502197
Experiment 2:
Metric: {'Precision': 25.22, 'Recall': 12.06, 'Coverage': 20.03, 'Popularity': 7.28649}
Func worker, run time: 215.9616241455078
Func splitData, run time: 1.7403991222381592
Experiment 3:
Metric: {'Precision': 24.98, 'Recall': 12.01, 'Coverage': 20.29, 'Popularity': 7.288943}
Func worker, run time: 192.13360381126404
Func splitData, run time: 1.7304770946502686
Experiment 4:
Metric: {'Precision': 24.78, 'Recall': 11.91, 'Coverage': 20.33, 'Popularity': 7.289041}
Func worker, run time: 191.28253412246704
Func splitData, run time: 1.7530970573425293
Experiment 5:
Metric: {'Precision': 25.04, 'Recall': 12.08, 'Coverage': 20.4, 'Popularity': 7.290409}
Func worker, run time: 209.25476503372192
Func splitData, run time: 1.9062669277191162
Experiment 6:
Metric: {'Precision': 25.17, 'Recall': 12.1, 'Coverage': 20.0, 'Popularity': 7.286132}
Func worker, run time: 219.69454503059387
Func splitData, run time: 1.9378957748413086
Experiment 7:
Metric: {'Precision': 25.11, 'Recall': 12.04, 'Coverage': 20.4, 'Popularity': 7.295018}
Func worker, run time: 217.57287120819092
Average Result (M=8, K=80, N=10): {'Precision': 25.10875, 'Recall': 12.056249999999999, 'Coverage': 20.25, 'Popularity': 7.288118125}
Func run, run time: 1643.4831750392914
Func loadData, run time: 1.2924230098724365
Func splitData, run time: 1.8834781646728516
Experiment 0:
Metric: {'Precision': 24.9, 'Recall': 11.96, 'Coverage': 15.34, 'Popularity': 7.369982}
Func worker, run time: 248.86677980422974
Func splitData, run time: 1.9202308654785156
Experiment 1:
Metric: {'Precision': 25.07, 'Recall': 12.0, 'Coverage': 15.43, 'Popularity': 7.359478}
Func worker, run time: 244.85498023033142
Func splitData, run time: 1.9144361019134521
Experiment 2:
Metric: {'Precision': 24.94, 'Recall': 11.92, 'Coverage': 15.51, 'Popularity': 7.365725}
Func worker, run time: 233.78980898857117
Func splitData, run time: 1.735440731048584
Experiment 3:
Metric: {'Precision': 24.7, 'Recall': 11.87, 'Coverage': 15.57, 'Popularity': 7.367826}
Func worker, run time: 218.3170599937439
Func splitData, run time: 1.7129569053649902
Experiment 4:
Metric: {'Precision': 24.54, 'Recall': 11.8, 'Coverage': 15.42, 'Popularity': 7.368641}
Func worker, run time: 218.92201709747314
Func splitData, run time: 1.7369437217712402
Experiment 5:
Metric: {'Precision': 24.77, 'Recall': 11.95, 'Coverage': 15.52, 'Popularity': 7.370501}
Func worker, run time: 217.8976969718933
Func splitData, run time: 1.7374908924102783
Experiment 6:
Metric: {'Precision': 24.9, 'Recall': 11.97, 'Coverage': 15.31, 'Popularity': 7.362657}
Func worker, run time: 241.18968224525452
Func splitData, run time: 1.9872171878814697
Experiment 7:
Metric: {'Precision': 24.69, 'Recall': 11.84, 'Coverage': 15.04, 'Popularity': 7.375662}
Func worker, run time: 251.5464129447937
Average Result (M=8, K=160, N=10): {'Precision': 24.813750000000002, 'Recall': 11.91375, 'Coverage': 15.392499999999998, 'Popularity': 7.367559}
Func run, run time: 1891.5019328594208

4. UserIIF实验
Func loadData, run time: 1.438131332397461
Func splitData, run time: 2.045954942703247
Experiment 0:
Metric: {'Precision': 25.36, 'Recall': 12.18, 'Coverage': 21.33, 'Popularity': 7.26129}
Func worker, run time: 392.8560140132904
Func splitData, run time: 1.8182199001312256
Experiment 1:
Metric: {'Precision': 25.5, 'Recall': 12.21, 'Coverage': 21.39, 'Popularity': 7.248747}
Func worker, run time: 372.19161105155945
Func splitData, run time: 1.7963738441467285
Experiment 2:
Metric: {'Precision': 25.39, 'Recall': 12.14, 'Coverage': 21.33, 'Popularity': 7.255987}
Func worker, run time: 373.7826910018921
Func splitData, run time: 2.0211751461029053
Experiment 3:
Metric: {'Precision': 25.08, 'Recall': 12.05, 'Coverage': 21.4, 'Popularity': 7.259753}
Func worker, run time: 371.92588996887207
Func splitData, run time: 1.8175630569458008
Experiment 4:
Metric: {'Precision': 24.92, 'Recall': 11.98, 'Coverage': 21.25, 'Popularity': 7.261206}
Func worker, run time: 368.02053785324097
Func splitData, run time: 1.8024423122406006
Experiment 5:
Metric: {'Precision': 25.14, 'Recall': 12.12, 'Coverage': 21.4, 'Popularity': 7.26109}
Func worker, run time: 373.1204378604889
Func splitData, run time: 1.8195960521697998
Experiment 6:
Metric: {'Precision': 25.19, 'Recall': 12.11, 'Coverage': 20.87, 'Popularity': 7.257091}
Func worker, run time: 373.04570269584656
Func splitData, run time: 1.8219950199127197
Experiment 7:
Metric: {'Precision': 25.15, 'Recall': 12.06, 'Coverage': 21.57, 'Popularity': 7.265932}
Func worker, run time: 365.2058880329132
Average Result (M=8, K=80, N=10): {'Precision': 25.21625, 'Recall': 12.106250000000001, 'Coverage': 21.3175, 'Popularity': 7.2588870000000005}
Func run, run time: 3006.6924328804016