# 基于用户标签的推荐
Created By Vulcan626 on 2023/10/13

In [11]:
# 导入包
import random
import math
import time
from tqdm import tqdm

## 一. 通用函数定义

In [12]:
# 定义装饰器，监控运行时间
def timmer(func):
    def wrapper(*args, **kwargs):
        start_time = time.time()
        res = func(*args, **kwargs)
        stop_time = time.time()
        print('Func %s, run time: %s' % (func.__name__, stop_time - start_time))
        return res
    return wrapper

### 1. 数据处理相关
Delicious-2k数据集
1. load data
2. split data

In [13]:
class Dataset():
    
    def __init__(self, fp):
        # fp: data file path
        self.data = self.loadData(fp)
    
    @timmer
    def loadData(self, fp):
        data = [f.strip().split('\t')[:3] for f in open(fp).readlines()[1:]]
        new_data = {}
        for user, item, tag in data:
            if user not in new_data:
                new_data[user] = {}
            if item not in new_data[user]:
                new_data[user][item] = set()
            new_data[user][item].add(tag)
        ret = []
        for user in new_data:
            for item in new_data[user]:
                ret.append((user, item, list(new_data[user][item])))
        return ret
    
    @timmer
    def splitData(self, M, k, seed=1):
        '''
        :params: data, 加载的所有(user, item)数据条目
        :params: M, 划分的数目，最后需要取M折的平均
        :params: k, 本次是第几次划分，k~[0, M)
        :params: seed, random的种子数，对于不同的k应设置成一样的
        :return: train, test
        '''
        # 按照(user, item)作为key进行划分
        train, test = [], []
        random.seed(seed)
        for user, item, tags in self.data:
            # 这里与书中的不一致，本人认为取M-1较为合理，因为k=[0, M)
            if random.randint(0, M-1) == k:  
                test.append((user, item, tags))
            else:
                train.append((user, item, tags))

        # 处理成字典的形式，user->set(items)
        def convert_dict(data):
            data_dict = {}
            for user, item, tags in data:
                if user not in data_dict:
                    data_dict[user] = {}
                data_dict[user][item] = tags
            return data_dict

        return convert_dict(train), convert_dict(test)

### 2. 评价指标
1. Precision
2. Recall
3. Coverage
4. Diversity
5. Popularity(Novelty)

In [14]:
class Metric():
    
    def __init__(self, train, test, GetRecommendation):
        '''
        :params: train, 训练数据
        :params: test, 测试数据
        :params: GetRecommendation, 为某个用户获取推荐物品的接口函数
        '''
        self.train = train
        self.test = test
        self.GetRecommendation = GetRecommendation
        self.recs = self.getRec()
        
    # 为test中的每个用户进行推荐
    def getRec(self):
        recs = {}
        for user in self.test:
            rank = self.GetRecommendation(user)
            recs[user] = rank
        return recs
        
    # 定义精确率指标计算方式
    def precision(self):
        all, hit = 0, 0
        for user in self.test:
            test_items = set(self.test[user])
            rank = self.recs[user]
            for item, score in rank:
                if item in test_items:
                    hit += 1
            all += len(rank)
        return round(hit / all * 100, 2)
    
    # 定义召回率指标计算方式
    def recall(self):
        all, hit = 0, 0
        for user in self.test:
            test_items = set(self.test[user])
            rank = self.recs[user]
            for item, score in rank:
                if item in test_items:
                    hit += 1
            all += len(test_items)
        return round(hit / all * 100, 2)
    
    # 定义覆盖率指标计算方式
    def coverage(self):
        all_item, recom_item = set(), set()
        for user in self.train:
            for item in self.train[user]:
                all_item.add(item)
        for user in self.test:
            rank = self.recs[user]
            for item, score in rank:
                recom_item.add(item)
        return round(len(recom_item) / len(all_item) * 100, 2)
    
    # 定义多样性指标计算方式
    def diversity(self):
        # 计算item_vec，每个tag的个数
        item_tags = {}
        for user in self.train:
            for item in self.train[user]:
                if item not in item_tags:
                    item_tags[item] = {}
                for tag in self.train[user][item]:
                    if tag not in item_tags[item]:
                        item_tags[item][tag] = 0
                    item_tags[item][tag] += 1
        
        # 计算两个item的相似度
        def CosineSim(u, v):
            ret = 0
            for tag in item_tags[u]:
                if tag in item_tags[v]:
                    ret += item_tags[u][tag] * item_tags[v][tag]
            nu, nv = 0, 0
            for tag in item_tags[u]:
                nu += item_tags[u][tag] ** 2
            for tag in item_tags[v]:
                nv += item_tags[v][tag] ** 2
            return ret / math.sqrt(nu * nv)
        
        # 计算Diversity
        div = []
        for user in self.test:
            rank = self.recs[user]
            sim, cnt = 0, 0
            for u, _ in rank:
                for v, _ in rank:
                    if u == v:
                        continue
                    sim += CosineSim(u, v)
                    cnt += 1
            sim = sim / cnt if sim != 0 else 0
            div.append(1 - sim)
        return sum(div) / len(div)   
    
    # 定义新颖度指标计算方式
    def popularity(self):
        # 计算物品的流行度，为给这个物品打过标签的用户数
        item_pop = {}
        for user in self.train:
            for item in self.train[user]:
                if item not in item_pop:
                    item_pop[item] = 0
                item_pop[item] += 1

        num, pop = 0, 0
        for user in self.test:
            rank = self.recs[user]
            for item, score in rank:
                # 取对数，防止因长尾问题带来的被流行物品所主导
                pop += math.log(1 + item_pop[item])
                num += 1
        return round(pop / num, 6)
    
    def eval(self):
        metric = {'Precision': self.precision(),
                  'Recall': self.recall(),
                  'Coverage': self.coverage(),
                  'Diversity': self.diversity(),
                  'Popularity': self.popularity()}
        print('Metric:', metric)
        return metric

## 二. 算法实现
1. SimpleTagBased
2. TagBasedTFIDF
3. TagBasedTFIDF_Improved
4. TagExtend

In [15]:
# 1. 基于热门标签的推荐
def SimpleTagBased(train, N):
    '''
    :params: train, 训练数据集
    :params: N, 超参数，设置取TopN推荐物品数目
    :return: GetRecommendation，推荐接口函数
    '''
    # 统计user_tags和tag_items
    user_tags, tag_items = {}, {}
    for user in train:
        user_tags[user] = {}
        for item in train[user]:
            for tag in train[user][item]:
                if tag not in user_tags[user]:
                    user_tags[user][tag] = 0
                user_tags[user][tag] += 1
                if tag not in tag_items:
                    tag_items[tag] = {}
                if item not in tag_items[tag]:
                    tag_items[tag][item] = 0
                tag_items[tag][item] += 1
    
    def GetRecommendation(user):
        # 按照打分推荐N个未见过的
        if user not in user_tags:
            return []
        seen_items = set(train[user])
        item_score = {}
        for tag in user_tags[user]:
            for item in tag_items[tag]:
                if item in seen_items:
                    continue
                if item not in item_score:
                    item_score[item] = 0
                item_score[item] += user_tags[user][tag] * tag_items[tag][item]
        item_score = list(sorted(item_score.items(), key=lambda x: x[1], reverse=True))
        return item_score[:N]
    
    return GetRecommendation

In [16]:
# 2. 改进一：为热门标签加入惩罚项
def TagBasedTFIDF(train, N):
    '''
    :params: train, 训练数据集
    :params: N, 超参数，设置取TopN推荐物品数目
    :return: GetRecommendation，推荐接口函数
    '''
    # 统计user_tags和tag_items
    user_tags, tag_items = {}, {}
    # 统计标签的热门程度，即打过此标签的不同用户数
    tag_pop = {}
    for user in train:
        user_tags[user] = {}
        for item in train[user]:
            for tag in train[user][item]:
                if tag not in user_tags[user]:
                    user_tags[user][tag] = 0
                user_tags[user][tag] += 1
                if tag not in tag_items:
                    tag_items[tag] = {}
                if item not in tag_items[tag]:
                    tag_items[tag][item] = 0
                tag_items[tag][item] += 1
                if tag not in tag_pop:
                    tag_pop[tag] = set()
                tag_pop[tag].add(user)
    tag_pop = {k: len(v) for k, v in tag_pop.items()}
    
    def GetRecommendation(user):
        # 按照打分推荐N个未见过的
        if user not in user_tags:
            return []
        seen_items = set(train[user])
        item_score = {}
        for tag in user_tags[user]:
            for item in tag_items[tag]:
                if item in seen_items:
                    continue
                if item not in item_score:
                    item_score[item] = 0
                item_score[item] += user_tags[user][tag] * tag_items[tag][item] / tag_pop[tag]
        item_score = list(sorted(item_score.items(), key=lambda x: x[1], reverse=True))
        return item_score[:N]
    
    return GetRecommendation

In [17]:
# 3. 改进二：同时也为热门商品加入惩罚项
def TagBasedTFIDF_Improved(train, N):
    '''
    :params: train, 训练数据集
    :params: N, 超参数，设置取TopN推荐物品数目
    :return: GetRecommendation，推荐接口函数
    '''
    # 统计user_tags和tag_items
    user_tags, tag_items = {}, {}
    # 统计标签和物品的热门程度，即打过此标签的不同用户数，和物品对应的不同用户数
    tag_pop, item_pop = {}, {}
    for user in train:
        user_tags[user] = {}
        for item in train[user]:
            if item not in item_pop:
                item_pop[item] = 0
            item_pop[item] += 1
            for tag in train[user][item]:
                if tag not in user_tags[user]:
                    user_tags[user][tag] = 0
                user_tags[user][tag] += 1
                if tag not in tag_items:
                    tag_items[tag] = {}
                if item not in tag_items[tag]:
                    tag_items[tag][item] = 0
                tag_items[tag][item] += 1
                if tag not in tag_pop:
                    tag_pop[tag] = set()
                tag_pop[tag].add(user)
    tag_pop = {k: len(v) for k, v in tag_pop.items()}
    
    def GetRecommendation(user):
        # 按照打分推荐N个未见过的
        if user not in user_tags:
            return []
        seen_items = set(train[user])
        item_score = {}
        for tag in user_tags[user]:
            for item in tag_items[tag]:
                if item in seen_items:
                    continue
                if item not in item_score:
                    item_score[item] = 0
                item_score[item] += user_tags[user][tag] * tag_items[tag][item] / tag_pop[tag] / item_pop[item]
        item_score = list(sorted(item_score.items(), key=lambda x: x[1], reverse=True))
        return item_score[:N]
    
    return GetRecommendation

In [18]:
# 4. 基于标签改进的推荐
def ExpandTagBased(train, N, M=20):
    '''
    :params: train, 训练数据集
    :params: N, 超参数，设置取TopN推荐物品数目
    :params: M，超参数，设置取TopM的标签填补不满M个标签的用户
    :return: GetRecommendation，推荐接口函数
    '''
    
    # 1. 计算标签之间的相似度
    item_tag = {}
    for user in train:
        for item in train[user]:
            if item not in item_tag:
                item_tag[item] = set()
            for tag in train[user][item]:
                item_tag[item].add(tag)
    tag_sim, tag_cnt = {}, {}
    for item in item_tag:
        for u in item_tag[item]:
            if u not in tag_cnt:
                tag_cnt[u] = 0
            tag_cnt[u] += 1
            if u not in tag_sim:
                tag_sim[u] = {}
            for v in item_tag[item]:
                if u == v:
                    continue
                if v not in tag_sim[u]:
                    tag_sim[u][v] = 0
                tag_sim[u][v] += 1
    for u in tag_sim:
        for v in tag_sim[u]:
            tag_sim[u][v] /= math.sqrt(tag_cnt[u] * tag_cnt[v])
    
    # 2. 为每个用户扩展标签
    user_tags = {}
    for user in train:
        if user not in user_tags:
            user_tags[user] = {}
        for item in train[user]:
            for tag in train[user][item]:
                if tag not in user_tags[user]:
                    user_tags[user][tag] = 0
                user_tags[user][tag] += 1
    expand_tags = {}
    for user in user_tags:
        if len(user_tags[user]) >= M:
            expand_tags[user] = user_tags[user]
            continue
        # 不满M个的进行标签扩展
        expand_tags[user] = {}
        seen_tags = set(user_tags[user])
        for tag in user_tags[user]:
            for t in tag_sim[tag]:
                if t in seen_tags:
                    continue
                if t not in expand_tags[user]:
                    expand_tags[user][t] = 0
                expand_tags[user][t] += user_tags[user][tag] * tag_sim[tag][t]
        expand_tags[user].update(user_tags[user])
        expand_tags[user] = dict(list(sorted(expand_tags[user].items(), key=lambda x: x[1], reverse=True))[:M])
        
    # 3. SimpleTagBased算法
    tag_items = {}
    for user in train:
        for item in train[user]:
            for tag in train[user][item]:
                if tag not in tag_items:
                    tag_items[tag] = {}
                if item not in tag_items[tag]:
                    tag_items[tag][item] = 0
                tag_items[tag][item] += 1
    
    def GetRecommendation(user):
        # 按照打分推荐N个未见过的
        if user not in user_tags:
            return []
        seen_items = set(train[user])
        item_score = {}
        for tag in expand_tags[user]:
            for item in tag_items[tag]:
                if item in seen_items:
                    continue
                if item not in item_score:
                    item_score[item] = 0
                item_score[item] += expand_tags[user][tag] * tag_items[tag][item]
        item_score = list(sorted(item_score.items(), key=lambda x: x[1], reverse=True))
        return item_score[:N]
    
    return GetRecommendation

## 三. 实验
1. SimpleTagBased实验
2. TagBasedTFIDF实验
3. TagBasedTFIDF_Improved实验
4. TagExtend

M=10, N=10

In [19]:
class Experiment():
    
    def __init__(self, M, N, fp='dataset/delicious-2k/user_taggedbookmarks.dat', rt='SimpleTagBased'):
        '''
        :params: M, 进行多少次实验
        :params: N, TopN推荐物品的个数
        :params: fp, 数据文件路径
        :params: rt, 推荐算法类型
        '''
        self.M = M
        self.N = N
        self.fp = fp
        self.rt = rt
        self.alg = {'SimpleTagBased': SimpleTagBased, 'TagBasedTFIDF': TagBasedTFIDF, \
                    'TagBasedTFIDF_Improved': TagBasedTFIDF_Improved, 'ExtendTagBased': ExpandTagBased}
    
    # 定义单次实验
    @timmer
    def worker(self, train, test):
        '''
        :params: train, 训练数据集
        :params: test, 测试数据集
        :return: 各指标的值
        '''
        getRecommendation = self.alg[self.rt](train, self.N)
        metric = Metric(train, test, getRecommendation)
        return metric.eval()
    
    # 多次实验取平均
    @timmer
    def run(self):
        metrics = {'Precision': 0, 'Recall': 0, 
                   'Coverage': 0, 'Diversity': 0, 
                   'Popularity': 0}
        dataset = Dataset(self.fp)
        for ii in range(self.M):
            train, test = dataset.splitData(self.M, ii)
            print('Experiment {}:'.format(ii))
            metric = self.worker(train, test)
            metrics = {k: metrics[k]+metric[k] for k in metrics}
        metrics = {k: metrics[k] / self.M for k in metrics}
        print('Average Result (M={}, N={}): {}'.format(\
                              self.M, self.N, metrics))

In [20]:
# 1. SimpleTagBased实验
M, N = 10, 10
exp = Experiment(M, N, rt='SimpleTagBased')
exp.run()

Func loadData, run time: 1.0500240325927734
Func splitData, run time: 0.09396505355834961
Experiment 0:
Metric: {'Precision': 0.33, 'Recall': 0.54, 'Coverage': 3.33, 'Diversity': 0.7892137593749782, 'Popularity': 2.341815}
Func worker, run time: 21.732192993164062
Func splitData, run time: 0.15036797523498535
Experiment 1:
Metric: {'Precision': 0.36, 'Recall': 0.59, 'Coverage': 3.37, 'Diversity': 0.7892935015451295, 'Popularity': 2.327207}
Func worker, run time: 21.687560081481934
Func splitData, run time: 0.1452350616455078
Experiment 2:
Metric: {'Precision': 0.36, 'Recall': 0.59, 'Coverage': 3.37, 'Diversity': 0.7931484109422255, 'Popularity': 2.327753}
Func worker, run time: 22.0946261882782
Func splitData, run time: 0.16254925727844238
Experiment 3:
Metric: {'Precision': 0.29, 'Recall': 0.48, 'Coverage': 3.34, 'Diversity': 0.7981205582277148, 'Popularity': 2.365442}
Func worker, run time: 21.82034206390381
Func splitData, run time: 0.15349030494689941
Experiment 4:
Metric: {'Precis

In [21]:
# 2. TagBasedTFIDF实验
M, N = 10, 10
exp = Experiment(M, N, rt='TagBasedTFIDF')
exp.run()

Func loadData, run time: 0.8751428127288818
Func splitData, run time: 0.09610223770141602
Experiment 0:
Metric: {'Precision': 0.38, 'Recall': 0.62, 'Coverage': 16.84, 'Diversity': 0.8817929272649402, 'Popularity': 1.324145}
Func worker, run time: 24.73927617073059
Func splitData, run time: 0.14814996719360352
Experiment 1:
Metric: {'Precision': 0.39, 'Recall': 0.64, 'Coverage': 16.95, 'Diversity': 0.882694294095967, 'Popularity': 1.316937}
Func worker, run time: 24.537441968917847
Func splitData, run time: 0.14658617973327637
Experiment 2:
Metric: {'Precision': 0.35, 'Recall': 0.58, 'Coverage': 16.95, 'Diversity': 0.881047509861129, 'Popularity': 1.32845}
Func worker, run time: 24.471004962921143
Func splitData, run time: 0.16125011444091797
Experiment 3:
Metric: {'Precision': 0.3, 'Recall': 0.5, 'Coverage': 16.98, 'Diversity': 0.885269321757197, 'Popularity': 1.323965}
Func worker, run time: 25.744343757629395
Func splitData, run time: 0.16150403022766113
Experiment 4:
Metric: {'Preci

In [22]:
# 3. TagBasedTFIDF++实验
M, N = 10, 10
exp = Experiment(M, N, rt='TagBasedTFIDF_Improved')
exp.run()

Func loadData, run time: 0.7675571441650391
Func splitData, run time: 0.15659403800964355
Experiment 0:
Metric: {'Precision': 0.14, 'Recall': 0.23, 'Coverage': 19.4, 'Diversity': 0.8599207757934516, 'Popularity': 0.786261}
Func worker, run time: 31.04145574569702
Func splitData, run time: 0.1533341407775879
Experiment 1:
Metric: {'Precision': 0.16, 'Recall': 0.27, 'Coverage': 19.36, 'Diversity': 0.861809807994831, 'Popularity': 0.785819}
Func worker, run time: 32.43993306159973
Func splitData, run time: 0.14620685577392578
Experiment 2:
Metric: {'Precision': 0.18, 'Recall': 0.3, 'Coverage': 19.47, 'Diversity': 0.8612670113303802, 'Popularity': 0.787102}
Func worker, run time: 31.186991930007935
Func splitData, run time: 0.16668391227722168
Experiment 3:
Metric: {'Precision': 0.15, 'Recall': 0.24, 'Coverage': 19.32, 'Diversity': 0.863333883555254, 'Popularity': 0.785932}
Func worker, run time: 31.260300874710083
Func splitData, run time: 0.14367914199829102
Experiment 4:
Metric: {'Preci

In [23]:
# 4. TagExtend实验
M, N = 10, 10
exp = Experiment(M, N, rt='ExtendTagBased')
exp.run()

Func loadData, run time: 0.8415701389312744
Func splitData, run time: 0.09526586532592773
Experiment 0:
Metric: {'Precision': 0.33, 'Recall': 0.54, 'Coverage': 3.37, 'Diversity': 0.7884537745340038, 'Popularity': 2.338333}
Func worker, run time: 24.176973819732666
Func splitData, run time: 0.14624881744384766
Experiment 1:
Metric: {'Precision': 0.37, 'Recall': 0.61, 'Coverage': 3.46, 'Diversity': 0.7885147605538919, 'Popularity': 2.323186}
Func worker, run time: 23.30581784248352
Func splitData, run time: 0.22475910186767578
Experiment 2:
Metric: {'Precision': 0.36, 'Recall': 0.6, 'Coverage': 3.47, 'Diversity': 0.7921426241321676, 'Popularity': 2.323487}
Func worker, run time: 23.418489694595337
Func splitData, run time: 0.1563868522644043
Experiment 3:
Metric: {'Precision': 0.29, 'Recall': 0.47, 'Coverage': 3.39, 'Diversity': 0.7975790078270631, 'Popularity': 2.361656}
Func worker, run time: 24.27443504333496
Func splitData, run time: 0.14275908470153809
Experiment 4:
Metric: {'Precis

## 四. 实验结果

### 1. SimpleTagBased实验

- 运行时间: 221.57秒

- 平均结果 (M=10, N=10):
    - Precision: 0.339
    - Recall: 0.554
    - Coverage: 3.36
    - Diversity: 0.7915
    - Popularity: 2.3398

### 2. TagBasedTFIDF实验

- 运行时间: 255.82秒

- 平均结果 (M=10, N=10):
    - Precision: 0.352
    - Recall: 0.58
    - Coverage: 16.95
    - Diversity: 0.883
    - Popularity: 1.3245

### 3. TagBasedTFIDF_Improved实验

- 运行时间: 315.38秒

- 平均结果 (M=10, N=10):
    - Precision: 0.163
    - Recall: 0.267
    - Coverage: 19.41
    - Diversity: 0.8612
    - Popularity: 0.7859

### 4. ExtendTagBased实验

- 运行时间: 239.03秒

- 平均结果 (M=10, N=10):
    - Precision: 0.344
    - Recall: 0.566
    - Coverage: 3.419
    - Diversity: 0.7905
    - Popularity: 2.3362

## 附：运行日志（请双击看）

1. SimpleTagBased实验
Func loadData, run time: 1.0500240325927734
Func splitData, run time: 0.09396505355834961
Experiment 0:
Metric: {'Precision': 0.33, 'Recall': 0.54, 'Coverage': 3.33, 'Diversity': 0.7892137593749782, 'Popularity': 2.341815}
Func worker, run time: 21.732192993164062
Func splitData, run time: 0.15036797523498535
Experiment 1:
Metric: {'Precision': 0.36, 'Recall': 0.59, 'Coverage': 3.37, 'Diversity': 0.7892935015451295, 'Popularity': 2.327207}
Func worker, run time: 21.687560081481934
Func splitData, run time: 0.1452350616455078
Experiment 2:
Metric: {'Precision': 0.36, 'Recall': 0.59, 'Coverage': 3.37, 'Diversity': 0.7931484109422255, 'Popularity': 2.327753}
Func worker, run time: 22.0946261882782
Func splitData, run time: 0.16254925727844238
Experiment 3:
Metric: {'Precision': 0.29, 'Recall': 0.48, 'Coverage': 3.34, 'Diversity': 0.7981205582277148, 'Popularity': 2.365442}
Func worker, run time: 21.82034206390381
Func splitData, run time: 0.15349030494689941
Experiment 4:
Metric: {'Precision': 0.34, 'Recall': 0.56, 'Coverage': 3.32, 'Diversity': 0.7914451541924852, 'Popularity': 2.336411}
Func worker, run time: 22.156601190567017
Func splitData, run time: 0.1524033546447754
Experiment 5:
Metric: {'Precision': 0.34, 'Recall': 0.56, 'Coverage': 3.29, 'Diversity': 0.7900444948121325, 'Popularity': 2.346891}
Func worker, run time: 22.368839740753174
Func splitData, run time: 0.15596890449523926
Experiment 6:
Metric: {'Precision': 0.35, 'Recall': 0.56, 'Coverage': 3.49, 'Diversity': 0.7946399441029975, 'Popularity': 2.305554}
Func worker, run time: 21.60142207145691
Func splitData, run time: 0.15061497688293457
Experiment 7:
Metric: {'Precision': 0.33, 'Recall': 0.54, 'Coverage': 3.39, 'Diversity': 0.7910829744602316, 'Popularity': 2.362738}
Func worker, run time: 22.087542057037354
Func splitData, run time: 0.151414155960083
Experiment 8:
Metric: {'Precision': 0.34, 'Recall': 0.55, 'Coverage': 3.37, 'Diversity': 0.7895859258510493, 'Popularity': 2.343725}
Func worker, run time: 21.77973508834839
Func splitData, run time: 0.1469707489013672
Experiment 9:
Metric: {'Precision': 0.35, 'Recall': 0.57, 'Coverage': 3.33, 'Diversity': 0.7882764313597028, 'Popularity': 2.34095}
Func worker, run time: 21.69010829925537
Average Result (M=10, N=10): {'Precision': 0.339, 'Recall': 0.554, 'Coverage': 3.3600000000000003, 'Diversity': 0.7914851154868646, 'Popularity': 2.3398486}
Func run, run time: 221.56687879562378

2. TagBasedTFIDF实验
Func loadData, run time: 0.8751428127288818
Func splitData, run time: 0.09610223770141602
Experiment 0:
Metric: {'Precision': 0.38, 'Recall': 0.62, 'Coverage': 16.84, 'Diversity': 0.8817929272649402, 'Popularity': 1.324145}
Func worker, run time: 24.73927617073059
Func splitData, run time: 0.14814996719360352
Experiment 1:
Metric: {'Precision': 0.39, 'Recall': 0.64, 'Coverage': 16.95, 'Diversity': 0.882694294095967, 'Popularity': 1.316937}
Func worker, run time: 24.537441968917847
Func splitData, run time: 0.14658617973327637
Experiment 2:
Metric: {'Precision': 0.35, 'Recall': 0.58, 'Coverage': 16.95, 'Diversity': 0.881047509861129, 'Popularity': 1.32845}
Func worker, run time: 24.471004962921143
Func splitData, run time: 0.16125011444091797
Experiment 3:
Metric: {'Precision': 0.3, 'Recall': 0.5, 'Coverage': 16.98, 'Diversity': 0.885269321757197, 'Popularity': 1.323965}
Func worker, run time: 25.744343757629395
Func splitData, run time: 0.16150403022766113
Experiment 4:
Metric: {'Precision': 0.39, 'Recall': 0.65, 'Coverage': 16.93, 'Diversity': 0.8839717731654333, 'Popularity': 1.318877}
Func worker, run time: 25.392255067825317
Func splitData, run time: 0.15613579750061035
Experiment 5:
Metric: {'Precision': 0.36, 'Recall': 0.59, 'Coverage': 16.85, 'Diversity': 0.8821070178509299, 'Popularity': 1.33234}
Func worker, run time: 26.35737419128418
Func splitData, run time: 0.1594090461730957
Experiment 6:
Metric: {'Precision': 0.36, 'Recall': 0.58, 'Coverage': 17.06, 'Diversity': 0.8857450351741973, 'Popularity': 1.31708}
Func worker, run time: 24.826799869537354
Func splitData, run time: 0.15881609916687012
Experiment 7:
Metric: {'Precision': 0.35, 'Recall': 0.58, 'Coverage': 17.07, 'Diversity': 0.8821537791500166, 'Popularity': 1.331754}
Func worker, run time: 24.402519702911377
Func splitData, run time: 0.18355584144592285
Experiment 8:
Metric: {'Precision': 0.31, 'Recall': 0.51, 'Coverage': 16.89, 'Diversity': 0.8827980403795257, 'Popularity': 1.327531}
Func worker, run time: 27.333621978759766
Func splitData, run time: 0.17234492301940918
Experiment 9:
Metric: {'Precision': 0.33, 'Recall': 0.55, 'Coverage': 16.98, 'Diversity': 0.8824678045018927, 'Popularity': 1.323427}
Func worker, run time: 25.55917501449585
Average Result (M=10, N=10): {'Precision': 0.352, 'Recall': 0.5799999999999998, 'Coverage': 16.949999999999996, 'Diversity': 0.8830047503201227, 'Popularity': 1.3244506000000003}
Func run, run time: 255.81618213653564
3. TagBasedTFIDF_Improved实验
Func loadData, run time: 0.7675571441650391
Func splitData, run time: 0.15659403800964355
Experiment 0:
Metric: {'Precision': 0.14, 'Recall': 0.23, 'Coverage': 19.4, 'Diversity': 0.8599207757934516, 'Popularity': 0.786261}
Func worker, run time: 31.04145574569702
Func splitData, run time: 0.1533341407775879
Experiment 1:
Metric: {'Precision': 0.16, 'Recall': 0.27, 'Coverage': 19.36, 'Diversity': 0.861809807994831, 'Popularity': 0.785819}
Func worker, run time: 32.43993306159973
Func splitData, run time: 0.14620685577392578
Experiment 2:
Metric: {'Precision': 0.18, 'Recall': 0.3, 'Coverage': 19.47, 'Diversity': 0.8612670113303802, 'Popularity': 0.787102}
Func worker, run time: 31.186991930007935
Func splitData, run time: 0.16668391227722168
Experiment 3:
Metric: {'Precision': 0.15, 'Recall': 0.24, 'Coverage': 19.32, 'Diversity': 0.863333883555254, 'Popularity': 0.785932}
Func worker, run time: 31.260300874710083
Func splitData, run time: 0.14367914199829102
Experiment 4:
Metric: {'Precision': 0.21, 'Recall': 0.34, 'Coverage': 19.38, 'Diversity': 0.8611697157397807, 'Popularity': 0.786397}
Func worker, run time: 31.01710295677185
Func splitData, run time: 0.14696788787841797
Experiment 5:
Metric: {'Precision': 0.16, 'Recall': 0.27, 'Coverage': 19.35, 'Diversity': 0.8606836603756692, 'Popularity': 0.786829}
Func worker, run time: 31.603728771209717
Func splitData, run time: 0.18067502975463867
Experiment 6:
Metric: {'Precision': 0.16, 'Recall': 0.26, 'Coverage': 19.43, 'Diversity': 0.862205282731689, 'Popularity': 0.784275}
Func worker, run time: 31.160314798355103
Func splitData, run time: 0.1483469009399414
Experiment 7:
Metric: {'Precision': 0.16, 'Recall': 0.26, 'Coverage': 19.57, 'Diversity': 0.8625987282675192, 'Popularity': 0.785651}
Func worker, run time: 31.45869517326355
Func splitData, run time: 0.15111565589904785
Experiment 8:
Metric: {'Precision': 0.15, 'Recall': 0.24, 'Coverage': 19.41, 'Diversity': 0.860569307213124, 'Popularity': 0.784442}
Func worker, run time: 30.298084020614624
Func splitData, run time: 0.24603819847106934
Experiment 9:
Metric: {'Precision': 0.16, 'Recall': 0.26, 'Coverage': 19.4, 'Diversity': 0.8585062827071077, 'Popularity': 0.785866}
Func worker, run time: 31.466542959213257
Average Result (M=10, N=10): {'Precision': 0.16299999999999998, 'Recall': 0.267, 'Coverage': 19.409, 'Diversity': 0.8612064455708808, 'Popularity': 0.7858573999999999}
Func run, run time: 315.37908911705017
4. TagExtend实验
Func loadData, run time: 0.8415701389312744
Func splitData, run time: 0.09526586532592773
Experiment 0:
Metric: {'Precision': 0.33, 'Recall': 0.54, 'Coverage': 3.37, 'Diversity': 0.7884537745340038, 'Popularity': 2.338333}
Func worker, run time: 24.176973819732666
Func splitData, run time: 0.14624881744384766
Experiment 1:
Metric: {'Precision': 0.37, 'Recall': 0.61, 'Coverage': 3.46, 'Diversity': 0.7885147605538919, 'Popularity': 2.323186}
Func worker, run time: 23.30581784248352
Func splitData, run time: 0.22475910186767578
Experiment 2:
Metric: {'Precision': 0.36, 'Recall': 0.6, 'Coverage': 3.47, 'Diversity': 0.7921426241321676, 'Popularity': 2.323487}
Func worker, run time: 23.418489694595337
Func splitData, run time: 0.1563868522644043
Experiment 3:
Metric: {'Precision': 0.29, 'Recall': 0.47, 'Coverage': 3.39, 'Diversity': 0.7975790078270631, 'Popularity': 2.361656}
Func worker, run time: 24.27443504333496
Func splitData, run time: 0.14275908470153809
Experiment 4:
Metric: {'Precision': 0.37, 'Recall': 0.62, 'Coverage': 3.41, 'Diversity': 0.7909374110564138, 'Popularity': 2.332825}
Func worker, run time: 24.135936975479126
Func splitData, run time: 0.14889192581176758
Experiment 5:
Metric: {'Precision': 0.34, 'Recall': 0.57, 'Coverage': 3.33, 'Diversity': 0.7885079325892931, 'Popularity': 2.343853}
Func worker, run time: 24.052077770233154
Func splitData, run time: 0.1462390422821045
Experiment 6:
Metric: {'Precision': 0.37, 'Recall': 0.6, 'Coverage': 3.53, 'Diversity': 0.7934229240415933, 'Popularity': 2.302398}
Func worker, run time: 23.26210308074951
Func splitData, run time: 0.14962387084960938
Experiment 7:
Metric: {'Precision': 0.33, 'Recall': 0.54, 'Coverage': 3.45, 'Diversity': 0.7900992604098647, 'Popularity': 2.359129}
Func worker, run time: 23.506361961364746
Func splitData, run time: 0.1459038257598877
Experiment 8:
Metric: {'Precision': 0.33, 'Recall': 0.53, 'Coverage': 3.42, 'Diversity': 0.7876959526201732, 'Popularity': 2.3401}
Func worker, run time: 23.20357084274292
Func splitData, run time: 0.15648221969604492
Experiment 9:
Metric: {'Precision': 0.35, 'Recall': 0.58, 'Coverage': 3.36, 'Diversity': 0.7874073891857009, 'Popularity': 2.336959}
Func worker, run time: 23.30314874649048
Average Result (M=10, N=10): {'Precision': 0.34400000000000003, 'Recall': 0.5660000000000001, 'Coverage': 3.4189999999999996, 'Diversity': 0.7904761036950165, 'Popularity': 2.3361926}
Func run, run time: 239.03292894363403