In [12]:
from collections import Counter
import utils

class PopularityRecommender():
    def __init__(self):
        self.item_counter = Counter()

    def train(self, user_items):
        """
        Train the recommender with a list of user-item interactions.
        user_item_list: a list of tuples, where the first element in the tuple
        is the user and the second element is the item.
        """
        for user, items in user_items.items():
            for item in items:
                self.item_counter[item] += 1

    def predict(self, num_items=10):
        """
        Recommend the top-N most popular items.
        num_items: the number of items to recommend
        """
        most_common_items = self.item_counter.most_common(num_items)
        # Return only the items, not the counts
        return [item for item, count in most_common_items]

In [4]:
# global dataset
dataset = utils.data_partition('processed/ml-1m')
[user_train, user_valid, user_test, usernum, itemnum] = dataset

cc = 0.0
for u in user_train:
    cc += len(user_train[u])
print('average sequence length: %.2f' % (cc / len(user_train)))
print('number of training data: %.2f' % len(user_train))
print('number of items: %.2f' % usernum)

average sequence length: 163.50
number of training data: 6040.00
number of items: 6040.00


In [13]:
PopRec = PopularityRecommender()
PopRec.train(user_train)

In [18]:
PopRec.predict()

[63, 79, 12, 56, 138, 17, 132, 4, 130, 66]

In [21]:
dataset = utils.data_partition('processed/ml-1m')
dataset_window = utils.data_partition_window_P('processed/ml-1m', valid_percent=0.2, test_percent=0.2, train_percent=0.2)

In [22]:
import copy, torch, random, sys

def evaluate_window_valid(model, dataset, dataset_window):
    [train, valid, test, usernum, itemnum] = copy.deepcopy(dataset)
    [_, train, valid, test, _, itemnum] = copy.deepcopy(dataset_window)
    Recall = 0.0
    Recall_U = 0.0
    coverage_list = []
    # P90 coverage means the smallest item sets that appear in the top 10 lists of at least 90% of the users.
    valid_user = 0.0
    sample_nums = 500
    random_items = random.sample(range(1, itemnum + 1), sample_nums)
    sample_idx = random_items
    sample_idx_tensor = torch.tensor(sample_idx)
    users = range(1, usernum + 1)
    predictions = model.predict()
    for u in users:
        if len(train[u]) < 1 or len(valid[u]) < 1: continue
        ground_truth_idx = valid[u]
        valid_num = len(valid[u])
        for item in ground_truth_idx:
            if item in predictions:
                Recall_U += 1
        Recall_U = Recall_U/valid_num
        Recall += Recall_U
        Recall_U=0
        # coverage_list+=predictions
        valid_user += 1
        if valid_user % 100 == 0:
            print('.', end="")
            sys.stdout.flush()
    return Recall/valid_user, 9/sample_nums

In [24]:
evaluate_window_valid(PopRec, dataset, dataset_window)

............................................................

(0.02763014686996881, 0.018)

In [25]:
evaluate_window_valid(PopRec, dataset, dataset_window)

............................................................

(0.02763014686996881, 0.018)