In [1]:
from flurs.datasets import csv_loader
from flurs.recommender import BRISMFRecommender, MFRecommender
from flurs.forgetting import ForgetUnpopularItems, NoForgetting, MappedUserFactorFading, UserFactorFading, SDUserFactorFading
from flurs.evaluator import Evaluator

import logging
import os
import sys

# DATA_PATH = 'D:\\recsys\\datasets\\MovieLens\\ml-1m\\ml-1m-gte.csv'
DATA_PATH = 'D:\\recsys\\datasets\\netflix\\positive_netflix_0K.csv'
# DATA_PATH = 'D:\\recsys\\datasets\\palco\\music_listen_15K.csv'
# DATA_PATH = 'D:\\recsys\\datasets\\palco\\music_playlist.csv'


EXP_NAME = DATA_PATH.split("\\")[-1].replace('.csv', '')
RECALL_AT = 10

program = os.path.basename(DATA_PATH)
logger = logging.getLogger(program)

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s')
logging.root.setLevel(level=logging.INFO)
logger.info('running %s' % ' '.join(sys.argv))

logging.info('converting data into FluRS input object')
data = csv_loader(DATA_PATH)

2019-04-02 13:57:06,918 : INFO : running D:\Anaconda3\lib\site-packages\ipykernel_launcher.py -f C:\Users\Eduardo Ferreira\AppData\Roaming\jupyter\runtime\kernel-8a6281d8-8bbe-42a2-8149-b59d1878c0d2.json
2019-04-02 13:57:06,920 : INFO : converting data into FluRS input object


In [2]:
class Experimenter:
    def __init__(self):
        self._recommenders = []
        self.forgettings = []
    def append(self, recommender_list, forgetting_list, parameters_list):
        for f in forgetting_list:
            self.forgettings.append(f.__name__)
        for Recommender in recommender_list:
            for p in parameters_list:
                for Forgetting in forgetting_list:
                    self._recommenders.append(Recommender(forgetting = Forgetting(alpha=p), learn_rate=.002, k=60))
                    
    def __getitem__(self, position):
        return self._recommenders[position]
    def forgetting_list(self):
        return self.forgettings

In [3]:
import numpy as np

class Recall:
    def __init__(self, n):
        self.n = n
        self.hits = 0
        self.stream = 0 
        self.r_mean = 0.0
        self.recall_list = []
    def update(self, rank):
        if rank <= self.n:
            self.hits += 1
        self.stream += 1
        recall = self.score()
        self.recall_list.append(recall)
        return recall
    def score(self):
        return self.hits/self.stream
    def mean(self):
        return np.mean(self.recall_list)

In [4]:
experimenter = Experimenter()
recommender_list = []
recommender_list.append(MFRecommender)

# [1.01, 1.1, 10, 100, 1000, 10000]
# mean_pop = [-0.00040199748722443427, -0.0003777209117588382, -0.0003762619662848127, 
#             -0.0003814282091102922, -0.00038355540091335725, -0.0003867758902406115]
# alpha = np.array([.9999, .99, .95, .9, .8])
# alpha = alpha ** (1./np.mean(mean_pop))
# print("Alphas: {}".format(alpha))

alpha = [ 1.1,   2.3,   6,  42, 382]

experimenter.append(recommender_list, [ForgetUnpopularItems], [1.2, 2., 2.5, 4., 10])
experimenter.append(recommender_list, [NoForgetting], [None])
experimenter.append(recommender_list, [SDUserFactorFading], alpha)
experimenter.append(recommender_list, [UserFactorFading], [.99, .95, .9, .85, .80])
experimenter.append(recommender_list, [MappedUserFactorFading], alpha)

In [5]:
experiments = {}
for f in experimenter.forgetting_list():
    experiments[f] = {}
    experiments[f]['mean_recall'] = 0.0

for rec in experimenter:
    forgetting = rec.forgetting.__class__.__name__
    logging.info('initialize recommendation model {} with {}'.format(rec.__class__.__name__, forgetting))
    rec.initialize()
    evaluator = Evaluator(rec, data.can_repeat)

    n_batch_train = int(data.n_sample * 0.2)  # 20% for pre-training to avoid cold-start
    n_batch_test = int(data.n_sample * 0.3)  # 30% for evaluation of pre-training
    batch_tail = n_batch_train + n_batch_test

    # pre-train
    # 20% for batch training 
    batch_training = data.samples[:n_batch_train] 
    # 30% for batch evaluate
    batch_test = data.samples[n_batch_train:batch_tail]
    # after the batch training, 30% samples are used for incremental updating
    
    logging.info('batch pre-training before streaming input')
    evaluator.fit(
        batch_training,
        batch_test,
        max_n_epoch=20 
    )
    
    recall = Recall(RECALL_AT)
    
    recommendations = evaluator.recommend(batch_test)
    logging.info('measuring the recall@{} from the new model'.format(RECALL_AT))
    # (top-1 score, rank, recommend_time)
    for _, rank, _ in recommendations:
        recall.update(rank)
    mean = recall.mean() 
    logging.info('mean recall@{}: {}'.format(RECALL_AT,mean))
    if mean > experiments[forgetting]['mean_recall']:
        experiments[forgetting]['mean_recall'] = mean
        experiments[forgetting]['evaluator'] = evaluator.evaluate(data.samples[batch_tail:])
        experiments[forgetting]['recommender'] = rec.__class__.__name__
        experiments[forgetting]['alpha'] = rec.forgetting.alpha
        

# 50% incremental evaluation and updating
logging.info('incrementally predict, evaluate and update the recommender')    
for forgetting in experiments:
    logging.info('Recommender: {0}, Forgetting: {1}, Alpha: {2}, Mean Recall: {3:2f}'.format(
        experiments[forgetting]['recommender'],
        forgetting,
        experiments[forgetting]['alpha'],
        experiments[forgetting]['mean_recall']
    ))

2019-04-02 13:57:08,826 : INFO : initialize recommendation model MFRecommender with ForgetUnpopularItems
2019-04-02 13:57:08,833 : INFO : batch pre-training before streaming input
2019-04-02 14:01:56,523 : INFO : 6068 : Epochs:13 Convergence:0.00019732607649580736
2019-04-02 14:01:56,523 : INFO : Epochs:13 Convergence:0.00019732607649580736


Alpha:1.2 Mean:0.779977


2019-04-02 14:02:09,780 : INFO : measuring the recall@10 from the new model
2019-04-02 21:38:48,774 : INFO : mean recall@10: 0.0
2019-04-02 21:38:48,774 : INFO : initialize recommendation model MFRecommender with ForgetUnpopularItems
2019-04-02 21:38:48,784 : INFO : batch pre-training before streaming input
2019-04-02 21:42:00,748 : INFO : 6068 : Epochs:9 Convergence:0.00011082920806404495
2019-04-02 21:42:00,748 : INFO : Epochs:9 Convergence:0.00011082920806404495


Alpha:2.0 Mean:0.929298


2019-04-02 21:42:13,766 : INFO : measuring the recall@10 from the new model


KeyboardInterrupt: 

In [None]:
for forgetting in experiments:
    evaluator = experiments[forgetting]['evaluator']
    recommender = experiments[forgetting]['recommender']
    param = str(experiments[forgetting]['alpha'])
    filename = "D:\\recsys\\flurs\\results\\{}_{}_{}_{}.dat".format(recommender, forgetting, param, EXP_NAME)
    logging.info("Abrindo arquivo {} ...".format(filename))
    with open(filename, 'w+') as f:
        logging.info("Começando a gerar resultados ...")
        for instance in evaluator:
            f.write(str(instance))
    logging.info("Arquivo {} completo.".format(filename))
    