In [1]:
from flurs.datasets import csv_loader
from flurs.recommender import BRISMFRecommender, MFRecommender
from flurs.forgetting import ForgetUnpopularItems, NoForgetting, MappedUserFactorFading, UserFactorFading, SDUserFactorFading
from flurs.evaluator import Evaluator
from flurs.meta_recommender import BUP, FloatLR, NoMeta 
from skmultiflow.drift_detection.eddm import EDDM

import logging
import os
import sys
import datetime

# Absolute
BASE_PATH = "D:\\recsys\\"

# Relatives
LOG_PATH = 'flurs\\log\\'


RECALL_AT = 10

logger = logging.getLogger("experimenter.jupyter")
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s')
logging.root.setLevel(level=logging.INFO)
logger.info('running %s' % ' '.join(sys.argv))

2019-09-20 19:05:58,650 : INFO : running D:\Anaconda3\envs\recsys\lib\site-packages\ipykernel_launcher.py -f C:\Users\Eduardo F. J. Heise\AppData\Roaming\jupyter\runtime\kernel-e2c7e3a8-85e9-4d44-902a-9d33e97138d4.json


In [2]:
import numpy as np

class Recall:
    def __init__(self, n):
        self.n = n
        self.hits = 0
        self.stream = 0 
        self.r_mean = 0.0
        self.recall_list = []
    def update(self, rank):
        if rank <= self.n:
            self.hits += 1
        self.stream += 1
        recall = self.score()
        self.recall_list.append(recall)
        return recall
    def score(self):
        return self.hits/self.stream
    def mean(self):
        return np.mean(self.recall_list[20000:])

In [3]:
import time

class Configuration:
    def __init__(self, data_path, Recommender, k=80, l2_reg= .01, learn_rate=.02, forgetting=NoForgetting(alpha=None), meta=NoMeta()):
        self.status = "..."
        self.duration = None
        self.start_time = None
        self.configuration = {
            "recommender" : Recommender.__name__,
            "learn_rate" : learn_rate,
            "k" : k,
            "forgetting": " - ",
            "forgetting_param": " - ",

            "meta" : " - ",
            "meta_param": " - "
        }

        if not forgetting.__class__.__name__ == "NoForgetting":
            self.configuration["forgetting"] = forgetting.__class__.__name__
            self.configuration["forgetting_param"] = " ".join(str(param) for param in forgetting.parameters())


        if not meta.__class__.__name__ == "NoMeta":
            self.configuration["meta"] = meta.__class__.__name__
            self.configuration["meta_param"] = " ".join(str(param) for param in meta.parameters())


        self.data_path = data_path
        self.__recommender = Recommender(k, l2_reg, learn_rate, forgetting)
        self.meta = meta

        self.exp_name = self.data_path.split("\\")[-1].replace('.csv', '') + "_" + self.configuration["recommender"]
        if self.configuration["meta"] != " - ":
            self.exp_name += "_" + self.configuration["meta"] + self.configuration["meta_param"].replace(" ", "_")
        if self.configuration["forgetting"] != " - ":
            self.exp_name += "_" + self.configuration["forgetting"] + self.configuration["forgetting_param"].replace(" ", "_")



    def recommender(self):
        return self.__recommender

    def get_status(self):
        return "\n[{}]\t{}".format(self.status, self.exp_name.replace("_", ""))

    def get_report(self):
        return """> Experiment Report
        - Dataset: {}.
        - Result File: {}.
        - Status: {}.
        - Duration: {} minutes.

        > Experiment Configuration
        Recommender
        - Name: {}
        - Learning Rate: {}
        - K Dimensions: {}

        Meta Recommender
        - Name: {}
        - Paramenters: {}

        Forgetting Techniques
        - Name: {}
        - Paramenters: {}
        """.format(self.data_path.split("\\")[-1], "{}.dat".format(self.exp_name), self.status, self.duration, self.configuration["recommender"], self.configuration["learn_rate"], self.configuration["k"], self.configuration["meta"], self.configuration["meta_param"], self.configuration["forgetting"], self.configuration["forgetting_param"])

    def get_batch_data(self):
        n_batch_train = int(self.data.n_sample * 0.2)  # 20% for pre-training to avoid cold-start
        n_batch_test = int(self.data.n_sample * 0.3)  # 30% for evaluation of pre-training
        batch_tail = n_batch_train + n_batch_test

        # pre-train
        # 20% for batch training
        batch_training = self.data.samples[:n_batch_train]
        # 30% for batch evaluate
        batch_test = self.data.samples[n_batch_train:batch_tail]
        # after the batch training, 30% samples are used for incremental updating

        return batch_training, batch_test

    def get_prequential_data(self):
        n_batch_train = int(self.data.n_sample * 0.2)  # 20% for pre-training to avoid cold-start
        n_batch_test = int(self.data.n_sample * 0.3)  # 30% for evaluation of pre-training
        batch_tail = n_batch_train + n_batch_test

        return self.data.samples[batch_tail:]

    def start(self):
        self.log_file = BASE_PATH + LOG_PATH + self.exp_name + "_" +  datetime.datetime.now().strftime('%Y-%m-%d_%H-%M') + ".log"
        self.logger = logging.getLogger("experimenter.jupyter")
#         self.file_handler = logging.FileHandler(self.log_file, mode='w+')
#         self.file_handler.setLevel(logging.DEBUG)
#         self.logger.handlers = [self.file_handler, console]

        self.start_time = time.process_time()
        self.logger.info('converting data into FluRS input object')
        self.data = csv_loader(self.data_path)


        self.result_file = BASE_PATH + "flurs\\results\\k{}\\{}.dat".format(self.configuration["k"], self.exp_name)

        self.logger.info('initialize recommendation model {}'.format(self.exp_name))
        self.__recommender.initialize()
        self.meta.initialize(self.__recommender)
        return self

    def finish(self):
        if self.start_time != None:
            self.finish_time = time.process_time()
            del self.__recommender
            del self.data
            self.duration =  (self.finish_time - self.start_time)/60
            self.status = "OK!"
#             self.file_handler.close()
        else:
            self.logger.warning("Experiment not finished properly. Please start it before finish")

In [4]:
import smtplib
import traceback

class Experimenter:
    def __init__(self):
        self.configurations = []
        self.gmail_user = 'jupyter.experimenter@gmail.com'
        self.gmail_password = 'experimenter@1212'
        self.dest = 'eduferreiraj@gmail.com'


    def append(self, configuration):
        self.configurations.append(configuration)

    def run(self):
        print(self.configurations)
        for experimenter in self.configurations:
            try:
                experimenter.start()
                rec = experimenter.recommender()
                forgetting = rec.forgetting.__class__.__name__
                logger.info('initialize recommendation model {} with {}'.format(rec.__class__.__name__, forgetting))
                rec.initialize()

                batch_training, batch_test = experimenter.get_batch_data()

                evaluator = Evaluator(rec)
                logging.info('batch pre-training before streaming input')
                evaluator.fit(
                    batch_training,
                    batch_test,
                    max_n_epoch=20
                )

                logging.info('incrementally predict, evaluate and update the recommender')
                logging.info("Abrindo arquivo {} ...".format(experimenter.result_file))
                with open(experimenter.result_file, 'w+') as f:
                    logging.info("Começando a gerar resultados ...")
                    for instance in evaluator.evaluate(experimenter.get_prequential_data()):
                        instance = str(instance)
                        print(instance)
                        f.write(instance)
                experimenter.finish()
                logging.info("Arquivo {} completo.".format(experimenter.result_file))
                subject = "[REPORT] {}".format(experimenter.exp_name.replace("_", " "))
                report = experimenter.get_report()
                report += "> Overview:\n\n"
                for c in self.configurations:
                    report += c.get_status()
            except Exception as e:
                subject = '[EXCEPT] {}'.format(e)
                body = 'Ooops, something happened in the experimentation.\n\nException:{}\n\n{}'.format(traceback.format_exc(), datetime.datetime.now())
                self.send_email(subject, body)
                raise e
            else:
                self.send_email(subject, report)

    def send_email(self, subject, body):
        sent_from = self.gmail_user
        sent_to = self.dest
        message = "From: {}\nTo: {}\nMIME-Version: 1.0\nSubject: {}\n{}".format(sent_from, sent_to, subject, body)
        server = smtplib.SMTP_SSL('smtp.gmail.com', 465)
#         server.ehlo()
#         server.login(self.gmail_user, self.gmail_password)
#         server.sendmail(sent_from, sent_to, message)
#         server.close()
        logger.info('Email sent!')

In [5]:
DATA_PATH = 'datasets\\MovieLens\\ml-100k.csv'
# DATA_PATH = 'datasets\\netflix\\positive_netflix_50.csv'
# DATA_PATH = 'datasets\\palco\\music_listen_15K.csv'
DATA_PATH = 'datasets\\palco\\music_playlist.csv'

data_paths = [DATA_PATH]
# data_paths.append('datasets\\MovieLens\\ml-100k.csv')
# data_paths.append('datasets\\netflix\\netflix_50.csv')
# data_paths.append('datasets\\MovieLens\\ml-1m.csv')


completed = []
alphas = [1.1, 1.35, 1.5]
windows = [(200, 100), (400, 200), (600, 300)]
ks = [60, 80]

alphas += [1.7, 1.9]
windows += [(800, 400), (1000, 500), (2000, 1000)]

experimenter = Experimenter()
for k in ks:
    for path in data_paths:
        for a in alphas:
            for (l_win, s_win) in windows:
                if not [path, k, a, l_win, s_win] in completed:
                    experimenter.append(Configuration(BASE_PATH + path, MFRecommender, k=k, meta=FloatLR(l_win, s_win, a)))

experimenter.run()

2019-09-20 19:05:58,726 : INFO : converting data into FluRS input object


[<__main__.Configuration object at 0x000001D72139ED30>, <__main__.Configuration object at 0x000001D72139EC88>, <__main__.Configuration object at 0x000001D72139E908>, <__main__.Configuration object at 0x000001D72139EB70>, <__main__.Configuration object at 0x000001D72139EA58>, <__main__.Configuration object at 0x000001D726A7F940>, <__main__.Configuration object at 0x000001D726A7F1D0>, <__main__.Configuration object at 0x000001D726A7F278>, <__main__.Configuration object at 0x000001D726A7F2E8>, <__main__.Configuration object at 0x000001D726A7F438>, <__main__.Configuration object at 0x000001D726A7F390>, <__main__.Configuration object at 0x000001D726A7F4E0>, <__main__.Configuration object at 0x000001D726A7F630>, <__main__.Configuration object at 0x000001D726A7F518>, <__main__.Configuration object at 0x000001D726A7F748>, <__main__.Configuration object at 0x000001D726A7F048>, <__main__.Configuration object at 0x000001D726A7F0F0>, <__main__.Configuration object at 0x000001D726A7F898>, <__main__

2019-09-20 19:05:59,054 : INFO : initialize recommendation model music_playlist_MFRecommender_FloatLR200_100_1.1
2019-09-20 19:05:59,054 : INFO : initialize recommendation model MFRecommender with NoForgetting
2019-09-20 19:05:59,056 : INFO : batch pre-training before streaming input
2019-09-20 19:06:16,546 : INFO : Batch training started..
2019-09-20 19:06:34,556 : INFO : Epochs:3 Convergence:-8.926488909155594e-07
2019-09-20 19:06:35,700 : INFO : incrementally predict, evaluate and update the recommender
2019-09-20 19:06:35,700 : INFO : Abrindo arquivo D:\recsys\flurs\results\k60\music_playlist_MFRecommender_FloatLR200_100_1.1.dat ...
2019-09-20 19:06:35,701 : INFO : Começando a gerar resultados ...
2019-09-20 19:06:35,703 : INFO : Prequential evaluation started...


(366, 1, 0.1036286988070635, 5223, 0.0, 0.0)
(206, 1, 0.2607121996476257, 5223, 0.0, 0.0)
(452, 1, 0.005461810803356407, 5223, 0.0, 0.0)
(711, 1, -0.15407222133257237, 5223, 0.0, 0.0)
(423, 1, 0.045007450870131034, 5223, 0.0, 0.0625)
(240, 1, 0.2235431901476722, 5223, 0.0, 0.0)
(80, 1, 0.4637284576167652, 5224, 0.09375, 0.0)
(792, 1, -0.28379070052929645, 5224, 0.0, 0.0)
(551, 1, -0.06491435348541433, 5224, 0.0, 0.0)
(570, 1, -0.06974171653001668, 5115, 0.0, 0.0)
(213, 1, 0.2937920717794143, 5115, 0.0, 0.0)
(30, 1, 0.6020908782361724, 5224, 0.0, 0.0)
(252, 1, 0.22141743708034786, 5224, 0.0, 0.0)
(457, 1, 0.046299956592355446, 5115, 0.046875, 0.0)
(848, 1, -0.36778670177385586, 5224, 0.0, 0.0)
(565, 1, -0.03482761005557389, 5225, 0.0, 0.0)
(724, 1, -0.18527951776548596, 5226, 0.0, 0.0)
(849, 1, -0.3368024779375466, 5226, 0.0, 0.0)
(28, 1, 0.6100001496375368, 5226, 0.046875, 0.0)
(645, 1, -0.11154026562330166, 5226, 0.0, 0.0)
(491, 1, 0.013986976334081781, 5226, 0.0, 0.0)
(889, 1, -0.395

KeyboardInterrupt: 