In [1]:
from flurs.datasets import csv_loader
from flurs.recommender import BRISMFRecommender, MFRecommender
from flurs.forgetting import ForgetUnpopularItems, NoForgetting, MappedUserFactorFading, UserFactorFading, SDUserFactorFading
from flurs.evaluator import Evaluator
from flurs.meta_recommender import BUP, FloatLR, NoMeta 
from skmultiflow.drift_detection.eddm import EDDM

import logging
import os
import sys
import datetime

# Absolute
BASE_PATH = "D:\\recsys\\"

# Relatives
LOG_PATH = 'flurs\\log\\'


RECALL_AT = 10

logger = logging.getLogger("experimenter.jupyter")
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s')
logging.root.setLevel(level=logging.INFO)
logger.info('running %s' % ' '.join(sys.argv))

2019-08-02 02:36:46,440 : INFO : running D:\Anaconda3\envs\recsys\lib\site-packages\ipykernel_launcher.py -f C:\Users\Eduardo F. J. Heise\AppData\Roaming\jupyter\runtime\kernel-a207bb53-b661-458a-bac7-0788c6a9c048.json


In [2]:
import numpy as np

class Recall:
    def __init__(self, n):
        self.n = n
        self.hits = 0
        self.stream = 0 
        self.r_mean = 0.0
        self.recall_list = []
    def update(self, rank):
        if rank <= self.n:
            self.hits += 1
        self.stream += 1
        recall = self.score()
        self.recall_list.append(recall)
        return recall
    def score(self):
        return self.hits/self.stream
    def mean(self):
        return np.mean(self.recall_list[20000:])

In [3]:
import time

class Configuration:
    def __init__(self, data_path, Recommender, k=80, l2_reg= .01, learn_rate=.02, forgetting=NoForgetting(alpha=None), meta=NoMeta()):
        self.status = "..."
        self.duration = None
        self.start_time = None
        self.configuration = {
            "recommender" : Recommender.__name__,
            "learn_rate" : learn_rate,
            "k" : k,
            "forgetting": " - ",
            "forgetting_param": " - ",

            "meta" : " - ",
            "meta_param": " - "
        }

        if not forgetting.__class__.__name__ == "NoForgetting":
            self.configuration["forgetting"] = forgetting.__class__.__name__
            self.configuration["forgetting_param"] = " ".join(str(param) for param in forgetting.parameters())


        if not meta.__class__.__name__ == "NoMeta":
            self.configuration["meta"] = meta.__class__.__name__
            self.configuration["meta_param"] = " ".join(str(param) for param in meta.parameters())


        self.data_path = data_path
        self.__recommender = Recommender(k, l2_reg, learn_rate, forgetting)
        self.meta = meta

        self.exp_name = self.data_path.split("\\")[-1].replace('.csv', '') + "_" + self.configuration["recommender"]
        if self.configuration["meta"] != " - ":
            self.exp_name += "_" + self.configuration["meta"] + self.configuration["meta_param"].replace(" ", "_")
        if self.configuration["forgetting"] != " - ":
            self.exp_name += "_" + self.configuration["forgetting"] + self.configuration["forgetting_param"].replace(" ", "_")



    def recommender(self):
        return self.__recommender

    def get_status(self):
        return "\n[{}]\t{}".format(self.status, self.exp_name.replace("_", ""))

    def get_report(self):
        return """> Experiment Report
        - Dataset: {}.
        - Result File: {}.
        - Status: {}.
        - Duration: {} minutes.

        > Experiment Configuration
        Recommender
        - Name: {}
        - Learning Rate: {}
        - K Dimensions: {}

        Meta Recommender
        - Name: {}
        - Paramenters: {}

        Forgetting Techniques
        - Name: {}
        - Paramenters: {}
        """.format(self.data_path.split("\\")[-1], "{}.dat".format(self.exp_name), self.status, self.duration, self.configuration["recommender"], self.configuration["learn_rate"], self.configuration["k"], self.configuration["meta"], self.configuration["meta_param"], self.configuration["forgetting"], self.configuration["forgetting_param"])

    def get_batch_data(self):
        n_batch_train = int(self.data.n_sample * 0.2)  # 20% for pre-training to avoid cold-start
        n_batch_test = int(self.data.n_sample * 0.3)  # 30% for evaluation of pre-training
        batch_tail = n_batch_train + n_batch_test

        # pre-train
        # 20% for batch training
        batch_training = self.data.samples[:n_batch_train]
        # 30% for batch evaluate
        batch_test = self.data.samples[n_batch_train:batch_tail]
        # after the batch training, 30% samples are used for incremental updating

        return batch_training, batch_test

    def get_prequential_data(self):
        n_batch_train = int(self.data.n_sample * 0.2)  # 20% for pre-training to avoid cold-start
        n_batch_test = int(self.data.n_sample * 0.3)  # 30% for evaluation of pre-training
        batch_tail = n_batch_train + n_batch_test

        return self.data.samples[batch_tail:]

    def start(self):
        self.log_file = BASE_PATH + LOG_PATH + self.exp_name + "_" +  datetime.datetime.now().strftime('%Y-%m-%d_%H-%M') + ".log"
        self.logger = logging.getLogger("experimenter.jupyter")
#         self.file_handler = logging.FileHandler(self.log_file, mode='w+')
#         self.file_handler.setLevel(logging.DEBUG)
#         self.logger.handlers = [self.file_handler, console]

        self.start_time = time.process_time()
        self.logger.info('converting data into FluRS input object')
        self.data = csv_loader(self.data_path)


        self.result_file = BASE_PATH + "flurs\\results\\k{}\\{}.dat".format(self.configuration["k"], self.exp_name)

        self.logger.info('initialize recommendation model {}'.format(self.exp_name))
        self.__recommender.initialize()
        self.meta.initialize(self.__recommender)
        return self

    def finish(self):
        if self.start_time != None:
            self.finish_time = time.process_time()
            del self.__recommender
            del self.data
            self.duration =  (self.finish_time - self.start_time)/60
            self.status = "OK!"
#             self.file_handler.close()
        else:
            self.logger.warning("Experiment not finished properly. Please start it before finish")

In [4]:
import smtplib
import traceback

class Experimenter:
    def __init__(self):
        self.configurations = []
        self.gmail_user = 'jupyter.experimenter@gmail.com'
        self.gmail_password = 'experimenter@1212'
        self.dest = 'eduferreiraj@gmail.com'


    def append(self, configuration):
        self.configurations.append(configuration)

    def run(self):
        print(self.configurations)
        for experimenter in self.configurations:
            try:
                experimenter.start()
                rec = experimenter.recommender()
                forgetting = rec.forgetting.__class__.__name__
                logger.info('initialize recommendation model {} with {}'.format(rec.__class__.__name__, forgetting))
                rec.initialize()

                batch_training, batch_test = experimenter.get_batch_data()

                evaluator = Evaluator(rec)
                logging.info('batch pre-training before streaming input')
                evaluator.fit(
                    batch_training,
                    batch_test,
                    max_n_epoch=20
                )

                logging.info('incrementally predict, evaluate and update the recommender')
                logging.info("Abrindo arquivo {} ...".format(experimenter.result_file))
                with open(experimenter.result_file, 'w+') as f:
                    logging.info("Começando a gerar resultados ...")
                    for instance in evaluator.evaluate(experimenter.get_prequential_data()):
                        f.write(str(instance))
                experimenter.finish()
                logging.info("Arquivo {} completo.".format(experimenter.result_file))
                subject = "[REPORT] {}".format(experimenter.exp_name.replace("_", " "))
                report = experimenter.get_report()
                report += "> Overview:\n\n"
                for c in self.configurations:
                    report += c.get_status()
            except Exception as e:
                subject = '[EXCEPT] {}'.format(e)
                body = 'Ooops, something happened in the experimentation.\n\nException:{}\n\n{}'.format(traceback.format_exc(), datetime.datetime.now())
                self.send_email(subject, body)
                raise e
            else:
                self.send_email(subject, report)

    def send_email(self, subject, body):
        sent_from = self.gmail_user
        sent_to = self.dest
        message = "From: {}\nTo: {}\nMIME-Version: 1.0\nSubject: {}\n{}".format(sent_from, sent_to, subject, body)
        server = smtplib.SMTP_SSL('smtp.gmail.com', 465)
#         server.ehlo()
#         server.login(self.gmail_user, self.gmail_password)
#         server.sendmail(sent_from, sent_to, message)
#         server.close()
        logger.info('Email sent!')

In [5]:
DATA_PATH = 'datasets\\MovieLens\\ml-100k.csv'
# DATA_PATH = 'datasets\\netflix\\positive_netflix_50.csv'
# DATA_PATH = 'datasets\\palco\\music_listen_15K.csv'
# DATA_PATH = 'datasets\\palco\\music_playlist.csv'

data_paths = []
data_paths.append('datasets\\MovieLens\\ml-100k.csv')
data_paths.append('datasets\\netflix\\netflix_50.csv')
data_paths.append('datasets\\MovieLens\\ml-1m.csv')


completed = []
alphas = [1.1, 1.35, 1.5]
windows = [(200, 100), (400, 200), (600, 300)]
ks = [60, 80]

alphas += [1.7, 1.9]
windows += [(800, 400), (1000, 500), (2000, 1000)]

experimenter = Experimenter()
for k in ks:
    for path in data_paths:
        for a in alphas:
            for (l_win, s_win) in windows:
                if not [path, k, a, l_win, s_win] in completed:
                    experimenter.append(Configuration(BASE_PATH + path, BRISMFRecommender, k=k, meta=FloatLR(l_win, s_win, a)))

experimenter.run()

2019-08-02 02:36:46,508 : INFO : converting data into FluRS input object


[<__main__.Configuration object at 0x0000026321989518>, <__main__.Configuration object at 0x0000026321989550>, <__main__.Configuration object at 0x00000263219894A8>, <__main__.Configuration object at 0x00000263219899B0>, <__main__.Configuration object at 0x0000026321989400>, <__main__.Configuration object at 0x00000263219C33C8>, <__main__.Configuration object at 0x00000263219C3080>, <__main__.Configuration object at 0x00000263219C3198>, <__main__.Configuration object at 0x00000263219C3208>, <__main__.Configuration object at 0x00000263219C3358>, <__main__.Configuration object at 0x00000263219C3550>, <__main__.Configuration object at 0x00000263219C35F8>, <__main__.Configuration object at 0x00000263219C3048>, <__main__.Configuration object at 0x00000263219EA0F0>, <__main__.Configuration object at 0x00000263219EA198>, <__main__.Configuration object at 0x00000263219EA240>, <__main__.Configuration object at 0x00000263219EA2E8>, <__main__.Configuration object at 0x00000263219EA390>, <__main__

2019-08-02 02:36:46,715 : INFO : initialize recommendation model ml-100k_BRISMFRecommender_FloatLR200_100_1.1
2019-08-02 02:36:46,716 : INFO : initialize recommendation model BRISMFRecommender with NoForgetting
2019-08-02 02:36:46,717 : INFO : batch pre-training before streaming input
2019-08-02 02:36:47,074 : INFO : Batch training started..
2019-08-02 02:38:25,485 : INFO : Epochs:12 Convergence:3.3205636209565625e-06
2019-08-02 02:38:26,497 : INFO : incrementally predict, evaluate and update the recommender
2019-08-02 02:38:26,498 : INFO : Abrindo arquivo D:\recsys\flurs\results\k60\ml-100k_BRISMFRecommender_FloatLR200_100_1.1.dat ...
2019-08-02 02:38:26,499 : INFO : Começando a gerar resultados ...
2019-08-02 02:38:26,502 : INFO : Prequential evaluation started...


[ 923  704 1121 ...  181   91  988]
[ 923 1121  421 ...  992  181  988]
[ 704 1121  872 ...  152  181   91]
[ 923  421 1121 ...  992  181  988]
[923 704 421 ... 992 181  91]
[ 923  421 1121 ...  992  181   91]
[ 421 1121  872 ...  992  181   91]
[923 704 421 ... 451  91 988]
[ 923  704 1121 ...  451   91  988]
[ 923 1121  872 ...  181   91  988]
[923 421 704 ... 181  91 988]
[421 704 872 ... 992 181  91]
[ 704 1121  872 ... 1224  152  451]
[ 923 1121  621 ...  992  181   91]
[ 923  421 1121 ...  451  181   91]
[923 421 872 ... 992 181  91]
[1121  704  421 ...  181   91  988]
[ 923 1121  704 ...  992   91  988]
[ 923 1121  704 ...  181   91  988]
[ 923 1121  421 ...  181   91  988]
[923 704 421 ... 451  91 988]
[923 704 421 ... 451  91 988]
[923 704 421 ... 451  91 988]
[923 704 421 ... 181 152 988]
[ 704 1121  872 ...  152   91  988]
[ 923  421 1121 ...  992   91  988]
[923 421 704 ... 992 152 988]
[923 421 704 ... 992  91 988]
[923 421 704 ... 152 992  91]
[923 421 704 ... 992  91 988

KeyboardInterrupt: 