In [1]:
from flurs.datasets import csv_loader
from flurs.recommender import BRISMFRecommender, MFRecommender
from flurs.forgetting import ForgetUnpopularItems, NoForgetting, MappedUserFactorFading, UserFactorFading, SDUserFactorFading
from flurs.evaluator import Evaluator
from flurs.meta_recommender import BUP, FloatLR, NoMeta 

import logging
import os
import sys
import datetime

# Absolute
BASE_PATH = "D:\\recsys\\"

# Relatives
LOG_PATH = 'flurs\\log\\'


RECALL_AT = 10

console = logging.StreamHandler()
console.setLevel(logging.INFO)
formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s')
console.setFormatter(formatter)
logging.getLogger('').addHandler(console)

logger = logging.getLogger("experimenter.jupyter")
logger.addHandler(console)
logger.info('running %s' % ' '.join(sys.argv))

In [2]:
import numpy as np

class Recall:
    def __init__(self, n):
        self.n = n
        self.hits = 0
        self.stream = 0 
        self.r_mean = 0.0
        self.recall_list = []
    def update(self, rank):
        if rank <= self.n:
            self.hits += 1
        self.stream += 1
        recall = self.score()
        self.recall_list.append(recall)
        return recall
    def score(self):
        return self.hits/self.stream
    def mean(self):
        return np.mean(self.recall_list[20000:])

In [3]:
import time
class Configuration:
    def __init__(self, data_path, Recommender, k=60, l2_reg= .01, learn_rate=.02, forgetting=NoForgetting(alpha=None), meta=NoMeta()):
        self.status = "..."
        self.duration = None
        self.start = None
        self.configuration = {
            "recommender" : Recommender.__name__,
            "learn_rate" : learn_rate,
            "k" : k,
            "forgetting": "",
            "forgetting_param": "",
            "forgetting_format_param": "",
            
            "meta" : "",
            "meta_param": "",
            "meta_format_param": ""
        }
        
        if not forgetting.__class__.__name__ == "NoForgetting":
            self.configuration["forgetting"] = forgetting.__class__.__name__ 
            self.configuration["forgetting_param"] = forgetting.parameters()
            self.configuration["forgetting_param_formater"] = forgetting.parameters_formater() 
            
        
        if not meta.__class__.__name__ == "NoMeta":
            self.configuration["meta"] = meta.__class__.__name__ 
            self.configuration["meta_param"] = meta.parameters() 
            self.configuration["meta_param_formater"] = meta.parameters_formater() 
            
        
        self.data_path = data_path
        self.__recommender = Recommender(k, l2_reg, learn_rate, forgetting)
        self.meta = meta
        
    def recommender(self):
        return self.__recommender
    
    def get_report(self):
        return """
        Experiment Report
        
        - Dataset: {}
        - Result File: {}
        - Status: {}
        
        Time
        - Started: {}
        - Finished: {}
        - Duration: {}
        
        Experiment Configuration
        
        Recommender:
        - Name: {}
        - Learning Rate: {}
        - K Dimensions: {}
        
        Meta Recommender:
        - Name: {}
        - Paramenters: {}
        
        Forgetting Techniques:
        - Name: {}
        - Paramenters: {}
        """.format(self.data_path, self.result_file, self.status, self.start, self.finish, self.duration ,
                   self.configuration["recommender"], self.configuration["learn_rate"], self.configuration["k"],
                  self.configuration["meta"], self.configuration["meta_param_formater"].format(self.configuration["meta_param"]),
                  self.configuration["forgetting"], self.configuration["forgetting_param_formater"].format(self.configuration["forgetting_param"]))
    
    def get_batch_data(self):
        n_batch_train = int(self.data.n_sample * 0.2)  # 20% for pre-training to avoid cold-start
        n_batch_test = int(self.data.n_sample * 0.3)  # 30% for evaluation of pre-training
        batch_tail = n_batch_train + n_batch_test

        # pre-train
        # 20% for batch training 
        batch_training = self.data.samples[:n_batch_train] 
        # 30% for batch evaluate
        batch_test = self.data.samples[n_batch_train:batch_tail]
        # after the batch training, 30% samples are used for incremental updating

        return batch_training, batch_test
    
    def started(self):
        exp_name = self.data_path.split("\\")[-1].replace('.csv', '') + "_" + self.configuration["recommender"]
        if self.configuration["meta"] != "":
            exp_name += "_" + self.configuration["meta"] + "_".join(str(param) for param in self.configuration["meta_param"])
        if self.configuration["forgetting"] != "":
            exp_name += "_" + self.configuration["forgetting"] + "_".join(str(param) for param in self.configuration["forgetting_param"])
        
        
        
        self.log_file = BASE_PATH + LOG_PATH + exp_name + "_" +  datetime.datetime.now().strftime('%Y-%m-%d_%H-%M') + ".log"
        self.logger = logging.getLogger("experimenter.jupyter")
        self.file_handler = logging.FileHandler(self.log_file, mode='w+')
        self.file_handler.setLevel(logging.DEBUG)
        self.logger.handlers = [self.file_handler, console]
        
        self.start = time.process_time()
        self.logger.info('converting data into FluRS input object')
        self.data = csv_loader(self.data_path)
        
        
        self.result_file = BASE_PATH + "flurs\\results\\{}.dat".format(exp_name)
        
        self.logger.info('initialize recommendation model {}'.format(exp_name))
        self.__recommender.initialize()
        self.meta.initialize(self.__recommender)
        
    def finished(self):
        if self.start != None:
            self.finish = time.process_time()
            del self.__recommender
            del self.data
            self.duration =  self.finish - self.start
            self.status = "OK!"
            self.file_handler.close()
        else:
            logger.warning("Experiment not finished properly. Please start it before finish")
    

In [4]:
import smtplib
import traceback

class Experimenter:
    def __init__(self):
        self.configurations = []
        self.gmail_user = 'jupyter.experimenter@gmail.com'  
        self.gmail_password = 'experimenter@1212'
        self.dest = 'eduferreiraj@gmail.com'


    def append(self, configuration):
        self.configurations.append(configuration)
    
    def run(self):
        for configuration in self.configurations:
            try:
                configuration.started()
                rec = configuration.recommender()
                forgetting = rec.forgetting.__class__.__name__
                logger.info('initialize recommendation model {} with {}'.format(rec.__class__.__name__, forgetting))
                rec.initialize()

                batch_training, batch_test = configuration.get_batch_data()

                evaluator = Evaluator(rec)
                logging.info('batch pre-training before streaming input')
                evaluator.fit(
                    batch_training,
                    batch_test,
                    max_n_epoch=20 
                )

                logging.info('incrementally predict, evaluate and update the recommender')
                logging.info("Abrindo arquivo {} ...".format(configuration.result_file))
                with open(configuration.result_file, 'w+') as f:
                    logging.info("Começando a gerar resultados ...")
                    for instance in evaluator:
                        f.write(str(instance))
                logging.info("Arquivo {} completo.".format(configuration.result_file))
                configuration.finished()
            except Exception as e:
                subject = '[EXCEPT] {}'.format(e)  
                body = 'Ooops, something happened in the experimentation.\n\nLog file:{}Exception:{}\n\n{}'.format(traceback.format_exc(), configuration.log_file, datetime.datetime.now())
                self.send_message(subject, body)
                raise e
            else:
                subject = "[REPORT] Experimentation Report"
                self.send_email(subject, configuration.get_report())
              
                
            
    def send_email(self, subject, body):
        sent_from = self.gmail_user
        sent_to = self.dest
        
        message = """From: {}
        To: {}
        MIME-Version: 1.0
        Subject: {}

        {}
        """.format(sent_from, sent_to, subject, body)

#         server = smtplib.SMTP_SSL('smtp.gmail.com', 465)
#         server.ehlo()
#         server.login(self.gmail_user, self.gmail_password)
#         server.sendmail(sent_from, sent_to, message)
#         server.close()

        logger.info('Email sent!')


In [5]:
# DATA_PATH = 'datasets\\MovieLens\\ml-1m\\ml-1m-gte.csv'
DATA_PATH = 'datasets\\netflix\\positive_netflix_50.csv'
# DATA_PATH = 'datasets\\palco\\music_listen_15K.csv'
# DATA_PATH = 'datasets\\palco\\music_playlist.csv'

configuration = Configuration(BASE_PATH + DATA_PATH, MFRecommender)
experimenter = Experimenter()
experimenter.append(configuration)
experimenter.run()

KeyboardInterrupt: 