# Matrix Factorization BPR Recommender

In [None]:
def init_kaggle(run_on_kaggle, secret_name):
    if run_on_kaggle:
        from kaggle_secrets import UserSecretsClient
        user_secrets = UserSecretsClient()
        personal_token = user_secrets.get_secret(secret_name)
        
        import subprocess
        subprocess.run(["git", "clone", "https://" + personal_token + "@github.com/alecontuIT/rec_sys_challenge2022.git"])
        
        import os
        os.chdir("./rec_sys_challenge2022")
        
        subprocess.run(["pip", "install", "-r", "requirements.txt"])
        subprocess.run(["python", "run_compile_all_cython.py"])
        return True
    
    else:
        return False

In [None]:
dataset_version = "interactions-all-ones"
kaggle = init_kaggle(False, "recsys_git_token")

In [None]:
import utils
from recmodels import MatrixFactorizationBPRRec, AsySVDRec, FunkSVDRec
from scipy.stats import loguniform
from Evaluation.Evaluator import EvaluatorHoldout
from skopt.space import Real, Integer, Categorical
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs

In [None]:
algorithm_name = "FUNK_SVD"

if algorithm_name == "MF_BPR":
    recommender_class = MatrixFactorizationBPRRec
elif algorithm_name == "ASY_SVD":
    recommender_class = AsySVDRec
elif algorithm_name == "FUNK_SVD":   
    recommender_class = FunkSVDRec
else:
    print("Error Algorithm Name!")

In [None]:
URM_all, URM_train, URM_val, ICM = utils.get_data_global_sample(dataset_version=dataset_version, 
                                                                train_percentage=0.7,
                                                                setSeed=True)

In [None]:
evaluator_validation = EvaluatorHoldout(URM_val, cutoff_list=[10])

## Hyperparameter Search

In [None]:
if algorithm_name == "FUNK_SVD":
    hyperparameters_range_dictionary = {
        "sgd_mode": Categorical(["sgd", "adagrad", "adam"]),
        "epochs": Categorical([500]),
        "use_bias": Categorical([True, False]),
        "batch_size": Categorical([1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]),
        "num_factors": Integer(1, 200),
        "item_reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'),
        "user_reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'),
        "learning_rate": Real(low = 1e-4, high = 1e-1, prior = 'log-uniform'),
        "negative_interactions_quota": Real(low = 0.0, high = 0.5, prior = 'uniform'),
        "dropout_quota": Real(low= 0, high=0.7, prior="uniform")
    }
    
    fit_keyword_args = {}

elif algorithm_name == "ASY_SVD":
    hyperparameters_range_dictionary = {
        "sgd_mode": Categorical(["sgd", "adagrad", "adam"]),
        "epochs": Categorical([500]),
        "use_bias": Categorical([True, False]),
        "batch_size": Categorical([1]),
        "num_factors": Integer(1, 200),
        "item_reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'),
        "user_reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'),
        "learning_rate": Real(low = 1e-4, high = 1e-1, prior = 'log-uniform'),
        "negative_interactions_quota": Real(low = 0.0, high = 0.5, prior = 'uniform')
    }
    
    fit_keyword_args = {}
    
else: # MF_BPR   
    hyperparameters_range_dictionary = {
        "sgd_mode": Categorical(["sgd", "adagrad", "adam"]),
        "epochs": Categorical([1500]),
        "num_factors": Integer(1, 200),
        "batch_size": Categorical([1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]),
        "positive_reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'),
        "negative_reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'),
        "learning_rate": Real(low = 1e-4, high = 1e-1, prior = 'log-uniform'),
        "dropout_quota": Real(low= 0, high=0.7, prior="uniform"),
}
    
    fit_keyword_args = {"positive_threshold_BPR": None}

In [None]:
earlystopping_keywargs = {
    "validation_every_n": 5,
    "stop_on_validation": True,
    "evaluator_object": evaluator_validation,
    "lower_validations_allowed": 5,
    "validation_metric": "MAP",
}

In [None]:
recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_train],
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = fit_keyword_args,
    EARLYSTOPPING_KEYWORD_ARGS = earlystopping_keywargs,
)

In [None]:
utils.bayesian_search(
    recommender_class, 
    recommender_input_args, 
    hyperparameters_range_dictionary, 
    evaluator_validation,
    dataset_version=dataset_version,
    n_cases=100,
    perc_random_starts=0.3,
    resume_from_saved=True
)

## Best Model

In [None]:
recommender = utils.fit_best_recommender(recommender_class, URM_all, dataset_version)
utils.submission(recommender, dataset_version)

In [None]:
utils.save_item_scores(recommender_class, 
                       URM_train, 
                       evaluator_validation.users_to_evaluate, 
                       dataset_version, 
                       fast=True)