# KNN Item Content-Based Filtering Recommender

In [None]:
def init_kaggle(run_on_kaggle, secret_name):
    if run_on_kaggle:
        from kaggle_secrets import UserSecretsClient
        user_secrets = UserSecretsClient()
        personal_token = user_secrets.get_secret(secret_name)
        
        import subprocess
        subprocess.run(["git", "clone", "https://" + personal_token + "@github.com/alecontuIT/rec_sys_challenge2022.git"])
        
        import os
        os.chdir("./rec_sys_challenge2022")
        
        subprocess.run(["pip", "install", "-r", "requirements.txt"])
        subprocess.run(["python", "run_compile_all_cython.py"])
        return True
    
    else:
        return False

In [None]:
kaggle = init_kaggle(False, "recsys_git_token")

In [None]:
import utils 
from recmodels import ItemKNNCBFRec
from scipy.stats import loguniform
from Evaluation.Evaluator import EvaluatorHoldout
from skopt.space import Real, Integer, Categorical
from HyperparameterTuning.SearchAbstractClass import SearchInputRecommenderArgs

In [None]:
recommender_class = ItemKNNCBFRec
dataset_version = "stacked"

In [None]:
URM_all, URM_train, URM_validation, _, _, ICM_stacked, ICM_stacked_train = utils.get_data_global_sample(
    dataset_version=dataset_version, 
    train_percentage=0.7, 
    setSeed=True
)

In [None]:
evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])

## Hyperparameters Search

In [None]:
recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS = [URM_train, ICM_stacked_train],     
    CONSTRUCTOR_KEYWORD_ARGS = {},
    FIT_POSITIONAL_ARGS = [],
    FIT_KEYWORD_ARGS = {},
    EARLYSTOPPING_KEYWORD_ARGS = {},
)

In [None]:
hyperparameters_range_dictionary = {
    "topK": Integer(5, 1000),
    "shrink": Integer(0, 1000),
    "similarity": Categorical(["cosine", "jaccard", "asymmetric", "dice", "tversky"]),
    "normalize": Categorical([True, False]),
    "feature_weighting": Categorical(["TF-IDF", "BM25", "none"]),
    # asymmetric similarity:
    #"asymmetric_alpha": Real(low = 0, high = 2, prior = 'uniform'),
    # tversky similarity:
    #"tversky_alpha": Real(low = 0, high = 2, prior = 'uniform'),
    #"tversky_beta": Real(low = 0, high = 2, prior = 'uniform'),
    # euclidean similarity:
    #"normalize_avg_row": Categorical([True, False]),
    #"similarity_from_distance_mode": Categorical(["lin", "log", "exp"])
}

In [None]:
hyper_search = utils.bayesian_search(
    recommender_class, 
    recommender_input_args, 
    hyperparameters_range_dictionary, 
    evaluator_validation,
    dataset_version=dataset_version,
    n_cases = 100,
    perc_random_starts = 0.3
)

## Best Model

In [None]:
recommender = utils.fit_best_recommender(recommender_class, URM_all, dataset_version, ICM_train=ICM)
utils.submission(recommender, dataset_version)

In [None]:
utils.save_item_scores(recommender_class, 
                       URM_train, 
                       evaluator_validation.users_to_evaluate, 
                       dataset_version, 
                       fast=True)