In [None]:
# train generated SVD & RNN4REC reco simulation test initialisation
import tensorflow as tf

from libreco.data import random_split, DatasetPure
from libreco.algorithms import SVD, RNN4Rec
from libreco.evaluation import evaluate

from puzzling_models_utils import get_float_steps_between
from puzzling_models_file_handling import load_char_from_csv_file, load_trainuser_testuser_trainchar_from_csv_file, save_one_metric_result_in_csv_file

path = "datasets/"
#dataset = "movielens_25m"
dataset = "yelp"
categories = False
ids_wanted = '1'
max_user = 1000
min_rank_nb = 20
show_lines = False
timed = True

resultpath = "results/res_test.csv"
sims_wanted = [0,1]
sim_step = .1

# test & eval ratio with the same global % of the dataset
test_ratio = 0.1
train_eval_ratio = 1.0 - test_ratio
eval_ratio = test_ratio / train_eval_ratio
train_ratio = 1.0 - eval_ratio
users_trainset = 100*train_eval_ratio

print(f"train [{train_ratio}] : {users_trainset*train_ratio}%, eval [{eval_ratio}] : {users_trainset*eval_ratio}%, test [{test_ratio}] : {100*test_ratio}%")

step_array = get_float_steps_between(sims_wanted[0], sims_wanted[1], sim_step)
print(step_array)

def reset_state():
    tf.compat.v1.reset_default_graph()

ranking_metrics = ["loss", "balanced_accuracy", "ndcg"]

In [None]:
# train generated SVD reco simulation test with all sims
recommenderAlg = "SVD"

for sim_wanted in step_array:
    user_rankings, char_train_data = load_char_from_csv_file(path, dataset, categories, sim_wanted, max_user=max_user, min_rank_nb=min_rank_nb, ids_set=ids_wanted)
    user_train_data, user_test_data, char_train_data = load_trainuser_testuser_trainchar_from_csv_file(path, dataset, categories, sim_wanted, max_user=max_user, min_rank_nb=min_rank_nb, ids_set=ids_wanted, path_subfile='/chars_test/', path_index='datasets/char_index_test.csv')

    train_data, eval_data, test_data = random_split(user_rankings, multi_ratios=[0.8, 0.1, 0.1])

    train_user_data, data_user_info = DatasetPure.build_trainset(train_data)
    eval_user_data = DatasetPure.build_evalset(eval_data)
    test_user_data = DatasetPure.build_testset(test_data)

    print("user -> user")
    reset_state()
    svd_rank = SVD('ranking', data_info=data_user_info)
    print("user - train")
    svd_rank.fit(
        train_user_data,
        verbose=0,
        neg_sampling=True,
        eval_data=eval_user_data,
        metrics=ranking_metrics
    )
    print("user - eval")
    ranking_evaluations = evaluate(
        model=svd_rank,
        data=test_user_data,
        neg_sampling=True,
        metrics=ranking_metrics
    )
    print("user - save")
    save_one_metric_result_in_csv_file(resultpath, dataset, "users", ids_wanted, categories, sim_wanted, recommenderAlg, "balancedAccuracy", ranking_evaluations["balanced_accuracy"])
    save_one_metric_result_in_csv_file(resultpath, dataset, "users", ids_wanted, categories, sim_wanted, recommenderAlg, "ndcg", ranking_evaluations["ndcg"])
    save_one_metric_result_in_csv_file(resultpath, dataset, "users", ids_wanted, categories, sim_wanted, recommenderAlg, "loss", ranking_evaluations["loss"])

    print("char -> user")
    train_char_data, eval_char_data = random_split(char_train_data, multi_ratios=[train_ratio, eval_ratio])
    train_char_data, data_char_info = DatasetPure.build_trainset(train_char_data)
    eval_char_data = DatasetPure.build_evalset(eval_char_data)

    test_user_data = DatasetPure.build_testset(user_test_data)

    reset_state()

    svdchar_rank = SVD('ranking', data_info=data_char_info)
    print("char - train")
    svdchar_rank.fit(
        train_char_data,
        verbose=0,
        neg_sampling=True,
        eval_data=eval_char_data,
        metrics=ranking_metrics
    )
    print("char - train")
    ranking_evaluationsChar = evaluate(
        model=svdchar_rank,
        data=test_user_data,
        neg_sampling=True,
        metrics=ranking_metrics
    )
    print("char - save")
    save_one_metric_result_in_csv_file(resultpath, dataset, "chars", ids_wanted, categories, sim_wanted, recommenderAlg, "balancedAccuracy", ranking_evaluationsChar["balanced_accuracy"])
    save_one_metric_result_in_csv_file(resultpath, dataset, "chars", ids_wanted, categories, sim_wanted, recommenderAlg, "ndcg", ranking_evaluationsChar["ndcg"])
    save_one_metric_result_in_csv_file(resultpath, dataset, "chars", ids_wanted, categories, sim_wanted, recommenderAlg, "loss", ranking_evaluationsChar["loss"])

In [None]:
# train generated RNN4Rec reco simulation with all sims
recommenderAlg = "RNN4Rec"
print(f"{dataset} - {categories} idset[{ids_wanted}] reco {recommenderAlg}");

for sim_wanted in step_array:
    user_rankings, char_train_data = load_char_from_csv_file(path, dataset, categories, sim_wanted, max_user=max_user, min_rank_nb=min_rank_nb, ids_set=ids_wanted)
    user_train_data, user_test_data, char_train_data = load_trainuser_testuser_trainchar_from_csv_file(path, dataset, categories, sim_wanted, max_user=max_user, min_rank_nb=min_rank_nb, ids_set=ids_wanted, path_subfile='/chars_test/', path_index='datasets/char_index_test.csv')

    train_data, eval_data, test_data = random_split(user_rankings, multi_ratios=[0.8, 0.1, 0.1])

    train_user_data, data_user_info = DatasetPure.build_trainset(train_data)
    eval_user_data = DatasetPure.build_evalset(eval_data)
    test_user_data = DatasetPure.build_testset(test_data)

    print("user -> user")
    reset_state()
    rnn_rank = RNN4Rec(
        "ranking",
        data_user_info,
        rnn_type="lstm",
        embed_size=16,
        n_epochs=5,
        lr=0.001,
        lr_decay=False,
        hidden_units=(16, 16),
        reg=None,
        batch_size=256,
        num_neg=1,
        dropout_rate=None,
        recent_num=10,
        tf_sess_config=None,
    )
    rnn_rank.fit(
        train_user_data,
        neg_sampling=True,
        verbose=2,
        shuffle=True,
        eval_data=eval_user_data,
        metrics=ranking_metrics,
    )
    ranking_evaluations = evaluate(
        model=rnn_rank,
        data=test_user_data,
        neg_sampling=True,
        metrics=ranking_metrics
    )
    
    print("user - save")

    print(ranking_evaluations)
    save_one_metric_result_in_csv_file(resultpath, dataset, "users", ids_wanted, categories, sim_wanted, recommenderAlg, "balancedAccuracy", ranking_evaluations["balanced_accuracy"])
    save_one_metric_result_in_csv_file(resultpath, dataset, "users", ids_wanted, categories, sim_wanted, recommenderAlg, "ndcg", ranking_evaluations["ndcg"])
    save_one_metric_result_in_csv_file(resultpath, dataset, "users", ids_wanted, categories, sim_wanted, recommenderAlg, "loss", ranking_evaluations["loss"])

    print("char -> user")
    train_char_data, eval_char_data = random_split(char_train_data, multi_ratios=[train_ratio, eval_ratio])
    train_char_data, data_char_info = DatasetPure.build_trainset(train_char_data)
    eval_char_data = DatasetPure.build_evalset(eval_char_data)

    test_user_data = DatasetPure.build_testset(user_test_data)

    reset_state()

    rnn_rank = RNN4Rec(
        "ranking",
        data_char_info,
        rnn_type="lstm",
        embed_size=16,
        n_epochs=5,
        lr=0.001,
        lr_decay=False,
        hidden_units=(16, 16),
        reg=None,
        batch_size=256,
        num_neg=1,
        dropout_rate=None,
        recent_num=10,
        tf_sess_config=None,
    )
    rnn_rank.fit(
        train_char_data,
        neg_sampling=True,
        verbose=2,
        shuffle=True,
        eval_data=eval_char_data,
        metrics=ranking_metrics,
    )
    ranking_evaluationsChar = evaluate(
        model=rnn_rank,
        data=test_user_data,
        neg_sampling=True,
        metrics=ranking_metrics
    )
    
    print("char - save")
    print(ranking_evaluationsChar)
    save_one_metric_result_in_csv_file(resultpath, dataset, "chars", ids_wanted, categories, sim_wanted, recommenderAlg, "balancedAccuracy", ranking_evaluationsChar["balanced_accuracy"])
    save_one_metric_result_in_csv_file(resultpath, dataset, "chars", ids_wanted, categories, sim_wanted, recommenderAlg, "ndcg", ranking_evaluationsChar["ndcg"])
    save_one_metric_result_in_csv_file(resultpath, dataset, "chars", ids_wanted, categories, sim_wanted, recommenderAlg, "loss", ranking_evaluationsChar["loss"])