In [1]:
import cornac
from cornac.eval_methods import RatioSplit, BaseMethod
from cornac.data import TextModality
from cornac.data.text import BaseTokenizer
from cornac.hyperopt import Discrete, Continuous
from cornac.hyperopt import GridSearch, RandomSearch
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

from recommenders.evaluation.python_evaluation import serendipity, distributional_coverage, catalog_coverage
from scipy.stats import hmean

from harmonic_mean import HarmonicMean
from serendipity_wrapper import Serendipity
from combined_eval_method import CombinedBaseMethod
from new_random_search import NewRandomSearch

  from .autonotebook import tqdm as notebook_tqdm


FM model is only supported on Linux.
Windows executable can be found at http://www.libfm.org.


In [2]:
def df_to_tuplelist(df):

    # transform into tuples
    tuple_list = list(df.itertuples(index=False, name=None))

    # rearrange
    for i in range(len(tuple_list)):
        tuple_list[i] = (str(tuple_list[i][1]), str(tuple_list[i][0]), float(tuple_list[i][2]))

    return tuple_list

def df_to_tuplelist_pair(df):
    # make into tuples
    cats_data = list(df.itertuples(index=False, name=None))

    # unzip the tuple into 2 lists
    cat_ids, texts = zip(*cats_data)

    cat_ids = list(cat_ids)
    texts = list(texts)

    for i in range(len(texts)):
        texts[i] = texts[i].replace('\n', ' ')

    return cat_ids, texts

# weighting modifications
def add_weightings(train_df, val_df, test_df):

    # list of datasets
    dfs = [train_df, test_df, val_df]

    for i in range(len(dfs)):
        
        # for all
        dfs[i]['like'] = dfs[i]['like'].apply(lambda x: 1 if x else 0)
        
        # train
        if i == 0:
            dfs[i]['click'] = dfs[i]['click'].apply(lambda x: 1 if x else 0)
            dfs[i].drop(['Unnamed: 0', 'id','created_at', 'updated_at'], axis=1, inplace=True)
            dfs[i] = dfs[i][['userID', 'catID', 'like', 'dwell_time_ms', 'click']]
        
        elif i == 1 or i == 2:
            dfs[i].drop(['Unnamed: 0', 'id', 'dwell_time_ms', 'click', 'created_at', 'updated_at'], axis=1, inplace=True)
            dfs[i] = dfs[i][['userID', 'catID', 'like']]
            
    train, test, val = dfs[0], dfs[1], dfs[2]

    # checking dwell_time_ms in seconds
    train['dwell_time_ms'].apply(lambda x: x/1000).describe()

    # applying log transformation on dwell time to reduce impact of outliers
    train['dwell_time_ms'].apply(lambda x: np.log(x)).describe()

    train['log_dwell_time'] = train['dwell_time_ms'].apply(lambda x: np.log(x))
    train.drop(['dwell_time_ms'], axis=1, inplace=True)

    from sklearn.preprocessing import MinMaxScaler
    scaler = MinMaxScaler()

    train['norm_log_dwell_time'] = scaler.fit_transform(train[['log_dwell_time']])
    train.drop(['log_dwell_time'], axis=1, inplace=True)
    train.head()

    train_weighted = train.copy()

    # weights L, C, D for like, click, dwell_time
    L = 0.5
    C = 0.25
    D = 0.25

    train_weighted['rating'] = L*train_weighted['like'] + C*train_weighted['click'] + D* train_weighted['norm_log_dwell_time']

    train_weighted.drop(['like','click','norm_log_dwell_time'], axis=1, inplace=True)

    # convert to tuple
    like_data_train = [tuple(train_weighted.iloc[i]) for i in range(len(train_weighted))]
    like_data_test = [tuple(test.iloc[i]) for i in range(len(test))]
    like_data_validation = [tuple(val.iloc[i]) for i in range(len(val))]

    return like_data_train, like_data_validation, like_data_test

In [3]:
# data-loading

# stratified splits
strat_train_df = pd.read_csv('model_data/train_val_test/strat_train.csv', usecols=['catID', 'userID', 'like'])
strat_validation_df = pd.read_csv('model_data/train_val_test/strat_validation.csv', usecols=['catID', 'userID', 'like'])
strat_test_df = pd.read_csv('model_data/train_val_test/strat_test.csv', usecols=['catID', 'userID', 'like'])

strat_like_data_train = df_to_tuplelist(strat_train_df)
strat_like_data_validation = df_to_tuplelist(strat_validation_df)
strat_like_data_test = df_to_tuplelist(strat_test_df)

# lsuo splits
lsuo_train_df = pd.read_csv('model_data/train_val_test/lsuo_train.csv', usecols=['catID', 'userID', 'like'])
lsuo_validation_df = pd.read_csv('model_data/train_val_test/lsuo_validation.csv', usecols=['catID', 'userID', 'like'])
lsuo_test_df = pd.read_csv('model_data/train_val_test/lsuo_test.csv', usecols=['catID', 'userID', 'like'])

lsuo_like_data_train = df_to_tuplelist(lsuo_train_df)
lsuo_like_data_validation = df_to_tuplelist(lsuo_validation_df)
lsuo_like_data_test = df_to_tuplelist(lsuo_test_df)

#description
desc_cats_df = pd.read_csv('model_data/auxiliary/cats.csv', usecols=['id', 'description'])
desc_cat_ids, desc_texts = df_to_tuplelist_pair(desc_cats_df)

#details
details_cats_df = pd.read_csv('model_data/auxiliary/cats.csv', usecols=['id', 'details'])
details_cat_ids, details_texts = df_to_tuplelist_pair(details_cats_df)

In [4]:
### Function to generate output
def make_recommendations(MODEL, TRAINING_SET):
    item_id2idx = MODEL.train_set.iid_map
    item_idx2id = list(MODEL.train_set.item_ids)
    user_idx2id = MODEL.train_set.uid_map
    user_idx2id = list(MODEL.train_set.user_ids)
   
    num_users = len(np.unique(user_idx2id))
    
    # For each user, get the list of items that they have rated in train and probe
    rated = TRAINING_SET.groupby('userID')['catID'].agg(lambda x: list(x))
    rated = rated.to_dict()

    rec_result = {}

    for UIDX in range(0, num_users):
        recommendations, scores = MODEL.rank(UIDX)
        rec_result[user_idx2id[UIDX]] = [item_idx2id[i] for i in recommendations]

    # sort results
    rec_result = {key:value for key, value in sorted(rec_result.items(), key=lambda item: item[0])}

    # remove the rated items from rec_results
    for user in rec_result:
        tmp = [x for x in rec_result[user] if x not in rated[user]]
        rec_result[user] = tmp

    return rated, rec_result

# Function for generating serendipity, distributional coverage and harmonic mean scores
def evaluate(EXPERIMENT, TRAINING_SET):
    
    MODEL = EXPERIMENT.models[0]
    
    # Get disctionary of rated and recommended items
    rated, recommendations = make_recommendations(MODEL, TRAINING_SET)


    ### PREPARING DATAFRAMES ###
    # Make recommended items into a dataframe for MS recommenders
    data = []
    
    # Iterate over the dictionary keys and values
    for key, values in recommendations.items():
        # Iterate over the values and append rows to the data list
        for value in values:
            data.append([key, value])
    
    # Create the DataFrame with the specified columns
    recommendations_df = pd.DataFrame(data, columns=['userID', 'itemID'])

    # Make the testing set into a dataframe for MS recommenders
    train_df = TRAINING_SET[['userID','catID']].reset_index()
    train_df = train_df[['userID','catID']]
    train_df = train_df.rename(columns={'userID':'userID', 'catID':'itemID'})


    ### METRICS ###
    # Calculate serendipity
    serendipity_score = serendipity(train_df, recommendations_df)

    # Calculate coverage
    dist_coverage_score = distributional_coverage(train_df, recommendations_df)

    # Extract 
    for result in EXPERIMENT.result:
        model_name = result.model_name
        fone10 = result.metric_avg_results['F1@10']
        ncrr = result.metric_avg_results['NCRR@-1']
        ndcg = result.metric_avg_results['NDCG@-1']

    # Calculate harmonic mean
    h_mean = hmean([fone10, ncrr, ndcg, dist_coverage_score, serendipity_score])

    print(model_name)
    print(f"F1@10: {fone10:.3f}")
    print(f"NCRR: {ncrr:.3f}")
    print(f"NDCG: {ndcg:.3f}")
    print(f"Distributional coverage: {dist_coverage_score:.3f}")
    print(f"Serendipity: {serendipity_score:.3f}")
    print(f"Harmonic mean: {h_mean:.3f}")
    print()

In [5]:
def train_ctr(model_name, text_column, split_type, weighted_or_unweighted):

    if weighted_or_unweighted == 'unweighted':
        if split_type == 'strat':
            like_data_train = strat_like_data_train
            like_data_validation = strat_like_data_validation
            like_data_test = strat_like_data_test

        else:
            like_data_train = lsuo_like_data_train
            like_data_validation = lsuo_like_data_validation
            like_data_test = lsuo_like_data_test

    else:
        #junyi's modifications
        # train test val filenames
        trainfile = f'{split_type}_train'
        testfile = f'{split_type}_test'
        valfile = f'{split_type}_validation'

        # train test val datasets
        train = pd.read_csv('model_data/train_val_test/'+trainfile+'.csv')
        test = pd.read_csv('model_data/train_val_test/'+testfile+'.csv')
        val = pd.read_csv('model_data/train_val_test/'+valfile+'.csv')

        # main data dataset
        user = pd.read_csv('model_data/auxiliary/users.csv')
        cat = pd.read_csv('model_data/auxiliary/cats.csv')
        # interaction = pd.read_csv('model_data/auxiliary/interaction.csv')

        # list of datasets
        dfs = [train, test, val]
        for i in range(len(dfs)):
            # for all
            dfs[i]['like'] = dfs[i]['like'].apply(lambda x: 1 if x else 0)
            # train
            if i == 0:
                dfs[i]['click'] = dfs[i]['click'].apply(lambda x: 1 if x else 0)
                dfs[i].drop(['Unnamed: 0', 'id','created_at', 'updated_at'], axis=1, inplace=True)
                dfs[i] = dfs[i][['userID', 'catID', 'like', 'dwell_time_ms', 'click']]
            elif i == 1 or i == 2:
                dfs[i].drop(['Unnamed: 0', 'id', 'dwell_time_ms', 'click', 'created_at', 'updated_at'], axis=1, inplace=True)
                dfs[i] = dfs[i][['userID', 'catID', 'like']]
        train, test, val = dfs[0], dfs[1], dfs[2]
        # checking dwell_time_ms in seconds
        train['dwell_time_ms'].apply(lambda x: x/1000).describe()
        # applying log transformation on dwell time to reduce impact of outliers
        train['dwell_time_ms'].apply(lambda x: np.log(x)).describe()
        train['log_dwell_time'] = train['dwell_time_ms'].apply(lambda x: np.log(x))
        train.drop(['dwell_time_ms'], axis=1, inplace=True)
        scaler = MinMaxScaler()
        train['norm_log_dwell_time'] = scaler.fit_transform(train[['log_dwell_time']])
        train.drop(['log_dwell_time'], axis=1, inplace=True)
        train_weighted = train.copy()
        # weights L, C, D for like, click, dwell_time
        L = 0.5
        C = 0.25
        D = 0.25
        train_weighted['rating'] = L*train_weighted['like'] + C*train_weighted['click'] + D* train_weighted['norm_log_dwell_time']
        train_weighted.drop(['like','click','norm_log_dwell_time'], axis=1, inplace=True)
        # convert to tuple
        like_data_train = [tuple(train_weighted.iloc[i]) for i in range(len(train_weighted))]
        like_data_test = [tuple(test.iloc[i]) for i in range(len(test))]
        like_data_validation = [tuple(val.iloc[i]) for i in range(len(val))]

    if text_column == 'desc':
        cat_ids = desc_cat_ids
        texts = desc_texts

    else:
        cat_ids = details_cat_ids
        texts = details_texts

    # Instantiate a TextModality, it makes it convenient to work with text auxiliary information
    # For more details, please refer to the tutorial on how to work with auxiliary data
    item_text_modality = TextModality(
        corpus=texts,
        ids=cat_ids,
        tokenizer=BaseTokenizer(sep=" ", stop_words="english"),
        max_vocab=8000,
        max_doc_freq=0.5,
    )

    # Define an evaluation method to split feedback into train and test sets

    if split_type == 'strat':
        bm = CombinedBaseMethod.from_splits(
        train_data=like_data_train,
        val_data=like_data_validation,
        test_data=like_data_test,
        verbose=True,
        item_text=item_text_modality,
        exclude_unknowns=True,
        )

    else:
        bm = CombinedBaseMethod.from_splits(
            train_data=like_data_train,
            val_data=like_data_validation,
            test_data=like_data_test,
            verbose=True,
            item_text=item_text_modality,
            exclude_unknowns=False,
            )

    # Instantiate CTR model
    ctr = cornac.models.CTR(
        name=f'{model_name}_{text_column}_{split_type}_{weighted_or_unweighted}',
        k=250,
        max_iter=200,
        lambda_v=1
        )

    # Use these for evaluation
    eval_metrics = [
        HarmonicMean(
            10,
            Serendipity(),
            cornac.metrics.FMeasure(k=10),
            cornac.metrics.NCRR(),
            cornac.metrics.NDCG()
        ),
        Serendipity(),
        cornac.metrics.FMeasure(k=10),
        cornac.metrics.NCRR(),
        cornac.metrics.NDCG()
    ]

    # Wrap MF model inside RandomSearch along with the searching space, try 20 times
    rs_ctr = NewRandomSearch(
        model=ctr,
        space=[
            Discrete("k", [50, 75, 100, 150, 200]),
            Continuous("lambda_u", low=1e-4, high=1e1),
            Continuous("lambda_v", low=1e-4, high=1e1),
            Continuous("a", low=0.9, high=1),
            Continuous("b", low=0.0, high=0.1),
            Continuous("eta", low=0.001, high=0.1),
        ],
        metric=HarmonicMean(
            10,
            Serendipity(),
            cornac.metrics.FMeasure(k=10),
            cornac.metrics.NCRR(),
            cornac.metrics.NDCG()
        ),
        eval_method=bm,
        n_trails=20,
    )

    # Put everything together into an experiment and run it
    experiment = cornac.Experiment(eval_method=bm, models=[rs_ctr], metrics=eval_metrics)
    experiment.run()

    # Print best params
    print('Random search best params: ', rs_ctr.best_params)

    EXPERIMENT = experiment

    if split_type == 'strat':
        TRAINING_SET = strat_train_df

    else:
        TRAINING_SET = lsuo_train_df

    # Evaluate and calculate harmonic mean
    evaluate(EXPERIMENT, TRAINING_SET)

def train_convMF(model_name, text_column, split_type, weighted_or_unweighted):

    if weighted_or_unweighted == 'unweighted':
        if split_type == 'strat':
            like_data_train = strat_like_data_train
            like_data_validation = strat_like_data_validation
            like_data_test = strat_like_data_test

        else:
            like_data_train = lsuo_like_data_train
            like_data_validation = lsuo_like_data_validation
            like_data_test = lsuo_like_data_test

    else:
        #junyi's modifications
        # train test val filenames
        trainfile = f'{split_type}_train'
        testfile = f'{split_type}_test'
        valfile = f'{split_type}_validation'

        # train test val datasets
        train = pd.read_csv('model_data/train_val_test/'+trainfile+'.csv')
        test = pd.read_csv('model_data/train_val_test/'+testfile+'.csv')
        val = pd.read_csv('model_data/train_val_test/'+valfile+'.csv')

        # main data dataset
        user = pd.read_csv('model_data/auxiliary/users.csv')
        cat = pd.read_csv('model_data/auxiliary/cats.csv')
        # interaction = pd.read_csv('model_data/auxiliary/interaction.csv')

        # list of datasets
        dfs = [train, test, val]
        for i in range(len(dfs)):
            # for all
            dfs[i]['like'] = dfs[i]['like'].apply(lambda x: 1 if x else 0)
            # train
            if i == 0:
                dfs[i]['click'] = dfs[i]['click'].apply(lambda x: 1 if x else 0)
                dfs[i].drop(['Unnamed: 0', 'id','created_at', 'updated_at'], axis=1, inplace=True)
                dfs[i] = dfs[i][['userID', 'catID', 'like', 'dwell_time_ms', 'click']]
            elif i == 1 or i == 2:
                dfs[i].drop(['Unnamed: 0', 'id', 'dwell_time_ms', 'click', 'created_at', 'updated_at'], axis=1, inplace=True)
                dfs[i] = dfs[i][['userID', 'catID', 'like']]
        train, test, val = dfs[0], dfs[1], dfs[2]
        # checking dwell_time_ms in seconds
        train['dwell_time_ms'].apply(lambda x: x/1000).describe()
        # applying log transformation on dwell time to reduce impact of outliers
        train['dwell_time_ms'].apply(lambda x: np.log(x)).describe()
        train['log_dwell_time'] = train['dwell_time_ms'].apply(lambda x: np.log(x))
        train.drop(['dwell_time_ms'], axis=1, inplace=True)
        scaler = MinMaxScaler()
        train['norm_log_dwell_time'] = scaler.fit_transform(train[['log_dwell_time']])
        train.drop(['log_dwell_time'], axis=1, inplace=True)
        train_weighted = train.copy()
        # weights L, C, D for like, click, dwell_time
        L = 0.5
        C = 0.25
        D = 0.25
        train_weighted['rating'] = L*train_weighted['like'] + C*train_weighted['click'] + D* train_weighted['norm_log_dwell_time']
        train_weighted.drop(['like','click','norm_log_dwell_time'], axis=1, inplace=True)
        # convert to tuple
        like_data_train = [tuple(train_weighted.iloc[i]) for i in range(len(train_weighted))]
        like_data_test = [tuple(test.iloc[i]) for i in range(len(test))]
        like_data_validation = [tuple(val.iloc[i]) for i in range(len(val))]

    if text_column == 'desc':
        cat_ids = desc_cat_ids
        texts = desc_texts

    else:
        cat_ids = details_cat_ids
        texts = details_texts


    # Instantiate a TextModality, it makes it convenient to work with text auxiliary information
    # For more details, please refer to the tutorial on how to work with auxiliary data
    item_text_modality = TextModality(
        corpus=texts,
        ids=cat_ids,
        tokenizer=BaseTokenizer(sep=" ", stop_words="english"),
        max_vocab=8000,
        max_doc_freq=0.5,
    )

    # Define an evaluation method to split feedback into train and test sets
    if split_type == 'strat':
        bm = CombinedBaseMethod.from_splits(
        train_data=like_data_train,
        val_data=like_data_validation,
        test_data=like_data_test,
        verbose=True,
        item_text=item_text_modality,
        exclude_unknowns=True,
        )

    else:
        bm = CombinedBaseMethod.from_splits(
            train_data=like_data_train,
            val_data=like_data_validation,
            test_data=like_data_test,
            verbose=True,
            item_text=item_text_modality,
            exclude_unknowns=False,
            )

    # Instantiate CTR model
    convMF = cornac.models.ConvMF(
        name=f'{model_name}_{text_column}_{split_type}_{weighted_or_unweighted}',
        n_epochs=50,
        cnn_epochs=5,
        verbose=False,
        seed=123
        )

    # Use these for evaluation
    eval_metrics = [
        HarmonicMean(
            10,
            Serendipity(),
            cornac.metrics.FMeasure(k=10),
            cornac.metrics.NCRR(),
            cornac.metrics.NDCG()
        ),
        Serendipity(),
        cornac.metrics.FMeasure(k=10),
        cornac.metrics.NCRR(),
        cornac.metrics.NDCG()
    ]

    # Wrap MF model inside RandomSearch along with the searching space, try 30 times
    rs_convMF = NewRandomSearch(
        model=convMF,
        space=[
            Discrete("k", [50, 75, 100, 150, 200]),
            Discrete("cnn_bs", [16, 32, 64, 128]),
            Continuous("cnn_lr", low=1e-4, high=1e-2),
            Continuous("lambda_u", low=1e-1, high=1e2),
            Continuous("lambda_v", low=1e2, high=1e4),
            Continuous("dropout_rate", low=0.05, high=0.15),
        ],
        metric=HarmonicMean(
            10,
            Serendipity(),
            cornac.metrics.FMeasure(k=10),
            cornac.metrics.NCRR(),
            cornac.metrics.NDCG()
        ),
        eval_method=bm,
        n_trails=20,
    )

    # Put everything together into an experiment and run it
    experiment = cornac.Experiment(eval_method=bm, models=[rs_convMF], metrics=eval_metrics)
    experiment.run()

    # Print best params
    print('Random search best params: ', rs_convMF.best_params)

    EXPERIMENT = experiment

    if split_type == 'strat':
        TRAINING_SET = strat_train_df

    else:
        TRAINING_SET = lsuo_train_df

    # Evaluate and calculate harmonic mean
    evaluate(EXPERIMENT, TRAINING_SET)

In [6]:
# CTR desc stratified split (unweighted)

model_name = 'CTR'
text_column = 'description'
split_type = 'strat'
weighted_or_unweighted = 'unweighted'

train_ctr(model_name=model_name,
          text_column=text_column,
          split_type=split_type,
          weighted_or_unweighted=weighted_or_unweighted
          )

creating from splits
initialising Combined Base
rating_threshold = 1.0
exclude_unknowns = True
---
Training data:
Number of users = 104
Number of items = 400
Number of ratings = 3874
Max rating = 1.0
Min rating = 0.0
Global mean = 0.5
---
Test data:
Number of users = 100
Number of items = 195
Number of ratings = 487
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 96
Number of items = 184
Number of ratings = 476
---
Total users = 104
Total items = 400

[RandomSearch_CTR_description_strat_unweighted] Training started!
Evaluating: {'a': 0.9006614738389299, 'b': 0.045161204318163665, 'eta': 0.05384915979390073, 'k': 150, 'lambda_u': 7.307317227823263, 'lambda_v': 5.137607158186093}


100%|██████████| 200/200 [08:10<00:00,  2.45s/it, cf_loss=525, lda_likelihood=-2.66e+4]


Learning completed!
Evaluating: {'a': 0.9008246600981573, 'b': 0.04095020222256599, 'eta': 0.055762965973127375, 'k': 100, 'lambda_u': 4.273549540145383, 'lambda_v': 4.731855282599306}


100%|██████████| 200/200 [03:37<00:00,  1.09s/it, cf_loss=438, lda_likelihood=-2.09e+4]


Learning completed!
Evaluating: {'a': 0.9583877670084683, 'b': 0.039235880612214946, 'eta': 0.07145140463021744, 'k': 100, 'lambda_u': 8.40151897862538, 'lambda_v': 6.0537808001396245}


100%|██████████| 200/200 [03:04<00:00,  1.09it/s, cf_loss=569, lda_likelihood=-1.94e+4]


Learning completed!
Evaluating: {'a': 0.9443361839084056, 'b': 0.022536116833811938, 'eta': 0.09343782490102721, 'k': 200, 'lambda_u': 5.096614781716116, 'lambda_v': 7.28707340631069}


100%|██████████| 200/200 [07:18<00:00,  2.19s/it, cf_loss=516, lda_likelihood=-3.07e+4]


Learning completed!
Evaluating: {'a': 0.9475769333314155, 'b': 0.07201816649288585, 'eta': 0.023776798836031773, 'k': 50, 'lambda_u': 9.945316949740262, 'lambda_v': 0.6284726447629613}


100%|██████████| 200/200 [01:06<00:00,  2.99it/s, cf_loss=382, lda_likelihood=-1.25e+4]


Learning completed!
Evaluating: {'a': 0.9335699673214317, 'b': 0.09251950307422116, 'eta': 0.08323260849199735, 'k': 75, 'lambda_u': 3.06923613815597, 'lambda_v': 7.024988986558186}


100%|██████████| 200/200 [01:40<00:00,  1.98it/s, cf_loss=449, lda_likelihood=-1.65e+4]


Learning completed!
Evaluating: {'a': 0.9823904341585422, 'b': 0.04287379690980806, 'eta': 0.08128572471517932, 'k': 150, 'lambda_u': 4.148409619172441, 'lambda_v': 2.971299862241821}


100%|██████████| 200/200 [05:06<00:00,  1.53s/it, cf_loss=414, lda_likelihood=-2.39e+4]


Learning completed!
Evaluating: {'a': 0.9994433019766066, 'b': 0.06280271300191313, 'eta': 0.022382506046857188, 'k': 50, 'lambda_u': 7.940435970693696, 'lambda_v': 6.814968663406777}


100%|██████████| 200/200 [01:07<00:00,  2.95it/s, cf_loss=578, lda_likelihood=-1.3e+4] 


Learning completed!
Evaluating: {'a': 0.9070711200359907, 'b': 0.09816272864025088, 'eta': 0.09781022261796964, 'k': 150, 'lambda_u': 0.05518938545124616, 'lambda_v': 5.397552404449283}


100%|██████████| 200/200 [04:45<00:00,  1.43s/it, cf_loss=90.5, lda_likelihood=-2.35e+4]


Learning completed!
Evaluating: {'a': 0.9206183223026874, 'b': 0.05200170228419737, 'eta': 0.04195151631297843, 'k': 75, 'lambda_u': 6.91345164859378, 'lambda_v': 7.059725059714248}


100%|██████████| 200/200 [01:39<00:00,  2.00it/s, cf_loss=543, lda_likelihood=-1.83e+4]


Learning completed!
Evaluating: {'a': 0.9567166901912196, 'b': 0.061673784396071844, 'eta': 0.06912206703067739, 'k': 100, 'lambda_u': 2.2138179815985346, 'lambda_v': 4.525407005760482}


100%|██████████| 200/200 [02:49<00:00,  1.18it/s, cf_loss=366, lda_likelihood=-1.92e+4]


Learning completed!
Evaluating: {'a': 0.9953379886841786, 'b': 0.060714111570120834, 'eta': 0.02709025166847709, 'k': 50, 'lambda_u': 4.5752303027140755, 'lambda_v': 5.6407577705606835}


100%|██████████| 200/200 [01:07<00:00,  2.97it/s, cf_loss=484, lda_likelihood=-1.2e+4] 


Learning completed!
Evaluating: {'a': 0.9915028060125262, 'b': 0.04803092896761005, 'eta': 0.09151849279406528, 'k': 75, 'lambda_u': 2.4733309163106934, 'lambda_v': 5.422451831218235}


100%|██████████| 200/200 [01:46<00:00,  1.87it/s, cf_loss=391, lda_likelihood=-1.59e+4]


Learning completed!
Evaluating: {'a': 0.9597499017533218, 'b': 0.07194697802337846, 'eta': 0.027880733889994506, 'k': 100, 'lambda_u': 0.9101659236725242, 'lambda_v': 6.245577304275941}


100%|██████████| 200/200 [02:59<00:00,  1.12it/s, cf_loss=289, lda_likelihood=-2.08e+4]


Learning completed!
Evaluating: {'a': 0.993549507651251, 'b': 0.04253363930797621, 'eta': 0.02817518277224307, 'k': 150, 'lambda_u': 9.385595162631098, 'lambda_v': 0.684159558827336}


100%|██████████| 200/200 [05:02<00:00,  1.51s/it, cf_loss=381, lda_likelihood=-2.57e+4]


Learning completed!
Evaluating: {'a': 0.9045737258308373, 'b': 0.022631406729412787, 'eta': 0.05379476962550417, 'k': 100, 'lambda_u': 4.422644783577021, 'lambda_v': 1.0767096666393938}


100%|██████████| 200/200 [02:58<00:00,  1.12it/s, cf_loss=309, lda_likelihood=-2.02e+4]


Learning completed!
Evaluating: {'a': 0.9015794736606453, 'b': 0.027018142044926943, 'eta': 0.05100096844728663, 'k': 50, 'lambda_u': 2.8690381441484663, 'lambda_v': 2.6409219328165734}


100%|██████████| 200/200 [01:06<00:00,  2.99it/s, cf_loss=331, lda_likelihood=-1.09e+4]


Learning completed!
Evaluating: {'a': 0.9313489888625238, 'b': 0.08927492609131596, 'eta': 0.040724772950131244, 'k': 100, 'lambda_u': 6.431325838049841, 'lambda_v': 4.929428887151874}


100%|██████████| 200/200 [03:15<00:00,  1.02it/s, cf_loss=518, lda_likelihood=-1.89e+4]


Learning completed!
Evaluating: {'a': 0.9229228294796985, 'b': 0.08591949432076694, 'eta': 0.09154533895490917, 'k': 100, 'lambda_u': 8.70730652780032, 'lambda_v': 6.3102017841293625}


100%|██████████| 200/200 [02:54<00:00,  1.14it/s, cf_loss=574, lda_likelihood=-1.76e+4]


Learning completed!
Evaluating: {'a': 0.910164085243495, 'b': 0.06480584696985309, 'eta': 0.03917335227873423, 'k': 100, 'lambda_u': 4.18420386658643, 'lambda_v': 7.802802525466104}


100%|██████████| 200/200 [03:01<00:00,  1.10it/s, cf_loss=483, lda_likelihood=-1.8e+4] 


Learning completed!
Best parameter settings: {'a': 0.9015794736606453, 'b': 0.027018142044926943, 'eta': 0.05100096844728663, 'k': 50, 'lambda_u': 2.8690381441484663, 'lambda_v': 2.6409219328165734}
HarmonicMean = 0.0076

[RandomSearch_CTR_description_strat_unweighted] Evaluation started!


Ranking: 100%|██████████| 100/100 [00:03<00:00, 27.98it/s]
Ranking: 100%|██████████| 96/96 [00:03<00:00, 31.48it/s]



VALIDATION:
...
                                              |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Time (s)
--------------------------------------------- + ------ + ------------ + ------- + ------- + ----------- + --------
RandomSearch_CTR_description_strat_unweighted | 0.0243 |       0.0076 |  0.0546 |  0.2440 |      0.0091 |   3.0516

TEST:
...
                                              |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Train (s) | Test (s)
--------------------------------------------- + ------ + ------------ + ------- + ------- + ----------- + --------- + --------
RandomSearch_CTR_description_strat_unweighted | 0.0136 |       0.0060 |  0.0439 |  0.2301 |      0.0084 | 3933.5666 |   3.5755

Random search best params:  {'a': 0.9015794736606453, 'b': 0.027018142044926943, 'eta': 0.05100096844728663, 'k': 50, 'lambda_u': 2.8690381441484663, 'lambda_v': 2.6409219328165734}
RandomSearch_CTR_description_strat_unweighted
F1@10: 0.014
NCRR

In [7]:
# CTR details stratified split (unweighted)

model_name = 'CTR'
text_column = 'details'
split_type = 'strat'
weighted_or_unweighted = 'unweighted'

train_ctr(model_name=model_name,
          text_column=text_column,
          split_type=split_type,
          weighted_or_unweighted=weighted_or_unweighted
          )

creating from splits
initialising Combined Base
rating_threshold = 1.0
exclude_unknowns = True
---
Training data:
Number of users = 104
Number of items = 400
Number of ratings = 3874
Max rating = 1.0
Min rating = 0.0
Global mean = 0.5
---
Test data:
Number of users = 100
Number of items = 195
Number of ratings = 487
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 96
Number of items = 184
Number of ratings = 476
---
Total users = 104
Total items = 400

[RandomSearch_CTR_details_strat_unweighted] Training started!
Evaluating: {'a': 0.9111526519689233, 'b': 0.025581379309709707, 'eta': 0.048363768721040055, 'k': 100, 'lambda_u': 7.963422530706351, 'lambda_v': 1.0456859807378849}


100%|██████████| 200/200 [02:55<00:00,  1.14it/s, cf_loss=378, lda_likelihood=-1.95e+4]


Learning completed!
Evaluating: {'a': 0.9659223825338872, 'b': 0.024668209623546978, 'eta': 0.027572885944642418, 'k': 100, 'lambda_u': 3.3392638327054947, 'lambda_v': 5.6074934646162475}


100%|██████████| 200/200 [03:00<00:00,  1.11it/s, cf_loss=423, lda_likelihood=-1.8e+4] 


Learning completed!
Evaluating: {'a': 0.9481327908756115, 'b': 0.03475198037086957, 'eta': 0.05991552680901714, 'k': 100, 'lambda_u': 6.641308606550052, 'lambda_v': 2.214310247290354}


100%|██████████| 200/200 [02:54<00:00,  1.15it/s, cf_loss=438, lda_likelihood=-2.03e+4]


Learning completed!
Evaluating: {'a': 0.951485833070975, 'b': 0.08472123043662177, 'eta': 0.052234442644290835, 'k': 50, 'lambda_u': 4.547313621378639, 'lambda_v': 5.314205870193269}


100%|██████████| 200/200 [01:06<00:00,  2.99it/s, cf_loss=477, lda_likelihood=-1.04e+4]


Learning completed!
Evaluating: {'a': 0.9774051542451351, 'b': 0.04551242130887351, 'eta': 0.03674141130061239, 'k': 200, 'lambda_u': 7.647992279405489, 'lambda_v': 4.71800822513391}


100%|██████████| 200/200 [07:17<00:00,  2.19s/it, cf_loss=552, lda_likelihood=-3.18e+4]


Learning completed!
Evaluating: {'a': 0.9374563698199192, 'b': 0.011527572433948552, 'eta': 0.03705054388072624, 'k': 100, 'lambda_u': 4.027475597461927, 'lambda_v': 3.186460673022353}


100%|██████████| 200/200 [02:59<00:00,  1.11it/s, cf_loss=388, lda_likelihood=-1.78e+4]


Learning completed!
Evaluating: {'a': 0.9049633753175281, 'b': 0.08878257927868283, 'eta': 0.02028272050999995, 'k': 200, 'lambda_u': 4.1505889261017614, 'lambda_v': 5.552045531377427}


100%|██████████| 200/200 [12:25<00:00,  3.73s/it, cf_loss=470, lda_likelihood=-2.67e+4]


Learning completed!
Evaluating: {'a': 0.9382469885102422, 'b': 0.08658218993717821, 'eta': 0.012457816517550648, 'k': 150, 'lambda_u': 2.337433590382425, 'lambda_v': 3.7272994932666994}


100%|██████████| 200/200 [06:38<00:00,  1.99s/it, cf_loss=364, lda_likelihood=-2.52e+4]


Learning completed!
Evaluating: {'a': 0.905164140441507, 'b': 0.059948220830330516, 'eta': 0.0979736994543397, 'k': 150, 'lambda_u': 9.949084503249239, 'lambda_v': 1.6297711156450034}


100%|██████████| 200/200 [07:34<00:00,  2.27s/it, cf_loss=466, lda_likelihood=-2.57e+4]


Learning completed!
Evaluating: {'a': 0.9475465838727124, 'b': 0.026655775850678777, 'eta': 0.03128204882793546, 'k': 75, 'lambda_u': 4.0188399663320284, 'lambda_v': 0.49973758213624836}


100%|██████████| 200/200 [01:40<00:00,  1.98it/s, cf_loss=239, lda_likelihood=-1.59e+4]


Learning completed!
Evaluating: {'a': 0.9552742638653577, 'b': 0.028222696864479115, 'eta': 0.09642969216949726, 'k': 75, 'lambda_u': 7.841279423153282, 'lambda_v': 4.128781408313084}


100%|██████████| 200/200 [01:49<00:00,  1.83it/s, cf_loss=511, lda_likelihood=-1.54e+4]


Learning completed!
Evaluating: {'a': 0.9734401452135346, 'b': 0.0020241128979698432, 'eta': 0.05902919129983911, 'k': 50, 'lambda_u': 5.106142948899999, 'lambda_v': 6.480866707600999}


100%|██████████| 200/200 [01:16<00:00,  2.60it/s, cf_loss=475, lda_likelihood=-1.12e+4]


Learning completed!
Evaluating: {'a': 0.9260036063574516, 'b': 0.05254991697177294, 'eta': 0.02609467980377657, 'k': 100, 'lambda_u': 9.558853088429272, 'lambda_v': 9.664112179471044}


100%|██████████| 200/200 [04:40<00:00,  1.40s/it, cf_loss=618, lda_likelihood=-1.75e+4]


Learning completed!
Evaluating: {'a': 0.9272191900389395, 'b': 0.03619409410446448, 'eta': 0.07020298479807939, 'k': 100, 'lambda_u': 8.191929254651804, 'lambda_v': 5.934374933608773}


100%|██████████| 200/200 [03:58<00:00,  1.19s/it, cf_loss=552, lda_likelihood=-2.02e+4]


Learning completed!
Evaluating: {'a': 0.9250053839424245, 'b': 0.05972586152247679, 'eta': 0.03378730729426983, 'k': 150, 'lambda_u': 4.713183095897166, 'lambda_v': 3.071974556661505}


100%|██████████| 200/200 [05:44<00:00,  1.72s/it, cf_loss=429, lda_likelihood=-2.24e+4]


Learning completed!
Evaluating: {'a': 0.9403764563631161, 'b': 0.09980629915094598, 'eta': 0.05315223421811184, 'k': 75, 'lambda_u': 0.5981485971103387, 'lambda_v': 5.09436091575207}


100%|██████████| 200/200 [01:41<00:00,  1.97it/s, cf_loss=251, lda_likelihood=-1.61e+4]


Learning completed!
Evaluating: {'a': 0.9362655637630827, 'b': 0.004672241677759071, 'eta': 0.03976320163762106, 'k': 75, 'lambda_u': 8.218078339877275, 'lambda_v': 8.555033428176268}


100%|██████████| 200/200 [01:42<00:00,  1.95it/s, cf_loss=570, lda_likelihood=-1.67e+4]


Learning completed!
Evaluating: {'a': 0.9936367094137994, 'b': 0.03893482621521399, 'eta': 0.026912755155957792, 'k': 50, 'lambda_u': 0.7593599803508405, 'lambda_v': 1.499861245013481}


100%|██████████| 200/200 [01:16<00:00,  2.61it/s, cf_loss=184, lda_likelihood=-1.05e+4]


Learning completed!
Evaluating: {'a': 0.9586245877927981, 'b': 0.04748531938750342, 'eta': 0.005720403258089206, 'k': 50, 'lambda_u': 7.8326369202181825, 'lambda_v': 9.449852916235749}


100%|██████████| 200/200 [01:11<00:00,  2.79it/s, cf_loss=582, lda_likelihood=-1.11e+4]


Learning completed!
Evaluating: {'a': 0.963145189194876, 'b': 0.09670826848023534, 'eta': 0.01680921584451462, 'k': 50, 'lambda_u': 6.0122354495657895, 'lambda_v': 3.8313089127468123}


100%|██████████| 200/200 [01:12<00:00,  2.76it/s, cf_loss=496, lda_likelihood=-1.03e+4]


Learning completed!
Best parameter settings: {'a': 0.9734401452135346, 'b': 0.0020241128979698432, 'eta': 0.05902919129983911, 'k': 50, 'lambda_u': 5.106142948899999, 'lambda_v': 6.480866707600999}
HarmonicMean = 0.0091

[RandomSearch_CTR_details_strat_unweighted] Evaluation started!


Ranking: 100%|██████████| 100/100 [00:03<00:00, 27.46it/s]
Ranking: 100%|██████████| 96/96 [00:03<00:00, 31.61it/s]



VALIDATION:
...
                                          |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Time (s)
----------------------------------------- + ------ + ------------ + ------- + ------- + ----------- + --------
RandomSearch_CTR_details_strat_unweighted | 0.0375 |       0.0091 |  0.0659 |  0.2565 |      0.0091 |   3.0385

TEST:
...
                                          |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Train (s) | Test (s)
----------------------------------------- + ------ + ------------ + ------- + ------- + ----------- + --------- + --------
RandomSearch_CTR_details_strat_unweighted | 0.0203 |       0.0073 |  0.0493 |  0.2331 |      0.0084 | 4443.8769 |   3.6446

Random search best params:  {'a': 0.9734401452135346, 'b': 0.0020241128979698432, 'eta': 0.05902919129983911, 'k': 50, 'lambda_u': 5.106142948899999, 'lambda_v': 6.480866707600999}
RandomSearch_CTR_details_strat_unweighted
F1@10: 0.020
NCRR: 0.049
NDCG: 0.233
Distribut

In [8]:
# CTR desc lsuo split (unweighted)

model_name = 'CTR'
text_column = 'desc'
split_type = 'lsuo'
weighted_or_unweighted = 'unweighted'

train_ctr(model_name=model_name,
          text_column=text_column,
          split_type=split_type,
          weighted_or_unweighted=weighted_or_unweighted
          )

creating from splits
initialising Combined Base
rating_threshold = 1.0
exclude_unknowns = False
---
Training data:
Number of users = 83
Number of items = 397
Number of ratings = 3720
Max rating = 1.0
Min rating = 0.0
Global mean = 0.5
---
Test data:
Number of users = 11
Number of items = 101
Number of ratings = 458
Number of unknown users = 11
Number of unknown items = 0
---
Validation data:
Number of users = 10
Number of items = 308
Number of ratings = 663
---
Total users = 104
Total items = 403

[RandomSearch_CTR_desc_lsuo_unweighted] Training started!
Evaluating: {'a': 0.9581075989483444, 'b': 0.09675009673394318, 'eta': 0.08507355692253224, 'k': 75, 'lambda_u': 6.118130487254779, 'lambda_v': 9.86423412093533}


100%|██████████| 200/200 [01:33<00:00,  2.14it/s, cf_loss=473, lda_likelihood=-928]    


Learning completed!
Evaluating: {'a': 0.919267490328438, 'b': 0.03906913809108864, 'eta': 0.0167098349809198, 'k': 150, 'lambda_u': 7.89015327826145, 'lambda_v': 6.756674585065074}


100%|██████████| 200/200 [06:24<00:00,  1.92s/it, cf_loss=458, lda_likelihood=-1.16e+3]


Learning completed!
Evaluating: {'a': 0.9275445372515654, 'b': 0.057305581215865944, 'eta': 0.06032411835958567, 'k': 150, 'lambda_u': 3.5574680775747836, 'lambda_v': 1.3241900753684985}


100%|██████████| 200/200 [11:10<00:00,  3.35s/it, cf_loss=283, lda_likelihood=-1.34e+3]


Learning completed!
Evaluating: {'a': 0.900394559072894, 'b': 0.01147159614312613, 'eta': 0.048845459653738, 'k': 50, 'lambda_u': 3.9973780388982676, 'lambda_v': 4.242057980431193}


100%|██████████| 200/200 [01:18<00:00,  2.56it/s, cf_loss=338, lda_likelihood=-742]   


Learning completed!
Evaluating: {'a': 0.9406651235812362, 'b': 0.06830746112378028, 'eta': 0.06696689584943522, 'k': 100, 'lambda_u': 7.701350388241904, 'lambda_v': 4.726022814057432}


100%|██████████| 200/200 [03:19<00:00,  1.00it/s, cf_loss=451, lda_likelihood=-1.04e+3]


Learning completed!
Evaluating: {'a': 0.9286143006776588, 'b': 0.09394837086824548, 'eta': 0.004505300356484202, 'k': 100, 'lambda_u': 8.186573588921958, 'lambda_v': 5.124213661603968}


100%|██████████| 200/200 [02:41<00:00,  1.24it/s, cf_loss=462, lda_likelihood=-1.01e+3]


Learning completed!
Evaluating: {'a': 0.9110299605088135, 'b': 0.08997086119097718, 'eta': 0.016609311845240525, 'k': 75, 'lambda_u': 1.0132722783723362, 'lambda_v': 6.42935012429303}


100%|██████████| 200/200 [01:37<00:00,  2.05it/s, cf_loss=245, lda_likelihood=-950]    


Learning completed!
Evaluating: {'a': 0.9475438644110575, 'b': 0.09783218326678372, 'eta': 0.00564112052274415, 'k': 150, 'lambda_u': 6.476734373659575, 'lambda_v': 3.3593793394287785}


100%|██████████| 200/200 [04:21<00:00,  1.31s/it, cf_loss=429, lda_likelihood=-1.25e+3]


Learning completed!
Evaluating: {'a': 0.9320722932023136, 'b': 0.05800154304036718, 'eta': 0.09398160354361358, 'k': 75, 'lambda_u': 6.503838933567951, 'lambda_v': 4.293886381704593}


100%|██████████| 200/200 [01:32<00:00,  2.15it/s, cf_loss=417, lda_likelihood=-912]    


Learning completed!
Evaluating: {'a': 0.91077984377614, 'b': 0.06443011083682032, 'eta': 0.09905434853617072, 'k': 50, 'lambda_u': 8.263419657253264, 'lambda_v': 4.773949194316895}


100%|██████████| 200/200 [01:05<00:00,  3.06it/s, cf_loss=442, lda_likelihood=-774]   


Learning completed!
Evaluating: {'a': 0.939033080545376, 'b': 0.048766592354398286, 'eta': 0.022472884608518227, 'k': 150, 'lambda_u': 1.4101069774467019, 'lambda_v': 2.771450051401114}


100%|██████████| 200/200 [13:26<00:00,  4.03s/it, cf_loss=232, lda_likelihood=-1.31e+3] 


Learning completed!
Evaluating: {'a': 0.9429656962103597, 'b': 0.08203766031900653, 'eta': 0.0943982630318973, 'k': 200, 'lambda_u': 7.616322511762509, 'lambda_v': 8.80632391380888}


100%|██████████| 200/200 [16:40<00:00,  5.00s/it, cf_loss=481, lda_likelihood=-1.31e+3]


Learning completed!
Evaluating: {'a': 0.9584806640419685, 'b': 0.07039982464683918, 'eta': 0.026258592838987717, 'k': 50, 'lambda_u': 2.9402889555332994, 'lambda_v': 6.218753979695753}


100%|██████████| 200/200 [01:04<00:00,  3.11it/s, cf_loss=356, lda_likelihood=-773]   


Learning completed!
Evaluating: {'a': 0.9673166689888897, 'b': 0.05130686520902403, 'eta': 0.057329752086118485, 'k': 50, 'lambda_u': 4.129110091255082, 'lambda_v': 7.783492607403563}


100%|██████████| 200/200 [01:04<00:00,  3.11it/s, cf_loss=403, lda_likelihood=-759]   


Learning completed!
Evaluating: {'a': 0.9605520386463722, 'b': 0.08909048490131083, 'eta': 0.035807299684913425, 'k': 75, 'lambda_u': 7.997224705875125, 'lambda_v': 0.03709360616974174}


100%|██████████| 200/200 [01:47<00:00,  1.86it/s, cf_loss=124, lda_likelihood=-1.36e+3]


Learning completed!
Evaluating: {'a': 0.9405827792442178, 'b': 0.06897974998526714, 'eta': 0.007204158527293436, 'k': 50, 'lambda_u': 6.952508319444918, 'lambda_v': 4.116726550846783}


100%|██████████| 200/200 [01:02<00:00,  3.18it/s, cf_loss=426, lda_likelihood=-787]   


Learning completed!
Evaluating: {'a': 0.9649804193416687, 'b': 0.018722265239506486, 'eta': 0.017570941748366653, 'k': 100, 'lambda_u': 5.116908652567143, 'lambda_v': 6.979465061091118}


100%|██████████| 200/200 [03:01<00:00,  1.10it/s, cf_loss=411, lda_likelihood=-969]    


Learning completed!
Evaluating: {'a': 0.9324446745350652, 'b': 0.004667116005644046, 'eta': 0.09722070395912526, 'k': 100, 'lambda_u': 3.8240996396968807, 'lambda_v': 4.95289521311263}


100%|██████████| 200/200 [02:56<00:00,  1.13it/s, cf_loss=345, lda_likelihood=-965]    


Learning completed!
Evaluating: {'a': 0.9612833556338167, 'b': 0.0018077643818106505, 'eta': 0.03126066783685817, 'k': 50, 'lambda_u': 1.9976660704881337, 'lambda_v': 5.779420972168283}


100%|██████████| 200/200 [01:04<00:00,  3.11it/s, cf_loss=278, lda_likelihood=-785]   


Learning completed!
Evaluating: {'a': 0.9474531081040253, 'b': 0.019666692177224256, 'eta': 0.01836838242858228, 'k': 50, 'lambda_u': 6.583725504901151, 'lambda_v': 8.737429271035834}


100%|██████████| 200/200 [01:04<00:00,  3.11it/s, cf_loss=440, lda_likelihood=-735]   


Learning completed!
Best parameter settings: {'a': 0.9581075989483444, 'b': 0.09675009673394318, 'eta': 0.08507355692253224, 'k': 75, 'lambda_u': 6.118130487254779, 'lambda_v': 9.86423412093533}
HarmonicMean = 0.0000

[RandomSearch_CTR_desc_lsuo_unweighted] Evaluation started!


Ranking: 100%|██████████| 11/11 [00:00<00:00, 49.40it/s]
Ranking: 100%|██████████| 10/10 [00:00<00:00, 40.45it/s]



VALIDATION:
...
                                      |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Time (s)
------------------------------------- + ------ + ------------ + ------- + ------- + ----------- + --------
RandomSearch_CTR_desc_lsuo_unweighted | 0.0428 |       0.0000 |  0.1674 |  0.4395 |      0.0000 |   0.2487

TEST:
...
                                      |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Train (s) | Test (s)
------------------------------------- + ------ + ------------ + ------- + ------- + ----------- + --------- + --------
RandomSearch_CTR_desc_lsuo_unweighted | 0.0246 |       0.0000 |  0.0997 |  0.3810 |      0.0000 | 4704.3239 |   0.2257

Random search best params:  {'a': 0.9581075989483444, 'b': 0.09675009673394318, 'eta': 0.08507355692253224, 'k': 75, 'lambda_u': 6.118130487254779, 'lambda_v': 9.86423412093533}
RandomSearch_CTR_desc_lsuo_unweighted
F1@10: 0.025
NCRR: 0.100
NDCG: 0.381
Distributional coverage: 8.616
Serendipi

In [9]:
# CTR details lsuo split (unweighted)

model_name = 'CTR'
text_column = 'details'
split_type = 'lsuo'
weighted_or_unweighted = 'unweighted'

train_ctr(model_name=model_name,
          text_column=text_column,
          split_type=split_type,
          weighted_or_unweighted=weighted_or_unweighted
          )

creating from splits
initialising Combined Base
rating_threshold = 1.0
exclude_unknowns = False
---
Training data:
Number of users = 83
Number of items = 397
Number of ratings = 3720
Max rating = 1.0
Min rating = 0.0
Global mean = 0.5
---
Test data:
Number of users = 11
Number of items = 101
Number of ratings = 458
Number of unknown users = 11
Number of unknown items = 0
---
Validation data:
Number of users = 10
Number of items = 308
Number of ratings = 663
---
Total users = 104
Total items = 403

[RandomSearch_CTR_details_lsuo_unweighted] Training started!
Evaluating: {'a': 0.9471075762082883, 'b': 0.010975604292272602, 'eta': 0.045020164335017876, 'k': 150, 'lambda_u': 6.469055224521553, 'lambda_v': 9.75329612659715}


100%|██████████| 200/200 [05:13<00:00,  1.57s/it, cf_loss=540, lda_likelihood=-2.44e+4]


Learning completed!
Evaluating: {'a': 0.9476330636655335, 'b': 0.06011623586550307, 'eta': 0.010958758766982782, 'k': 50, 'lambda_u': 1.3923527755156808, 'lambda_v': 0.6995978332179038}


100%|██████████| 200/200 [01:08<00:00,  2.93it/s, cf_loss=174, lda_likelihood=-1.29e+4]


Learning completed!
Evaluating: {'a': 0.980870067176476, 'b': 0.05363042459501513, 'eta': 0.0027396295230787925, 'k': 50, 'lambda_u': 1.0071517779647114, 'lambda_v': 5.376511703899376}


100%|██████████| 200/200 [01:09<00:00,  2.90it/s, cf_loss=273, lda_likelihood=-8.99e+3]


Learning completed!
Evaluating: {'a': 0.9987326342238145, 'b': 0.01877973385921592, 'eta': 0.014257812161395024, 'k': 75, 'lambda_u': 5.462435528269623, 'lambda_v': 0.41964732870524724}


100%|██████████| 200/200 [01:49<00:00,  1.82it/s, cf_loss=235, lda_likelihood=-1.45e+4]


Learning completed!
Evaluating: {'a': 0.9640976534403921, 'b': 0.02454760916748161, 'eta': 0.03239561837516783, 'k': 50, 'lambda_u': 8.470029139354924, 'lambda_v': 1.6726968054905866}


100%|██████████| 200/200 [01:08<00:00,  2.92it/s, cf_loss=408, lda_likelihood=-1.44e+4]


Learning completed!
Evaluating: {'a': 0.9591109573640362, 'b': 0.06346813457501559, 'eta': 0.037997874685367326, 'k': 200, 'lambda_u': 8.847887235402146, 'lambda_v': 2.9539596422578738}


100%|██████████| 200/200 [08:08<00:00,  2.44s/it, cf_loss=493, lda_likelihood=-2.9e+4] 


Learning completed!
Evaluating: {'a': 0.9142066149827981, 'b': 0.07531265500294526, 'eta': 0.09899634879248775, 'k': 200, 'lambda_u': 9.624327719459263, 'lambda_v': 8.529420964962252}


100%|██████████| 200/200 [08:00<00:00,  2.40s/it, cf_loss=589, lda_likelihood=-2.89e+4]


Learning completed!
Evaluating: {'a': 0.9276314053470762, 'b': 0.06449947955146494, 'eta': 0.0684611225272151, 'k': 75, 'lambda_u': 4.145084042631062, 'lambda_v': 6.3690909136783596}


100%|██████████| 200/200 [01:48<00:00,  1.85it/s, cf_loss=442, lda_likelihood=-1.72e+4]


Learning completed!
Evaluating: {'a': 0.9342225424456873, 'b': 0.09130667238981756, 'eta': 0.05964373392324037, 'k': 50, 'lambda_u': 9.194948524763898, 'lambda_v': 3.0708481781047827}


100%|██████████| 200/200 [01:08<00:00,  2.90it/s, cf_loss=488, lda_likelihood=-1.04e+4]


Learning completed!
Evaluating: {'a': 0.9033705488633077, 'b': 0.08235642402384696, 'eta': 0.08019591969296017, 'k': 50, 'lambda_u': 5.372314844388223, 'lambda_v': 2.0864994064736613}


100%|██████████| 200/200 [01:09<00:00,  2.86it/s, cf_loss=385, lda_likelihood=-1.14e+4]


Learning completed!
Evaluating: {'a': 0.9010469223790234, 'b': 0.09474565569095938, 'eta': 0.08942972289012315, 'k': 100, 'lambda_u': 3.2012168236576315, 'lambda_v': 3.839737434126578}


100%|██████████| 200/200 [03:00<00:00,  1.11it/s, cf_loss=375, lda_likelihood=-1.84e+4]


Learning completed!
Evaluating: {'a': 0.9870956107608276, 'b': 0.052621926700011125, 'eta': 0.07969793515399688, 'k': 75, 'lambda_u': 4.015862999520723, 'lambda_v': 3.594997347614673}


100%|██████████| 200/200 [01:48<00:00,  1.84it/s, cf_loss=397, lda_likelihood=-1.51e+4]


Learning completed!
Evaluating: {'a': 0.950187343104821, 'b': 0.06308383592490167, 'eta': 0.05991124776362247, 'k': 50, 'lambda_u': 9.992144033616995, 'lambda_v': 5.893533975535696}


100%|██████████| 200/200 [01:08<00:00,  2.91it/s, cf_loss=549, lda_likelihood=-1.11e+4]


Learning completed!
Evaluating: {'a': 0.9786082241656195, 'b': 0.015227619062853094, 'eta': 0.03747413368348517, 'k': 150, 'lambda_u': 6.265185224529959, 'lambda_v': 6.920650922078412}


100%|██████████| 200/200 [05:05<00:00,  1.53s/it, cf_loss=516, lda_likelihood=-2.58e+4]


Learning completed!
Evaluating: {'a': 0.9616928383328096, 'b': 0.07600332187601311, 'eta': 0.05215795262690606, 'k': 75, 'lambda_u': 9.734870097293426, 'lambda_v': 9.665802216341353}


100%|██████████| 200/200 [01:49<00:00,  1.83it/s, cf_loss=602, lda_likelihood=-1.56e+4]


Learning completed!
Evaluating: {'a': 0.9197384064013538, 'b': 0.08914302233810532, 'eta': 0.08887945972098918, 'k': 50, 'lambda_u': 7.383177207951082, 'lambda_v': 8.332230749281239}


100%|██████████| 200/200 [01:08<00:00,  2.91it/s, cf_loss=535, lda_likelihood=-1.13e+4]


Learning completed!
Evaluating: {'a': 0.9355892503258627, 'b': 0.05372374467394617, 'eta': 0.00434073115038286, 'k': 200, 'lambda_u': 6.258195144944393, 'lambda_v': 3.12994983553582}


100%|██████████| 200/200 [08:06<00:00,  2.43s/it, cf_loss=442, lda_likelihood=-2.53e+4]


Learning completed!
Evaluating: {'a': 0.9498120556818886, 'b': 0.07195574253428448, 'eta': 0.05274152499352893, 'k': 50, 'lambda_u': 5.906795513111166, 'lambda_v': 0.026267548290910225}


100%|██████████| 200/200 [01:08<00:00,  2.92it/s, cf_loss=92.3, lda_likelihood=-1.29e+4]


Learning completed!
Evaluating: {'a': 0.9073645384393559, 'b': 0.04399906325091738, 'eta': 0.05756100811707103, 'k': 150, 'lambda_u': 1.13830855436733, 'lambda_v': 4.598949958436397}


100%|██████████| 200/200 [05:12<00:00,  1.56s/it, cf_loss=255, lda_likelihood=-2.54e+4]


Learning completed!
Evaluating: {'a': 0.9867583934552846, 'b': 0.009019676529770005, 'eta': 0.09008241102168979, 'k': 200, 'lambda_u': 6.153938830748407, 'lambda_v': 0.4603713211316615}


100%|██████████| 200/200 [07:45<00:00,  2.33s/it, cf_loss=253, lda_likelihood=-2.91e+4]


Learning completed!
Best parameter settings: {'a': 0.9471075762082883, 'b': 0.010975604292272602, 'eta': 0.045020164335017876, 'k': 150, 'lambda_u': 6.469055224521553, 'lambda_v': 9.75329612659715}
HarmonicMean = 0.0000

[RandomSearch_CTR_details_lsuo_unweighted] Evaluation started!


Ranking: 100%|██████████| 11/11 [00:00<00:00, 49.29it/s]
Ranking: 100%|██████████| 10/10 [00:00<00:00, 39.73it/s]



VALIDATION:
...
                                         |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Time (s)
---------------------------------------- + ------ + ------------ + ------- + ------- + ----------- + --------
RandomSearch_CTR_details_lsuo_unweighted | 0.0428 |       0.0000 |  0.1674 |  0.4395 |      0.0000 |   0.2537

TEST:
...
                                         |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Train (s) | Test (s)
---------------------------------------- + ------ + ------------ + ------- + ------- + ----------- + --------- + --------
RandomSearch_CTR_details_lsuo_unweighted | 0.0246 |       0.0000 |  0.0997 |  0.3810 |      0.0000 | 4023.5976 |   0.2252

Random search best params:  {'a': 0.9471075762082883, 'b': 0.010975604292272602, 'eta': 0.045020164335017876, 'k': 150, 'lambda_u': 6.469055224521553, 'lambda_v': 9.75329612659715}
RandomSearch_CTR_details_lsuo_unweighted
F1@10: 0.025
NCRR: 0.100
NDCG: 0.381
Distributional c

In [10]:
# CTR desc stratified split (weighted)

model_name = 'CTR'
text_column = 'desc'
split_type = 'strat'
weighted_or_unweighted = 'weighted'

train_ctr(model_name=model_name,
          text_column=text_column,
          split_type=split_type,
          weighted_or_unweighted=weighted_or_unweighted
          )

creating from splits
initialising Combined Base
rating_threshold = 1.0
exclude_unknowns = True
---
Training data:
Number of users = 104
Number of items = 400
Number of ratings = 3874
Max rating = 1.0
Min rating = 0.0
Global mean = 0.4
---
Test data:
Number of users = 100
Number of items = 195
Number of ratings = 487
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 96
Number of items = 184
Number of ratings = 476
---
Total users = 104
Total items = 400

[RandomSearch_CTR_desc_strat_weighted] Training started!
Evaluating: {'a': 0.9493301691872464, 'b': 0.002178838925403326, 'eta': 0.013127260146536174, 'k': 200, 'lambda_u': 8.048052341229711, 'lambda_v': 8.525799636611234}


100%|██████████| 200/200 [07:36<00:00,  2.28s/it, cf_loss=246, lda_likelihood=-1.26e+3]


Learning completed!
Evaluating: {'a': 0.9468610308061826, 'b': 0.06882895102129218, 'eta': 0.03406018916009659, 'k': 50, 'lambda_u': 9.250010222576968, 'lambda_v': 7.030257724217479}


100%|██████████| 200/200 [01:05<00:00,  3.04it/s, cf_loss=260, lda_likelihood=-782]    


Learning completed!
Evaluating: {'a': 0.9223183237085391, 'b': 0.09358304891681923, 'eta': 0.08053974527992168, 'k': 200, 'lambda_u': 1.9584977087301252, 'lambda_v': 7.185875017627006}


100%|██████████| 200/200 [07:54<00:00,  2.37s/it, cf_loss=187, lda_likelihood=-1.37e+3]


Learning completed!
Evaluating: {'a': 0.9474700473136126, 'b': 0.024816507703897372, 'eta': 0.014776869626104167, 'k': 200, 'lambda_u': 3.9096635517916565, 'lambda_v': 0.4040063332463124}


100%|██████████| 200/200 [07:44<00:00,  2.32s/it, cf_loss=128, lda_likelihood=-1.64e+3]


Learning completed!
Evaluating: {'a': 0.9505139890009986, 'b': 0.014056273274131038, 'eta': 0.0025007860338301015, 'k': 100, 'lambda_u': 0.24647689225108876, 'lambda_v': 7.1335613960660496}


100%|██████████| 200/200 [03:01<00:00,  1.10it/s, cf_loss=66.3, lda_likelihood=-1.18e+3]


Learning completed!
Evaluating: {'a': 0.9656337158627808, 'b': 0.0653063216154106, 'eta': 0.04773211373191266, 'k': 100, 'lambda_u': 9.778669130733553, 'lambda_v': 2.5003478790889364}


100%|██████████| 200/200 [02:58<00:00,  1.12it/s, cf_loss=245, lda_likelihood=-1.15e+3]


Learning completed!
Evaluating: {'a': 0.9288641112407447, 'b': 0.02694060460581912, 'eta': 0.07691884386471894, 'k': 75, 'lambda_u': 3.063248145053909, 'lambda_v': 9.528998743759496}


100%|██████████| 200/200 [01:44<00:00,  1.92it/s, cf_loss=184, lda_likelihood=-845]    


Learning completed!
Evaluating: {'a': 0.9112414192082166, 'b': 0.007864801867669203, 'eta': 0.0030636287022013766, 'k': 100, 'lambda_u': 6.17170339007224, 'lambda_v': 7.649787469118457}


100%|██████████| 200/200 [02:59<00:00,  1.12it/s, cf_loss=216, lda_likelihood=-962]    


Learning completed!
Evaluating: {'a': 0.9474839303315423, 'b': 0.010868237307220408, 'eta': 0.050586545837301, 'k': 150, 'lambda_u': 7.8537492459038925, 'lambda_v': 0.9675152541920272}


100%|██████████| 200/200 [05:21<00:00,  1.61s/it, cf_loss=186, lda_likelihood=-1.42e+3]


Learning completed!
Evaluating: {'a': 0.9198401490116883, 'b': 0.010096977542049947, 'eta': 0.03584190602884217, 'k': 150, 'lambda_u': 3.3993292349322277, 'lambda_v': 6.153824748975314}


100%|██████████| 200/200 [07:30<00:00,  2.25s/it, cf_loss=188, lda_likelihood=-1.21e+3]


Learning completed!
Evaluating: {'a': 0.9915679930634612, 'b': 0.03507612728346994, 'eta': 0.04236835495639308, 'k': 75, 'lambda_u': 2.9209449027261187, 'lambda_v': 0.8549843402026652}


100%|██████████| 200/200 [01:44<00:00,  1.92it/s, cf_loss=136, lda_likelihood=-1.14e+3]


Learning completed!
Evaluating: {'a': 0.9520323977399208, 'b': 0.04110301190398308, 'eta': 0.015692838405198764, 'k': 75, 'lambda_u': 1.393954173767231, 'lambda_v': 6.2162586792184715}


100%|██████████| 200/200 [01:44<00:00,  1.91it/s, cf_loss=145, lda_likelihood=-944]    


Learning completed!
Evaluating: {'a': 0.9157706105986164, 'b': 0.07941821783849658, 'eta': 0.01692003204142777, 'k': 200, 'lambda_u': 0.8871818767070456, 'lambda_v': 6.080017103881088}


100%|██████████| 200/200 [07:48<00:00,  2.34s/it, cf_loss=140, lda_likelihood=-1.43e+3]


Learning completed!
Evaluating: {'a': 0.9435207501651631, 'b': 0.004019909745933814, 'eta': 0.013498501573874704, 'k': 75, 'lambda_u': 8.185481527202874, 'lambda_v': 3.4340528344056183}


100%|██████████| 200/200 [01:44<00:00,  1.92it/s, cf_loss=214, lda_likelihood=-903]    


Learning completed!
Evaluating: {'a': 0.9142377545189558, 'b': 0.05722533932992185, 'eta': 0.04754029812491496, 'k': 100, 'lambda_u': 1.220230453086556, 'lambda_v': 5.622563493469907}


100%|██████████| 200/200 [02:57<00:00,  1.13it/s, cf_loss=140, lda_likelihood=-1.06e+3]


Learning completed!
Evaluating: {'a': 0.9424193717726477, 'b': 0.06313194643635756, 'eta': 0.011496221679571862, 'k': 150, 'lambda_u': 0.5880214611356427, 'lambda_v': 4.678003052727321}


100%|██████████| 200/200 [05:27<00:00,  1.64s/it, cf_loss=110, lda_likelihood=-1.33e+3]


Learning completed!
Evaluating: {'a': 0.9694937170813578, 'b': 0.01841529791675346, 'eta': 0.09999397244287721, 'k': 50, 'lambda_u': 3.700575531927882, 'lambda_v': 1.2102441795142997}


100%|██████████| 200/200 [01:05<00:00,  3.05it/s, cf_loss=148, lda_likelihood=-891]    


Learning completed!
Evaluating: {'a': 0.9555363020114542, 'b': 0.057502774053980146, 'eta': 0.004268160977914553, 'k': 200, 'lambda_u': 1.7389729131474227, 'lambda_v': 8.001757876216738}


100%|██████████| 200/200 [07:42<00:00,  2.31s/it, cf_loss=174, lda_likelihood=-1.29e+3]


Learning completed!
Evaluating: {'a': 0.9743246962339014, 'b': 0.07795881538798559, 'eta': 0.016080920824673006, 'k': 200, 'lambda_u': 8.61022786076107, 'lambda_v': 7.576191633959362}


100%|██████████| 200/200 [07:39<00:00,  2.30s/it, cf_loss=278, lda_likelihood=-1.36e+3]


Learning completed!
Evaluating: {'a': 0.9471921815995236, 'b': 0.08528572665252962, 'eta': 0.053292750893809616, 'k': 200, 'lambda_u': 1.738481593820751, 'lambda_v': 2.749726930009925}


100%|██████████| 200/200 [08:40<00:00,  2.60s/it, cf_loss=162, lda_likelihood=-1.49e+3]


Learning completed!
Best parameter settings: {'a': 0.9198401490116883, 'b': 0.010096977542049947, 'eta': 0.03584190602884217, 'k': 150, 'lambda_u': 3.3993292349322277, 'lambda_v': 6.153824748975314}
HarmonicMean = 0.0117

[RandomSearch_CTR_desc_strat_weighted] Evaluation started!


Ranking: 100%|██████████| 100/100 [00:03<00:00, 26.97it/s]
Ranking: 100%|██████████| 96/96 [00:03<00:00, 30.87it/s]



VALIDATION:
...
                                     |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Time (s)
------------------------------------ + ------ + ------------ + ------- + ------- + ----------- + --------
RandomSearch_CTR_desc_strat_weighted | 0.0439 |       0.0117 |  0.0774 |  0.2689 |      0.0091 |   3.1121

TEST:
...
                                     |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Train (s) | Test (s)
------------------------------------ + ------ + ------------ + ------- + ------- + ----------- + --------- + --------
RandomSearch_CTR_desc_strat_weighted | 0.0280 |       0.0089 |  0.0636 |  0.2529 |      0.0084 | 5726.0769 |   3.7105

Random search best params:  {'a': 0.9198401490116883, 'b': 0.010096977542049947, 'eta': 0.03584190602884217, 'k': 150, 'lambda_u': 3.3993292349322277, 'lambda_v': 6.153824748975314}
RandomSearch_CTR_desc_strat_weighted
F1@10: 0.028
NCRR: 0.064
NDCG: 0.253
Distributional coverage: 8.633
Serendipity:

In [11]:
# CTR details stratified split (weighted)

model_name = 'CTR'
text_column = 'details'
split_type = 'strat'
weighted_or_unweighted = 'weighted'

train_ctr(model_name=model_name,
          text_column=text_column,
          split_type=split_type,
          weighted_or_unweighted=weighted_or_unweighted
          )

creating from splits
initialising Combined Base
rating_threshold = 1.0
exclude_unknowns = True
---
Training data:
Number of users = 104
Number of items = 400
Number of ratings = 3874
Max rating = 1.0
Min rating = 0.0
Global mean = 0.4
---
Test data:
Number of users = 100
Number of items = 195
Number of ratings = 487
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 96
Number of items = 184
Number of ratings = 476
---
Total users = 104
Total items = 400

[RandomSearch_CTR_details_strat_weighted] Training started!
Evaluating: {'a': 0.956334100776738, 'b': 0.027335056966866823, 'eta': 0.05509559897436499, 'k': 150, 'lambda_u': 8.97018797082085, 'lambda_v': 1.9184472979942864}


100%|██████████| 200/200 [05:36<00:00,  1.68s/it, cf_loss=251, lda_likelihood=-2.56e+4]


Learning completed!
Evaluating: {'a': 0.9677922571346613, 'b': 0.041403498974021326, 'eta': 0.09553992728879279, 'k': 150, 'lambda_u': 1.2169543598726134, 'lambda_v': 1.1761464848223422}


100%|██████████| 200/200 [05:36<00:00,  1.68s/it, cf_loss=112, lda_likelihood=-2.72e+4]


Learning completed!
Evaluating: {'a': 0.9257127228486499, 'b': 0.0031297162420985906, 'eta': 0.09114589350112817, 'k': 75, 'lambda_u': 5.052270617165201, 'lambda_v': 9.966811669180712}


100%|██████████| 200/200 [01:50<00:00,  1.80it/s, cf_loss=269, lda_likelihood=-1.84e+4]


Learning completed!
Evaluating: {'a': 0.9950611128858565, 'b': 0.04662737769583036, 'eta': 0.0023846855603643817, 'k': 75, 'lambda_u': 6.241999295694586, 'lambda_v': 7.886977653698045}


100%|██████████| 200/200 [01:53<00:00,  1.76it/s, cf_loss=294, lda_likelihood=-1.53e+4]


Learning completed!
Evaluating: {'a': 0.9904290567694898, 'b': 0.05236043676088167, 'eta': 0.04402249230753725, 'k': 50, 'lambda_u': 6.755846301453692, 'lambda_v': 4.921741065045453}


100%|██████████| 200/200 [01:10<00:00,  2.85it/s, cf_loss=272, lda_likelihood=-1.1e+4] 


Learning completed!
Evaluating: {'a': 0.9857105546650473, 'b': 0.053219600646112954, 'eta': 0.05369007860895898, 'k': 100, 'lambda_u': 8.747083512363464, 'lambda_v': 7.8823372665397144}


100%|██████████| 200/200 [03:11<00:00,  1.04it/s, cf_loss=328, lda_likelihood=-1.91e+4]


Learning completed!
Evaluating: {'a': 0.9190157092151906, 'b': 0.06460024607495858, 'eta': 0.017256545853228215, 'k': 100, 'lambda_u': 0.9211200839497521, 'lambda_v': 4.63819405212884}


100%|██████████| 200/200 [03:11<00:00,  1.05it/s, cf_loss=140, lda_likelihood=-1.75e+4]


Learning completed!
Evaluating: {'a': 0.9464368957465432, 'b': 0.03008749056703344, 'eta': 0.058179236292013076, 'k': 100, 'lambda_u': 0.2940432896229293, 'lambda_v': 4.21895937478965}


100%|██████████| 200/200 [03:05<00:00,  1.08it/s, cf_loss=79.4, lda_likelihood=-2.12e+4]


Learning completed!
Evaluating: {'a': 0.9197463398105273, 'b': 0.06485877102257212, 'eta': 0.07429890792747981, 'k': 200, 'lambda_u': 7.138833769149692, 'lambda_v': 5.764677237186234}


100%|██████████| 200/200 [08:02<00:00,  2.41s/it, cf_loss=295, lda_likelihood=-3.05e+4]


Learning completed!
Evaluating: {'a': 0.9934631622318415, 'b': 0.04877229726475146, 'eta': 0.01971804195668113, 'k': 50, 'lambda_u': 0.6253079504392799, 'lambda_v': 0.5226939390458625}


100%|██████████| 200/200 [01:09<00:00,  2.87it/s, cf_loss=72.7, lda_likelihood=-1.18e+4]


Learning completed!
Evaluating: {'a': 0.9137216885482509, 'b': 0.012238510255896218, 'eta': 0.05779831490844461, 'k': 150, 'lambda_u': 7.522798783904339, 'lambda_v': 4.611141311729331}


100%|██████████| 200/200 [05:26<00:00,  1.63s/it, cf_loss=275, lda_likelihood=-2.51e+4]


Learning completed!
Evaluating: {'a': 0.9755366437195878, 'b': 0.038252460780308986, 'eta': 0.036940164132763416, 'k': 200, 'lambda_u': 2.592771568521272, 'lambda_v': 5.163875460740692}


100%|██████████| 200/200 [07:56<00:00,  2.38s/it, cf_loss=213, lda_likelihood=-2.92e+4]


Learning completed!
Evaluating: {'a': 0.9628649258584336, 'b': 0.018422439583254013, 'eta': 0.08600807513203819, 'k': 75, 'lambda_u': 5.416926859202597, 'lambda_v': 7.891509416186367}


100%|██████████| 200/200 [01:51<00:00,  1.79it/s, cf_loss=271, lda_likelihood=-1.71e+4]


Learning completed!
Evaluating: {'a': 0.9152027722188597, 'b': 0.03755352025863134, 'eta': 0.02914158582242839, 'k': 200, 'lambda_u': 2.9603750000785016, 'lambda_v': 0.5624531846607338}


100%|██████████| 200/200 [07:47<00:00,  2.34s/it, cf_loss=128, lda_likelihood=-2.81e+4]


Learning completed!
Evaluating: {'a': 0.9553352552286521, 'b': 0.06890453465052523, 'eta': 0.07844963705237913, 'k': 150, 'lambda_u': 4.006928333462816, 'lambda_v': 2.994416762625449}


100%|██████████| 200/200 [05:35<00:00,  1.68s/it, cf_loss=225, lda_likelihood=-2.52e+4]


Learning completed!
Evaluating: {'a': 0.9049403059932646, 'b': 0.04065682154876298, 'eta': 0.0077319825682040395, 'k': 200, 'lambda_u': 8.159290594755179, 'lambda_v': 9.241936114257708}


100%|██████████| 200/200 [08:07<00:00,  2.44s/it, cf_loss=323, lda_likelihood=-2.83e+4]


Learning completed!
Evaluating: {'a': 0.9133097115009955, 'b': 0.043868917878277416, 'eta': 0.020598979182085673, 'k': 50, 'lambda_u': 2.059328056027449, 'lambda_v': 9.593205218397271}


100%|██████████| 200/200 [01:09<00:00,  2.86it/s, cf_loss=198, lda_likelihood=-9.17e+3]


Learning completed!
Evaluating: {'a': 0.9629634776807345, 'b': 0.09930670505873879, 'eta': 0.09200766530234106, 'k': 75, 'lambda_u': 5.203289957157916, 'lambda_v': 6.7880517983606}


100%|██████████| 200/200 [01:49<00:00,  1.83it/s, cf_loss=278, lda_likelihood=-1.54e+4]


Learning completed!
Evaluating: {'a': 0.9436459734692539, 'b': 0.028923596690876477, 'eta': 0.05378594884451235, 'k': 100, 'lambda_u': 1.5766448202300682, 'lambda_v': 5.598979629081888}


100%|██████████| 200/200 [03:13<00:00,  1.03it/s, cf_loss=169, lda_likelihood=-1.87e+4]


Learning completed!
Evaluating: {'a': 0.9620149754295239, 'b': 0.05321407541286688, 'eta': 0.05337687743403453, 'k': 50, 'lambda_u': 1.8304352628346727, 'lambda_v': 4.950123579912475}


100%|██████████| 200/200 [01:11<00:00,  2.82it/s, cf_loss=181, lda_likelihood=-1.03e+4]


Learning completed!
Best parameter settings: {'a': 0.9950611128858565, 'b': 0.04662737769583036, 'eta': 0.0023846855603643817, 'k': 75, 'lambda_u': 6.241999295694586, 'lambda_v': 7.886977653698045}
HarmonicMean = 0.0074

[RandomSearch_CTR_details_strat_weighted] Evaluation started!


Ranking: 100%|██████████| 100/100 [00:03<00:00, 27.82it/s]
Ranking: 100%|██████████| 96/96 [00:02<00:00, 32.28it/s]



VALIDATION:
...
                                        |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Time (s)
--------------------------------------- + ------ + ------------ + ------- + ------- + ----------- + --------
RandomSearch_CTR_details_strat_weighted | 0.0240 |       0.0074 |  0.0556 |  0.2462 |      0.0091 |   2.9764

TEST:
...
                                        |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Train (s) | Test (s)
--------------------------------------- + ------ + ------------ + ------- + ------- + ----------- + --------- + --------
RandomSearch_CTR_details_strat_weighted | 0.0112 |       0.0055 |  0.0373 |  0.2261 |      0.0084 | 4792.3726 |   3.5967

Random search best params:  {'a': 0.9950611128858565, 'b': 0.04662737769583036, 'eta': 0.0023846855603643817, 'k': 75, 'lambda_u': 6.241999295694586, 'lambda_v': 7.886977653698045}
RandomSearch_CTR_details_strat_weighted
F1@10: 0.011
NCRR: 0.037
NDCG: 0.226
Distributional coverage

In [12]:
# CTR desc lsuo split (weighted)

model_name = 'CTR'
text_column = 'desc'
split_type = 'lsuo'
weighted_or_unweighted = 'weighted'

train_ctr(model_name=model_name,
          text_column=text_column,
          split_type=split_type,
          weighted_or_unweighted=weighted_or_unweighted
          )

creating from splits
initialising Combined Base
rating_threshold = 1.0
exclude_unknowns = False
---
Training data:
Number of users = 83
Number of items = 397
Number of ratings = 3720
Max rating = 1.0
Min rating = 0.0
Global mean = 0.4
---
Test data:
Number of users = 11
Number of items = 101
Number of ratings = 458
Number of unknown users = 11
Number of unknown items = 0
---
Validation data:
Number of users = 10
Number of items = 308
Number of ratings = 663
---
Total users = 104
Total items = 403

[RandomSearch_CTR_desc_lsuo_weighted] Training started!
Evaluating: {'a': 0.9950540186794792, 'b': 0.00380599591230657, 'eta': 0.021637294692872348, 'k': 50, 'lambda_u': 0.30039278798666413, 'lambda_v': 9.387831746890065}


100%|██████████| 200/200 [01:02<00:00,  3.20it/s, cf_loss=65.8, lda_likelihood=-882]    


Learning completed!
Evaluating: {'a': 0.9762874106406632, 'b': 0.014535003465421116, 'eta': 0.007970275176039987, 'k': 100, 'lambda_u': 8.430143088936461, 'lambda_v': 6.700356925479086}


100%|██████████| 200/200 [02:52<00:00,  1.16it/s, cf_loss=236, lda_likelihood=-1.04e+3]


Learning completed!
Evaluating: {'a': 0.9074181461296489, 'b': 0.0962016458980852, 'eta': 0.05145803818589798, 'k': 50, 'lambda_u': 7.126780711649708, 'lambda_v': 9.901898545497177}


100%|██████████| 200/200 [01:02<00:00,  3.18it/s, cf_loss=233, lda_likelihood=-806]    


Learning completed!
Evaluating: {'a': 0.9758898381404254, 'b': 0.040811618400448106, 'eta': 0.010600184803105015, 'k': 150, 'lambda_u': 3.2161668781826434, 'lambda_v': 2.502927849198468}


100%|██████████| 200/200 [04:48<00:00,  1.44s/it, cf_loss=165, lda_likelihood=-1.27e+3]


Learning completed!
Evaluating: {'a': 0.9084164621709243, 'b': 0.004720100799010563, 'eta': 0.09860427161858257, 'k': 75, 'lambda_u': 4.088064839253202, 'lambda_v': 3.590946161665813}


100%|██████████| 200/200 [01:38<00:00,  2.03it/s, cf_loss=159, lda_likelihood=-895]    


Learning completed!
Evaluating: {'a': 0.9791669917133519, 'b': 0.06211187386841374, 'eta': 0.08331473441621526, 'k': 200, 'lambda_u': 2.742673494357065, 'lambda_v': 8.222990825581793}


100%|██████████| 200/200 [07:44<00:00,  2.32s/it, cf_loss=189, lda_likelihood=-1.32e+3]


Learning completed!
Evaluating: {'a': 0.9904966720538437, 'b': 0.004028254428709732, 'eta': 0.07128672011518168, 'k': 200, 'lambda_u': 2.968897363771427, 'lambda_v': 2.9436478228134133}


100%|██████████| 200/200 [07:23<00:00,  2.22s/it, cf_loss=155, lda_likelihood=-1.37e+3]


Learning completed!
Evaluating: {'a': 0.9315942509952722, 'b': 0.04020694777131796, 'eta': 0.0765566966011135, 'k': 200, 'lambda_u': 7.914540039267889, 'lambda_v': 5.71732083238163}


100%|██████████| 200/200 [07:35<00:00,  2.28s/it, cf_loss=237, lda_likelihood=-1.4e+3] 


Learning completed!
Evaluating: {'a': 0.9622704231034322, 'b': 0.09407309310257023, 'eta': 0.06846219485570408, 'k': 100, 'lambda_u': 9.094194084426913, 'lambda_v': 3.586103517309142}


100%|██████████| 200/200 [02:46<00:00,  1.20it/s, cf_loss=240, lda_likelihood=-1.1e+3] 


Learning completed!
Evaluating: {'a': 0.967268171958444, 'b': 0.09595811664760709, 'eta': 0.0739990812409941, 'k': 100, 'lambda_u': 9.731886717651758, 'lambda_v': 3.9936419323163066}


100%|██████████| 200/200 [02:51<00:00,  1.16it/s, cf_loss=246, lda_likelihood=-1.08e+3]


Learning completed!
Evaluating: {'a': 0.9025235004654989, 'b': 0.046895574281205527, 'eta': 0.04437402414401265, 'k': 75, 'lambda_u': 7.469231422172033, 'lambda_v': 7.832299223975962}


100%|██████████| 200/200 [01:40<00:00,  1.99it/s, cf_loss=226, lda_likelihood=-914]    


Learning completed!
Evaluating: {'a': 0.9175636755091199, 'b': 0.04131776748767785, 'eta': 0.03513293634228104, 'k': 150, 'lambda_u': 9.689684039034436, 'lambda_v': 1.8540332920505052}


100%|██████████| 200/200 [05:12<00:00,  1.56s/it, cf_loss=213, lda_likelihood=-1.38e+3]


Learning completed!
Evaluating: {'a': 0.9997250325611959, 'b': 0.0514650932893079, 'eta': 0.0034727767478689433, 'k': 150, 'lambda_u': 8.866666797614496, 'lambda_v': 2.2873407972147843}


100%|██████████| 200/200 [04:51<00:00,  1.46s/it, cf_loss=224, lda_likelihood=-1.27e+3]


Learning completed!
Evaluating: {'a': 0.9826451789950195, 'b': 0.062242167105908755, 'eta': 0.05711860080654306, 'k': 200, 'lambda_u': 9.300220536546588, 'lambda_v': 1.136323088764172}


100%|██████████| 200/200 [07:23<00:00,  2.22s/it, cf_loss=209, lda_likelihood=-1.5e+3] 


Learning completed!
Evaluating: {'a': 0.9611667028232256, 'b': 0.07852911691992753, 'eta': 0.04338340515959312, 'k': 50, 'lambda_u': 2.309889503290952, 'lambda_v': 7.869041465493319}


100%|██████████| 200/200 [01:05<00:00,  3.05it/s, cf_loss=169, lda_likelihood=-817]   


Learning completed!
Evaluating: {'a': 0.9782817220087127, 'b': 0.0065965798220734674, 'eta': 0.033682777405515354, 'k': 75, 'lambda_u': 4.795066325140068, 'lambda_v': 9.035971551488647}


100%|██████████| 200/200 [01:40<00:00,  1.99it/s, cf_loss=195, lda_likelihood=-824]    


Learning completed!
Evaluating: {'a': 0.9557743092810485, 'b': 0.07864766036053061, 'eta': 0.09191410093963953, 'k': 75, 'lambda_u': 3.2334779457309692, 'lambda_v': 6.89920849800746}


100%|██████████| 200/200 [01:38<00:00,  2.03it/s, cf_loss=186, lda_likelihood=-935]    


Learning completed!
Evaluating: {'a': 0.9252463404194762, 'b': 0.07919696704002278, 'eta': 0.06289431197027871, 'k': 100, 'lambda_u': 3.978233269446386, 'lambda_v': 6.509838296493077}


100%|██████████| 200/200 [02:56<00:00,  1.14it/s, cf_loss=196, lda_likelihood=-1.04e+3]


Learning completed!
Evaluating: {'a': 0.9697866952889337, 'b': 0.020931199103703738, 'eta': 0.0030024630028815925, 'k': 50, 'lambda_u': 4.959971799655054, 'lambda_v': 3.7765823472110003}


100%|██████████| 200/200 [01:01<00:00,  3.23it/s, cf_loss=181, lda_likelihood=-798]    


Learning completed!
Evaluating: {'a': 0.900439263770219, 'b': 0.0846994223105857, 'eta': 0.09543854071824463, 'k': 200, 'lambda_u': 4.794623438514491, 'lambda_v': 7.58691044767803}


100%|██████████| 200/200 [07:37<00:00,  2.29s/it, cf_loss=217, lda_likelihood=-1.37e+3]


Learning completed!
Best parameter settings: {'a': 0.9950540186794792, 'b': 0.00380599591230657, 'eta': 0.021637294692872348, 'k': 50, 'lambda_u': 0.30039278798666413, 'lambda_v': 9.387831746890065}
HarmonicMean = 0.0000

[RandomSearch_CTR_desc_lsuo_weighted] Evaluation started!


Ranking: 100%|██████████| 11/11 [00:00<00:00, 49.40it/s]
Ranking: 100%|██████████| 10/10 [00:00<00:00, 34.39it/s]



VALIDATION:
...
                                    |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Time (s)
----------------------------------- + ------ + ------------ + ------- + ------- + ----------- + --------
RandomSearch_CTR_desc_lsuo_weighted | 0.0428 |       0.0000 |  0.1674 |  0.4395 |      0.0000 |   0.2927

TEST:
...
                                    |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Train (s) | Test (s)
----------------------------------- + ------ + ------------ + ------- + ------- + ----------- + --------- + --------
RandomSearch_CTR_desc_lsuo_weighted | 0.0246 |       0.0000 |  0.0997 |  0.3810 |      0.0000 | 4499.1473 |   0.2247

Random search best params:  {'a': 0.9950540186794792, 'b': 0.00380599591230657, 'eta': 0.021637294692872348, 'k': 50, 'lambda_u': 0.30039278798666413, 'lambda_v': 9.387831746890065}
RandomSearch_CTR_desc_lsuo_weighted
F1@10: 0.025
NCRR: 0.100
NDCG: 0.381
Distributional coverage: 8.616
Serendipity: 0.790


In [13]:
# CTR details lsuo split (weighted)

model_name = 'CTR'
text_column = 'details'
split_type = 'lsuo'
weighted_or_unweighted = 'weighted'

train_ctr(model_name=model_name,
          text_column=text_column,
          split_type=split_type,
          weighted_or_unweighted=weighted_or_unweighted
          )

creating from splits
initialising Combined Base
rating_threshold = 1.0
exclude_unknowns = False
---
Training data:
Number of users = 83
Number of items = 397
Number of ratings = 3720
Max rating = 1.0
Min rating = 0.0
Global mean = 0.4
---
Test data:
Number of users = 11
Number of items = 101
Number of ratings = 458
Number of unknown users = 11
Number of unknown items = 0
---
Validation data:
Number of users = 10
Number of items = 308
Number of ratings = 663
---
Total users = 104
Total items = 403

[RandomSearch_CTR_details_lsuo_weighted] Training started!
Evaluating: {'a': 0.9292452087874831, 'b': 0.05999875607312708, 'eta': 0.07239179647674573, 'k': 75, 'lambda_u': 8.520380603047805, 'lambda_v': 3.573772634107786}


100%|██████████| 200/200 [01:48<00:00,  1.84it/s, cf_loss=258, lda_likelihood=-1.69e+4]


Learning completed!
Evaluating: {'a': 0.9223029151049579, 'b': 0.030973916181506612, 'eta': 0.07062065165383528, 'k': 50, 'lambda_u': 7.748395039217611, 'lambda_v': 8.345119029676349}


100%|██████████| 200/200 [01:08<00:00,  2.94it/s, cf_loss=275, lda_likelihood=-1.08e+4]


Learning completed!
Evaluating: {'a': 0.9053041652602726, 'b': 0.042926977990113926, 'eta': 0.09338557319665378, 'k': 50, 'lambda_u': 9.9079555810639, 'lambda_v': 4.260272088653481}


100%|██████████| 200/200 [01:07<00:00,  2.96it/s, cf_loss=265, lda_likelihood=-1.03e+4]


Learning completed!
Evaluating: {'a': 0.9910467960162956, 'b': 0.01818944283650369, 'eta': 0.019251689314023292, 'k': 50, 'lambda_u': 2.147247249741835, 'lambda_v': 1.691509692114146}


100%|██████████| 200/200 [01:07<00:00,  2.97it/s, cf_loss=135, lda_likelihood=-1.42e+4]


Learning completed!
Evaluating: {'a': 0.9143851424163745, 'b': 0.042280829184959914, 'eta': 0.0536519970438872, 'k': 75, 'lambda_u': 8.298427014342352, 'lambda_v': 4.579374234466621}


100%|██████████| 200/200 [01:48<00:00,  1.84it/s, cf_loss=263, lda_likelihood=-1.78e+4]


Learning completed!
Evaluating: {'a': 0.9613020889627572, 'b': 0.08714055344300975, 'eta': 0.0905797715200274, 'k': 75, 'lambda_u': 4.36800696661236, 'lambda_v': 7.075328859990154}


100%|██████████| 200/200 [01:47<00:00,  1.86it/s, cf_loss=247, lda_likelihood=-1.56e+4]


Learning completed!
Evaluating: {'a': 0.9074259734380632, 'b': 0.005628034760430722, 'eta': 0.05450412495431645, 'k': 50, 'lambda_u': 0.8989336392212727, 'lambda_v': 2.087594668532611}


100%|██████████| 200/200 [01:07<00:00,  2.96it/s, cf_loss=89.3, lda_likelihood=-1.18e+4]


Learning completed!
Evaluating: {'a': 0.9567925747972248, 'b': 0.04969693769488827, 'eta': 0.04475603797524866, 'k': 150, 'lambda_u': 8.41232461647498, 'lambda_v': 9.682488028934312}


100%|██████████| 200/200 [05:20<00:00,  1.60s/it, cf_loss=318, lda_likelihood=-2.5e+4] 


Learning completed!
Evaluating: {'a': 0.9901594088928767, 'b': 0.026543199884770455, 'eta': 0.040232453236977085, 'k': 50, 'lambda_u': 4.47985537246668, 'lambda_v': 8.157179500358193}


100%|██████████| 200/200 [01:07<00:00,  2.95it/s, cf_loss=239, lda_likelihood=-1.07e+4]


Learning completed!
Evaluating: {'a': 0.9467247855674273, 'b': 0.016903709936871937, 'eta': 0.045740858005794055, 'k': 150, 'lambda_u': 3.1811446882709453, 'lambda_v': 0.6803587191787531}


100%|██████████| 200/200 [05:20<00:00,  1.60s/it, cf_loss=123, lda_likelihood=-2.36e+4]


Learning completed!
Evaluating: {'a': 0.9950131039681278, 'b': 0.0990268573164011, 'eta': 0.05664995446831549, 'k': 150, 'lambda_u': 4.340371652889544, 'lambda_v': 0.21819821924498586}


100%|██████████| 200/200 [05:10<00:00,  1.55s/it, cf_loss=113, lda_likelihood=-2.13e+4]


Learning completed!
Evaluating: {'a': 0.9124869614348384, 'b': 0.08317719824183434, 'eta': 0.01250915811757472, 'k': 50, 'lambda_u': 7.1651051094905815, 'lambda_v': 9.227920680003795}


100%|██████████| 200/200 [01:08<00:00,  2.94it/s, cf_loss=280, lda_likelihood=-1.02e+4]


Learning completed!
Evaluating: {'a': 0.9983504249098151, 'b': 0.021303114973126383, 'eta': 0.06540104036339903, 'k': 200, 'lambda_u': 7.61476296819029, 'lambda_v': 0.8045092778949898}


100%|██████████| 200/200 [08:05<00:00,  2.43s/it, cf_loss=185, lda_likelihood=-3.11e+4]


Learning completed!
Evaluating: {'a': 0.9122846752170857, 'b': 0.054182667496730674, 'eta': 0.05807708937431634, 'k': 200, 'lambda_u': 1.1377169307744495, 'lambda_v': 3.25483432605773}


100%|██████████| 200/200 [07:46<00:00,  2.33s/it, cf_loss=131, lda_likelihood=-2.84e+4]


Learning completed!
Evaluating: {'a': 0.9440083575364063, 'b': 0.02277707537383398, 'eta': 0.02650998692015201, 'k': 150, 'lambda_u': 6.189924365843009, 'lambda_v': 4.010558769577962}


100%|██████████| 200/200 [05:12<00:00,  1.56s/it, cf_loss=243, lda_likelihood=-2.38e+4]


Learning completed!
Evaluating: {'a': 0.9797657024133812, 'b': 0.06780733336336964, 'eta': 0.05283396501507697, 'k': 75, 'lambda_u': 7.208761017934897, 'lambda_v': 0.19454153517616898}


100%|██████████| 200/200 [01:46<00:00,  1.88it/s, cf_loss=128, lda_likelihood=-1.7e+4] 


Learning completed!
Evaluating: {'a': 0.9988277856053172, 'b': 0.007214369963684231, 'eta': 0.003303324500970285, 'k': 150, 'lambda_u': 4.025185718243385, 'lambda_v': 5.038398298031218}


100%|██████████| 200/200 [05:15<00:00,  1.58s/it, cf_loss=224, lda_likelihood=-2.62e+4]


Learning completed!
Evaluating: {'a': 0.9633816685768721, 'b': 0.08241869582166916, 'eta': 0.020644882475819692, 'k': 100, 'lambda_u': 8.901512333392786, 'lambda_v': 0.8938645204621296}


100%|██████████| 200/200 [03:14<00:00,  1.03it/s, cf_loss=207, lda_likelihood=-1.79e+4]


Learning completed!
Evaluating: {'a': 0.9516881007977305, 'b': 0.017742036953076468, 'eta': 0.01957615358776812, 'k': 50, 'lambda_u': 6.063742077604598, 'lambda_v': 9.098599980464066}


100%|██████████| 200/200 [01:09<00:00,  2.89it/s, cf_loss=263, lda_likelihood=-9.68e+3]


Learning completed!
Evaluating: {'a': 0.9456871499864848, 'b': 0.06796645312036831, 'eta': 0.03221113155657776, 'k': 200, 'lambda_u': 8.55620036633677, 'lambda_v': 0.09699298918391054}


100%|██████████| 200/200 [07:43<00:00,  2.32s/it, cf_loss=109, lda_likelihood=-2.73e+4]


Learning completed!
Best parameter settings: {'a': 0.9292452087874831, 'b': 0.05999875607312708, 'eta': 0.07239179647674573, 'k': 75, 'lambda_u': 8.520380603047805, 'lambda_v': 3.573772634107786}
HarmonicMean = 0.0000

[RandomSearch_CTR_details_lsuo_weighted] Evaluation started!


Ranking: 100%|██████████| 11/11 [00:00<00:00, 48.96it/s]
Ranking: 100%|██████████| 10/10 [00:00<00:00, 40.37it/s]



VALIDATION:
...
                                       |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Time (s)
-------------------------------------- + ------ + ------------ + ------- + ------- + ----------- + --------
RandomSearch_CTR_details_lsuo_weighted | 0.0428 |       0.0000 |  0.1674 |  0.4395 |      0.0000 |   0.2497

TEST:
...
                                       |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Train (s) | Test (s)
-------------------------------------- + ------ + ------------ + ------- + ------- + ----------- + --------- + --------
RandomSearch_CTR_details_lsuo_weighted | 0.0246 |       0.0000 |  0.0997 |  0.3810 |      0.0000 | 4102.3977 |   0.2267

Random search best params:  {'a': 0.9292452087874831, 'b': 0.05999875607312708, 'eta': 0.07239179647674573, 'k': 75, 'lambda_u': 8.520380603047805, 'lambda_v': 3.573772634107786}
RandomSearch_CTR_details_lsuo_weighted
F1@10: 0.025
NCRR: 0.100
NDCG: 0.381
Distributional coverage: 8.616
S

In [14]:
# convMF desc stratified split (unweighted) NOTE

model_name = 'convMF'
text_column = 'desc'
split_type = 'strat'
weighted_or_unweighted = 'unweighted'

train_convMF(model_name=model_name,
          text_column=text_column,
          split_type=split_type,
          weighted_or_unweighted=weighted_or_unweighted
          )

creating from splits
initialising Combined Base
rating_threshold = 1.0
exclude_unknowns = True
---
Training data:
Number of users = 104
Number of items = 400
Number of ratings = 3874
Max rating = 1.0
Min rating = 0.0
Global mean = 0.5
---
Test data:
Number of users = 100
Number of items = 195
Number of ratings = 487
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 96
Number of items = 184
Number of ratings = 476
---
Total users = 104
Total items = 400

[RandomSearch_convMF_desc_strat_unweighted] Training started!

[RandomSearch_convMF_desc_strat_unweighted] Evaluation started!


Ranking: 100%|██████████| 100/100 [00:03<00:00, 27.17it/s]
Ranking: 100%|██████████| 96/96 [00:03<00:00, 31.95it/s]



VALIDATION:
...
                                          |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Time (s)
----------------------------------------- + ------ + ------------ + ------- + ------- + ----------- + --------
RandomSearch_convMF_desc_strat_unweighted | 0.0349 |       0.0095 |  0.0794 |  0.2565 |      0.0091 |   3.0061

TEST:
...
                                          |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Train (s) | Test (s)
----------------------------------------- + ------ + ------------ + ------- + ------- + ----------- + --------- + --------
RandomSearch_convMF_desc_strat_unweighted | 0.0387 |       0.0092 |  0.0673 |  0.2486 |      0.0084 | 8584.9833 |   3.6936

Random search best params:  {'cnn_bs': 64, 'cnn_lr': 0.003800634125320658, 'dropout_rate': 0.07345128751822862, 'k': 200, 'lambda_u': 63.13451477306333, 'lambda_v': 1011.8389054562443}
RandomSearch_convMF_desc_strat_unweighted
F1@10: 0.039
NCRR: 0.067
NDCG: 0.249
Distr

In [15]:
# convMF details stratified split (unweighted)

model_name = 'convMF'
text_column = 'details'
split_type = 'strat'
weighted_or_unweighted = 'unweighted'

train_convMF(model_name=model_name,
          text_column=text_column,
          split_type=split_type,
          weighted_or_unweighted=weighted_or_unweighted
          )

creating from splits
initialising Combined Base
rating_threshold = 1.0
exclude_unknowns = True
---
Training data:
Number of users = 104
Number of items = 400
Number of ratings = 3874
Max rating = 1.0
Min rating = 0.0
Global mean = 0.5
---
Test data:
Number of users = 100
Number of items = 195
Number of ratings = 487
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 96
Number of items = 184
Number of ratings = 476
---
Total users = 104
Total items = 400

[RandomSearch_convMF_details_strat_unweighted] Training started!

[RandomSearch_convMF_details_strat_unweighted] Evaluation started!


Ranking: 100%|██████████| 100/100 [00:03<00:00, 26.13it/s]
Ranking: 100%|██████████| 96/96 [00:03<00:00, 30.57it/s]



VALIDATION:
...
                                             |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Time (s)
-------------------------------------------- + ------ + ------------ + ------- + ------- + ----------- + --------
RandomSearch_convMF_details_strat_unweighted | 0.0320 |       0.0085 |  0.0588 |  0.2412 |      0.0091 |   3.1432

TEST:
...
                                             |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Train (s) | Test (s)
-------------------------------------------- + ------ + ------------ + ------- + ------- + ----------- + --------- + --------
RandomSearch_convMF_details_strat_unweighted | 0.0131 |       0.0041 |  0.0478 |  0.2302 |      0.0084 | 8934.8317 |   3.8533

Random search best params:  {'cnn_bs': 16, 'cnn_lr': 0.00037702135492519493, 'dropout_rate': 0.06739065162367008, 'k': 100, 'lambda_u': 31.294896174949283, 'lambda_v': 4320.8779389318015}
RandomSearch_convMF_details_strat_unweighted
F1@10: 0.013
NCRR:

In [16]:
# convMF desc lsuo split (unweighted)

model_name = 'convMF'
text_column = 'desc'
split_type = 'lsuo'
weighted_or_unweighted = 'unweighted'

train_convMF(model_name=model_name,
          text_column=text_column,
          split_type=split_type,
          weighted_or_unweighted=weighted_or_unweighted
          )

creating from splits
initialising Combined Base
rating_threshold = 1.0
exclude_unknowns = False
---
Training data:
Number of users = 83
Number of items = 397
Number of ratings = 3720
Max rating = 1.0
Min rating = 0.0
Global mean = 0.5
---
Test data:
Number of users = 11
Number of items = 101
Number of ratings = 458
Number of unknown users = 11
Number of unknown items = 0
---
Validation data:
Number of users = 10
Number of items = 308
Number of ratings = 663
---
Total users = 104
Total items = 403

[RandomSearch_convMF_desc_lsuo_unweighted] Training started!

[RandomSearch_convMF_desc_lsuo_unweighted] Evaluation started!


Ranking: 100%|██████████| 11/11 [00:00<00:00, 47.07it/s]
Ranking: 100%|██████████| 10/10 [00:00<00:00, 40.95it/s]



VALIDATION:
...
                                         |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Time (s)
---------------------------------------- + ------ + ------------ + ------- + ------- + ----------- + --------
RandomSearch_convMF_desc_lsuo_unweighted | 0.0428 |       0.0000 |  0.1674 |  0.4395 |      0.0000 |   0.2457

TEST:
...
                                         |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Train (s) | Test (s)
---------------------------------------- + ------ + ------------ + ------- + ------- + ----------- + --------- + --------
RandomSearch_convMF_desc_lsuo_unweighted | 0.0246 |       0.0000 |  0.0997 |  0.3810 |      0.0000 | 8817.4722 |   0.2372

Random search best params:  {'cnn_bs': 64, 'cnn_lr': 0.00715825767317995, 'dropout_rate': 0.09284709261868257, 'k': 100, 'lambda_u': 55.17634543138084, 'lambda_v': 7222.742800877075}
RandomSearch_convMF_desc_lsuo_unweighted
F1@10: 0.025
NCRR: 0.100
NDCG: 0.381
Distributional

In [17]:
# convMF details lsuo split (unweighted)

model_name = 'convMF'
text_column = 'details'
split_type = 'lsuo'
weighted_or_unweighted = 'unweighted'

train_convMF(model_name=model_name,
          text_column=text_column,
          split_type=split_type,
          weighted_or_unweighted=weighted_or_unweighted
          )

creating from splits
initialising Combined Base
rating_threshold = 1.0
exclude_unknowns = False
---
Training data:
Number of users = 83
Number of items = 397
Number of ratings = 3720
Max rating = 1.0
Min rating = 0.0
Global mean = 0.5
---
Test data:
Number of users = 11
Number of items = 101
Number of ratings = 458
Number of unknown users = 11
Number of unknown items = 0
---
Validation data:
Number of users = 10
Number of items = 308
Number of ratings = 663
---
Total users = 104
Total items = 403

[RandomSearch_convMF_details_lsuo_unweighted] Training started!

[RandomSearch_convMF_details_lsuo_unweighted] Evaluation started!


Ranking: 100%|██████████| 11/11 [00:00<00:00, 44.41it/s]
Ranking: 100%|██████████| 10/10 [00:00<00:00, 36.60it/s]



VALIDATION:
...
                                            |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Time (s)
------------------------------------------- + ------ + ------------ + ------- + ------- + ----------- + --------
RandomSearch_convMF_details_lsuo_unweighted | 0.0428 |       0.0000 |  0.1674 |  0.4395 |      0.0000 |   0.2752

TEST:
...
                                            |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Train (s) | Test (s)
------------------------------------------- + ------ + ------------ + ------- + ------- + ----------- + --------- + --------
RandomSearch_convMF_details_lsuo_unweighted | 0.0246 |       0.0000 |  0.0997 |  0.3810 |      0.0000 | 9694.8496 |   0.2682

Random search best params:  {'cnn_bs': 64, 'cnn_lr': 0.00715825767317995, 'dropout_rate': 0.09284709261868257, 'k': 100, 'lambda_u': 55.17634543138084, 'lambda_v': 7222.742800877075}
RandomSearch_convMF_details_lsuo_unweighted
F1@10: 0.025
NCRR: 0.100
NDCG:

In [18]:
# convMF desc stratified split (weighted)

model_name = 'convMF'
text_column = 'desc'
split_type = 'strat'
weighted_or_unweighted = 'weighted'

train_convMF(model_name=model_name,
          text_column=text_column,
          split_type=split_type,
          weighted_or_unweighted=weighted_or_unweighted
          )

creating from splits
initialising Combined Base
rating_threshold = 1.0
exclude_unknowns = True
---
Training data:
Number of users = 104
Number of items = 400
Number of ratings = 3874
Max rating = 1.0
Min rating = 0.0
Global mean = 0.4
---
Test data:
Number of users = 100
Number of items = 195
Number of ratings = 487
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 96
Number of items = 184
Number of ratings = 476
---
Total users = 104
Total items = 400

[RandomSearch_convMF_desc_strat_weighted] Training started!

[RandomSearch_convMF_desc_strat_weighted] Evaluation started!


Ranking: 100%|██████████| 100/100 [00:03<00:00, 27.23it/s]
Ranking: 100%|██████████| 96/96 [00:03<00:00, 31.73it/s]



VALIDATION:
...
                                        |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Time (s)
--------------------------------------- + ------ + ------------ + ------- + ------- + ----------- + --------
RandomSearch_convMF_desc_strat_weighted | 0.0334 |       0.0083 |  0.0780 |  0.2532 |      0.0091 |   3.0266

TEST:
...
                                        |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Train (s) | Test (s)
--------------------------------------- + ------ + ------------ + ------- + ------- + ----------- + --------- + --------
RandomSearch_convMF_desc_strat_weighted | 0.0200 |       0.0063 |  0.0398 |  0.2140 |      0.0084 | 8545.9508 |   3.6761

Random search best params:  {'cnn_bs': 16, 'cnn_lr': 0.00014093296321896555, 'dropout_rate': 0.11380225178789184, 'k': 200, 'lambda_u': 4.398388002820375, 'lambda_v': 8763.006231160176}
RandomSearch_convMF_desc_strat_weighted
F1@10: 0.020
NCRR: 0.040
NDCG: 0.214
Distributional cov

In [19]:
# convMF details stratified split (weighted)

model_name = 'convMF'
text_column = 'details'
split_type = 'strat'
weighted_or_unweighted = 'weighted'

train_convMF(model_name=model_name,
          text_column=text_column,
          split_type=split_type,
          weighted_or_unweighted=weighted_or_unweighted
          )

creating from splits
initialising Combined Base
rating_threshold = 1.0
exclude_unknowns = True
---
Training data:
Number of users = 104
Number of items = 400
Number of ratings = 3874
Max rating = 1.0
Min rating = 0.0
Global mean = 0.4
---
Test data:
Number of users = 100
Number of items = 195
Number of ratings = 487
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 96
Number of items = 184
Number of ratings = 476
---
Total users = 104
Total items = 400

[RandomSearch_convMF_details_strat_weighted] Training started!

[RandomSearch_convMF_details_strat_weighted] Evaluation started!


Ranking: 100%|██████████| 100/100 [00:03<00:00, 27.31it/s]
Ranking: 100%|██████████| 96/96 [00:03<00:00, 31.99it/s]



VALIDATION:
...
                                           |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Time (s)
------------------------------------------ + ------ + ------------ + ------- + ------- + ----------- + --------
RandomSearch_convMF_details_strat_weighted | 0.0239 |       0.0076 |  0.0696 |  0.2370 |      0.0091 |   3.0023

TEST:
...
                                           |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Train (s) | Test (s)
------------------------------------------ + ------ + ------------ + ------- + ------- + ----------- + --------- + --------
RandomSearch_convMF_details_strat_weighted | 0.0195 |       0.0065 |  0.0510 |  0.2169 |      0.0084 | 7680.6526 |   3.6646

Random search best params:  {'cnn_bs': 64, 'cnn_lr': 0.003800634125320658, 'dropout_rate': 0.07345128751822862, 'k': 200, 'lambda_u': 63.13451477306333, 'lambda_v': 1011.8389054562443}
RandomSearch_convMF_details_strat_weighted
F1@10: 0.020
NCRR: 0.051
NDCG: 0.21

In [20]:
# convMF desc lsuo split (weighted)

model_name = 'convMF'
text_column = 'desc'
split_type = 'lsuo'
weighted_or_unweighted = 'weighted'

train_convMF(model_name=model_name,
          text_column=text_column,
          split_type=split_type,
          weighted_or_unweighted=weighted_or_unweighted
          )

creating from splits
initialising Combined Base
rating_threshold = 1.0
exclude_unknowns = False
---
Training data:
Number of users = 83
Number of items = 397
Number of ratings = 3720
Max rating = 1.0
Min rating = 0.0
Global mean = 0.4
---
Test data:
Number of users = 11
Number of items = 101
Number of ratings = 458
Number of unknown users = 11
Number of unknown items = 0
---
Validation data:
Number of users = 10
Number of items = 308
Number of ratings = 663
---
Total users = 104
Total items = 403

[RandomSearch_convMF_desc_lsuo_weighted] Training started!

[RandomSearch_convMF_desc_lsuo_weighted] Evaluation started!


Ranking: 100%|██████████| 11/11 [00:00<00:00, 48.96it/s]
Ranking: 100%|██████████| 10/10 [00:00<00:00, 39.97it/s]



VALIDATION:
...
                                       |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Time (s)
-------------------------------------- + ------ + ------------ + ------- + ------- + ----------- + --------
RandomSearch_convMF_desc_lsuo_weighted | 0.0428 |       0.0000 |  0.1674 |  0.4395 |      0.0000 |   0.2522

TEST:
...
                                       |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Train (s) | Test (s)
-------------------------------------- + ------ + ------------ + ------- + ------- + ----------- + --------- + --------
RandomSearch_convMF_desc_lsuo_weighted | 0.0246 |       0.0000 |  0.0997 |  0.3810 |      0.0000 | 7457.9751 |   0.2277

Random search best params:  {'cnn_bs': 64, 'cnn_lr': 0.00715825767317995, 'dropout_rate': 0.09284709261868257, 'k': 100, 'lambda_u': 55.17634543138084, 'lambda_v': 7222.742800877075}
RandomSearch_convMF_desc_lsuo_weighted
F1@10: 0.025
NCRR: 0.100
NDCG: 0.381
Distributional coverage: 8.6

In [21]:
# convMF details lsuo split (weighted)

model_name = 'convMF'
text_column = 'details'
split_type = 'lsuo'
weighted_or_unweighted = 'weighted'

train_convMF(model_name=model_name,
          text_column=text_column,
          split_type=split_type,
          weighted_or_unweighted=weighted_or_unweighted
          )

creating from splits
initialising Combined Base
rating_threshold = 1.0
exclude_unknowns = False
---
Training data:
Number of users = 83
Number of items = 397
Number of ratings = 3720
Max rating = 1.0
Min rating = 0.0
Global mean = 0.4
---
Test data:
Number of users = 11
Number of items = 101
Number of ratings = 458
Number of unknown users = 11
Number of unknown items = 0
---
Validation data:
Number of users = 10
Number of items = 308
Number of ratings = 663
---
Total users = 104
Total items = 403

[RandomSearch_convMF_details_lsuo_weighted] Training started!

[RandomSearch_convMF_details_lsuo_weighted] Evaluation started!


Ranking: 100%|██████████| 11/11 [00:00<00:00, 48.63it/s]
Ranking: 100%|██████████| 10/10 [00:00<00:00, 39.97it/s]



VALIDATION:
...
                                          |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Time (s)
----------------------------------------- + ------ + ------------ + ------- + ------- + ----------- + --------
RandomSearch_convMF_details_lsuo_weighted | 0.0428 |       0.0000 |  0.1674 |  0.4395 |      0.0000 |   0.2522

TEST:
...
                                          |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Train (s) | Test (s)
----------------------------------------- + ------ + ------------ + ------- + ------- + ----------- + --------- + --------
RandomSearch_convMF_details_lsuo_weighted | 0.0246 |       0.0000 |  0.0997 |  0.3810 |      0.0000 | 7883.7402 |   0.2362

Random search best params:  {'cnn_bs': 64, 'cnn_lr': 0.00715825767317995, 'dropout_rate': 0.09284709261868257, 'k': 100, 'lambda_u': 55.17634543138084, 'lambda_v': 7222.742800877075}
RandomSearch_convMF_details_lsuo_weighted
F1@10: 0.025
NCRR: 0.100
NDCG: 0.381
Distrib