In [None]:
import pandas as pd
import polars as pl
import numpy as np
import scipy.sparse as sp

from tqdm import tqdm
from typing import List, Any
from gensim.models import Word2Vec
import optuna
import random

## Task Description

As an ML engineer in a music streaming service, we are tasked with creating a recommendation system for our own streaming service, similar to Spotify. We do not have knowledge about the content, but we have the listening history of artists for each user.

Our task is to improve the algorithm that will determine the most relevant recommendations for each user based on their listening history.

#### Data Description
The train.parquet file provides us with data about users listening to artists on the service.

| Field     | Type | Description               |
|-----------|------|---------------------------|
| user_id   | str  | User ID                   |
| artist_id | str  | Artist ID               |

### Quality Metrics
We will use the ndcg@20 metric, which is often applied in ranking tasks. The more relevant objects are at the beginning of the recommendation list, the higher it is. Session validation with the last N artists from the listening history is used for evaluation.

The code for calculating ndcg is as follows:

```python
def user_ndcg(y_rel: List[Any], y_rec: List[Any], k: int = 20) -> float:
    """
    :param y_rel: relevant items
    :param y_rec: recommended items
    :param k: number of top recommended items
    :return: ndcg metric for user recommendations
    """
    dcg = sum([1. / np.log2(idx + 2) for idx, item in enumerate(y_rec[:k]) if item in y_rel])
    idcg = sum([1. / np.log2(idx + 2) for idx, _ in enumerate(zip(y_rel, np.arange(k)))])
    return dcg / idcg
    

## Read the dataset

In [None]:
data = pl.read_parquet('train_session_based.parquet')
data

user_id,artist_id
str,str
"""d705b538-1bd8-…","""69c71d72-7ed8-…"
"""d705b538-1bd8-…","""30bf469f-9abd-…"
"""d705b538-1bd8-…","""a26c9335-2459-…"
"""d705b538-1bd8-…","""69c903b5-dff0-…"
"""d705b538-1bd8-…","""af8eef9d-13aa-…"
"""d705b538-1bd8-…","""293a86ee-6ce7-…"
"""d705b538-1bd8-…","""348f4909-1c48-…"
"""d705b538-1bd8-…","""ad2bf122-726e-…"
"""d705b538-1bd8-…","""cc97fc57-30b5-…"
"""d705b538-1bd8-…","""3000b3a4-7435-…"


## Metrics

Our task will be the optimization of the ndcg@20 metric. Nevertheless, such a metric is difficult to interpret, so we will also have access to the hitrate@20 metric value.

In [None]:
TOP_K = 20


def user_hitrate(y_relevant: List[str], y_recs: List[str], k: int = TOP_K) -> int:
    return int(len(set(y_relevant).intersection(y_recs[:k])) > 0)

def user_ndcg(y_rel: List[Any], y_rec: List[Any], k: int = 10) -> float:
    """
    :param y_rel: relevant items
    :param y_rec: recommended items
    :param k: number of top recommended items
    :return: ndcg metric for user recommendations
    """
    dcg = sum([1. / np.log2(idx + 2) for idx, item in enumerate(y_rec[:k]) if item in y_rel])
    idcg = sum([1. / np.log2(idx + 2) for idx, _ in enumerate(zip(y_rel, np.arange(k)))])
    return dcg / idcg

In this dataset, identifiers are presented as strings, but for working with them, it might be easier to convert them into numbers (for instance, for matrix factorization algorithms).

In [None]:
user_mapping = {k: v for v, k in enumerate(data['user_id'].unique())}
user_mapping_inverse = {k: v for v, k in user_mapping.items()}

artist_mapping = {k: v for v, k in enumerate(data['artist_id'].unique())}
artist_mapping_inverse = {k: v for v, k in artist_mapping.items()}

In [None]:
grouped_df_with_inds = (
    data
    .with_columns([
        pl.col('user_id').apply(user_mapping.get),
        pl.col('artist_id').apply(artist_mapping.get),
    ])
    # для каждого пользователя оставим последние 3 объекта в качестве тестовой выборки,
    # а остальное будем использовать для тренировки
    .groupby('user_id')
    .agg([
        pl.col('artist_id').apply(lambda x: x[:-3]).alias('train_item_ids'),
        pl.col('artist_id').apply(lambda x: x[-3:]).alias('test_item_ids'),
    ])
)

grouped_df_with_inds

user_id,train_item_ids,test_item_ids
i64,list[i64],list[i64]
28128,"[55353, 52205, … 34244]","[51683, 89276, 30800]"
15728,"[15079, 5280, … 37792]","[55010, 57189, 86467]"
26456,"[60633, 83526, … 2615]","[19403, 23539, 77883]"
14312,"[31313, 49653, … 7797]","[52376, 35487, 28985]"
9544,"[70858, 28833, … 29641]","[13172, 75070, 61118]"
15360,"[50163, 17374, … 7091]","[86722, 37658, 30540]"
24752,"[5814, 78853, … 25589]","[81893, 34980, 27018]"
34800,"[43507, 45735, … 9856]","[77812, 4463, 33271]"
25272,"[57417, 22956, … 60972]","[87952, 44027, 24358]"
41688,"[19454, 79988, … 83583]","[39973, 52212, 36821]"


In [None]:
median_seq_len = int(grouped_df_with_inds['train_item_ids'].apply(len).median())
print(f"средняя длина сессии {median_seq_len}")

средняя длина сессии 42


In [None]:
# соберем строчки для разреженной матрицы
rows = []
cols = []
values = []
for user_id, train_ids, _ in grouped_df_with_inds.rows():
    rows.extend([user_id] * len(train_ids))
    values.extend([1] * len(train_ids))
    cols.extend(train_ids)

user_item_data = sp.csr_matrix((values, (rows, cols)))

## Baselines

As a simple baseline, we will recommend the most popular artists.

We want to first validate such a solution, which means we will consider only those artists who appear most frequently in `train_item_ids` as popular artists.

In [None]:
top_artists = (
    grouped_df_with_inds
    .select(pl.col('train_item_ids').alias('artist_id'))
    .explode('artist_id')
    .groupby('artist_id')
    .count()
    .sort('count', descending=True)
    .head(TOP_K + median_seq_len)
)['artist_id'].to_list()

In [None]:
ndcg_list = []
hitrate_list = []

for user_id, user_history, y_rel in grouped_df_with_inds.rows():
    y_rec = top_artists.copy()
    
    ndcg_list.append(user_ndcg(y_rel, y_rec))
    hitrate_list.append(user_hitrate(y_rel, y_rec))
    
print(f'NDCG@{TOP_K} = {np.mean(ndcg_list):.5f}, Hitrate@{TOP_K} = {np.mean(hitrate_list):.5f}')

NDCG@20 = 0.01401, Hitrate@20 = 0.10248


Don't forget about filtering out what has already been viewed (for different domains and approaches, this doesn't always improve recommendations, but in this case, it provided a boost).

In [None]:
ndcg_list = []
hitrate_list = []

for user_id, user_history, y_rel in grouped_df_with_inds.rows():
    y_rec = [artist_id for artist_id in top_artists if artist_id not in user_history]
    
    ndcg_list.append(user_ndcg(y_rel, y_rec))
    hitrate_list.append(user_hitrate(y_rel, y_rec))
    
print(f'NDCG@{TOP_K} = {np.mean(ndcg_list):.5f}, Hitrate@{TOP_K} = {np.mean(hitrate_list):.5f}')

NDCG@20 = 0.01740, Hitrate@20 = 0.11684


## Building a Recommendations file

To build recommendations, we can now consider all possible data. It is important to note that previously, to optimize memory, ids were converted to an integer format. However, for the production display, it is necessary to convert them back to the original identifiers.

In [None]:
top_artists = (
    data
    .groupby('artist_id')
    .count()
    .sort('count', descending=True)
    .head(TOP_K + median_seq_len)
)['artist_id'].to_list()

In [None]:
submission = []

for user_id, user_history in data.groupby('user_id').agg(pl.col('artist_id')).rows():
    y_rec = top_artists.copy()
    
    submission.append((user_id, y_rec))
    
submission = pl.DataFrame(submission, schema=('user_id', 'y_rec'))
submission.write_parquet('sample_submission.parquet')
submission

user_id,y_rec
str,list[str]
"""e5932af9-b616-…","[""5cd0ffb5-0cf2-4ecd-8c5b-ca2102e33198"", ""12289298-d9dc-4b1d-bc27-16480829de75"", … ""98e69a29-ee83-41f1-924e-08a50a32efdc""]"
"""1bf72b61-4b3b-…","[""5cd0ffb5-0cf2-4ecd-8c5b-ca2102e33198"", ""12289298-d9dc-4b1d-bc27-16480829de75"", … ""98e69a29-ee83-41f1-924e-08a50a32efdc""]"
"""a7f33afd-5bf8-…","[""5cd0ffb5-0cf2-4ecd-8c5b-ca2102e33198"", ""12289298-d9dc-4b1d-bc27-16480829de75"", … ""98e69a29-ee83-41f1-924e-08a50a32efdc""]"
"""04222978-c1ff-…","[""5cd0ffb5-0cf2-4ecd-8c5b-ca2102e33198"", ""12289298-d9dc-4b1d-bc27-16480829de75"", … ""98e69a29-ee83-41f1-924e-08a50a32efdc""]"
"""d37391c2-397c-…","[""5cd0ffb5-0cf2-4ecd-8c5b-ca2102e33198"", ""12289298-d9dc-4b1d-bc27-16480829de75"", … ""98e69a29-ee83-41f1-924e-08a50a32efdc""]"
"""91e6c0c4-e12e-…","[""5cd0ffb5-0cf2-4ecd-8c5b-ca2102e33198"", ""12289298-d9dc-4b1d-bc27-16480829de75"", … ""98e69a29-ee83-41f1-924e-08a50a32efdc""]"
"""cf79a76a-f714-…","[""5cd0ffb5-0cf2-4ecd-8c5b-ca2102e33198"", ""12289298-d9dc-4b1d-bc27-16480829de75"", … ""98e69a29-ee83-41f1-924e-08a50a32efdc""]"
"""4a23066e-0ab2-…","[""5cd0ffb5-0cf2-4ecd-8c5b-ca2102e33198"", ""12289298-d9dc-4b1d-bc27-16480829de75"", … ""98e69a29-ee83-41f1-924e-08a50a32efdc""]"
"""b1b2b0ff-0ae0-…","[""5cd0ffb5-0cf2-4ecd-8c5b-ca2102e33198"", ""12289298-d9dc-4b1d-bc27-16480829de75"", … ""98e69a29-ee83-41f1-924e-08a50a32efdc""]"
"""81244106-ab07-…","[""5cd0ffb5-0cf2-4ecd-8c5b-ca2102e33198"", ""12289298-d9dc-4b1d-bc27-16480829de75"", … ""98e69a29-ee83-41f1-924e-08a50a32efdc""]"


! It's important to remember that the recommendations file should contain the original identifiers (strings), not those converted to numbers!

### W2V in RecSys using Gensim library

To apply the W2V (Word2Vec) algorithm without hyperparameter tuning, we have the W2V algorithm and an array of sessions that are used as training data.

Launch this algorithm on this dataset in a single line.

To validate this model, for each training session, call the method predict output word using the standard model.

If our model returns nothing (either an exception word or it has not been used before), we'll make the hitrate equal to zero and skip this example.

If we have recommendations, using them, we'll filter out those objects that were already in the training sample, and then assess their quality.

In [None]:
def evaluate_model(model):
    ndcg_list = []
    hitrate_list = []
    for train_ids, y_rel in grouped_df_with_inds.select('train_item_ids', 'test_item_ids').rows():
        model_preds = model.predict_output_word(
            train_ids, topn=(TOP_K + len(train_ids))
        )
        if model_preds is None:
            hitrate_list.append(0)
            continue

        y_rec = [pred[0] for pred in model_preds if pred[0] not in train_ids]
        ndcg_list.append(user_ndcg(y_rel, y_rec))
        hitrate_list.append(user_hitrate(y_rel, y_rec))
    return np.mean(ndcg_list), np.mean(hitrate_list)

# обучим w2v с параметрами по умолчанию
model = Word2Vec(grouped_df_with_inds['train_item_ids'].to_list())
mean_ndcg, mean_hitrate = evaluate_model(model)
print(f'NDCG@{TOP_K} = {mean_ndcg:.5f}, Hitrate@{TOP_K} = {mean_hitrate:.5f}')

#MAP@10 = 0.0033 Hitrate@10 = 0.1210

NDCG@20 = 0.02518, Hitrate@20 = 0.15314


To find the optimal hyperparameters, we're starting from a baseline where NDCG was 0.0174.

To adjust the parameters, we'll use Optuna.

We have a set of hyperparameters to consider:

- SKIP-GRAM algorithm (whether to use it or not)
- The window parameter (the length of the window used for training)
- The ns_exponent and negative parameters
- The min_count parameter (filters objects that appear less than a certain number of times)
- The vector_size parameter (determines the dimensionality of the embedding space; the larger it is, the more parameters can be trained, but this does not mean that the final model will be better)

In [75]:
SEED = 42

def set_seed(seed):
    np.random.seed(seed)
    random.seed(seed)

def objective(trial):
    sg = trial.suggest_categorical('sg', [0, 1])
    window = trial.suggest_int('window', 1, 10)
    ns_exponent = trial.suggest_float('ns_exponent', -3, 3)
    negative = trial.suggest_int('negative', 8, 20)
    min_count = trial.suggest_int('min_count', 3, 20)
    vector_size = trial.suggest_categorical('vector_size', [64, 128])
    
    print({
        'sg': sg,
        'window_len': window,
        'ns_exponent': ns_exponent,
        'negative': negative,
        'min_count': min_count,
        'vector_size': vector_size,
    })

    set_seed(SEED)
    model = Word2Vec(
        grouped_df_with_inds['train_item_ids'].to_list() + grouped_df_with_inds['test_item_ids'].to_list(),
        window=window,
        sg=sg,
        hs=0,
        min_count=min_count,
        vector_size=vector_size,
        negative=negative,
        ns_exponent=ns_exponent,
        seed=SEED,
        epochs=50,
    )
    
    mean_ndcg, mean_hitrate = evaluate_model(model)

    print(f'NDCG@{TOP_K} = {mean_ndcg:.5f}, Hitrate@{TOP_K} = {mean_hitrate:.5f}')
    
    return mean_ndcg
    
    
study = optuna.create_study(directions=('maximize',))
study.optimize(objective, n_trials=1000)

study.best_params


[32m[I 2024-02-21 16:24:09,870][0m Trial 340 finished with value: 0.05896601520387894 and parameters: {'sg': 0, 'window': 9, 'ns_exponent': -0.3085463048894043, 'negative': 18, 'min_count': 12, 'vector_size': 128}. Best is trial 339 with value: 0.06750722550760041.[0m


NDCG@20 = 0.05897, Hitrate@20 = 0.33036
{'sg': 0, 'window_len': 9, 'ns_exponent': 0.19725001284489105, 'negative': 17, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 16:27:02,015][0m Trial 341 finished with value: 0.058998134467641634 and parameters: {'sg': 0, 'window': 9, 'ns_exponent': 0.19725001284489105, 'negative': 17, 'min_count': 11, 'vector_size': 128}. Best is trial 339 with value: 0.06750722550760041.[0m


NDCG@20 = 0.05900, Hitrate@20 = 0.35510
{'sg': 0, 'window_len': 9, 'ns_exponent': 0.08387208325769638, 'negative': 17, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 16:29:45,246][0m Trial 342 finished with value: 0.06703377987929941 and parameters: {'sg': 0, 'window': 9, 'ns_exponent': 0.08387208325769638, 'negative': 17, 'min_count': 12, 'vector_size': 128}. Best is trial 339 with value: 0.06750722550760041.[0m


NDCG@20 = 0.06703, Hitrate@20 = 0.37558
{'sg': 0, 'window_len': 9, 'ns_exponent': 0.36164902947154987, 'negative': 18, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 16:32:40,466][0m Trial 343 finished with value: 0.04483958961704998 and parameters: {'sg': 0, 'window': 9, 'ns_exponent': 0.36164902947154987, 'negative': 18, 'min_count': 12, 'vector_size': 128}. Best is trial 339 with value: 0.06750722550760041.[0m


NDCG@20 = 0.04484, Hitrate@20 = 0.27890
{'sg': 0, 'window_len': 9, 'ns_exponent': -0.11699919858437095, 'negative': 19, 'min_count': 13, 'vector_size': 64}


[32m[I 2024-02-21 16:35:02,064][0m Trial 344 finished with value: 0.06346299967994629 and parameters: {'sg': 0, 'window': 9, 'ns_exponent': -0.11699919858437095, 'negative': 19, 'min_count': 13, 'vector_size': 64}. Best is trial 339 with value: 0.06750722550760041.[0m


NDCG@20 = 0.06346, Hitrate@20 = 0.35804
{'sg': 0, 'window_len': 9, 'ns_exponent': 0.1341181861174145, 'negative': 17, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 16:37:47,136][0m Trial 345 finished with value: 0.06464821679306978 and parameters: {'sg': 0, 'window': 9, 'ns_exponent': 0.1341181861174145, 'negative': 17, 'min_count': 12, 'vector_size': 128}. Best is trial 339 with value: 0.06750722550760041.[0m


NDCG@20 = 0.06465, Hitrate@20 = 0.37028
{'sg': 0, 'window_len': 9, 'ns_exponent': 0.8091347641151024, 'negative': 18, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 16:40:47,408][0m Trial 346 finished with value: 0.033218004704366234 and parameters: {'sg': 0, 'window': 9, 'ns_exponent': 0.8091347641151024, 'negative': 18, 'min_count': 12, 'vector_size': 128}. Best is trial 339 with value: 0.06750722550760041.[0m


NDCG@20 = 0.03322, Hitrate@20 = 0.19268
{'sg': 0, 'window_len': 9, 'ns_exponent': 0.052765899235520886, 'negative': 17, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 16:43:41,637][0m Trial 347 finished with value: 0.06746389453305783 and parameters: {'sg': 0, 'window': 9, 'ns_exponent': 0.052765899235520886, 'negative': 17, 'min_count': 11, 'vector_size': 128}. Best is trial 339 with value: 0.06750722550760041.[0m


NDCG@20 = 0.06746, Hitrate@20 = 0.37690
{'sg': 0, 'window_len': 9, 'ns_exponent': 0.2921198471752661, 'negative': 18, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 16:46:43,780][0m Trial 348 finished with value: 0.04956235017745047 and parameters: {'sg': 0, 'window': 9, 'ns_exponent': 0.2921198471752661, 'negative': 18, 'min_count': 11, 'vector_size': 128}. Best is trial 339 with value: 0.06750722550760041.[0m


NDCG@20 = 0.04956, Hitrate@20 = 0.30910
{'sg': 0, 'window_len': 9, 'ns_exponent': 0.08234454232455704, 'negative': 17, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 16:49:38,568][0m Trial 349 finished with value: 0.06729909072757224 and parameters: {'sg': 0, 'window': 9, 'ns_exponent': 0.08234454232455704, 'negative': 17, 'min_count': 11, 'vector_size': 128}. Best is trial 339 with value: 0.06750722550760041.[0m


NDCG@20 = 0.06730, Hitrate@20 = 0.37744
{'sg': 0, 'window_len': 9, 'ns_exponent': -0.7397564909670277, 'negative': 17, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 16:52:19,202][0m Trial 350 finished with value: 0.0514661230636304 and parameters: {'sg': 0, 'window': 9, 'ns_exponent': -0.7397564909670277, 'negative': 17, 'min_count': 11, 'vector_size': 128}. Best is trial 339 with value: 0.06750722550760041.[0m


NDCG@20 = 0.05147, Hitrate@20 = 0.29520
{'sg': 0, 'window_len': 9, 'ns_exponent': 0.1734172435772357, 'negative': 17, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 16:55:09,616][0m Trial 351 finished with value: 0.06126744660449547 and parameters: {'sg': 0, 'window': 9, 'ns_exponent': 0.1734172435772357, 'negative': 17, 'min_count': 11, 'vector_size': 128}. Best is trial 339 with value: 0.06750722550760041.[0m


NDCG@20 = 0.06127, Hitrate@20 = 0.36462
{'sg': 0, 'window_len': 9, 'ns_exponent': 0.07610734610732803, 'negative': 17, 'min_count': 10, 'vector_size': 128}


[32m[I 2024-02-21 16:58:12,127][0m Trial 352 finished with value: 0.06668136020480875 and parameters: {'sg': 0, 'window': 9, 'ns_exponent': 0.07610734610732803, 'negative': 17, 'min_count': 10, 'vector_size': 128}. Best is trial 339 with value: 0.06750722550760041.[0m


NDCG@20 = 0.06668, Hitrate@20 = 0.37518
{'sg': 0, 'window_len': 9, 'ns_exponent': 0.4694192085734412, 'negative': 17, 'min_count': 10, 'vector_size': 128}


[32m[I 2024-02-21 17:01:09,445][0m Trial 353 finished with value: 0.040647323258346635 and parameters: {'sg': 0, 'window': 9, 'ns_exponent': 0.4694192085734412, 'negative': 17, 'min_count': 10, 'vector_size': 128}. Best is trial 339 with value: 0.06750722550760041.[0m


NDCG@20 = 0.04065, Hitrate@20 = 0.24130
{'sg': 1, 'window_len': 9, 'ns_exponent': 0.6319677623786548, 'negative': 17, 'min_count': 10, 'vector_size': 128}


[32m[I 2024-02-21 17:17:22,099][0m Trial 354 finished with value: 0.02438181355199115 and parameters: {'sg': 1, 'window': 9, 'ns_exponent': 0.6319677623786548, 'negative': 17, 'min_count': 10, 'vector_size': 128}. Best is trial 339 with value: 0.06750722550760041.[0m


NDCG@20 = 0.02438, Hitrate@20 = 0.14928
{'sg': 0, 'window_len': 9, 'ns_exponent': -1.8450656094185767, 'negative': 18, 'min_count': 11, 'vector_size': 64}


[32m[I 2024-02-21 17:19:33,621][0m Trial 355 finished with value: 0.03551511874958646 and parameters: {'sg': 0, 'window': 9, 'ns_exponent': -1.8450656094185767, 'negative': 18, 'min_count': 11, 'vector_size': 64}. Best is trial 339 with value: 0.06750722550760041.[0m


NDCG@20 = 0.03552, Hitrate@20 = 0.25348
{'sg': 0, 'window_len': 9, 'ns_exponent': 0.28275568919744, 'negative': 17, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 17:22:24,279][0m Trial 356 finished with value: 0.05080127631867376 and parameters: {'sg': 0, 'window': 9, 'ns_exponent': 0.28275568919744, 'negative': 17, 'min_count': 11, 'vector_size': 128}. Best is trial 339 with value: 0.06750722550760041.[0m


NDCG@20 = 0.05080, Hitrate@20 = 0.31636
{'sg': 0, 'window_len': 9, 'ns_exponent': 0.11601698629881127, 'negative': 18, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 17:25:14,752][0m Trial 357 finished with value: 0.06526611446905231 and parameters: {'sg': 0, 'window': 9, 'ns_exponent': 0.11601698629881127, 'negative': 18, 'min_count': 12, 'vector_size': 128}. Best is trial 339 with value: 0.06750722550760041.[0m


NDCG@20 = 0.06527, Hitrate@20 = 0.37324
{'sg': 0, 'window_len': 10, 'ns_exponent': 1.8465036312441832, 'negative': 17, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 17:27:58,109][0m Trial 358 finished with value: 0.02118401275873163 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 1.8465036312441832, 'negative': 17, 'min_count': 11, 'vector_size': 128}. Best is trial 339 with value: 0.06750722550760041.[0m


NDCG@20 = 0.02118, Hitrate@20 = 0.13604
{'sg': 0, 'window_len': 9, 'ns_exponent': -0.07833001018036301, 'negative': 19, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 17:30:51,603][0m Trial 359 finished with value: 0.0644671827058982 and parameters: {'sg': 0, 'window': 9, 'ns_exponent': -0.07833001018036301, 'negative': 19, 'min_count': 12, 'vector_size': 128}. Best is trial 339 with value: 0.06750722550760041.[0m


NDCG@20 = 0.06447, Hitrate@20 = 0.36234
{'sg': 0, 'window_len': 9, 'ns_exponent': -0.5571716506167368, 'negative': 17, 'min_count': 10, 'vector_size': 128}


[32m[I 2024-02-21 17:33:45,640][0m Trial 360 finished with value: 0.053781277785249576 and parameters: {'sg': 0, 'window': 9, 'ns_exponent': -0.5571716506167368, 'negative': 17, 'min_count': 10, 'vector_size': 128}. Best is trial 339 with value: 0.06750722550760041.[0m


NDCG@20 = 0.05378, Hitrate@20 = 0.30376
{'sg': 0, 'window_len': 9, 'ns_exponent': -0.18284865197383554, 'negative': 18, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 17:36:42,028][0m Trial 361 finished with value: 0.06176786955483875 and parameters: {'sg': 0, 'window': 9, 'ns_exponent': -0.18284865197383554, 'negative': 18, 'min_count': 12, 'vector_size': 128}. Best is trial 339 with value: 0.06750722550760041.[0m


NDCG@20 = 0.06177, Hitrate@20 = 0.34564
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.07217551752038419, 'negative': 17, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 17:39:40,746][0m Trial 362 finished with value: 0.06765508343312682 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.07217551752038419, 'negative': 17, 'min_count': 11, 'vector_size': 128}. Best is trial 362 with value: 0.06765508343312682.[0m


NDCG@20 = 0.06766, Hitrate@20 = 0.37966
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.2085482685196914, 'negative': 17, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 17:42:42,045][0m Trial 363 finished with value: 0.058034708612785486 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.2085482685196914, 'negative': 17, 'min_count': 11, 'vector_size': 128}. Best is trial 362 with value: 0.06765508343312682.[0m


NDCG@20 = 0.05803, Hitrate@20 = 0.35210
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.36467789065054534, 'negative': 17, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 17:45:43,311][0m Trial 364 finished with value: 0.04599034023362138 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.36467789065054534, 'negative': 17, 'min_count': 11, 'vector_size': 128}. Best is trial 362 with value: 0.06765508343312682.[0m


NDCG@20 = 0.04599, Hitrate@20 = 0.28106
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.09938202809244881, 'negative': 20, 'min_count': 13, 'vector_size': 128}


[32m[I 2024-02-21 17:48:51,499][0m Trial 365 finished with value: 0.0670031238107168 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.09938202809244881, 'negative': 20, 'min_count': 13, 'vector_size': 128}. Best is trial 362 with value: 0.06765508343312682.[0m


NDCG@20 = 0.06700, Hitrate@20 = 0.37568
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.053851021036013316, 'negative': 20, 'min_count': 10, 'vector_size': 128}


[32m[I 2024-02-21 17:52:16,319][0m Trial 366 finished with value: 0.0659960493677217 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.053851021036013316, 'negative': 20, 'min_count': 10, 'vector_size': 128}. Best is trial 362 with value: 0.06765508343312682.[0m


NDCG@20 = 0.06600, Hitrate@20 = 0.36834
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.25103735162762786, 'negative': 20, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 17:55:23,911][0m Trial 367 finished with value: 0.060385992914307345 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.25103735162762786, 'negative': 20, 'min_count': 12, 'vector_size': 128}. Best is trial 362 with value: 0.06765508343312682.[0m


NDCG@20 = 0.06039, Hitrate@20 = 0.33752
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.07411320142850895, 'negative': 17, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 17:58:24,215][0m Trial 368 finished with value: 0.06800619114110919 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.07411320142850895, 'negative': 17, 'min_count': 11, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06801, Hitrate@20 = 0.37866
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.21706766856178566, 'negative': 20, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 18:01:42,763][0m Trial 369 finished with value: 0.056654182994851456 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.21706766856178566, 'negative': 20, 'min_count': 11, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.05665, Hitrate@20 = 0.34908
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.10133319691524484, 'negative': 20, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 18:05:01,734][0m Trial 370 finished with value: 0.06645328162091656 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.10133319691524484, 'negative': 20, 'min_count': 11, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06645, Hitrate@20 = 0.37760
{'sg': 0, 'window_len': 10, 'ns_exponent': 1.3472928384022271, 'negative': 20, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 18:08:13,022][0m Trial 371 finished with value: 0.027972868601001837 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 1.3472928384022271, 'negative': 20, 'min_count': 12, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.02797, Hitrate@20 = 0.16114
{'sg': 0, 'window_len': 10, 'ns_exponent': 1.628347988099542, 'negative': 17, 'min_count': 19, 'vector_size': 128}


[32m[I 2024-02-21 18:10:52,848][0m Trial 372 finished with value: 0.02310102560738713 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 1.628347988099542, 'negative': 17, 'min_count': 19, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.02310, Hitrate@20 = 0.13286
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.06335265072269106, 'negative': 17, 'min_count': 14, 'vector_size': 128}


[32m[I 2024-02-21 18:13:36,605][0m Trial 373 finished with value: 0.06595232579490945 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.06335265072269106, 'negative': 17, 'min_count': 14, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06595, Hitrate@20 = 0.36862
{'sg': 1, 'window_len': 10, 'ns_exponent': 0.34764520971690266, 'negative': 17, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 18:31:48,066][0m Trial 374 finished with value: 0.028747234392002444 and parameters: {'sg': 1, 'window': 10, 'ns_exponent': 0.34764520971690266, 'negative': 17, 'min_count': 11, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.02875, Hitrate@20 = 0.17426
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.06211971307220043, 'negative': 17, 'min_count': 13, 'vector_size': 128}


[32m[I 2024-02-21 18:34:37,495][0m Trial 375 finished with value: 0.06782972324885367 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.06211971307220043, 'negative': 17, 'min_count': 13, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06783, Hitrate@20 = 0.37832
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.1992197588314895, 'negative': 17, 'min_count': 13, 'vector_size': 128}


[32m[I 2024-02-21 18:37:31,580][0m Trial 376 finished with value: 0.05909359673204381 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.1992197588314895, 'negative': 17, 'min_count': 13, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.05909, Hitrate@20 = 0.35842
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.15599456590596916, 'negative': 19, 'min_count': 13, 'vector_size': 128}


[32m[I 2024-02-21 18:40:30,170][0m Trial 377 finished with value: 0.06317570072357505 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.15599456590596916, 'negative': 19, 'min_count': 13, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06318, Hitrate@20 = 0.35184
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.4445031441634304, 'negative': 17, 'min_count': 14, 'vector_size': 128}


[32m[I 2024-02-21 18:43:15,920][0m Trial 378 finished with value: 0.040696574172968764 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.4445031441634304, 'negative': 17, 'min_count': 14, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.04070, Hitrate@20 = 0.25140
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.0832252478558281, 'negative': 18, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 18:46:15,294][0m Trial 379 finished with value: 0.06729494773259272 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.0832252478558281, 'negative': 18, 'min_count': 12, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06729, Hitrate@20 = 0.37740
{'sg': 0, 'window_len': 10, 'ns_exponent': 2.3831821213727418, 'negative': 17, 'min_count': 17, 'vector_size': 128}


[32m[I 2024-02-21 18:48:43,222][0m Trial 380 finished with value: 0.014067737631923713 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 2.3831821213727418, 'negative': 17, 'min_count': 17, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.01407, Hitrate@20 = 0.09088
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.2781646641646263, 'negative': 18, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 18:51:46,153][0m Trial 381 finished with value: 0.05140094183566773 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.2781646641646263, 'negative': 18, 'min_count': 12, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.05140, Hitrate@20 = 0.32386
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.10783682675898436, 'negative': 17, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 18:54:42,486][0m Trial 382 finished with value: 0.06631284065190499 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.10783682675898436, 'negative': 17, 'min_count': 12, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06631, Hitrate@20 = 0.37718
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.028539428975243558, 'negative': 19, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 18:57:47,407][0m Trial 383 finished with value: 0.06660846520785703 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.028539428975243558, 'negative': 19, 'min_count': 12, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06661, Hitrate@20 = 0.37024
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.17527640258002142, 'negative': 18, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 19:00:53,465][0m Trial 384 finished with value: 0.060776650678027025 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.17527640258002142, 'negative': 18, 'min_count': 11, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06078, Hitrate@20 = 0.36456
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.2648529327819294, 'negative': 17, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 19:03:43,706][0m Trial 385 finished with value: 0.06072348679976636 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.2648529327819294, 'negative': 17, 'min_count': 12, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06072, Hitrate@20 = 0.33790
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.10546223007366112, 'negative': 18, 'min_count': 13, 'vector_size': 128}


[32m[I 2024-02-21 19:06:37,423][0m Trial 386 finished with value: 0.0639819894037566 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.10546223007366112, 'negative': 18, 'min_count': 13, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06398, Hitrate@20 = 0.35892
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.9698306025481633, 'negative': 17, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 19:09:39,597][0m Trial 387 finished with value: 0.03241620695552042 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.9698306025481633, 'negative': 17, 'min_count': 11, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.03242, Hitrate@20 = 0.18568
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.04781290034204459, 'negative': 20, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 19:12:48,889][0m Trial 388 finished with value: 0.06778966716143497 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.04781290034204459, 'negative': 20, 'min_count': 12, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06779, Hitrate@20 = 0.37752
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.31525076029986565, 'negative': 20, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 19:16:04,421][0m Trial 389 finished with value: 0.04823848017223502 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.31525076029986565, 'negative': 20, 'min_count': 12, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.04824, Hitrate@20 = 0.30170
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.12677613972331211, 'negative': 20, 'min_count': 13, 'vector_size': 128}


[32m[I 2024-02-21 19:19:11,797][0m Trial 390 finished with value: 0.06538872831699308 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.12677613972331211, 'negative': 20, 'min_count': 13, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06539, Hitrate@20 = 0.37428
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.5563268214222282, 'negative': 20, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 19:22:27,839][0m Trial 391 finished with value: 0.038455257083964 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.5563268214222282, 'negative': 20, 'min_count': 12, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.03846, Hitrate@20 = 0.22510
{'sg': 0, 'window_len': 10, 'ns_exponent': -1.3637181249725032, 'negative': 20, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 19:25:11,872][0m Trial 392 finished with value: 0.04496503824030265 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -1.3637181249725032, 'negative': 20, 'min_count': 12, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.04497, Hitrate@20 = 0.28946
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.042349177773476375, 'negative': 20, 'min_count': 10, 'vector_size': 128}


[32m[I 2024-02-21 19:28:38,068][0m Trial 393 finished with value: 0.06746959272723456 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.042349177773476375, 'negative': 20, 'min_count': 10, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06747, Hitrate@20 = 0.37856
{'sg': 1, 'window_len': 10, 'ns_exponent': -2.6523509734041513, 'negative': 20, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 19:42:57,935][0m Trial 394 finished with value: 0.0237759009200435 and parameters: {'sg': 1, 'window': 10, 'ns_exponent': -2.6523509734041513, 'negative': 20, 'min_count': 11, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.02378, Hitrate@20 = 0.15124
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.37290902734859105, 'negative': 20, 'min_count': 13, 'vector_size': 128}


[32m[I 2024-02-21 19:45:59,324][0m Trial 395 finished with value: 0.05790649441608808 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.37290902734859105, 'negative': 20, 'min_count': 13, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.05791, Hitrate@20 = 0.32538
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.1591182952243948, 'negative': 20, 'min_count': 14, 'vector_size': 128}


[32m[I 2024-02-21 19:48:54,499][0m Trial 396 finished with value: 0.06265114329815291 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.1591182952243948, 'negative': 20, 'min_count': 14, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06265, Hitrate@20 = 0.34954
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.03260298915130558, 'negative': 20, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 19:52:01,659][0m Trial 397 finished with value: 0.06697485118567757 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.03260298915130558, 'negative': 20, 'min_count': 12, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06697, Hitrate@20 = 0.37064
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.05072925267350411, 'negative': 20, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 19:54:59,589][0m Trial 398 finished with value: 0.06612073687480846 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.05072925267350411, 'negative': 20, 'min_count': 12, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06612, Hitrate@20 = 0.36634
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.22526892630609122, 'negative': 20, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 19:58:03,059][0m Trial 399 finished with value: 0.056414648467749094 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.22526892630609122, 'negative': 20, 'min_count': 12, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.05641, Hitrate@20 = 0.34830
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.8631434703524199, 'negative': 20, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 20:00:57,977][0m Trial 400 finished with value: 0.05191923637164972 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.8631434703524199, 'negative': 20, 'min_count': 11, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.05192, Hitrate@20 = 0.29304
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.1560479879541886, 'negative': 20, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 20:03:57,165][0m Trial 401 finished with value: 0.06274545547030634 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.1560479879541886, 'negative': 20, 'min_count': 12, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06275, Hitrate@20 = 0.35122
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.016120101821199745, 'negative': 20, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 20:06:54,766][0m Trial 402 finished with value: 0.06666392975920761 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.016120101821199745, 'negative': 20, 'min_count': 12, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06666, Hitrate@20 = 0.37260
{'sg': 0, 'window_len': 10, 'ns_exponent': 1.1747104230438437, 'negative': 19, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 20:09:56,460][0m Trial 403 finished with value: 0.030292015837437212 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 1.1747104230438437, 'negative': 19, 'min_count': 11, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.03029, Hitrate@20 = 0.17294
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.43353250987792674, 'negative': 20, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 20:13:07,118][0m Trial 404 finished with value: 0.04259344585073586 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.43353250987792674, 'negative': 20, 'min_count': 11, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.04259, Hitrate@20 = 0.25408
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.2663205947243887, 'negative': 19, 'min_count': 13, 'vector_size': 128}


[32m[I 2024-02-21 20:16:07,079][0m Trial 405 finished with value: 0.060023993142068514 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.2663205947243887, 'negative': 19, 'min_count': 13, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06002, Hitrate@20 = 0.33568
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.7262773778021634, 'negative': 20, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 20:19:17,141][0m Trial 406 finished with value: 0.03504030890257302 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.7262773778021634, 'negative': 20, 'min_count': 12, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.03504, Hitrate@20 = 0.20122
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.1925028556571981, 'negative': 20, 'min_count': 18, 'vector_size': 128}


[32m[I 2024-02-21 20:22:08,972][0m Trial 407 finished with value: 0.06107019995314104 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.1925028556571981, 'negative': 20, 'min_count': 18, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06107, Hitrate@20 = 0.36724
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.013366452693677366, 'negative': 19, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 20:25:10,523][0m Trial 408 finished with value: 0.06699212875688736 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.013366452693677366, 'negative': 19, 'min_count': 12, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06699, Hitrate@20 = 0.37248
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.4297056965214111, 'negative': 19, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 20:28:07,707][0m Trial 409 finished with value: 0.05768391456862413 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.4297056965214111, 'negative': 19, 'min_count': 12, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.05768, Hitrate@20 = 0.32020
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.08329209111219049, 'negative': 19, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 20:31:14,719][0m Trial 410 finished with value: 0.0646697553262112 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.08329209111219049, 'negative': 19, 'min_count': 12, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06467, Hitrate@20 = 0.36182
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.19678579860470186, 'negative': 19, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 20:34:26,463][0m Trial 411 finished with value: 0.06147346624131497 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.19678579860470186, 'negative': 19, 'min_count': 11, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06147, Hitrate@20 = 0.34500
{'sg': 0, 'window_len': 10, 'ns_exponent': 2.9699746822335493, 'negative': 19, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 20:37:08,882][0m Trial 412 finished with value: 0.015839465582987128 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 2.9699746822335493, 'negative': 19, 'min_count': 12, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.01584, Hitrate@20 = 0.10448
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.01262703304310342, 'negative': 20, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 20:40:10,532][0m Trial 413 finished with value: 0.06724284645674278 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.01262703304310342, 'negative': 20, 'min_count': 12, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06724, Hitrate@20 = 0.37464
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.055421047864491105, 'negative': 20, 'min_count': 13, 'vector_size': 128}


[32m[I 2024-02-21 20:43:08,085][0m Trial 414 finished with value: 0.06570283916004525 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.055421047864491105, 'negative': 20, 'min_count': 13, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06570, Hitrate@20 = 0.36540
{'sg': 1, 'window_len': 10, 'ns_exponent': -0.2432876279374385, 'negative': 20, 'min_count': 10, 'vector_size': 128}


[32m[I 2024-02-21 21:02:15,187][0m Trial 415 finished with value: 0.037515351529148935 and parameters: {'sg': 1, 'window': 10, 'ns_exponent': -0.2432876279374385, 'negative': 20, 'min_count': 10, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.03752, Hitrate@20 = 0.22624
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.6635947233780948, 'negative': 20, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 21:05:17,306][0m Trial 416 finished with value: 0.05302373740419151 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.6635947233780948, 'negative': 20, 'min_count': 11, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.05302, Hitrate@20 = 0.29964
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.2838579993426794, 'negative': 20, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 21:08:31,262][0m Trial 417 finished with value: 0.051053505018069364 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.2838579993426794, 'negative': 20, 'min_count': 12, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.05105, Hitrate@20 = 0.32018
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.019042865364541944, 'negative': 19, 'min_count': 12, 'vector_size': 128}


[32m[I 2024-02-21 21:11:28,278][0m Trial 418 finished with value: 0.06733617686648745 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.019042865364541944, 'negative': 19, 'min_count': 12, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06734, Hitrate@20 = 0.37388
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.017894975464191464, 'negative': 19, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 21:14:28,878][0m Trial 419 finished with value: 0.06706538119251577 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.017894975464191464, 'negative': 19, 'min_count': 11, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06707, Hitrate@20 = 0.37398
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.1454436451044807, 'negative': 19, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 21:17:27,840][0m Trial 420 finished with value: 0.06295738458398795 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.1454436451044807, 'negative': 19, 'min_count': 11, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06296, Hitrate@20 = 0.35302
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.8108911477240394, 'negative': 19, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 21:20:27,960][0m Trial 421 finished with value: 0.03464285123347239 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.8108911477240394, 'negative': 19, 'min_count': 11, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.03464, Hitrate@20 = 0.19816
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.34418909861853175, 'negative': 19, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 21:23:31,220][0m Trial 422 finished with value: 0.058458542400962765 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.34418909861853175, 'negative': 19, 'min_count': 11, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.05846, Hitrate@20 = 0.32590
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.020973295712118294, 'negative': 19, 'min_count': 10, 'vector_size': 128}


[32m[I 2024-02-21 21:26:48,089][0m Trial 423 finished with value: 0.06771364532926168 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.020973295712118294, 'negative': 19, 'min_count': 10, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06771, Hitrate@20 = 0.37646
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.17827456181400458, 'negative': 19, 'min_count': 10, 'vector_size': 128}


[32m[I 2024-02-21 21:30:05,974][0m Trial 424 finished with value: 0.059373637433530566 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.17827456181400458, 'negative': 19, 'min_count': 10, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.05937, Hitrate@20 = 0.35726
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.5715698197756042, 'negative': 19, 'min_count': 10, 'vector_size': 128}


[32m[I 2024-02-21 21:33:06,455][0m Trial 425 finished with value: 0.053692302539412144 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.5715698197756042, 'negative': 19, 'min_count': 10, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.05369, Hitrate@20 = 0.30454
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.05176733058653076, 'negative': 19, 'min_count': 10, 'vector_size': 128}


[32m[I 2024-02-21 21:36:24,624][0m Trial 426 finished with value: 0.06776834084738169 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.05176733058653076, 'negative': 19, 'min_count': 10, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06777, Hitrate@20 = 0.37694
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.14323884667999537, 'negative': 19, 'min_count': 10, 'vector_size': 128}


[32m[I 2024-02-21 21:39:37,606][0m Trial 427 finished with value: 0.062417683502785426 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.14323884667999537, 'negative': 19, 'min_count': 10, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06242, Hitrate@20 = 0.35382
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.030135543233390582, 'negative': 19, 'min_count': 10, 'vector_size': 128}


[32m[I 2024-02-21 21:42:53,762][0m Trial 428 finished with value: 0.0677211686696061 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.030135543233390582, 'negative': 19, 'min_count': 10, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06772, Hitrate@20 = 0.37930
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.3493397115117121, 'negative': 19, 'min_count': 10, 'vector_size': 128}


[32m[I 2024-02-21 21:46:13,974][0m Trial 429 finished with value: 0.04550722690606309 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.3493397115117121, 'negative': 19, 'min_count': 10, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.04551, Hitrate@20 = 0.27770
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.013301971009106892, 'negative': 19, 'min_count': 10, 'vector_size': 128}


[32m[I 2024-02-21 21:49:27,583][0m Trial 430 finished with value: 0.06722935956739913 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.013301971009106892, 'negative': 19, 'min_count': 10, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06723, Hitrate@20 = 0.37388
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.24665494773232138, 'negative': 19, 'min_count': 10, 'vector_size': 128}


[32m[I 2024-02-21 21:52:37,580][0m Trial 431 finished with value: 0.05961727771453817 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.24665494773232138, 'negative': 19, 'min_count': 10, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.05962, Hitrate@20 = 0.33898
{'sg': 0, 'window_len': 10, 'ns_exponent': -1.0332136330880166, 'negative': 19, 'min_count': 10, 'vector_size': 128}


[32m[I 2024-02-21 21:55:29,230][0m Trial 432 finished with value: 0.04963619377853638 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -1.0332136330880166, 'negative': 19, 'min_count': 10, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.04964, Hitrate@20 = 0.29534
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.05453307544616488, 'negative': 19, 'min_count': 9, 'vector_size': 128}


[32m[I 2024-02-21 21:58:43,558][0m Trial 433 finished with value: 0.06562793486914144 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.05453307544616488, 'negative': 19, 'min_count': 9, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06563, Hitrate@20 = 0.37032
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.18441330660230446, 'negative': 19, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 22:01:50,458][0m Trial 434 finished with value: 0.06010610919669042 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.18441330660230446, 'negative': 19, 'min_count': 11, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06011, Hitrate@20 = 0.36066
{'sg': 1, 'window_len': 10, 'ns_exponent': 0.04534702180924415, 'negative': 19, 'min_count': 10, 'vector_size': 128}


[32m[I 2024-02-21 22:20:24,698][0m Trial 435 finished with value: 0.0489423599488667 and parameters: {'sg': 1, 'window': 10, 'ns_exponent': 0.04534702180924415, 'negative': 19, 'min_count': 10, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.04894, Hitrate@20 = 0.28146
{'sg': 0, 'window_len': 9, 'ns_exponent': -0.16446233900999213, 'negative': 19, 'min_count': 10, 'vector_size': 128}


[32m[I 2024-02-21 22:23:41,535][0m Trial 436 finished with value: 0.06161138850991002 and parameters: {'sg': 0, 'window': 9, 'ns_exponent': -0.16446233900999213, 'negative': 19, 'min_count': 10, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06161, Hitrate@20 = 0.34668
{'sg': 0, 'window_len': 2, 'ns_exponent': -0.3439226294140309, 'negative': 19, 'min_count': 10, 'vector_size': 128}


[32m[I 2024-02-21 22:26:31,779][0m Trial 437 finished with value: 0.04688008863994435 and parameters: {'sg': 0, 'window': 2, 'ns_exponent': -0.3439226294140309, 'negative': 19, 'min_count': 10, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.04688, Hitrate@20 = 0.27110
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.20270409248217625, 'negative': 19, 'min_count': 10, 'vector_size': 128}


[32m[I 2024-02-21 22:29:43,682][0m Trial 438 finished with value: 0.05659949248468089 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.20270409248217625, 'negative': 19, 'min_count': 10, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.05660, Hitrate@20 = 0.34736
{'sg': 0, 'window_len': 9, 'ns_exponent': -0.05292471411998122, 'negative': 19, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 22:32:42,270][0m Trial 439 finished with value: 0.0655579892008861 and parameters: {'sg': 0, 'window': 9, 'ns_exponent': -0.05292471411998122, 'negative': 19, 'min_count': 11, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06556, Hitrate@20 = 0.36546
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.1131408366926437, 'negative': 19, 'min_count': 9, 'vector_size': 128}


[32m[I 2024-02-21 22:35:53,707][0m Trial 440 finished with value: 0.06435333153395856 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.1131408366926437, 'negative': 19, 'min_count': 9, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06435, Hitrate@20 = 0.37326
{'sg': 0, 'window_len': 9, 'ns_exponent': 0.2961296861683168, 'negative': 19, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 22:39:00,380][0m Trial 441 finished with value: 0.049030428455131975 and parameters: {'sg': 0, 'window': 9, 'ns_exponent': 0.2961296861683168, 'negative': 19, 'min_count': 11, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.04903, Hitrate@20 = 0.30984
{'sg': 0, 'window_len': 10, 'ns_exponent': -1.9297374824625444, 'negative': 19, 'min_count': 10, 'vector_size': 128}


[32m[I 2024-02-21 22:41:47,601][0m Trial 442 finished with value: 0.034305268788609435 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -1.9297374824625444, 'negative': 19, 'min_count': 10, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.03431, Hitrate@20 = 0.24772
{'sg': 0, 'window_len': 4, 'ns_exponent': 0.02930191712050726, 'negative': 19, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 22:44:47,037][0m Trial 443 finished with value: 0.06331775477918482 and parameters: {'sg': 0, 'window': 4, 'ns_exponent': 0.02930191712050726, 'negative': 19, 'min_count': 11, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06332, Hitrate@20 = 0.35264
{'sg': 0, 'window_len': 9, 'ns_exponent': -0.09459989268125496, 'negative': 19, 'min_count': 10, 'vector_size': 128}


[32m[I 2024-02-21 22:48:00,180][0m Trial 444 finished with value: 0.0639748052012622 and parameters: {'sg': 0, 'window': 9, 'ns_exponent': -0.09459989268125496, 'negative': 19, 'min_count': 10, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06397, Hitrate@20 = 0.35926
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.4395921987099357, 'negative': 19, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 22:51:05,574][0m Trial 445 finished with value: 0.042281889229224476 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.4395921987099357, 'negative': 19, 'min_count': 11, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.04228, Hitrate@20 = 0.25336
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.16863161519500341, 'negative': 19, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 22:54:06,979][0m Trial 446 finished with value: 0.061720851116647275 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.16863161519500341, 'negative': 19, 'min_count': 11, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06172, Hitrate@20 = 0.36500
{'sg': 0, 'window_len': 9, 'ns_exponent': -0.5058332927220993, 'negative': 19, 'min_count': 9, 'vector_size': 128}


[32m[I 2024-02-21 22:57:09,464][0m Trial 447 finished with value: 0.05437049190142663 and parameters: {'sg': 0, 'window': 9, 'ns_exponent': -0.5058332927220993, 'negative': 19, 'min_count': 9, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.05437, Hitrate@20 = 0.30640
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.16706350306269957, 'negative': 19, 'min_count': 16, 'vector_size': 128}


[32m[I 2024-02-21 22:59:49,244][0m Trial 448 finished with value: 0.06230406044986597 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.16706350306269957, 'negative': 19, 'min_count': 16, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06230, Hitrate@20 = 0.34874
{'sg': 0, 'window_len': 9, 'ns_exponent': -1.6382073167409519, 'negative': 19, 'min_count': 10, 'vector_size': 128}


[32m[I 2024-02-21 23:02:29,778][0m Trial 449 finished with value: 0.038029028783961305 and parameters: {'sg': 0, 'window': 9, 'ns_exponent': -1.6382073167409519, 'negative': 19, 'min_count': 10, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.03803, Hitrate@20 = 0.25854
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.035485071681940455, 'negative': 19, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 23:05:28,371][0m Trial 450 finished with value: 0.0676600526180054 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.035485071681940455, 'negative': 19, 'min_count': 11, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06766, Hitrate@20 = 0.37630
{'sg': 0, 'window_len': 10, 'ns_exponent': -1.0169881641320573, 'negative': 19, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 23:08:15,310][0m Trial 451 finished with value: 0.05075872038323134 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -1.0169881641320573, 'negative': 19, 'min_count': 11, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.05076, Hitrate@20 = 0.29688
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.020208710009846052, 'negative': 19, 'min_count': 10, 'vector_size': 128}


[32m[I 2024-02-21 23:11:20,821][0m Trial 452 finished with value: 0.06704423967864613 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.020208710009846052, 'negative': 19, 'min_count': 10, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.06704, Hitrate@20 = 0.37342
{'sg': 0, 'window_len': 10, 'ns_exponent': -0.28956944815296304, 'negative': 19, 'min_count': 11, 'vector_size': 128}


[32m[I 2024-02-21 23:14:16,829][0m Trial 453 finished with value: 0.05962770108625442 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': -0.28956944815296304, 'negative': 19, 'min_count': 11, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.[0m


NDCG@20 = 0.05963, Hitrate@20 = 0.33192
{'sg': 1, 'window_len': 10, 'ns_exponent': 0.6254270532486954, 'negative': 19, 'min_count': 11, 'vector_size': 128}


[33m[W 2024-02-21 23:31:22,007][0m Trial 454 failed with parameters: {'sg': 1, 'window': 10, 'ns_exponent': 0.6254270532486954, 'negative': 19, 'min_count': 11, 'vector_size': 128} because of the following error: KeyboardInterrupt().[0m
Traceback (most recent call last):
  File "/Users/Elena_Sidorova/opt/anaconda3/lib/python3.9/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/var/folders/zt/59v1qs8x6_g4tdgv1k4rqts80000gp/T/ipykernel_43195/1088312698.py", line 25, in objective
    model = Word2Vec(
  File "/Users/Elena_Sidorova/opt/anaconda3/lib/python3.9/site-packages/gensim/models/word2vec.py", line 426, in __init__
    self.train(
  File "/Users/Elena_Sidorova/opt/anaconda3/lib/python3.9/site-packages/gensim/models/word2vec.py", line 1069, in train
    trained_word_count_epoch, raw_word_count_epoch, job_tally_epoch = self._train_epoch(
  File "/Users/Elena_Sidorova/opt/anaconda3/lib/python3.9/site-packages/gensim/models/w

KeyboardInterrupt: 

Выведем гиперпараметры лучшей версии модели:

In [76]:
study.best_params

{'sg': 0,
 'window': 10,
 'ns_exponent': 0.07411320142850895,
 'negative': 17,
 'min_count': 11,
 'vector_size': 128}

Trial 368 finished with value: 0.06800619114110919 and parameters: {'sg': 0, 'window': 10, 'ns_exponent': 0.07411320142850895, 'negative': 17, 'min_count': 11, 'vector_size': 128}. Best is trial 368 with value: 0.06800619114110919.
NDCG@20 = 0.06801, Hitrate@20 = 0.37866
{'sg': 0, 'window_len': 10, 'ns_exponent': 0.21706766856178566, 'negative': 20, 'min_count': 11, 'vector_size': 128}

Преейдем от integer идентификаторов к исходным:

In [118]:
set_seed(SEED)
model = Word2Vec(
    grouped_df_with_inds['train_item_ids'].to_list(),
    **study.best_params,
    hs=0,
    seed=SEED,
    epochs=50
)

#ndcg_list = []
submission_check = []
for user_id, train_item_ids, test_item_ids  in grouped_df_with_inds.select('user_id', 'train_item_ids', 'test_item_ids').rows():
    combined_known_items = set(train_item_ids + test_item_ids)
    model_preds = model.predict_output_word(combined_known_items, topn=(TOP_K + len(combined_known_items)))
    if model_preds is None:
        ndcg_list.append(0)
        continue
        
    y_rec = [pred[0] for pred in model_preds if pred[0] not in combined_known_items]
    
    #ndcg_list.append(user_ndcg(combined_known_items, y_rec))

    mapped_user_id = user_mapping_inverse[user_id]

    mapped_y_rec = [artist_mapping_inverse[artist_id] for artist_id in y_rec] # Adjust based on your actual logic
    
    submission_check.append((mapped_user_id, mapped_y_rec))

submission_check = pl.DataFrame(submission_check, schema=('user_id', 'y_rec'))
submission_check



user_id,y_rec
str,list[str]
"""380988fb-266a-…","[""5cd0ffb5-0cf2-4ecd-8c5b-ca2102e33198"", ""f468c554-1cf2-4bd6-9281-4ed93216427c"", … ""d749731f-5515-4df7-8dcb-c4c3ef6b98f7""]"
"""df2f6ea6-e85b-…","[""277c978f-65fd-4d35-9784-feed640f11b6"", ""b4e4b725-d47f-4f79-b044-a14cdd47e980"", … ""7bd9002b-6a41-4557-b5f3-45622661fdcd""]"
"""e2d3692d-307c-…","[""8e3ec798-4315-48d2-8acc-b9e170225989"", ""df3d0658-443f-4998-9824-fbcda9e24897"", … ""93bf07d3-5233-4270-a3c4-9815e9d786da""]"
"""d70f22ea-1168-…","[""6f1b8a85-c127-40a6-84be-d659aefc99e8"", ""73fb8894-9fe4-47a0-aea9-227fe1854bf6"", … ""9d521ac7-6063-442b-88b4-27da9360c749""]"
"""40f6bca4-e4be-…","[""542c41c6-3a05-47ec-8ea0-ae7cfe9d937c"", ""a653c2e1-3130-40fd-b15d-115dd93bf77d"", … ""58389edf-72df-468f-8e79-7fb8ce6a6124""]"
"""f7cabbea-4434-…","[""3fba9cd1-9fdd-4839-bf94-46cfe2f54da0"", ""112d5d5e-57ad-439b-b956-495804c81b1b"", … ""0ebd9e28-51f8-4c1d-a3a6-42fbaf08fc5a""]"
"""0f105b8d-0e48-…","[""faac1632-9102-4c56-8355-4fb2148a2025"", ""b23bb8f9-a8d7-4210-87a4-f2db099dc00f"", … ""d340290a-4ace-4b78-9ed0-aec5f8f75ef1""]"
"""425aadca-7588-…","[""c65422b6-6896-49d7-9c83-72f12980a375"", ""148a5aca-2f10-4577-afe9-ca83e9e56bad"", … ""13145656-b46b-4dba-875f-9b6f7bf5d72e""]"
"""5f72814e-e0d8-…","[""7ce5e594-a09f-4526-934a-d52cdcaa689a"", ""c2a01cab-0108-4a17-a205-db3e3ff7b01c"", … ""b235f72d-8b43-4f10-9e88-687615b8d8bc""]"
"""04530482-cc5c-…","[""c70a9c8c-a44c-4db1-a3ac-268a5326521a"", ""fdf581d0-fd24-465e-929f-9092418ed059"", … ""40039115-5037-4c5b-a80b-a7190571f4ab""]"


Save the final result:

In [None]:
submission_check.write_parquet('sample_submission.parquet')

#### Result

NDCG@20 = 0.08397145641325111

Hitrate@20 = 0.36992