In [4]:
import random
import numpy as np
import pandas as pd
# import cudf, itertools
import scipy.sparse as ssp
from functools import lru_cache, partial
from tqdm import tqdm, trange
from collections import Counter, defaultdict
import torch
import pickle

In [5]:
def cast_dtype(df : pd.DataFrame, columns=None):
    if columns is None:
        columns = df.columns
    for k in columns:
        dt = type(df[k].iloc[0])
        if 'float' in str(dt):
            df[k] = df[k].astype('float32')
        elif 'int' in str(dt):
            df[k] = df[k].astype('int32')
        elif dt == list:
            dt_ = type(df.iloc[0][k][0])
            if 'float' in str(dt_):
                df[k] = df[k].apply(lambda x : np.array(x, dtype=np.float32))
            elif 'int' in str(dt_):
                df[k] = df[k].apply(lambda x : np.array(x, dtype=np.int32))

In [6]:
def normalize_scores(score_df, score_name, normalized_score_name):
    # score_df_g = cudf.from_pandas(score_df)
    score_df['exp_score'] = np.exp(score_df[score_name].to_numpy())
    scores_sum = score_df[['sess_id', 'exp_score']].groupby('sess_id').sum()
    scores_sum.reset_index(inplace=True)
    # scores_sum = scores_sum.sort_values(by=['sess_id'], ascending=True)
    # scores_sum.reset_index(drop=True, inplace=True)
    scores_sum.rename(columns={'exp_score' : 'score_sum'}, inplace=True)

    merged_score_df = score_df.merge(scores_sum, how='left', left_on=['sess_id'], right_on=['sess_id'])
    # merged_score_df = merged_score_df.sort_values(by=['sess_id', 'product'])
    # merged_score_df.reset_index(drop=True, inplace=True)
    
    # merged_score_df = merged_score_df_g.to_pandas(merged_score_df_g)
    score_df[normalized_score_name] = merged_score_df['exp_score'] / merged_score_df['score_sum']
    score_df['exp_score'] = merged_score_df['exp_score']
    score_df['score_sum'] = merged_score_df['score_sum']

In [7]:
# tau_dict = {'lyx_lknn_i2i_score' : 0.32119619437800245, 'lyx_lknn_u2i_score' : 10.24309068497735, 'lyx_gru4rec_i2i_score' : 0.05434574325976763, 'lyx_gru4rec_u2i_score' : 0.05434574325976763}

# Valid feature

In [8]:
other_feature_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/XGBoost/candidates_phase2/lyx_merged_scores_150_0608.parquet'
merged_candidates_feature_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/XGBoost/candidates_phase2/merged_candidates_150_feature.parquet'

In [9]:
@lru_cache(maxsize=1)
def read_other_feature():
    return pd.read_parquet(other_feature_path, engine='pyarrow')

@lru_cache(maxsize=1)
def read_merged_candidates_feature():
    return pd.read_parquet(merged_candidates_feature_path, engine='pyarrow')

In [10]:
other_feature = read_other_feature()
# other_feature = other_feature.sort_values(by=['sess_id', 'product']).reset_index(drop=True)

In [11]:
other_feature

Unnamed: 0,sess_id,sess_locale,product,target,lyx_sasrec_u2i_nextitem_score,lyx_sasrec_u2i_score_len12,lyx_sasrec_u2i_score_len13,lyx_avghist_u2i_score,lyx_avghist_i2i_score,lyx_w2v_cos_l1_score,lyx_w2v_cos_l2_score,lyx_w2v_cos_l3_score
0,0,DE,355165591X,0.0,9.029700,8.616446,6.175021,7.284950,10.004648,0.328371,0.333689,0.315483
1,0,DE,3833237058,0.0,13.801620,14.088508,12.585397,14.786277,14.609027,0.446276,0.365882,0.374754
2,0,DE,B00CIXSI6U,0.0,8.066158,7.619434,3.903853,8.245017,6.915462,0.418034,0.462675,0.440308
3,0,DE,B00NVDOWUW,0.0,13.816841,14.379192,12.975473,12.711834,10.658710,0.371478,0.351897,0.313646
4,0,DE,B00NVDP3ZU,0.0,11.830596,13.147077,12.251559,11.918351,10.092535,0.456581,0.369536,0.304733
...,...,...,...,...,...,...,...,...,...,...,...,...
78842194,261815,UK,B0BCX524Y6,0.0,12.423108,11.929237,11.749612,14.848063,12.652530,0.643565,0.643565,0.660659
78842195,261815,UK,B0BCX6QB4L,0.0,14.302721,13.967371,14.283616,17.382141,16.312590,0.550828,0.550828,0.559930
78842196,261815,UK,B0BFPJYXQL,0.0,7.790404,7.401981,6.740705,10.297573,9.075425,0.431231,0.431231,0.415055
78842197,261815,UK,B0BH3X67S3,0.0,9.700985,9.301737,9.710249,12.515525,10.423323,0.460977,0.460977,0.437468


In [12]:
other_feature.query('sess_id==0').sort_values(by=['lyx_sasrec_u2i_nextitem_score'], ascending=False)

Unnamed: 0,sess_id,sess_locale,product,target,lyx_sasrec_u2i_nextitem_score,lyx_sasrec_u2i_score_len12,lyx_sasrec_u2i_score_len13,lyx_avghist_u2i_score,lyx_avghist_i2i_score,lyx_w2v_cos_l1_score,lyx_w2v_cos_l2_score,lyx_w2v_cos_l3_score
149,0,DE,B09BNV7PBF,1.0,22.843765,24.441113,23.203220,19.433693,19.263979,0.898390,0.388144,0.362882
85,0,DE,B08G56GFCV,0.0,18.849321,19.985910,18.605576,14.409595,15.504601,0.494281,0.392397,0.413245
164,0,DE,B09BNW36Z2,0.0,18.844004,19.750332,18.304008,13.845119,13.113123,0.530713,0.328527,0.317395
34,0,DE,B07W6Q951M,0.0,18.590300,19.309258,16.556446,16.084581,14.689657,0.540188,0.405316,0.359677
80,0,DE,B08G4THL9P,0.0,18.169724,19.465364,17.412766,16.745104,16.056080,0.509918,0.483871,0.473354
...,...,...,...,...,...,...,...,...,...,...,...,...
53,0,DE,B085QZ727T,0.0,7.071573,7.090680,6.076988,6.745470,5.370977,0.343867,0.255888,0.226441
47,0,DE,B0813RY13Q,0.0,5.929756,6.710072,7.320052,9.243649,5.100277,0.427405,0.489814,0.441820
194,0,DE,B09CQF4CGF,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
258,0,DE,B09XJBF26D,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [13]:
normalize_feat_list = ['lyx_avghist_u2i_score', 'lyx_avghist_i2i_score']
# normalize_feat_list = ['lyx_sasrec_u2i_nextitem_score', 'lyx_sasrec_u2i_score_len12', 'lyx_sasrec_u2i_score_len13']

In [14]:
for feat in normalize_feat_list:
    normalize_scores(other_feature, feat, 'normalized_'+feat)
    other_feature.drop(columns=['exp_score', 'score_sum'], inplace=True)

In [15]:
merged_candidates_feature = read_merged_candidates_feature()

In [16]:
combine_feat_list = ['normalized_lyx_avghist_u2i_score', 'normalized_lyx_avghist_i2i_score']
# combine_feat_list = ['lyx_sasrec_u2i_nextitem_score', 'lyx_sasrec_u2i_score_len12', 'lyx_sasrec_u2i_score_len13', 
#                      'normalized_lyx_sasrec_u2i_nextitem_score', 'normalized_lyx_sasrec_u2i_score_len12', 'normalized_lyx_sasrec_u2i_score_len13', 
#                      'lyx_avghist_u2i_score', 'lyx_avghist_i2i_score', 
#                      'lyx_w2v_cos_l1_score', 'lyx_w2v_cos_l2_score', 'lyx_w2v_cos_l3_score']

In [17]:
for feat in combine_feat_list:
    merged_candidates_feature[feat] = other_feature[feat]

In [18]:
other_feature

Unnamed: 0,sess_id,sess_locale,product,target,lyx_sasrec_u2i_nextitem_score,lyx_sasrec_u2i_score_len12,lyx_sasrec_u2i_score_len13,lyx_avghist_u2i_score,lyx_avghist_i2i_score,lyx_w2v_cos_l1_score,lyx_w2v_cos_l2_score,lyx_w2v_cos_l3_score,normalized_lyx_avghist_u2i_score,normalized_lyx_avghist_i2i_score
0,0,DE,355165591X,0.0,9.029700,8.616446,6.175021,7.284950,10.004648,0.328371,0.333689,0.315483,9.865672e-07,2.208272e-05
1,0,DE,3833237058,0.0,13.801620,14.088508,12.585397,14.786277,14.609027,0.446276,0.365882,0.374754,1.786123e-03,2.206525e-03
2,0,DE,B00CIXSI6U,0.0,8.066158,7.619434,3.903853,8.245017,6.915462,0.418034,0.462675,0.440308,2.576786e-06,1.005625e-06
3,0,DE,B00NVDOWUW,0.0,13.816841,14.379192,12.975473,12.711834,10.658710,0.371478,0.351897,0.313646,2.243842e-04,4.247250e-05
4,0,DE,B00NVDP3ZU,0.0,11.830596,13.147077,12.251559,11.918351,10.092535,0.456581,0.369536,0.304733,1.014816e-04,2.411134e-05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78842194,261815,UK,B0BCX524Y6,0.0,12.423108,11.929237,11.749612,14.848063,12.652530,0.643565,0.643565,0.660659,1.258794e-03,4.346779e-04
78842195,261815,UK,B0BCX6QB4L,0.0,14.302721,13.967371,14.283616,17.382141,16.312590,0.550828,0.550828,0.559930,1.586686e-02,1.689318e-02
78842196,261815,UK,B0BFPJYXQL,0.0,7.790404,7.401981,6.740705,10.297573,9.075425,0.431231,0.431231,0.415055,1.329542e-05,1.215209e-05
78842197,261815,UK,B0BH3X67S3,0.0,9.700985,9.301737,9.710249,12.515525,10.423323,0.460977,0.460977,0.437468,1.221649e-04,4.677731e-05


In [21]:
cast_dtype(merged_candidates_feature, combine_feat_list)
merged_candidates_feature.to_parquet(merged_candidates_feature_path, engine='pyarrow')

In [22]:
merged_candidates_feature.query('sess_id==51001').sort_values(by=['sasrec_scores_3'], ascending=False)[['sess_id', 'sess_locale', 'sasrec_scores_3', 'normalized_sasrec_scores_3', 'normalized_lyx_sasrec_u2i_nextitem_score', 'lyx_sasrec_u2i_nextitem_score']][:15]

Unnamed: 0,sess_id,sess_locale,sasrec_scores_3,normalized_sasrec_scores_3,normalized_lyx_sasrec_u2i_nextitem_score,lyx_sasrec_u2i_nextitem_score
15222770,51001,DE,14.986555,0.120063,0.068128,17.619888
15222976,51001,DE,14.363394,0.064383,0.040411,17.097603
15222797,51001,DE,14.259733,0.058043,0.053337,17.375139
15222883,51001,DE,13.727954,0.034104,0.057548,17.451128
15222842,51001,DE,13.720732,0.033858,0.04013,17.090626
15222745,51001,DE,13.625393,0.03078,0.008879,15.58215
15222875,51001,DE,13.617064,0.030524,0.016907,16.226227
15222896,51001,DE,13.553229,0.028637,0.028576,16.75106
15222881,51001,DE,13.420553,0.025078,0.029488,16.78248
15222939,51001,DE,13.323638,0.022762,0.027813,16.724026


In [29]:
merged_candidates_feature.query('sess_id==52202').sort_values(by=['sasrec_scores_3'], ascending=False)[['sess_id', 'sess_locale', 'sasrec_scores_3', 'normalized_sasrec_scores_3', 'normalized_lyx_sasrec_u2i_score_len13', 'lyx_sasrec_u2i_score_len13']][:15]

Unnamed: 0,sess_id,sess_locale,sasrec_scores_3,normalized_sasrec_scores_3,normalized_lyx_sasrec_u2i_score_len13,lyx_sasrec_u2i_score_len13
15580754,52202,DE,22.269001,0.311014,0.321377,21.1926
15580760,52202,DE,22.203421,0.291272,0.262246,20.989267
15580758,52202,DE,22.114962,0.266613,0.188954,20.661488
15580759,52202,DE,20.901043,0.079192,0.068908,19.652752
15580618,52202,DE,19.817327,0.026794,0.067253,19.62845
15580652,52202,DE,19.218456,0.014721,0.010598,17.78067
15580653,52202,DE,18.181253,0.005218,0.009246,17.644127
15580606,52202,DE,16.719313,0.001209,0.00253,16.34808
15580720,52202,DE,16.14082,0.000678,0.000928,15.344753
15580570,52202,DE,15.901094,0.000534,0.001984,16.105335


In [20]:
merged_candidates_feature.query('sess_id==52202').sort_values(by=['sasrec_scores_3'], ascending=False)[['sess_id', 'sess_locale', 'sasrec_scores_3', 'normalized_sasrec_scores_3', 'normalized_lyx_avghist_i2i_score', 'lyx_avghist_i2i_score']][:15]

Unnamed: 0,sess_id,sess_locale,sasrec_scores_3,normalized_sasrec_scores_3,normalized_lyx_avghist_i2i_score,lyx_avghist_i2i_score
15580754,52202,DE,22.269001,0.311014,0.27563,23.986166
15580760,52202,DE,22.203421,0.291272,0.072419,22.64957
15580758,52202,DE,22.114962,0.266613,0.131044,23.242641
15580759,52202,DE,20.901043,0.079192,0.024224,21.554451
15580618,52202,DE,19.817327,0.026794,0.0719,22.642385
15580652,52202,DE,19.218456,0.014721,0.00074,18.065777
15580653,52202,DE,18.181253,0.005218,0.001785,18.946566
15580606,52202,DE,16.719313,0.001209,0.000681,17.983629
15580720,52202,DE,16.14082,0.000678,0.000316,17.214352
15580570,52202,DE,15.901094,0.000534,0.001518,18.78441


In [22]:
merged_candidates_feature

Unnamed: 0,sess_id,sess_locale,product,target,sess_avg_price,product_price,sasrec_scores_3,normalized_sasrec_scores_3,sasrec_scores_2,normalized_sasrec_scores_2,...,normalized_lyx_sasrec_u2i_nextitem_score,normalized_lyx_sasrec_u2i_score_len12,normalized_lyx_sasrec_u2i_score_len13,lyx_avghist_u2i_score,lyx_avghist_i2i_score,lyx_w2v_cos_l1_score,lyx_w2v_cos_l2_score,lyx_w2v_cos_l3_score,normalized_lyx_avghist_u2i_score,normalized_lyx_avghist_i2i_score
0,0,DE,355165591X,0.0,43.256542,8.990000,2.230508,7.658405e-09,0.512931,1.377575e-09,...,8.283594e-07,1.193023e-07,3.667045e-08,7.284950,10.004648,0.328371,0.333689,0.315483,9.865672e-07,2.208272e-05
1,0,DE,3833237058,0.0,43.256542,22.000000,9.605231,1.221631e-05,9.325538,9.255110e-06,...,9.786717e-05,2.838802e-05,2.230012e-05,14.786277,14.609027,0.446276,0.365882,0.374754,1.786123e-03,2.206525e-03
2,0,DE,B00CIXSI6U,0.0,43.256542,6.470000,0.714114,1.681035e-09,-0.115904,7.345399e-10,...,3.160515e-07,4.402023e-08,3.784082e-09,8.245017,6.915462,0.418034,0.462675,0.440308,2.576786e-06,1.005625e-06
3,0,DE,B00NVDOWUW,0.0,43.256542,11.990000,8.750996,5.199363e-06,8.507557,4.084482e-06,...,9.936826e-05,3.796451e-05,3.293937e-05,12.711834,10.658710,0.371478,0.351897,0.313646,2.243842e-04,4.247250e-05
4,0,DE,B00NVDP3ZU,0.0,43.256542,22.990000,8.056712,2.596729e-06,5.898870,3.007453e-07,...,1.363429e-05,1.107329e-05,1.597068e-05,11.918351,10.092535,0.456581,0.369536,0.304733,1.014816e-04,2.411134e-05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78842194,261815,UK,B0BCX524Y6,0.0,9.383333,16.990000,6.813615,1.076201e-03,7.203015,4.597607e-04,...,1.175802e-03,1.000117e-03,1.228062e-03,14.848063,12.652530,0.643565,0.643565,0.660659,1.258794e-03,4.346779e-04
78842195,261815,UK,B0BCX6QB4L,0.0,9.383333,10.990000,9.030836,9.881445e-03,10.123234,8.526421e-03,...,7.702641e-03,7.677165e-03,1.547834e-02,17.382141,16.312590,0.550828,0.550828,0.559930,1.586686e-02,1.689318e-02
78842196,261815,UK,B0BFPJYXQL,0.0,9.383333,10.560000,0.796892,2.623396e-06,1.711608,1.895152e-06,...,1.143869e-05,1.081156e-05,8.201243e-06,10.297573,9.075425,0.431231,0.431231,0.415055,1.329542e-05,1.215209e-05
78842197,261815,UK,B0BH3X67S3,0.0,9.383333,6.830000,4.250781,8.296004e-05,6.447586,2.159998e-04,...,7.729138e-05,7.226728e-05,1.597851e-04,12.515525,10.423323,0.460977,0.460977,0.437468,1.221649e-04,4.677731e-05


# Test feature

In [23]:
other_feature_test_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/XGBoost/candidates_phase2/lyx_merged_scores_150_test_0608.parquet'
merged_candidates_feature_test_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/XGBoost/candidates_phase2/merged_candidates_150_test_feature.parquet'

In [24]:
@lru_cache(maxsize=1)
def read_other_feature_test():
    return pd.read_parquet(other_feature_test_path, engine='pyarrow')

@lru_cache(maxsize=1)
def read_merged_candidates_feature_test():
    return pd.read_parquet(merged_candidates_feature_test_path, engine='pyarrow')

In [25]:
other_feature_test = read_other_feature_test()
other_feature_test = other_feature_test.sort_values(by=['sess_id', 'product']).reset_index(drop=True)

In [26]:
other_feature_test.query('sess_id==250000').sort_values(by=['lyx_sasrec_u2i_nextitem_score'], ascending=False)[:15]

Unnamed: 0,sess_id,sess_locale,product,lyx_sasrec_u2i_nextitem_score,lyx_sasrec_u2i_score_len12,lyx_sasrec_u2i_score_len13,lyx_avghist_u2i_score,lyx_avghist_i2i_score,lyx_w2v_cos_l1_score,lyx_w2v_cos_l2_score,lyx_w2v_cos_l3_score
76095575,250000,UK,B00VWMGLVM,21.481352,22.313637,21.173912,21.453463,19.699205,0.798777,0.830068,0.589649
76095687,250000,UK,B07QLJTZMG,19.872719,21.0413,19.609724,21.434074,19.98731,0.91469,0.960508,0.665013
76095797,250000,UK,B08M96LY9W,18.641748,18.820536,17.258677,19.870064,17.328249,0.890173,0.898877,0.700402
76095699,250000,UK,B07QQPYRBG,18.333351,19.88707,18.963737,20.6868,19.155796,0.865646,0.910739,0.66053
76095574,250000,UK,B00VWMGJLE,17.916487,17.943132,17.244564,20.617037,18.297737,0.80115,0.842982,0.728382
76095577,250000,UK,B00VWMHFZ8,17.73254,18.25338,17.398199,19.601786,18.027885,0.709729,0.716947,0.52545
76095578,250000,UK,B00VWMHKD0,17.38648,17.851889,17.349138,20.059046,19.170214,0.614776,0.665044,0.626233
76095737,250000,UK,B07WCLNGZ5,16.923275,17.661381,15.07454,19.886446,18.40624,0.743658,0.734452,0.528565
76095660,250000,UK,B07D6MTLX8,16.565149,17.677929,16.366167,19.364487,17.273363,0.627176,0.673913,0.588728
76095650,250000,UK,B077P4CHY4,16.497303,17.017025,16.138411,19.904816,18.344673,0.605906,0.636681,0.580668


In [27]:
normalize_feat_list = ['lyx_avghist_u2i_score', 'lyx_avghist_i2i_score']
#  normalize_feat_list = ['lyx_sasrec_u2i_nextitem_score', 'lyx_sasrec_u2i_score_len12', 'lyx_sasrec_u2i_score_len13']

In [28]:
for feat in normalize_feat_list:
    normalize_scores(other_feature_test, feat, 'normalized_'+feat)
    other_feature_test.drop(columns=['exp_score', 'score_sum'], inplace=True)

In [29]:
merged_candidates_feature_test = read_merged_candidates_feature_test()

In [30]:
combine_feat_list = ['normalized_lyx_avghist_u2i_score', 'normalized_lyx_avghist_i2i_score']
# combine_feat_list = ['lyx_sasrec_u2i_nextitem_score', 'lyx_sasrec_u2i_score_len12', 'lyx_sasrec_u2i_score_len13', 
#                      'normalized_lyx_sasrec_u2i_nextitem_score', 'normalized_lyx_sasrec_u2i_score_len12', 'normalized_lyx_sasrec_u2i_score_len13', 
#                      'lyx_avghist_u2i_score', 'lyx_avghist_i2i_score', 
#                      'lyx_w2v_cos_l1_score', 'lyx_w2v_cos_l2_score', 'lyx_w2v_cos_l3_score']

In [31]:
for feat in combine_feat_list:
    merged_candidates_feature_test[feat] = other_feature_test[feat]

In [46]:
cast_dtype(merged_candidates_feature_test, combine_feat_list)
merged_candidates_feature_test.to_parquet(merged_candidates_feature_test_path, engine='pyarrow')

In [32]:
merged_candidates_feature_test

Unnamed: 0,sess_id,sess_locale,product,sasrec_scores_2,normalized_sasrec_scores_2,sasrec_scores_3,normalized_sasrec_scores_3,sess_avg_price,product_price,seqmlp_scores,...,normalized_lyx_sasrec_u2i_score_len13,lyx_avghist_u2i_score,lyx_avghist_i2i_score,lyx_w2v_cos_l1_score,lyx_w2v_cos_l2_score,lyx_w2v_cos_l3_score,last2item_freq,last2item_freq_test,normalized_lyx_avghist_u2i_score,normalized_lyx_avghist_i2i_score
0,0,DE,B000Q87D0Q,0.000000,3.282997e-10,0.000000,6.689660e-10,67.527199,36.761604,0.000000,...,2.829800e-09,0.000000,0.000000,0.000000,0.000000,0.000000,6,6,3.449579e-10,3.373327e-09
1,0,DE,B000QB30DW,0.501346,5.420036e-10,-0.588501,3.713825e-10,67.527199,9.990000,7.260942,...,2.737634e-07,6.182528,7.679409,0.474859,0.460259,0.298111,69,71,1.670336e-07,7.297655e-06
2,0,DE,B004BIG55Q,6.917523,3.315223e-07,5.737720,2.076175e-07,67.527199,8.990000,2.454817,...,8.470311e-07,8.774444,9.594070,0.273506,0.299569,0.283845,338,346,2.230789e-06,4.951197e-05
3,0,DE,B0053FTNQY,-0.100895,2.967921e-10,1.507319,3.020121e-09,67.527199,36.761604,3.837643,...,2.079746e-08,8.111156,7.462204,0.567309,0.553076,0.533694,11,11,1.149201e-06,5.872894e-06
4,0,DE,B007QWII1S,3.768980,1.422714e-08,4.594047,6.615662e-08,67.527199,54.950001,4.923371,...,8.494584e-06,12.209817,8.292838,0.438139,0.472592,0.322974,26,27,6.925039e-05,1.347697e-05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96556030,316971,UK,B0B82N3CQQ,-1.076433,6.007382e-08,-0.457645,1.105378e-07,19.459999,13.990000,6.433315,...,4.428857e-07,4.772191,2.119687,0.574181,0.565705,0.554617,3,3,1.989415e-08,1.149218e-08
96556031,316971,UK,B0BB9NW3F3,0.000000,1.762683e-07,0.000000,1.746882e-07,19.459999,22.097065,0.000000,...,1.658097e-08,0.000000,0.000000,0.000000,0.000000,0.000000,2,2,1.683407e-10,1.379856e-09
96556032,316971,UK,B0BDMVKTQ3,-1.079334,5.989980e-08,-1.901198,2.609658e-08,19.459999,41.990002,-1.094359,...,6.533648e-06,10.099529,7.850157,0.395675,0.431507,0.440364,23,24,4.095992e-06,3.540899e-06
96556033,316971,UK,B0BHW1D5VP,6.722834,1.465088e-04,6.111193,7.876277e-05,19.459999,26.990000,8.700006,...,1.381068e-03,13.935356,12.456689,0.561685,0.599908,0.558696,10,10,1.897743e-04,3.545725e-04


In [None]:
other_feature_test

Unnamed: 0,sess_id,sess_locale,product,target,sasrec_duorec_score,w2v_l1_score,w2v_l2_score,w2v_l3_score,normalized_sasrec_duorec_score
0,0,DE,355165591X,0.0,6.290242,25.177464,23.846624,22.635153,9.662357e-07
1,0,DE,3833237058,0.0,11.603280,32.207531,24.611195,25.308212,1.961129e-04
2,0,DE,B00CIXSI6U,0.0,4.110237,19.747381,20.370945,19.463253,1.092242e-07
3,0,DE,B00NVDOWUW,0.0,11.578343,25.640152,22.638163,20.257607,1.912829e-04
4,0,DE,B00NVDP3ZU,0.0,10.304344,33.229935,25.067163,20.753508,5.350390e-05
...,...,...,...,...,...,...,...,...,...
78842194,261815,UK,B0BCX524Y6,0.0,8.870004,25.790541,25.790541,25.963753,9.947462e-04
78842195,261815,UK,B0BCX6QB4L,0.0,11.360728,37.195541,37.195541,37.079201,1.200661e-02
78842196,261815,UK,B0BFPJYXQL,0.0,6.270709,18.659113,18.659113,17.611990,7.393550e-05
78842197,261815,UK,B0BH3X67S3,0.0,7.936618,28.308519,28.308519,26.345455,3.911544e-04


In [27]:
merged_candidates_feature_test

Unnamed: 0,sess_id,sess_locale,product,sasrec_scores_2,normalized_sasrec_scores_2,sasrec_scores_3,normalized_sasrec_scores_3,sess_avg_price,product_price,seqmlp_scores,...,gru4rec_feat_scores_2,normalized_gru4rec_feat_scores_2,sasrec_duorec_score,normalized_sasrec_duorec_score,w2v_l1_score,w2v_l2_score,w2v_l3_score,normalized_w2v_l1_score,normalized_w2v_l2_score,normalized_w2v_l3_score
0,0,DE,B000Q87D0Q,0.000000,3.282997e-10,0.000000,6.689660e-10,67.527199,36.761604,0.000000,...,0.000000,4.436458e-10,0.000000,4.888735e-09,0.000000,0.000000,0.000000,6.725470e-18,1.902335e-18,3.300965e-19
1,0,DE,B000QB30DW,0.501346,5.420036e-10,-0.588501,3.713825e-10,67.527199,9.990000,7.260942,...,5.122046,7.438971e-08,5.116123,8.148930e-07,31.385134,31.193865,23.269724,2.871528e-04,6.708266e-05,4.212632e-09
2,0,DE,B004BIG55Q,6.917523,3.315223e-07,5.737720,2.076175e-07,67.527199,8.990000,2.454817,...,8.643940,2.517977e-06,6.817140,4.465219e-06,12.718339,14.284587,15.588253,2.245055e-12,3.040915e-12,1.943270e-12
3,0,DE,B0053FTNQY,-0.100895,2.967921e-10,1.507319,3.020121e-09,67.527199,36.761604,3.837643,...,-2.595809,3.308954e-11,2.309990,4.925070e-08,27.256227,27.248238,30.282482,4.623298e-06,1.297317e-06,4.679029e-06
4,0,DE,B007QWII1S,3.768980,1.422714e-08,4.594047,6.615662e-08,67.527199,54.950001,4.923371,...,7.384109,7.143555e-07,7.491323,8.762675e-06,23.500416,25.993063,20.459011,1.080995e-07,3.697691e-07,2.534407e-10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96556030,316971,UK,B0B82N3CQQ,-1.076433,6.007382e-08,-0.457645,1.105378e-07,19.459999,13.990000,6.433315,...,4.623051,4.508018e-07,3.866317,1.910537e-06,14.294576,13.684182,18.628992,7.488371e-10,8.704190e-10,1.966086e-12
96556031,316971,UK,B0BB9NW3F3,0.000000,1.762683e-07,0.000000,1.746882e-07,19.459999,22.097065,0.000000,...,0.000000,4.428127e-09,0.000000,3.999773e-08,0.000000,0.000000,0.000000,4.638013e-16,9.925754e-16,1.596372e-20
96556032,316971,UK,B0BDMVKTQ3,-1.079334,5.989980e-08,-1.901198,2.609658e-08,19.459999,41.990002,-1.094359,...,1.618663,2.234583e-08,1.900935,2.676707e-07,22.474419,23.814653,33.746994,2.672071e-06,2.184420e-05,7.232148e-06
96556033,316971,UK,B0BHW1D5VP,6.722834,1.465088e-04,6.111193,7.876277e-05,19.459999,26.990000,8.700006,...,11.669815,5.180317e-04,9.440860,5.036731e-04,21.862946,22.688568,29.340395,1.449735e-06,7.084080e-06,8.820754e-08


In [38]:
merged_candidates_feature_test.query('sess_id==263202').sort_values(by=['sasrec_scores_3'], ascending=False)[['sess_id', 'sess_locale', 'sasrec_scores_3', 'normalized_sasrec_scores_3', 'normalized_lyx_avghist_u2i_score', 'lyx_avghist_u2i_score']][:15]

Unnamed: 0,sess_id,sess_locale,sasrec_scores_3,normalized_sasrec_scores_3,normalized_lyx_avghist_u2i_score,lyx_avghist_u2i_score
80122565,263202,UK,20.474483,0.718274,0.796844,28.467735
80122433,263202,UK,18.312805,0.082696,0.021057,24.834328
80122488,263202,UK,17.554052,0.038722,0.054052,25.777025
80122542,263202,UK,17.327715,0.030879,0.024594,24.98959
80122605,263202,UK,17.229515,0.027991,0.004449,23.279737
80122581,263202,UK,16.674685,0.016072,0.005835,23.550871
80122477,263202,UK,16.66309,0.015886,0.025571,25.028528
80122589,263202,UK,16.370598,0.011858,0.013626,24.399048
80122499,263202,UK,16.184855,0.009848,0.017566,24.653027
80122503,263202,UK,16.166992,0.009673,0.000111,19.592592


In [42]:
merged_candidates_feature_test.query('sess_id==51301').sort_values(by=['sasrec_scores_3'], ascending=False)[['sess_id', 'sess_locale', 'sasrec_scores_3', 'normalized_sasrec_scores_3', 'normalized_lyx_sasrec_u2i_nextitem_score', 'lyx_sasrec_u2i_nextitem_score']][:15]

Unnamed: 0,sess_id,sess_locale,sasrec_scores_3,normalized_sasrec_scores_3,normalized_lyx_sasrec_u2i_nextitem_score,lyx_sasrec_u2i_nextitem_score
15405217,51301,DE,16.641602,0.230038,0.245207,20.388124
15405144,51301,DE,15.245219,0.056932,0.055316,18.899086
15405206,51301,DE,15.235601,0.056387,0.01332,17.475256
15405092,51301,DE,15.223587,0.055714,0.074586,19.197975
15404980,51301,DE,15.221009,0.05557,0.070739,19.145018
15405140,51301,DE,14.987396,0.043993,0.034658,18.431562
15405130,51301,DE,14.780273,0.035763,0.050337,18.804762
15405158,51301,DE,14.709152,0.033308,0.012759,17.432243
15405137,51301,DE,14.621141,0.030502,0.027831,18.212191
15405143,51301,DE,14.463743,0.02606,0.034988,18.441027


In [45]:
merged_candidates_feature_test.query('sess_id==55202').sort_values(by=['sasrec_scores_3'], ascending=False)[['sess_id', 'sess_locale', 'sasrec_scores_3', 'normalized_sasrec_scores_3', 'normalized_lyx_sasrec_u2i_score_len12', 'lyx_sasrec_u2i_score_len12']][:15]

Unnamed: 0,sess_id,sess_locale,sasrec_scores_3,normalized_sasrec_scores_3,normalized_lyx_sasrec_u2i_score_len12,lyx_sasrec_u2i_score_len12
16577402,55202,DE,14.64962,0.115881,0.235981,21.169987
16577377,55202,DE,14.296375,0.081396,0.003965,17.083818
16577381,55202,DE,14.208056,0.074515,0.038625,19.360125
16577203,55202,DE,14.1425,0.069787,0.006379,17.559172
16577265,55202,DE,13.79389,0.049246,0.008609,17.859039
16577430,55202,DE,13.733116,0.046343,0.022442,18.817158
16577358,55202,DE,13.542681,0.038307,0.018293,18.612736
16577335,55202,DE,13.510315,0.037087,0.039218,19.375376
16577324,55202,DE,13.319477,0.030644,0.019925,18.698233
16577248,55202,DE,13.230216,0.028027,0.00414,17.127045
