In [1]:
import sys
sys.path = ['/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/RecStudio/'] + sys.path
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
import random
import numpy as np
import pandas as pd
import cudf, itertools
import scipy.sparse as ssp
from functools import lru_cache, partial
from tqdm import tqdm, trange
from collections import Counter, defaultdict
import torch
import pickle

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def cast_dtype(df : pd.DataFrame):
    for k in df.columns:
        dt = type(df[k].iloc[0])
        if 'float' in str(dt):
            df[k] = df[k].astype('float32')
        elif 'int' in str(dt):
            df[k] = df[k].astype('int32')
        elif dt == list:
            dt_ = type(df.iloc[0][k][0])
            if 'float' in str(dt_):
                df[k] = df[k].apply(lambda x : np.array(x, dtype=np.float32))
            elif 'int' in str(dt_):
                df[k] = df[k].apply(lambda x : np.array(x, dtype=np.int32))

In [3]:
def get_scores(merged_candidates_df, product_id_name, query_embeddings, product_embeddings):
    batch_size = 4096
    num_iter = (len(merged_candidates_df) - 1) // batch_size + 1
    score_list = []
    with torch.no_grad():
        for i in tqdm(range(num_iter)):
            st, ed = i * batch_size, (i + 1) * batch_size 
            batch_sess = merged_candidates_df.iloc[st : ed]
            batch_sess_id = torch.tensor(batch_sess['sess_id'].tolist(), device=query_embeddings.device)
            batch_product_id = torch.tensor(batch_sess[product_id_name].tolist(), device=product_embeddings.device)
            query_emb = query_embeddings[batch_sess_id]
            product_emb = product_embeddings[batch_product_id]
            batch_score = (query_emb * product_emb).sum(dim=-1) 
            score_list.append(batch_score.cpu())
        score_list = torch.cat(score_list, dim=0).cpu().tolist()
        return score_list 

In [4]:
def normalize_scores(score_df, score_name, normalized_score_name):
    score_df_g = cudf.from_pandas(score_df)
    score_df_g['exp_score'] = np.exp(score_df_g[score_name].to_numpy() - score_df_g[score_name].max())
    scores_sum_g = score_df_g[['sess_id', 'exp_score']].groupby('sess_id').sum()
    scores_sum_g.reset_index(inplace=True)
    scores_sum_g = scores_sum_g.sort_values(by=['sess_id'], ascending=True)
    scores_sum_g.reset_index(drop=True, inplace=True)
    scores_sum_g.rename(columns={'exp_score' : 'score_sum'}, inplace=True)

    merged_score_df_g = score_df_g.merge(scores_sum_g, how='left', left_on=['sess_id'], right_on=['sess_id'])
    merged_score_df_g = merged_score_df_g.sort_values(by=['sess_id', 'product'])
    merged_score_df_g.reset_index(drop=True, inplace=True)
    
    merged_score_df = merged_score_df_g.to_pandas(merged_score_df_g)
    score_df[normalized_score_name] = merged_score_df['exp_score'] / merged_score_df['score_sum']
    score_df['exp_score'] = merged_score_df['exp_score']
    score_df['score_sum'] = merged_score_df['score_sum']

    del scores_sum_g
    del merged_score_df_g 

# Merge valid score

In [47]:
merged_candidates_feature_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/XGBoost/candidates/merged_candidates_no_hist_feature.parquet'
valid_sessions_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/data_for_recstudio/task1_data/task13_4_task1_valid_sessions.csv'
product_data_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/raw_data/products_train.csv'

In [48]:
@lru_cache(maxsize=1)
def read_merged_candidates_feature():
    return pd.read_parquet(merged_candidates_feature_path, engine='pyarrow')

@lru_cache(maxsize=1)
def read_valid_sessions():
    return pd.read_csv(valid_sessions_path)

@lru_cache(maxsize=1)
def read_product_data():
    return pd.read_csv(product_data_path)

In [8]:
merged_candidates = read_merged_candidates_feature()
valid_sessions = read_valid_sessions()
product_data = read_product_data()

In [10]:
merged_candidates_product = merged_candidates[['sess_id', 'sess_locale', 'product']]
merged_candidates_product

Unnamed: 0,sess_id,sess_locale,product
0,0,UK,B000OPPVCS
1,0,UK,B000V599Y2
2,0,UK,B0018HH444
3,0,UK,B0079JI4DU
4,0,UK,B0079JI4EY
...,...,...,...
84407334,361580,DE,B0BB7XV97M
84407335,361580,DE,B0BB7YSRBX
84407336,361580,DE,B0BB7ZMGY8
84407337,361580,DE,B0BD4CP7N3


In [11]:
product_index = product_data[['id', 'locale']]
product_index['product_index'] = product_index.index + 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  product_index['product_index'] = product_index.index + 1


In [12]:
merged_candidates_product_g = cudf.from_pandas(merged_candidates_product)
product_index_g = cudf.from_pandas(product_index)

In [13]:
merged_candidates_product_index_g = merged_candidates_product_g.merge(product_index_g, how='left', left_on=['sess_locale', 'product'], right_on=['locale', 'id'])
merged_candidates_product_index_g = merged_candidates_product_index_g.sort_values(by=['sess_id', 'product'])
merged_candidates_product_index_g.reset_index(drop=True, inplace=True)
assert len(merged_candidates_product_index_g) == len(merged_candidates_product_g)
merged_candidates_product_index_g.drop(columns=['id', 'locale'], inplace=True)
merged_candidates_product_index_g['product_index'] = merged_candidates_product_index_g['product_index'].fillna(0)
merged_candidates_product_index = merged_candidates_product_index_g.to_pandas()

In [34]:
del merged_candidates_product_g
del product_index_g
del merged_candidates_product_index_g

In [19]:
roberta_product_embeddings_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/text_method/roberta_results/results_epoch_4/item_reps/item.npy'
roberta_valid_embeddings_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/text_method/roberta_results/valid_results_epoch_4/valid_query_reps/query.npy'

In [20]:
roberta_product_embeddings = np.load(roberta_product_embeddings_path)
roberta_valid_embeddings = np.load(roberta_valid_embeddings_path)

In [21]:
roberta_product_embeddings = torch.from_numpy(roberta_product_embeddings)
roberta_valid_embeddings = torch.from_numpy(roberta_valid_embeddings)
roberta_product_embeddings = torch.cat([torch.tensor([[0.0] * roberta_product_embeddings.shape[1]]), roberta_product_embeddings], dim=0)

In [27]:
roberta_product_embeddings = roberta_product_embeddings.to('cuda:0')
roberta_valid_embeddings = roberta_valid_embeddings.to('cuda:0')

In [32]:
merged_candidates_product_index['roberta_scores'] = get_scores(merged_candidates_product_index, 'product_index', roberta_valid_embeddings, roberta_product_embeddings)

100%|██████████| 20608/20608 [02:30<00:00, 137.32it/s]


In [35]:
roberta_product_embeddings = roberta_product_embeddings.to('cpu')
roberta_valid_embeddings = roberta_valid_embeddings.to('cpu')

In [43]:
normalize_scores(merged_candidates_product_index, 'roberta_scores', 'roberta_normalized_scores')

In [45]:
assert len(merged_candidates) == len(merged_candidates_product_index)
merged_candidates['roberta_scores'] = merged_candidates_product_index['roberta_scores']
merged_candidates['roberta_normalized_scores'] = merged_candidates_product_index['roberta_normalized_scores']

In [57]:
cast_dtype(merged_candidates)
merged_candidates.to_parquet(merged_candidates_feature_path, engine='pyarrow')

In [26]:
roberta_product_embeddings

tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0310,  0.2281, -0.1089,  ..., -0.5854,  0.1648, -0.1245],
        [ 0.0048,  0.3028, -0.1198,  ...,  0.1212,  0.1891,  0.1643],
        ...,
        [ 0.2472, -0.0760,  0.0638,  ..., -0.5656, -0.1147, -0.2556],
        [ 0.2095,  0.0716, -0.0547,  ..., -0.3876, -0.0677, -0.1837],
        [ 0.0653, -0.2431, -0.2078,  ..., -0.2018, -0.0851, -0.1529]])

In [44]:
merged_candidates_product_index

Unnamed: 0,sess_id,sess_locale,product,product_index,roberta_scores,roberta_normalized_scores,exp_score,score_sum
0,0,UK,B000OPPVCS,1375599,265.826630,0.001087,0.000022,0.020035
1,0,UK,B000V599Y2,1324417,259.157867,0.000001,0.0,0.020035
2,0,UK,B0018HH444,1413111,257.331421,0.0,0.0,0.020035
3,0,UK,B0079JI4DU,0,0.000000,0.0,0.0,0.020035
4,0,UK,B0079JI4EY,0,0.000000,0.0,0.0,0.020035
...,...,...,...,...,...,...,...,...
84407334,361580,DE,B0BB7XV97M,446969,263.574158,0.001378,0.000002,0.001661
84407335,361580,DE,B0BB7YSRBX,275922,263.523743,0.001311,0.000002,0.001661
84407336,361580,DE,B0BB7ZMGY8,429872,263.567017,0.001369,0.000002,0.001661
84407337,361580,DE,B0BD4CP7N3,276547,265.401611,0.008571,0.000014,0.001661


In [18]:
product_data.iloc[276546]

id                                                 B0BD4CP7N3
locale                                                     DE
title       i Cafilas Wiederbefüllbare Kaffeekapsel für Ta...
price                                                   24.99
brand                                               i Cafilas
color                              220ml Kaffeekapsel +Tamper
size                                                      NaN
model                                                     NaN
material                                                  NaN
author                                                    NaN
desc        PERFEKTE ALTERNATIVE: Die beste Wahl für kosts...
Name: 276546, dtype: object

In [20]:
product_index

Unnamed: 0,id,locale,index
0,B005ZSSN10,DE,1
1,B08PRYN6LD,DE,2
2,B09MBZJ48V,DE,3
3,B08ZN6F26S,DE,4
4,B094DGRV7D,DE,5
...,...,...,...
1551052,B09BW5CDRR,IT,1551053
1551053,B0050IILBM,IT,1551054
1551054,B07W4C5W9D,IT,1551055
1551055,B012D0HJXA,IT,1551056


In [46]:
merged_candidates

Unnamed: 0,sess_id,sess_locale,product,target,sasrec_scores_2,sasrec_normalized_scores_2,product_freq,gru4rec_scores,gru4rec_normalized_scores,sess_avg_price,...,gru4rec_scores_2,gru4rec_normalized_scores_2,co_graph_counts_0,normalized_co_graph_counts_0,co_graph_counts_1,normalized_co_graph_counts_1,co_graph_counts_2,normalized_co_graph_counts_2,roberta_scores,roberta_normalized_scores
0,0,UK,B000OPPVCS,0.0,11.972421,2.286162e-04,104,6.484859,3.816029e-05,7.388571,...,12.291418,5.528012e-05,1,0.002188,1.0,0.004819,2,0.004525,265.826630,0.001087
1,0,UK,B000V599Y2,0.0,13.152878,7.443427e-04,37,4.342063,4.477209e-06,7.388571,...,12.142086,4.761183e-05,0,0.000000,0.0,0.000000,2,0.004525,259.157867,0.000001
2,0,UK,B0018HH444,0.0,5.606023,3.928400e-07,7,3.220763,1.458925e-06,7.388571,...,8.919555,1.897524e-06,1,0.002188,1.0,0.004819,1,0.002262,257.331421,0.0
3,0,UK,B0079JI4DU,0.0,0.000000,1.443945e-09,67,0.000000,5.824698e-08,7.388571,...,0.000000,2.537897e-10,1,0.002188,0.5,0.002410,2,0.004525,0.000000,0.0
4,0,UK,B0079JI4EY,0.0,0.000000,1.443945e-09,77,0.000000,5.824698e-08,7.388571,...,0.000000,2.537897e-10,1,0.002188,1.0,0.004819,2,0.004525,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84407334,361580,DE,B0BB7XV97M,0.0,9.117821,6.077226e-05,56,9.268379,1.396883e-05,32.424000,...,14.038595,8.992638e-05,0,0.000000,0.0,0.000000,0,0.000000,263.574158,0.001378
84407335,361580,DE,B0BB7YSRBX,0.0,9.163816,6.363281e-05,58,7.047796,1.516259e-06,32.424000,...,13.342258,4.482001e-05,0,0.000000,0.0,0.000000,0,0.000000,263.523743,0.001311
84407336,361580,DE,B0BB7ZMGY8,0.0,11.256460,5.158278e-04,452,9.359167,1.529639e-05,32.424000,...,12.778135,2.549625e-05,0,0.000000,0.0,0.000000,0,0.000000,263.567017,0.001369
84407337,361580,DE,B0BD4CP7N3,0.0,-3.778687,1.523433e-10,1,-0.593306,7.282568e-10,32.424000,...,-3.986487,1.335653e-12,0,0.000000,0.0,0.000000,0,0.000000,265.401611,0.008571


In [56]:
merged_candidates.query('sess_id==60').sort_values(by=['sasrec_normalized_scores_2'], ascending=False)[['product', 'sasrec_normalized_scores_2', 'roberta_scores', 'roberta_normalized_scores']][:15]

Unnamed: 0,product,sasrec_normalized_scores_2,roberta_scores,roberta_normalized_scores
14724,B0BFPJ2GYZ,0.820259,267.482422,0.026355
14728,B0BFPL2QTY,0.097062,267.883789,0.03937
14694,B0B1J924QR,0.024243,269.148499,0.139452
14559,B083VL9TWR,0.01367,265.113983,0.002467
14705,B0B5P2V47K,0.01113,262.555878,0.000191
14715,B0B9QZ585L,0.010924,265.838074,0.00509
14725,B0BFPJLPWX,0.003905,260.694,3e-05
14579,B08HSBN2DF,0.003063,263.648682,0.00057
14722,B0BFPHSNC7,0.002091,262.94754,0.000283
14690,B0B18FPXX4,0.001939,265.157318,0.002577


# Merge test score

In [6]:
merged_candidates_feature_test_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/XGBoost/candidates/merged_candidates_test_no_hist_feature.parquet'
test_sessions_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/raw_data/sessions_test_task1.csv'
product_data_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/raw_data/products_train.csv'

In [8]:
@lru_cache(maxsize=1)
def read_merged_candidates_feature_test():
    return pd.read_parquet(merged_candidates_feature_test_path, engine='pyarrow')

@lru_cache(maxsize=1)
def read_test_sessions():
    return pd.read_csv(test_sessions_path)

@lru_cache(maxsize=1)
def read_product_data():
    return pd.read_csv(product_data_path)

In [9]:
merged_candidates_test = read_merged_candidates_feature_test()
test_sessions = read_test_sessions()
product_data = read_product_data()

In [10]:
merged_candidates_product = merged_candidates_test[['sess_id', 'sess_locale', 'product']]
merged_candidates_product

Unnamed: 0,sess_id,sess_locale,product
0,0,DE,4088833651
1,0,DE,B000H6W2GW
2,0,DE,B000JG2RAG
3,0,DE,B000RYSOUW
4,0,DE,B000UGZVQM
...,...,...,...
69428426,316970,UK,B0BJCTH4NH
69428427,316970,UK,B0BJTQQWLG
69428428,316970,UK,B0BJV3RL4H
69428429,316970,UK,B0BK7SPC84


In [11]:
product_index = product_data[['id', 'locale']]
product_index['product_index'] = product_index.index + 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  product_index['product_index'] = product_index.index + 1


In [13]:
merged_candidates_product_g = cudf.from_pandas(merged_candidates_product)
product_index_g = cudf.from_pandas(product_index)

In [14]:
merged_candidates_product_index_g = merged_candidates_product_g.merge(product_index_g, how='left', left_on=['sess_locale', 'product'], right_on=['locale', 'id'])
merged_candidates_product_index_g = merged_candidates_product_index_g.sort_values(by=['sess_id', 'product'])
merged_candidates_product_index_g.reset_index(drop=True, inplace=True)
assert len(merged_candidates_product_index_g) == len(merged_candidates_product_g)
merged_candidates_product_index_g.drop(columns=['id', 'locale'], inplace=True)
merged_candidates_product_index_g['product_index'] = merged_candidates_product_index_g['product_index'].fillna(0)
merged_candidates_product_index = merged_candidates_product_index_g.to_pandas()

In [15]:
del merged_candidates_product_g
del product_index_g
del merged_candidates_product_index_g

In [16]:
roberta_product_embeddings_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/text_method/roberta_results/results_epoch_4/item_reps/item.npy'
roberta_test_embeddings_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/text_method/roberta_results/test_results_epoch_4/test_query_reps/query.npy'

In [18]:
roberta_product_embeddings = np.load(roberta_product_embeddings_path)
roberta_test_embeddings = np.load(roberta_test_embeddings_path)

In [45]:
roberta_test_embeddings.shape, roberta_product_embeddings.shape

(torch.Size([316971, 768]), torch.Size([1551058, 768]))

In [19]:
roberta_product_embeddings = torch.from_numpy(roberta_product_embeddings)
roberta_test_embeddings = torch.from_numpy(roberta_test_embeddings)
roberta_product_embeddings = torch.cat([torch.tensor([[0.0] * roberta_product_embeddings.shape[1]]), roberta_product_embeddings], dim=0)

In [20]:
roberta_product_embeddings = roberta_product_embeddings.to('cuda:0')
roberta_test_embeddings = roberta_test_embeddings.to('cuda:0')

In [21]:
merged_candidates_product_index['roberta_scores'] = get_scores(merged_candidates_product_index, 'product_index', roberta_test_embeddings, roberta_product_embeddings)

100%|██████████| 16951/16951 [01:54<00:00, 147.60it/s]


In [23]:
roberta_product_embeddings = roberta_product_embeddings.to('cpu')
roberta_test_embeddings = roberta_test_embeddings.to('cpu')

In [24]:
normalize_scores(merged_candidates_product_index, 'roberta_scores', 'roberta_normalized_scores')

In [27]:
assert len(merged_candidates_test) == len(merged_candidates_product_index)
merged_candidates_test['roberta_scores'] = merged_candidates_product_index['roberta_scores']
merged_candidates_test['roberta_normalized_scores'] = merged_candidates_product_index['roberta_normalized_scores']

In [None]:
cast_dtype(merged_candidates_test)
merged_candidates_test.to_parquet(merged_candidates_feature_test_path, engine='pyarrow')

In [35]:
product_index

Unnamed: 0,id,locale,product_index
0,B005ZSSN10,DE,1
1,B08PRYN6LD,DE,2
2,B09MBZJ48V,DE,3
3,B08ZN6F26S,DE,4
4,B094DGRV7D,DE,5
...,...,...,...
1551052,B09BW5CDRR,IT,1551053
1551053,B0050IILBM,IT,1551054
1551054,B07W4C5W9D,IT,1551055
1551055,B012D0HJXA,IT,1551056


In [38]:
product_data.iloc[194815]

id                                                 B000JG2RAG
locale                                                     DE
title       TFA Dostmann Comfort Control digitales Thermo-...
price                                                   23.19
brand                                            TFA Dostmann
color                                                 schwarz
size                         L 95 x B 25 (65) x H 95 (106) mm
model                                                 30.5011
material                                           Kunststoff
author                                                    NaN
desc        Alarm: Alarmfunktion bei Schimmelgefahr, Schim...
Name: 194815, dtype: object

In [34]:
merged_candidates_product_index

Unnamed: 0,sess_id,sess_locale,product,product_index,roberta_scores,roberta_normalized_scores,exp_score,score_sum
0,0,DE,4088833651,0,0.000000,0.0,0.0,0.068428
1,0,DE,B000H6W2GW,0,0.000000,0.0,0.0,0.068428
2,0,DE,B000JG2RAG,194816,267.192719,0.004943,0.000338,0.068428
3,0,DE,B000RYSOUW,216255,267.322815,0.005629,0.000385,0.068428
4,0,DE,B000UGZVQM,272114,267.242462,0.005195,0.000355,0.068428
...,...,...,...,...,...,...,...,...
69428426,316970,UK,B0BJCTH4NH,1168773,270.043762,0.014921,0.005853,0.39227
69428427,316970,UK,B0BJTQQWLG,1140922,269.350769,0.007462,0.002927,0.39227
69428428,316970,UK,B0BJV3RL4H,979890,269.313751,0.007191,0.002821,0.39227
69428429,316970,UK,B0BK7SPC84,1004430,270.200653,0.017456,0.006847,0.39227


In [28]:
merged_candidates_test

Unnamed: 0,sess_id,sess_locale,product,sasrec_scores_2,sasrec_normalized_scores_2,gru4rec_scores,gru4rec_normalized_scores,product_freq,sess_avg_price,product_price,gru4rec_scores_2,gru4rec_normalized_scores_2,co_graph_counts_0,normalized_co_graph_counts_0,co_graph_counts_1,normalized_co_graph_counts_1,co_graph_counts_2,normalized_co_graph_counts_2,roberta_scores,roberta_normalized_scores
0,0,DE,4088833651,0.000000,2.975813e-09,0.000000,1.580065e-09,828,25.195269,36.761604,0.000000,1.326730e-09,0,0.0,0.0,0.0,0,0.0,0.000000,0.0
1,0,DE,B000H6W2GW,0.000000,2.975813e-09,0.000000,1.580065e-09,875,25.195269,36.761604,0.000000,1.326730e-09,0,0.0,0.0,0.0,0,0.0,0.000000,0.0
2,0,DE,B000JG2RAG,7.665308,6.347557e-06,8.104032,5.226502e-06,24,25.195269,23.190001,11.372551,1.152972e-04,0,0.0,0.0,0.0,0,0.0,267.192719,0.004943
3,0,DE,B000RYSOUW,-2.951060,1.555882e-10,-2.857798,9.068785e-11,5,25.195269,6.900000,-2.205641,1.461790e-10,0,0.0,0.0,0.0,0,0.0,267.322815,0.005629
4,0,DE,B000UGZVQM,3.977920,1.589257e-07,4.688567,1.717488e-07,4,25.195269,21.990000,8.559400,6.919625e-06,0,0.0,0.0,0.0,0,0.0,267.242462,0.005195
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69428426,316970,UK,B0BJCTH4NH,11.327528,1.041200e-04,10.629994,3.818184e-04,74,16.950001,5.800000,11.301320,2.638649e-04,0,0.0,0.0,0.0,0,0.0,270.043762,0.014921
69428427,316970,UK,B0BJTQQWLG,5.604142,3.403292e-07,6.052083,3.923694e-06,6,16.950001,9.880000,8.246040,1.243056e-05,0,0.0,0.0,0.0,0,0.0,269.350769,0.007462
69428428,316970,UK,B0BJV3RL4H,9.146974,1.176336e-05,7.667603,1.973815e-05,7,16.950001,22.097065,9.860847,6.248733e-05,0,0.0,0.0,0.0,0,0.0,269.313751,0.007191
69428429,316970,UK,B0BK7SPC84,-10.383047,3.879279e-14,-6.356799,1.601719e-11,0,16.950001,5.960000,-7.227418,2.368389e-12,0,0.0,0.0,0.0,0,0.0,270.200653,0.017456


In [88]:
merged_candidates_test.query('sess_id==300110').sort_values(by=['roberta_scores'], ascending=False)[['sess_locale', 'product', 'sasrec_normalized_scores_2', 'roberta_scores', 'roberta_normalized_scores']][:30]

Unnamed: 0,sess_locale,product,sasrec_normalized_scores_2,roberta_scores,roberta_normalized_scores
65700003,UK,B076HQTX5S,0.02021244,266.844208,0.138655
65700012,UK,B076HS7LDQ,0.02786299,266.776978,0.12964
65700010,UK,B076HRHP8R,2.011223e-07,266.75351,0.126633
65700005,UK,B076HQXB8X,0.925384,266.709839,0.121221
65700006,UK,B076HQZQ8L,0.006359186,266.664795,0.115882
65699999,UK,B076HPV3FC,0.01194097,266.515411,0.099802
65699998,UK,B076HMSJ74,0.003603664,266.383545,0.087473
65700007,UK,B076HR26F7,2.702519e-11,264.602417,0.014735
65699968,UK,B001O506EK,4.276145e-11,264.305908,0.010954
65699970,UK,B001O5259Y,1.448184e-12,263.800964,0.006611


In [87]:
merged_candidates_test.query('sess_id==300110').sort_values(by=['sasrec_normalized_scores_2'], ascending=False)[['sess_locale', 'product', 'sasrec_normalized_scores_2', 'roberta_scores', 'roberta_normalized_scores']][:30]

Unnamed: 0,sess_locale,product,sasrec_normalized_scores_2,roberta_scores,roberta_normalized_scores
65700005,UK,B076HQXB8X,0.925384,266.709839,0.121221
65700012,UK,B076HS7LDQ,0.027863,266.776978,0.12964
65700003,UK,B076HQTX5S,0.020212,266.844208,0.138655
65699999,UK,B076HPV3FC,0.011941,266.515411,0.099802
65700006,UK,B076HQZQ8L,0.006359,266.664795,0.115882
65700008,UK,B076HR2HGY,0.004024,262.891296,0.002662
65699998,UK,B076HMSJ74,0.003604,266.383545,0.087473
65700155,UK,B09HXR1Q8K,0.000172,255.445511,2e-06
65700111,UK,B08SR9XG7Y,7e-05,261.819061,0.000911
65700089,UK,B08F2NVLDN,4.1e-05,258.083649,2.2e-05


In [85]:
product_data.query('id=="B076HQTX5S"')

Unnamed: 0,id,locale,title,price,brand,color,size,model,material,author,desc
1040541,B076HQTX5S,UK,"adidas Unisex Kids Core 18 Sweat Trousers, Dar...",22.96,adidas,Dark Grey Heather/Black,Size 152,CV3957,100% Polyester,,Climate technology wicks sweat away


In [86]:
product_data.query('id=="B076HQXB8X"')

Unnamed: 0,id,locale,title,price,brand,color,size,model,material,author,desc
1027505,B076HQXB8X,UK,"Adidas Kid's Core 18 Sweat Trousers, Dark Grey...",21.42,adidas,Dark Grey Heather/Black,9-10 Years,CV3957,100% Polyester,,Climate technology wicks sweat away


In [80]:
product_data.query('id=="B096MK5BBH"')

Unnamed: 0,id,locale,title,price,brand,color,size,model,material,author,desc
856590,B096MK5BBH,JP,[ザノースフェイス] キッズリュック K HOMESLICE ホームスライス NMJ7220...,6490.0,THE NORTH FACE(ザノースフェイス),ブラック,Free Size,NMJ72205,210Dリサイクルマイルドナイロン(K、GB、BF、MP、MU)、450Dリサイクルポリエス...,,ショルダーハーネスにフックつきDリング/サイドメッシュポケット


In [79]:
test_sessions.iloc[200110]['prev_items']

"['B096MJCZ65' 'B096MK5BBH']"