# Necessary Common Functions

Those functions should be ran before each part.

In [1]:
import os
import random
import numpy as np
import pandas as pd
import cudf, itertools
import scipy.sparse as ssp
from functools import lru_cache
from tqdm import tqdm, trange
from collections import Counter, defaultdict

In [2]:
def get_sessions(df: pd.DataFrame, test=False, list_item=False) -> list:
    
    all_item = []
    if 'next_item' in df and not test:
        if list_item:
            for i in trange(len(df)):
                all_item.append(np.concatenate([np.array(df.loc[i, 'prev_items']), np.array(df.loc[i, 'next_item'])], axis=0))
        else:
            for i in trange(len(df)):
                all_item.append(eval((df.loc[i, 'prev_items'][:-1]+f" '{df.loc[i, 'next_item']}']").replace(" ", ",")))
    else:
        if list_item:
            all_item = df['prev_items']
        else:
            for i in trange(len(df)):
                all_item.append(eval((df.loc[i, 'prev_items']).replace(" ", ",")))
    return all_item

In [3]:
def get_co_occurence_dict(sessions: list, bidirection: bool=True, weighted: bool=False, max_dis=None) -> dict:
    res = {}
    for sess in tqdm(sessions):
        for i, id in enumerate(sess):
            if id not in res:
                res[id] = Counter()
            
            if max_dis == None:
                e = len(sess)
            else:
                e = min(i + max_dis + 1, len(sess))

            for j in range(i+1, e):
                if not weighted:
                    res[id][sess[j]] += 1
                else:
                    res[id][sess[j]] += 1 / (j-i)
                if bidirection:
                    if sess[j] not in res:
                        res[sess[j]] = Counter()
                    if not weighted:
                        res[sess[j]][id] += 1
                    else:
                        res[sess[j]][id] += 1 / (j-i)
    return res

In [4]:
def sort_co_occurence_dict(co_occurence_dict: dict) -> dict:
    res = {}
    for k,v in co_occurence_dict.items():
        res[k] = dict(sorted(v.items(), key=lambda item: -item[1]))
    return res

In [5]:
def cast_dtype(df : pd.DataFrame, columns=None):
    if columns is None:
        columns = df.columns
    for k in columns:
        dt = type(df[k].iloc[0])
        if 'float' in str(dt):
            df[k] = df[k].astype('float32')
        elif 'int' in str(dt):
            df[k] = df[k].astype('int32')
        elif dt == list:
            dt_ = type(df.iloc[0][k][0])
            if 'float' in str(dt_):
                df[k] = df[k].apply(lambda x : np.array(x, dtype=np.float32))
            elif 'int' in str(dt_):
                df[k] = df[k].apply(lambda x : np.array(x, dtype=np.int32))

In [6]:
def get_session_last_item(session_df):
    last_items = []
    num_sessions = len(session_df)
    for i in tqdm(range(num_sessions)):
        sess = session_df.iloc[i]
        sess_prev_items = sess['prev_items']
        
        product_list = sess_prev_items.strip('[]').split(' ')
        last_item = product_list[-1].strip("'\n")

        last_items.append(last_item)
    return last_items 

In [7]:
def get_co_graph_counts(session_last_items, merged_candidates_df, co_graph_dict):
    co_graph_count_list = []
    for idx, row in tqdm(merged_candidates_df.iterrows(), total=merged_candidates_df.shape[0]):
        sess_id = row['sess_id']
        product = row['product']
        last_item = session_last_items[sess_id]
        co_graph_count = co_graph_dict[last_item][product]
        co_graph_count_list.append(co_graph_count)
    return co_graph_count_list

In [8]:
def flatten_co_graph_dict(co_graph_dict):
    product_list = []
    neighbor_list = []
    counts_list = []
    for product in tqdm(co_graph_dict.keys(), total=len(co_graph_dict)):
        for neigh in co_graph_dict[product].keys():
            product_list.append(product)
            neighbor_list.append(neigh)
            counts_list.append(co_graph_dict[product][neigh])
    return pd.DataFrame({'product_' : product_list, 'neighbor' : neighbor_list, 'counts' : counts_list})

In [9]:
def normalize_co_graph_counts(merged_candidates_counts):
    # normalize co graph counts 
    # merged_candidates_counts_g = cudf.from_pandas(merged_candidates_counts)
    sessions_count_sum = merged_candidates_counts[['sess_id', 'counts']].groupby('sess_id').sum()
    sessions_count_sum.sort_index(inplace=True)

    # sessions_count_sum = sessions_count_sum.to_pandas()

    candidates_count_sum = sessions_count_sum.loc[merged_candidates_counts['sess_id']].reset_index(drop=True)
    merged_candidates_counts['counts_sum'] = candidates_count_sum['counts']
    merged_candidates_counts['normalized_counts'] = merged_candidates_counts['counts'] / merged_candidates_counts['counts_sum']

    # del merged_candidates_counts_g
    # del sessions_count_sum_g
    

# Merge Test co-graph counts 

In [10]:
merged_candidates_feature_test_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/XGBoost/candidates_phase2/merged_candidates_150_test_feature.parquet'
train_sessions_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/data_for_recstudio/task1_data/task13_4_task1_train_sessions_phase2.csv'
valid_sessions_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/data_for_recstudio/task1_data/task13_4_task1_valid_sessions_phase2.csv'
test_sessions_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/raw_data/sessions_test_task1_phase2.csv'
product_data_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/raw_data/products_train.csv'

In [11]:
@lru_cache(maxsize=1)
def read_merged_candidates_feature_test():
    return pd.read_parquet(merged_candidates_feature_test_path)

@lru_cache(maxsize=1)
def read_product_data():
    return pd.read_csv(product_data_path)

@lru_cache(maxsize=1)
def read_train_data():
    return pd.read_csv(train_sessions_path)

@lru_cache(maxsize=1)
def read_valid_data():
    return pd.read_csv(valid_sessions_path)

@lru_cache(maxsize=1)
def read_test_data():
    return pd.read_csv(test_sessions_path)

In [12]:
merged_candidates_feature_test = read_merged_candidates_feature_test()

In [13]:
merged_candidates = merged_candidates_feature_test[['sess_id', 'sess_locale', 'product']]

In [14]:
train_sess_data = read_train_data()
valid_sess_data = read_valid_data()
test_sess_data = read_test_data()
product = read_product_data()

In [15]:
train_sess_item = get_sessions(train_sess_data, list_item=False)
valid_sess_item = get_sessions(valid_sess_data, test=False, list_item=False)
test_sess_item = get_sessions(test_sess_data, test=True, list_item=False)

100%|██████████| 3010900/3010900 [02:00<00:00, 24921.84it/s]
100%|██████████| 261816/261816 [00:11<00:00, 22977.95it/s]
100%|██████████| 316972/316972 [00:07<00:00, 40479.18it/s]


In [16]:
test_session_last_items = get_session_last_item(test_sess_data)

100%|██████████| 316972/316972 [00:13<00:00, 23679.16it/s]


In [17]:
test_session_last_items = np.array(test_session_last_items)
merged_candidates['last_item'] = test_session_last_items[merged_candidates['sess_id']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_candidates['last_item'] = test_session_last_items[merged_candidates['sess_id']]


## bidirection

In [18]:
# valid and test are included in train 
co_occurence_dict_bi = get_co_occurence_dict(train_sess_item, bidirection=True, weighted=False)

100%|██████████| 3010900/3010900 [01:17<00:00, 38994.82it/s]


In [19]:
merged_candidates_feature_test = read_merged_candidates_feature_test()
merged_candidates = merged_candidates_feature_test[['sess_id', 'sess_locale', 'product']]
test_session_last_items = get_session_last_item(test_sess_data)
test_session_last_items = np.array(test_session_last_items)
merged_candidates['last_item'] = test_session_last_items[merged_candidates['sess_id']]

100%|██████████| 316972/316972 [00:13<00:00, 22814.95it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_candidates['last_item'] = test_session_last_items[merged_candidates['sess_id']]


In [20]:
co_occurence_dict_bi_df = flatten_co_graph_dict(co_occurence_dict_bi)

100%|██████████| 1314531/1314531 [00:21<00:00, 59930.11it/s] 


In [None]:
# merged_candidates_g = cudf.from_pandas(merged_candidates)
# co_occurence_dict_bi_df_g = cudf.from_pandas(co_occurence_dict_bi_df)

In [None]:
# merged_candidates_bi_g = merged_candidates_g.merge(co_occurence_dict_bi_df_g, how='left', left_on=['last_item', 'product'], right_on=['product_', 'neighbor'])
# merged_candidates_bi_g.drop(columns=['product_', 'neighbor'], inplace=True)
# merged_candidates_bi_g = merged_candidates_bi_g.sort_values(by=['sess_id', 'product'])
# merged_candidates_bi_g.reset_index(drop=True, inplace=True)
# merged_candidates_bi_g['counts'] = merged_candidates_bi_g['counts'].fillna(0)
# assert len(merged_candidates_bi_g['counts']) == len(merged_candidates)
# merged_candidates_bi = merged_candidates_bi_g.to_pandas()

In [21]:
merged_candidates_bi = merged_candidates.merge(co_occurence_dict_bi_df, how='left', left_on=['last_item', 'product'], right_on=['product_', 'neighbor'])
merged_candidates_bi.drop(columns=['product_', 'neighbor'], inplace=True)
merged_candidates_bi = merged_candidates_bi.sort_values(by=['sess_id', 'product'])
merged_candidates_bi.reset_index(drop=True, inplace=True)
merged_candidates_bi['counts'] = merged_candidates_bi['counts'].fillna(0)
assert len(merged_candidates_bi['counts']) == len(merged_candidates)
# merged_candidates_bi = merged_candidates_bi_g.to_pandas()

In [22]:
# del merged_candidates_g
# del co_occurence_dict_bi_df_g
# del merged_candidates_bi_g

In [23]:
normalize_co_graph_counts(merged_candidates_bi)

In [30]:
merged_candidates_feature_test[['sess_id', 'co_graph_counts_0', 'normalized_co_graph_counts_0']].query('sess_id==100')

Unnamed: 0,sess_id,co_graph_counts_0,normalized_co_graph_counts_0
29483,100,0.0,0.000000
29484,100,0.0,0.000000
29485,100,0.0,0.000000
29486,100,0.0,0.000000
29487,100,1.0,0.002165
...,...,...,...
29848,100,12.0,0.025974
29849,100,0.0,0.000000
29850,100,4.0,0.008658
29851,100,0.0,0.000000


In [29]:
merged_candidates_bi[['sess_id', 'counts', 'normalized_counts']].query('sess_id==100')

Unnamed: 0,sess_id,counts,normalized_counts
29483,100,0.0,0.000000
29484,100,0.0,0.000000
29485,100,0.0,0.000000
29486,100,0.0,0.000000
29487,100,1.0,0.002165
...,...,...,...
29848,100,12.0,0.025974
29849,100,0.0,0.000000
29850,100,4.0,0.008658
29851,100,0.0,0.000000


In [31]:
merged_candidates_feature_test['co_graph_counts_0'] = merged_candidates_bi['counts']
merged_candidates_feature_test['normalized_co_graph_counts_0'] = merged_candidates_bi['normalized_counts']

In [34]:
merged_candidates_feature_test.query('sess_id==25000').sort_values(by=['sasrec_scores_2'], ascending=False)[['product', 'normalized_sasrec_scores_2', 'sasrec_scores_2', 'co_graph_counts_0', 'normalized_co_graph_counts_0']][:25]

Unnamed: 0,product,normalized_sasrec_scores_2,sasrec_scores_2,co_graph_counts_0,normalized_co_graph_counts_0
7498610,B09LVFM791,0.724845,19.189095,15.0,0.180723
7498591,B09D3S1F39,0.038137,16.244316,2.0,0.024096
7498507,B08D66NC11,0.017202,15.448151,1.0,0.012048
7498612,B09M9VRL7M,0.011368,15.033895,0.0,0.0
7498537,B08XTV2JMX,0.010418,14.946674,1.0,0.012048
7498598,B09J8D3WZV,0.009074,14.808554,3.0,0.036145
7498627,B09NX48NLJ,0.008147,14.700846,1.0,0.012048
7498520,B08Q7VX9FY,0.008094,14.694235,5.0,0.060241
7498552,B0912XZCV1,0.006531,14.479713,2.0,0.024096
7498663,B09YH4Q7D6,0.006311,14.445447,1.0,0.012048


In [35]:
cast_dtype(merged_candidates_feature_test, ['co_graph_counts_0', 'normalized_co_graph_counts_0'])
merged_candidates_feature_test.to_parquet(merged_candidates_feature_test_path, engine='pyarrow')

## uni and weight

In [36]:
co_occurence_dict_uni_weight = get_co_occurence_dict(train_sess_item, bidirection=False, weighted=True)

100%|██████████| 3010900/3010900 [00:57<00:00, 51974.07it/s]


In [37]:
co_graph_uni_weight_df = flatten_co_graph_dict(co_occurence_dict_uni_weight)

100%|██████████| 1314531/1314531 [00:15<00:00, 83094.80it/s] 


In [22]:
# merged_candidates_g = cudf.from_pandas(merged_candidates)
# co_graph_uni_weight_df_g = cudf.from_pandas(co_graph_uni_weight_df)

In [38]:
merged_candidates_uni_weight = merged_candidates.merge(co_graph_uni_weight_df, how='left', left_on=['last_item', 'product'], right_on=['product_', 'neighbor'])
merged_candidates_uni_weight.drop(columns=['product_', 'neighbor'], inplace=True)
merged_candidates_uni_weight = merged_candidates_uni_weight.sort_values(by=['sess_id', 'product'])
merged_candidates_uni_weight.reset_index(drop=True, inplace=True)
merged_candidates_uni_weight['counts'] = merged_candidates_uni_weight['counts'].fillna(0)
assert len(merged_candidates_uni_weight['counts']) == len(merged_candidates)
# merged_candidates_uni_weight = merged_candidates_uni_weight_g.to_pandas()

In [25]:
# del merged_candidates_g
# del co_graph_uni_weight_df_g
# del merged_candidates_uni_weight_g

In [39]:
normalize_co_graph_counts(merged_candidates_uni_weight)

In [43]:
merged_candidates_uni_weight[['sess_id', 'counts', 'normalized_counts']].query('sess_id==150')

Unnamed: 0,sess_id,counts,normalized_counts
44505,150,0.0,
44506,150,0.0,
44507,150,0.0,
44508,150,0.0,
44509,150,0.0,
...,...,...,...
44730,150,0.0,
44731,150,0.0,
44732,150,0.0,
44733,150,0.0,


In [104]:
merged_candidates_feature_test['co_graph_counts_1'] = merged_candidates_uni_weight['counts']
merged_candidates_feature_test['normalized_co_graph_counts_1'] = merged_candidates_uni_weight['normalized_counts']

In [105]:
cast_dtype(merged_candidates_feature_test)
merged_candidates_feature_test.to_parquet(merged_candidates_feature_test_path, engine='pyarrow')

In [106]:
merged_candidates_uni_weight

Unnamed: 0,sess_id,sess_locale,product,last_item,counts,counts_sum,normalized_counts
0,0,DE,B000Q87D0Q,B099KW4ZLV,0.000000,470.232339,0.000000
1,0,DE,B000QB30DW,B099KW4ZLV,0.600000,470.232339,0.001276
2,0,DE,B004BIG55Q,B099KW4ZLV,0.311111,470.232339,0.000662
3,0,DE,B0053FTNQY,B099KW4ZLV,0.090909,470.232339,0.000193
4,0,DE,B007QWII1S,B099KW4ZLV,0.000000,470.232339,0.000000
...,...,...,...,...,...,...,...
96556030,316971,UK,B0B82N3CQQ,B07H42L4TJ,0.000000,13.266241,0.000000
96556031,316971,UK,B0BB9NW3F3,B07H42L4TJ,0.000000,13.266241,0.000000
96556032,316971,UK,B0BDMVKTQ3,B07H42L4TJ,0.000000,13.266241,0.000000
96556033,316971,UK,B0BHW1D5VP,B07H42L4TJ,0.000000,13.266241,0.000000


In [None]:
merged_candidates_uni_weight.query("sess_id==0")['counts'].sum()

457

In [None]:
merged_candidates_uni_weight.query("sess_id==0")['normalized_counts'].max()

0.08971553610503283

In [23]:
co_graph_uni_weight_df

Unnamed: 0,product_,neighbor,counts
0,B005ZJTUXE,B005ZJTUXE,7
1,B005ZJTUXE,B00P8VIBBG,11
2,B005ZJTUXE,B07TVSL9TW,7
3,B005ZJTUXE,B014J7P4KU,4
4,B005ZJTUXE,B07HJCRPTB,9
...,...,...,...
29994681,B09KXXFJS7,B09KXXGQX8,1
29994682,B09KXVZ7YQ,B09KXVDK5F,1
29994683,B09KXVZ7YQ,B09KXXGQX8,1
29994684,B09KXVDK5F,B09KXXGQX8,1


In [107]:
merged_candidates_feature_test.query('sess_id==20000').sort_values(by=['sasrec_scores_2'], ascending=False)[['product', 'sasrec_scores_2', 'co_graph_counts_1', 'normalized_co_graph_counts_1']][:25]

Unnamed: 0,product,sasrec_scores_2,co_graph_counts_1,normalized_co_graph_counts_1
5994892,B0B3MC3GR4,30.890354,13.0,0.438233
5994829,B09LTHD9M4,25.270374,2.0,0.067421
5994831,B09LTJZV2T,25.042706,3.833333,0.129223
5994913,B0BGM3BJHK,23.67469,2.2,0.074163
5994830,B09LTHZTRP,21.975412,0.333333,0.011237
5994732,B088QR2RSF,17.871006,0.192857,0.006501
5994891,B0B3LPHGCY,17.493834,0.2,0.006742
5994870,B09WDV1T8Q,17.446304,0.0,0.0
5994883,B0B1PZZ9H8,17.438635,0.25,0.008428
5994634,B01N3QR25Q,17.343325,0.0,0.0


## uni and dis=1

In [108]:
co_occurence_dict_uni_dis1 = get_co_occurence_dict(train_sess_item, bidirection=False, weighted=False, max_dis=1)

100%|██████████| 3010900/3010900 [00:34<00:00, 88376.83it/s] 


In [109]:
merged_candidates_feature_test = read_merged_candidates_feature_test()
merged_candidates = merged_candidates_feature_test[['sess_id', 'sess_locale', 'product']]
test_session_last_items = get_session_last_item(test_sess_data)
test_session_last_items = np.array(test_session_last_items)
merged_candidates['last_item'] = test_session_last_items[merged_candidates['sess_id']]

100%|██████████| 316972/316972 [00:13<00:00, 23030.15it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_candidates['last_item'] = test_session_last_items[merged_candidates['sess_id']]


In [110]:
merged_candidates

Unnamed: 0,sess_id,sess_locale,product,last_item
0,0,DE,B000Q87D0Q,B099KW4ZLV
1,0,DE,B000QB30DW,B099KW4ZLV
2,0,DE,B004BIG55Q,B099KW4ZLV
3,0,DE,B0053FTNQY,B099KW4ZLV
4,0,DE,B007QWII1S,B099KW4ZLV
...,...,...,...,...
96556030,316971,UK,B0B82N3CQQ,B07H42L4TJ
96556031,316971,UK,B0BB9NW3F3,B07H42L4TJ
96556032,316971,UK,B0BDMVKTQ3,B07H42L4TJ
96556033,316971,UK,B0BHW1D5VP,B07H42L4TJ


In [111]:
co_occurence_dict_uni_dis1_df = flatten_co_graph_dict(co_occurence_dict_uni_dis1)

100%|██████████| 1314531/1314531 [00:05<00:00, 262031.09it/s]


In [112]:
# merged_candidates_g = cudf.from_pandas(merged_candidates)
# co_occurence_dict_uni_dis1_df_g = cudf.from_pandas(co_occurence_dict_uni_dis1_df)

In [113]:
# merged_candidates_uni_dis1_g = merged_candidates_g.merge(co_occurence_dict_uni_dis1_df_g, how='left', left_on=['last_item', 'product'], right_on=['product_', 'neighbor'])
# merged_candidates_uni_dis1_g.drop(columns=['product_', 'neighbor'], inplace=True)
# merged_candidates_uni_dis1_g = merged_candidates_uni_dis1_g.sort_values(by=['sess_id', 'product'])
# merged_candidates_uni_dis1_g.reset_index(drop=True, inplace=True)
# merged_candidates_uni_dis1_g['counts'] = merged_candidates_uni_dis1_g['counts'].fillna(0)
# assert len(merged_candidates_uni_dis1_g['counts']) == len(merged_candidates)
# merged_candidates_uni_dis1 = merged_candidates_uni_dis1_g.to_pandas()

In [114]:
merged_candidates_uni_dis1 = merged_candidates.merge(co_occurence_dict_uni_dis1_df, how='left', left_on=['last_item', 'product'], right_on=['product_', 'neighbor'])
merged_candidates_uni_dis1.drop(columns=['product_', 'neighbor'], inplace=True)
merged_candidates_uni_dis1 = merged_candidates_uni_dis1.sort_values(by=['sess_id', 'product'])
merged_candidates_uni_dis1.reset_index(drop=True, inplace=True)
merged_candidates_uni_dis1['counts'] = merged_candidates_uni_dis1['counts'].fillna(0)
assert len(merged_candidates_uni_dis1['counts']) == len(merged_candidates)
# merged_candidates_uni_dis1 = merged_candidates_uni_dis1_g.to_pandas()

In [115]:
# del merged_candidates_g
# del co_occurence_dict_uni_dis1_df_g
# del merged_candidates_uni_dis1_g

In [116]:
normalize_co_graph_counts(merged_candidates_uni_dis1)

In [117]:
merged_candidates_feature_test['co_graph_counts_2'] = merged_candidates_uni_dis1['counts']
merged_candidates_feature_test['normalized_co_graph_counts_2'] = merged_candidates_uni_dis1['normalized_counts']

In [121]:
cast_dtype(merged_candidates_feature_test)
merged_candidates_feature_test.to_parquet(merged_candidates_feature_test_path, engine='pyarrow')

In [48]:
merged_candidates_uni_dis1.query("sess_id==99")

Unnamed: 0,sess_id,sess_locale,product,last_item,counts,counts_cum,normalized_counts
23692,99,UK,B0045XDSZM,B0BC6DL1FW,0.0,13.666667,0.0
23693,99,UK,B004605SE8,B0BC6DL1FW,0.0,13.666667,0.0
23694,99,UK,B0053Y8M1W,B0BC6DL1FW,0.0,13.666667,0.0
23695,99,UK,B007PS6O28,B0BC6DL1FW,0.0,13.666667,0.0
23696,99,UK,B0081Q3YN0,B0BC6DL1FW,0.0,13.666667,0.0
...,...,...,...,...,...,...,...
23906,99,UK,B0BF15FRZT,B0BC6DL1FW,0.0,13.666667,0.0
23907,99,UK,B0BF15JRSV,B0BC6DL1FW,0.0,13.666667,0.0
23908,99,UK,B0BF62M27B,B0BC6DL1FW,0.0,13.666667,0.0
23909,99,UK,B0BF75P7MZ,B0BC6DL1FW,0.0,13.666667,0.0


In [49]:
merged_candidates_uni_dis1.query("sess_id==99")['counts'].sum()

13.666666666666666

In [52]:
merged_candidates_uni_dis1.query("sess_id==99")['counts'].max(), merged_candidates_uni_dis1.query("sess_id==99")['normalized_counts'].max()

(1.8333333333333333, 0.13414634146341464)

In [None]:
merged_candidates_feature_test.query('sess_id==20000').sort_values(by=['sasrec_scores_2'], ascending=False)[['product', 'sasrec_scores_2', 'co_graph_counts_2', 'normalized_co_graph_counts_2']][:25]