use loc[i, field] instead of iloc[i, field], iloc is very slow.

In [1]:
import sys
sys.path = ['/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/RecStudio/'] + sys.path
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
import random
import numpy as np
import pandas as pd
import cudf, itertools
import scipy.sparse as ssp
from functools import lru_cache, partial
from tqdm import tqdm, trange
from collections import Counter, defaultdict
import torch
import pickle


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
merged_candidates_feature_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/XGBoost/candidates/merged_candidates_no_hist_feature.parquet'
valid_sessions_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/data_for_recstudio/task1_data/task13_4_task1_valid_sessions.csv'
test_sessions_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/raw_data/sessions_test_task1.csv'

In [3]:
@lru_cache(maxsize=1)
def read_merged_candidates_feature():
    return pd.read_parquet(merged_candidates_feature_path, engine='pyarrow')

@lru_cache(maxsize=1)
def read_valid_sessions():
    return pd.read_csv(valid_sessions_path)

@lru_cache(maxsize=1)
def read_test_sessions():
    return pd.read_csv(test_sessions_path)

In [4]:
def cast_dtype(df : pd.DataFrame, columns=None):
    if columns is None:
        columns = df.columns
    for k in columns:
        dt = type(df[k].iloc[0])
        if 'float' in str(dt):
            df[k] = df[k].astype('float32')
        elif 'int' in str(dt):
            df[k] = df[k].astype('int32')
        elif dt == list:
            dt_ = type(df.iloc[0][k][0])
            if 'float' in str(dt_):
                df[k] = df[k].apply(lambda x : np.array(x, dtype=np.float32))
            elif 'int' in str(dt_):
                df[k] = df[k].apply(lambda x : np.array(x, dtype=np.int32))

In [5]:
def _load_cache(path):
    with open(path, 'rb') as f:
        download_obj = pickle.load(f)
    return download_obj

In [6]:
def get_scores(merged_candidates_df, query_embeddings, product_embeddings):
    batch_size = 2048
    num_iter = (len(merged_candidates_df) - 1) // batch_size + 1
    score_list = []
    with torch.no_grad():
        for i in tqdm(range(num_iter)):
            st, ed = i * batch_size, (i + 1) * batch_size 
            batch_sess = merged_candidates_df.iloc[st : ed]
            batch_sess_id = torch.tensor(batch_sess['sess_id'].tolist(), device=query_embeddings.device)
            batch_product_id = torch.tensor(batch_sess['dataset_id'].tolist(), device=product_embeddings.device)
            query_emb = query_embeddings[batch_sess_id]
            product_emb = product_embeddings[batch_product_id]
            batch_score = (query_emb * product_emb).sum(dim=-1) 
            score_list.append(batch_score.cpu())
        score_list = torch.cat(score_list, dim=0).cpu().tolist()
        return score_list 

In [7]:
def normalize_scores(score_df, score_name, normalized_score_name):
    score_df_g = cudf.from_pandas(score_df)
    score_df_g['exp_score'] = np.exp(score_df_g[score_name].to_numpy())
    scores_sum_g = score_df_g[['sess_id', 'exp_score']].groupby('sess_id').sum()
    scores_sum_g.reset_index(inplace=True)
    scores_sum_g = scores_sum_g.sort_values(by=['sess_id'], ascending=True)
    scores_sum_g.reset_index(drop=True, inplace=True)
    scores_sum_g.rename(columns={'exp_score' : 'score_sum'}, inplace=True)

    merged_score_df_g = score_df_g.merge(scores_sum_g, how='left', left_on=['sess_id'], right_on=['sess_id'])
    merged_score_df_g = merged_score_df_g.sort_values(by=['sess_id', 'product'])
    merged_score_df_g.reset_index(drop=True, inplace=True)
    
    merged_score_df = merged_score_df_g.to_pandas(merged_score_df_g)
    score_df[normalized_score_name] = merged_score_df['exp_score'] / merged_score_df['score_sum']
    score_df['exp_score'] = merged_score_df['exp_score']
    score_df['score_sum'] = merged_score_df['score_sum']

    del scores_sum_g
    del merged_score_df_g 

In [8]:
FIELD_NAME = 'sasrec_feat_scores'

# Merge valid score

In [9]:
DE_product_embeddings_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/candidates/query_embeddings/SASRec_Next_Feat/kdd_cup_2023_DE/product_embeddings_2023-05-22-14-49-09.pt'
DE_valid_embeddings_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/candidates/query_embeddings/SASRec_Next_Feat/kdd_cup_2023_DE/valid_embeddings_2023-05-22-14-50-10.pt'
JP_product_embeddings_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/candidates/query_embeddings/SASRec_Next_Feat/kdd_cup_2023_JP/product_embeddings_2023-05-22-14-56-18.pt'
JP_valid_embeddings_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/candidates/query_embeddings/SASRec_Next_Feat/kdd_cup_2023_JP/valid_embeddings_2023-05-22-14-56-53.pt'
UK_product_embeddings_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/candidates/query_embeddings/SASRec_Next_Feat/kdd_cup_2023_UK/product_embeddings_2023-05-22-15-00-18.pt'
UK_valid_embeddings_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/candidates/query_embeddings/SASRec_Next_Feat/kdd_cup_2023_UK/valid_embeddings_2023-05-22-15-01-04.pt'

In [10]:
DE_dataset_cache = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/.recstudio/cache/c76eddf0a07106ffcce7ce8010856a3b'
JP_dataset_cache = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/.recstudio/cache/81a71d0a18766af84b3beab69bf53e69'
UK_dataset_cache = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/.recstudio/cache/250dbc09c30162452e00486051e47756'

In [11]:
DE_train_dataset, DE_valid_dataset = _load_cache(DE_dataset_cache)
JP_train_dataset, JP_valid_dataset = _load_cache(JP_dataset_cache)
UK_train_dataset, UK_valid_dataset = _load_cache(UK_dataset_cache)
locale_map = {
    'DE' : DE_train_dataset.field2token2idx['product_id'], 
    'JP' : JP_train_dataset.field2token2idx['product_id'], 
    'UK' : UK_train_dataset.field2token2idx['product_id']
    }

In [12]:
merged_candidates = read_merged_candidates_feature()
valid_sessions = read_valid_sessions()
EMBED_DIM = 128
merged_candidates.sort_values(by=['sess_id', 'product'], inplace=True)
merged_candidates.reset_index(drop=True, inplace=True)

In [13]:
# sess embeddings 
valid_DE_query_emb = torch.load(DE_valid_embeddings_path, map_location='cpu')
valid_JP_query_emb = torch.load(JP_valid_embeddings_path, map_location='cpu')
valid_UK_query_emb = torch.load(UK_valid_embeddings_path, map_location='cpu')
valid_query_embeddings = torch.empty(len(valid_sessions), EMBED_DIM)
valid_query_embeddings[(valid_sessions[valid_sessions['locale'] == 'DE'].index).tolist()] = valid_DE_query_emb
valid_query_embeddings[(valid_sessions[valid_sessions['locale'] == 'JP'].index).tolist()] = valid_JP_query_emb
valid_query_embeddings[(valid_sessions[valid_sessions['locale'] == 'UK'].index).tolist()] = valid_UK_query_emb

In [14]:
# product_embeddings 
DE_product_emb = torch.load(DE_product_embeddings_path, map_location='cpu')
JP_product_emb = torch.load(JP_product_embeddings_path, map_location='cpu')
UK_product_emb = torch.load(UK_product_embeddings_path, map_location='cpu')
product_embeddings = torch.cat([DE_product_emb, JP_product_emb, UK_product_emb], dim=0)

In [15]:
merged_candidates_ = merged_candidates[['sess_id', 'sess_locale', 'product']].copy()

In [16]:
DE_product_list, DE_id_list = list(zip(*locale_map['DE'].items()))
JP_product_list, JP_id_list = list(zip(*locale_map['JP'].items()))
UK_product_list, UK_id_list = list(zip(*locale_map['UK'].items()))
product_list = list(DE_product_list) + list(JP_product_list) + list(UK_product_list)
id_list = list(DE_id_list) + list(JP_id_list) + list(UK_id_list)
locale_list = ['DE'] * len(DE_id_list) + ['JP'] * len(JP_id_list) + ['UK'] * len(UK_id_list)
product_id_df = pd.DataFrame({'locale' : locale_list, 'product' : product_list, 'dataset_id' : id_list})

In [17]:
merged_candidates_g = cudf.from_pandas(merged_candidates_)
product_id_df_g = cudf.from_pandas(product_id_df)

In [18]:
merged_candidates_score_g = merged_candidates_g.merge(product_id_df_g, how='left', left_on=['sess_locale', 'product'], right_on=['locale', 'product'])
merged_candidates_score_g['dataset_id'] = merged_candidates_score_g['dataset_id'].fillna(0)
merged_candidates_score_g.drop(columns=['locale'], inplace=True)
merged_candidates_score_g = merged_candidates_score_g.sort_values(by=['sess_id', 'product'])
merged_candidates_score_g.reset_index(drop=True, inplace=True)
merged_candidates_score = merged_candidates_score_g.to_pandas()

MemoryError: std::bad_alloc: out_of_memory: CUDA error at: /project/include/rmm/mr/device/cuda_memory_resource.hpp:70: cudaErrorMemoryAllocation out of memory

In [None]:
del merged_candidates_g
del product_id_df_g
del merged_candidates_score_g

In [None]:
locale_offset = {'DE' : 0, 'JP' : len(DE_product_list), 'UK' : len(DE_product_list) + len(JP_product_list)}
for locale in ['DE', 'JP', 'UK']:
    merged_candidates_score['dataset_id'][merged_candidates_score['sess_locale'] == locale] = \
        merged_candidates_score['dataset_id'][merged_candidates_score['sess_locale'] == locale] + locale_offset[locale]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_candidates_score['dataset_id'][merged_candidates_score['sess_locale'] == locale] = \


In [None]:
valid_query_embeddings = valid_query_embeddings.to('cuda:0')
product_embeddings = product_embeddings.to('cuda:0')

In [None]:
merged_candidates_score[FIELD_NAME] = get_scores(merged_candidates_score, valid_query_embeddings, product_embeddings)

100%|██████████| 41215/41215 [00:57<00:00, 716.19it/s] 


In [None]:
normalize_scores(merged_candidates_score, FIELD_NAME, 'normalized_'+FIELD_NAME)

In [None]:
merged_candidates[FIELD_NAME] = merged_candidates_score[FIELD_NAME]
merged_candidates['normalized_'+FIELD_NAME] = merged_candidates_score['normalized_'+FIELD_NAME]

In [27]:
cast_dtype(merged_candidates)
merged_candidates.to_parquet(merged_candidates_feature_path, engine='pyarrow')

In [33]:
merged_candidates

Unnamed: 0,sess_id,sess_locale,product,target,sasrec_scores_2,sasrec_normalized_scores_2,product_freq,gru4rec_scores,gru4rec_normalized_scores,sess_avg_price,product_price,gru4rec_scores_2,gru4rec_normalized_scores_2
0,0,UK,B000OPPVCS,0.0,11.972421,2.286162e-04,104,12.291418,5.528012e-05,7.388571,7.280000,12.291418,5.528012e-05
1,0,UK,B000V599Y2,0.0,13.152878,7.443427e-04,37,12.142086,4.761183e-05,7.388571,5.200000,12.142086,4.761183e-05
2,0,UK,B0018HH444,0.0,5.606023,3.928400e-07,7,8.919555,1.897524e-06,7.388571,15.800000,8.919555,1.897524e-06
3,0,UK,B0079JI4DU,0.0,0.000000,1.443945e-09,67,0.000000,2.537897e-10,7.388571,22.097065,0.000000,2.537897e-10
4,0,UK,B0079JI4EY,0.0,0.000000,1.443945e-09,77,0.000000,2.537897e-10,7.388571,22.097065,0.000000,2.537897e-10
...,...,...,...,...,...,...,...,...,...,...,...,...,...
84407334,361580,DE,B0BB7XV97M,0.0,9.117821,6.077226e-05,56,14.038595,8.992638e-05,32.424000,47.990002,14.038595,8.992638e-05
84407335,361580,DE,B0BB7YSRBX,0.0,9.163816,6.363281e-05,58,13.342258,4.482001e-05,32.424000,43.990002,13.342258,4.482001e-05
84407336,361580,DE,B0BB7ZMGY8,0.0,11.256460,5.158278e-04,452,12.778135,2.549625e-05,32.424000,41.990002,12.778135,2.549625e-05
84407337,361580,DE,B0BD4CP7N3,0.0,-3.778687,1.523433e-10,1,-3.986487,1.335653e-12,32.424000,24.990000,-3.986487,1.335653e-12


In [26]:
# verify gru4rec scores
merged_candidates[merged_candidates['sess_id'] == 150001].sort_values(by=['narm_scores'], ascending=False)[['sess_locale', 'product', 'gru4rec_normalized_scores_2', 'gru4rec_scores_2', 'normalized_narm_scores', 'narm_scores']].iloc[:15]

Unnamed: 0,sess_locale,product,gru4rec_normalized_scores_2,gru4rec_scores_2,normalized_narm_scores,narm_scores
35011902,DE,B08VN4VTMC,0.477046,19.960262,0.473138,16.958035
35011977,DE,B0BBRKH55W,0.129742,18.658195,0.142673,15.7592
35011978,DE,B0BBRPFP73,0.054098,17.783445,0.065759,14.984645
35011790,DE,B01MSZ7WK7,0.011652,16.248108,0.061134,14.911717
35011843,DE,B07TP1HY5B,0.013633,16.40517,0.034068,14.327007
35011888,DE,B08HHXDHRJ,0.020194,16.798056,0.032991,14.294895
35011979,DE,B0BBRR84KK,0.137245,18.714417,0.029283,14.175643
35011786,DE,B011KJ6WLU,0.05854,17.862368,0.028296,14.141378
35011803,DE,B077YL91SG,0.000176,12.057325,0.015924,13.566456
35011968,DE,B0B7NCTHWX,0.036256,17.383266,0.011662,13.254972


# Merge test score

In [9]:
FIELD_NAME = 'sasrec_feat_scores'

In [10]:
merged_candidates_feature_test_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/XGBoost/candidates/merged_candidates_test_no_hist_feature.parquet'
test_sessions_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/raw_data/sessions_test_task1.csv'

In [11]:
DE_product_embeddings_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/candidates/query_embeddings/NARM/kdd_cup_2023_DE/product_embeddings_2023-05-20-09-38-19.pt'
DE_test_embeddings_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/candidates/query_embeddings/NARM/kdd_cup_2023_DE/predict_embeddings_2023-05-20-09-39-47.pt'
JP_product_embeddings_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/candidates/query_embeddings/NARM/kdd_cup_2023_JP/product_embeddings_2023-05-20-09-40-17.pt'
JP_test_embeddings_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/candidates/query_embeddings/NARM/kdd_cup_2023_JP/predict_embeddings_2023-05-20-09-41-36.pt'
UK_product_embeddings_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/candidates/query_embeddings/NARM/kdd_cup_2023_UK/product_embeddings_2023-05-20-09-46-20.pt'
UK_test_embeddings_path = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/candidates/query_embeddings/NARM/kdd_cup_2023_UK/predict_embeddings_2023-05-20-09-47-41.pt'

In [12]:
@lru_cache(maxsize=1)
def read_merged_candidates_feature_test():
    return pd.read_parquet(merged_candidates_feature_test_path, engine='pyarrow')

@lru_cache(maxsize=1)
def read_test_sessions():
    return pd.read_csv(test_sessions_path)

In [13]:
merged_candidates = read_merged_candidates_feature_test()
test_sessions = read_test_sessions()
EMBED_DIM = 128
merged_candidates.sort_values(by=['sess_id', 'product'], inplace=True)
merged_candidates.reset_index(drop=True, inplace=True)

In [14]:
# sess embeddings 
test_DE_query_emb = torch.load(DE_test_embeddings_path, map_location='cpu')
test_JP_query_emb = torch.load(JP_test_embeddings_path, map_location='cpu')
test_UK_query_emb = torch.load(UK_test_embeddings_path, map_location='cpu')
test_query_embeddings = torch.cat(
    [test_DE_query_emb[test_sessions['locale'] == 'DE'], test_JP_query_emb[test_sessions['locale'] == 'JP'], test_UK_query_emb[test_sessions['locale'] == 'UK']],
    dim=0)

In [15]:
# product embeddings 
DE_product_emb = torch.load(DE_product_embeddings_path, map_location='cpu')
JP_product_emb = torch.load(JP_product_embeddings_path, map_location='cpu')
UK_product_emb = torch.load(UK_product_embeddings_path, map_location='cpu')
product_embeddings = torch.cat([DE_product_emb, JP_product_emb, UK_product_emb], dim=0)

In [16]:
DE_dataset_cache = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/.recstudio/cache/c76eddf0a07106ffcce7ce8010856a3b'
JP_dataset_cache = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/.recstudio/cache/81a71d0a18766af84b3beab69bf53e69'
UK_dataset_cache = '/root/autodl-tmp/xiaolong/WorkSpace/Amazon-KDDCUP-23/.recstudio/cache/250dbc09c30162452e00486051e47756'
DE_train_dataset, DE_valid_dataset = _load_cache(DE_dataset_cache)
JP_train_dataset, JP_valid_dataset = _load_cache(JP_dataset_cache)
UK_train_dataset, UK_valid_dataset = _load_cache(UK_dataset_cache)
locale_map = {
    'DE' : DE_train_dataset.field2token2idx['product_id'], 
    'JP' : JP_train_dataset.field2token2idx['product_id'], 
    'UK' : UK_train_dataset.field2token2idx['product_id']
    }

In [17]:
DE_product_list, DE_id_list = list(zip(*locale_map['DE'].items()))
JP_product_list, JP_id_list = list(zip(*locale_map['JP'].items()))
UK_product_list, UK_id_list = list(zip(*locale_map['UK'].items()))
product_list = list(DE_product_list) + list(JP_product_list) + list(UK_product_list)
id_list = list(DE_id_list) + list(JP_id_list) + list(UK_id_list)
locale_list = ['DE'] * len(DE_id_list) + ['JP'] * len(JP_id_list) + ['UK'] * len(UK_id_list)
product_id_df = pd.DataFrame({'locale' : locale_list, 'product' : product_list, 'dataset_id' : id_list})

In [18]:
merged_candidates_ = merged_candidates[['sess_id', 'sess_locale', 'product']].copy()

In [19]:
merged_candidates_g = cudf.from_pandas(merged_candidates_)
product_id_df_g = cudf.from_pandas(product_id_df)

In [20]:
merged_candidates_score_g = merged_candidates_g.merge(product_id_df_g, how='left', left_on=['sess_locale', 'product'], right_on=['locale', 'product'])
merged_candidates_score_g['dataset_id'] = merged_candidates_score_g['dataset_id'].fillna(0)
merged_candidates_score_g.drop(columns=['locale'], inplace=True)
merged_candidates_score_g = merged_candidates_score_g.sort_values(by=['sess_id', 'product'])
merged_candidates_score_g.reset_index(drop=True, inplace=True)
merged_candidates_score = merged_candidates_score_g.to_pandas()

In [21]:
del merged_candidates_g
del product_id_df_g
del merged_candidates_score_g

In [22]:
locale_offset = {'DE' : 0, 'JP' : len(DE_product_list), 'UK' : len(DE_product_list) + len(JP_product_list)}
for locale in ['DE', 'JP', 'UK']:
    merged_candidates_score['dataset_id'][merged_candidates_score['sess_locale'] == locale] = \
        merged_candidates_score['dataset_id'][merged_candidates_score['sess_locale'] == locale] + locale_offset[locale]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_candidates_score['dataset_id'][merged_candidates_score['sess_locale'] == locale] = \


In [23]:
merged_candidates_score[FIELD_NAME] = get_scores(merged_candidates_score, test_query_embeddings, product_embeddings)

100%|██████████| 33901/33901 [02:46<00:00, 203.27it/s]


In [24]:
normalize_scores(merged_candidates_score, FIELD_NAME, 'normalized_'+FIELD_NAME)

In [25]:
merged_candidates[FIELD_NAME] = merged_candidates_score[FIELD_NAME]
merged_candidates['normalized_'+FIELD_NAME] = merged_candidates_score['normalized_'+FIELD_NAME]

In [52]:
cast_dtype(merged_candidates, [FIELD_NAME, 'normalized_'+FIELD_NAME])
merged_candidates.to_parquet(merged_candidates_feature_test_path, engine='pyarrow')

In [29]:
merged_candidates[(merged_candidates['sess_id'] == 100005)].sort_values(by='gru4rec_normalized_scores_2', ascending=False)[
    ['sess_locale', 'product', 'gru4rec_normalized_scores_2', 'gru4rec_scores_2', 'normalized_narm_scores', 'narm_scores']
][:15]

Unnamed: 0,sess_locale,product,gru4rec_normalized_scores_2,gru4rec_scores_2,normalized_narm_scores,narm_scores
21586793,DE,B07TRQH45S,0.129736,12.857471,0.106933,12.473371
21586937,DE,B0B6WNV91T,0.124358,12.815129,0.105796,12.462686
21586762,DE,B01N4ND1T2,0.121831,12.794599,0.027377,11.11087
21586866,DE,B09F2J37V4,0.08504,12.435093,0.038476,11.451196
21586840,DE,B092CMLDHW,0.034603,11.535893,0.042144,11.54226
21586934,DE,B0B62K5H9P,0.030203,11.399909,0.044146,11.588679
21586943,DE,B0BD8911Y1,0.029017,11.359835,0.01576,10.558624
21586901,DE,B09SMK3R8H,0.024517,11.191334,0.035262,11.363962
21586761,DE,B01N4ND0F9,0.021267,11.049112,0.03013,11.206704
21586870,DE,B09F66MWVX,0.018534,10.911561,0.026572,11.081043


In [30]:
merged_candidates

Unnamed: 0,sess_id,sess_locale,product,sasrec_scores_2,sasrec_normalized_scores_2,gru4rec_scores,gru4rec_normalized_scores,product_freq,sess_avg_price,product_price,...,all_items_co_graph_count_0,seqmlp_scores,seqmlp_normalized_scores,desc_BM25_scores,normalized_all_items_co_graph_count_1,all_items_co_graph_count_1,normalized_all_items_co_graph_count_2,all_items_co_graph_count_2,narm_scores,normalized_narm_scores
0,0,DE,4088833651,0.000000,2.975813e-09,0.000000,1.580065e-09,828,25.195269,36.761604,...,0,0.000000,2.554478e-10,0.000000,0.000000,0.000000,0.000000,0,0.000000,0.0
1,0,DE,B000H6W2GW,0.000000,2.975813e-09,0.000000,1.580065e-09,875,25.195269,36.761604,...,0,0.000000,2.554478e-10,0.000000,0.000000,0.000000,0.000000,0,0.000000,0.0
2,0,DE,B000JG2RAG,7.665308,6.347557e-06,8.104032,5.226502e-06,24,25.195269,23.190001,...,0,8.786958,1.672744e-06,67.792648,0.000000,0.000000,0.000000,0,9.265436,0.000058
3,0,DE,B000RYSOUW,-2.951060,1.555882e-10,-2.857798,9.068785e-11,5,25.195269,6.900000,...,0,-3.325048,9.188664e-12,170.360580,0.000000,0.000000,0.000000,0,-3.215176,0.0
4,0,DE,B000UGZVQM,3.977920,1.589257e-07,4.688567,1.717488e-07,4,25.195269,21.990000,...,0,5.540127,6.506522e-08,71.169296,0.000000,0.000000,0.000000,0,5.534946,0.000001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69428426,316970,UK,B0BJCTH4NH,11.327528,1.041200e-04,10.629994,3.818184e-04,74,16.950001,5.800000,...,16,11.838901,9.762144e-04,164.803131,0.002540,1.285714,0.003086,1,8.999118,0.000072
69428427,316970,UK,B0BJTQQWLG,5.604142,3.403292e-07,6.052083,3.923694e-06,6,16.950001,9.880000,...,1,4.890683,9.375031e-07,303.665985,0.000494,0.250000,0.000000,0,5.973598,0.000004
69428428,316970,UK,B0BJV3RL4H,9.146974,1.176336e-05,7.667603,1.973815e-05,7,16.950001,22.097065,...,1,10.187823,1.872800e-04,226.131516,0.001976,1.000000,0.003086,1,7.151886,0.000011
69428429,316970,UK,B0BK7SPC84,-10.383047,3.879279e-14,-6.356799,1.601719e-11,0,16.950001,5.960000,...,0,-4.160688,1.099036e-10,312.603607,0.000000,0.000000,0.000000,0,-2.646061,0.0
