# Hybrid Approach with Gradinet Boosting 

* Goal: build a recommender by combining user and item features
* Engineer user and product features
* Split Data with a global temporal split
* Encode product text features
* Train a gradinet boosting model (CatboostRanker) with a PairLogit loss so that it is able to rank items based on their relevance to users

In [1]:
from collections import defaultdict

import numpy as np
import pandas as pd
from catboost import CatBoostRanker, Pool
from datasets import load_dataset
from sklearn.metrics import ndcg_score
from sentence_transformers import SentenceTransformer
from tqdm import tqdm

from feature_engineering import calculate_rolling_stats
from text_pre_processing import combine_text_features, pre_process_text
from train_test_split import (
    global_temporal_split,
    temporal_split_users_in_both_sets,
    temporal_split_users_with_cold_start,
)

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
# pd.set_option('display.max_colwidth', None)

2025-01-12 22:04:40.090697: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-12 22:04:40.100084: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1736719480.110748  171897 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1736719480.114171  171897 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-12 22:04:40.125918: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

# Load Data

In [2]:
dataset_reviews = load_dataset("McAuley-Lab/Amazon-Reviews-2023", "raw_review_All_Beauty", trust_remote_code=True)
df_reviews = dataset_reviews["full"].to_pandas()

dataset_items = load_dataset("McAuley-Lab/Amazon-Reviews-2023", "raw_meta_All_Beauty", split="full", trust_remote_code=True)
df_items = dataset_items.to_pandas()

# filter out users with low ammount of reviews for now - look at the cold start problem later
min_ammount_reviews = 5
user_review_counts = df_reviews.groupby('user_id').size()
users_with_min_reviews = user_review_counts[user_review_counts >= min_ammount_reviews].index
df_reviews_filtered = df_reviews[df_reviews['user_id'].isin(users_with_min_reviews)]

df = pd.merge(df_reviews_filtered, df_items, on='parent_asin', how='left', suffixes=('_review', '_item'))

In [3]:
df.shape

(14984, 25)

In [4]:
df.columns

Index(['rating', 'title_review', 'text', 'images_review', 'asin',
       'parent_asin', 'user_id', 'timestamp', 'helpful_vote',
       'verified_purchase', 'main_category', 'title_item', 'average_rating',
       'rating_number', 'features', 'description', 'price', 'images_item',
       'videos', 'store', 'categories', 'details', 'bought_together',
       'subtitle', 'author'],
      dtype='object')

# Pre-processing

In [5]:
# item features
df['title_description_features'] = df.apply(
    lambda row: combine_text_features(row, col1='title_item', col2='features', col3='description'),
    axis=1
)
df = pre_process_text(df, input_col='title_description_features', output_col='product_title_description_feature_pre_processed')


In [6]:
df.head()

Unnamed: 0,rating,title_review,text,images_review,asin,parent_asin,user_id,timestamp,helpful_vote,verified_purchase,main_category,title_item,average_rating,rating_number,features,description,price,images_item,videos,store,categories,details,bought_together,subtitle,author,title_description_features,product_title_description_feature_pre_processed
0,5.0,Great for at home use and so easy to use!,This is perfect for my between salon visits. I...,[],B08P2DZB4X,B08P2DZB4X,AFSKPY37N3C43SOI5IEXEK5JSIYA,1627391044559,0,False,All Beauty,NIRA Skincare Laser & Serum Bundle - Includes ...,3.8,109,[POWERFUL ANTI-AGING DUO - This powerful anti-...,[],,{'hi_res': ['https://m.media-amazon.com/images...,"{'title': [], 'url': [], 'user_id': []}",Nira,[],"{""Skin Type"": ""Dry"", ""Product Benefits"": ""Hydr...",,,,NIRA Skincare Laser & Serum Bundle - Includes ...,nira skincare laser serum bundle includes anti...
1,5.0,Nice shampoo for the money,I get Keratin treatments at the salon at least...,[],B086QY6T7N,B086QY6T7N,AFSKPY37N3C43SOI5IEXEK5JSIYA,1626614511145,0,False,All Beauty,Caroline Keller Keratin Shampoo for dry and da...,3.5,12,[],[],,{'hi_res': ['https://m.media-amazon.com/images...,"{'title': [], 'url': [], 'user_id': []}",Caroline Keller,[],"{""Brand"": ""Caroline Keller"", ""Item Form"": ""Liq...",,,,Caroline Keller Keratin Shampoo for dry and da...,caroline keller keratin shampoo dry damaged ha...
2,3.0,Not what I thought I would be getting,I was very disappointed when I got this facial...,[],B08DHTJ25J,B08DHTJ25J,AFSKPY37N3C43SOI5IEXEK5JSIYA,1626211245370,0,False,All Beauty,Orange Peel Nature's Cleanse Facial Scrub - by...,3.1,12,[],[],,{'hi_res': ['https://m.media-amazon.com/images...,"{'title': [], 'url': [], 'user_id': []}",Visage Pure,[],"{""Brand"": ""Visage Pure"", ""Skin Type"": ""Acne Pr...",,,,Orange Peel Nature's Cleanse Facial Scrub - by...,orange peel nature cleanse facial scrub visage...
3,5.0,A little goes a long way!,This is a really nice moisturizing lotion. It ...,[],B07RBSLNFR,B07RBSLNFR,AFSKPY37N3C43SOI5IEXEK5JSIYA,1621184430697,0,False,All Beauty,OGANA CELL Peptide Concentrating Amazing Lotio...,4.5,26,[],[],,{'hi_res': ['https://m.media-amazon.com/images...,"{'title': [], 'url': [], 'user_id': []}",OGANA CELL,[],"{""Brand"": ""OGANA CELL"", ""Item Form"": ""Lotion"",...",,,,OGANA CELL Peptide Concentrating Amazing Lotio...,ogana cell peptide concentrating amazing lotio...
4,3.0,Just ok,I try to get Keratin treatments every 3 months...,[],B07SLFWZKN,B07SLFWZKN,AFSKPY37N3C43SOI5IEXEK5JSIYA,1619737501209,0,False,All Beauty,Keratin Secrets Do It Yourself Home Keratin Sy...,3.3,25,[],[],49.95,{'hi_res': ['https://m.media-amazon.com/images...,{'title': ['Keratin Secrets DIY Treatment Syst...,Keratin Secrets,[],"{""Package Dimensions"": ""8.27 x 4.21 x 3.9 inch...",,,,Keratin Secrets Do It Yourself Home Keratin Sy...,keratin secret home keratin system keratin sec...


In [7]:
def analyze_nulls(df, column):
    total_rows = len(df)
    null_count = df[column].isnull().sum()
    null_percentage = (null_count / total_rows) * 100
    
    return {
        'column': column,
        'total_rows': total_rows,
        'null_count': null_count,
        'null_percentage': round(null_percentage, 2)
    }


In [8]:
analyze_nulls(df, 'price')

{'column': 'price',
 'total_rows': 14984,
 'null_count': 0,
 'null_percentage': 0.0}

In [9]:
# too many missing values, we won't use it for now
df[df.price == 'None'].shape[0] / df.shape[0]

0.8312867058195409

In [10]:
analyze_nulls(df, 'verified_purchase')

{'column': 'verified_purchase',
 'total_rows': 14984,
 'null_count': 0,
 'null_percentage': 0.0}

In [11]:
df['verified_purchase'] = df.verified_purchase.astype('int')

In [12]:
analyze_nulls(df, 'store')

{'column': 'store',
 'total_rows': 14984,
 'null_count': 772,
 'null_percentage': 5.15}

In [13]:
df['store'] = df['store'].fillna('UNKNOWN')

In [14]:
def pre_process_categories(row, col):
    if isinstance(row[col], str) and row[col].strip():
        categories = row[col]
    elif isinstance(row[col], (list, np.ndarray)):
        categories = ' '.join(str(element) for element in row[col])
    else:
        categories = ''
    return categories

In [15]:
df['categories_processed'] = df.apply(
    lambda row: pre_process_categories(row, col='categories'),
    axis=1
)

In [16]:
df[df.categories_processed == ''].shape

(14984, 28)

In [17]:
# maybe prefereences change over the years 
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
df['year'] = df['timestamp'].dt.year

# Feature Engineering

In [18]:
df = calculate_rolling_stats(df, 'user_id')  
df = calculate_rolling_stats(df, 'parent_asin')  
df = df.sort_values(by='timestamp')

In [19]:
df[df.user_id == 'AHV6QCNBJNSGLATP56JAWJ3C4G2A'][['timestamp', 'rating', 'rolling_avg_rating_user', 'rolling_review_count_user']].head()

Unnamed: 0,timestamp,rating,rolling_avg_rating_user,rolling_review_count_user
65,2014-11-17 18:55:42,5.0,5.0,1
64,2015-02-17 19:11:32,5.0,5.0,2
63,2015-04-24 21:43:07,4.0,4.666667,3
62,2015-09-12 00:04:28,5.0,4.75,4
61,2016-02-04 22:48:30,4.0,4.6,5


In [20]:
df[df.parent_asin == 'B0B5XFVSXY'][['timestamp', 'rating', 'rolling_avg_rating_product', 'rolling_review_count_product']].head()

Unnamed: 0,timestamp,rating,rolling_avg_rating_product,rolling_review_count_product
7768,2022-08-02 19:09:49.390,5.0,5.0,1
3876,2022-08-12 16:44:39.817,4.0,4.5,2
6210,2022-08-17 12:17:13.185,2.0,3.666667,3
14166,2022-08-20 17:40:53.209,5.0,4.0,4
10252,2022-08-25 21:04:21.033,5.0,4.2,5


# Select Features

In [21]:
# numerical features
user_review_features = ['rolling_avg_rating_user', 'rolling_review_count_user', 'helpful_vote', 'verified_purchase', 'year']
product_features = ['average_rating', 'rolling_avg_rating_product', 'rolling_review_count_product', 'rating_number']
categoricals = ['main_category', 'store']
text_features = ['product_title_description_feature_pre_processed']
target = 'rating'

train_test_split_features =  ['timestamp', 'parent_asin']
group_features = ['user_id']

In [22]:
columns = train_test_split_features + group_features + user_review_features + product_features + categoricals + text_features + [target]

In [23]:
df = df[columns]

In [24]:
df.head()

Unnamed: 0,timestamp,parent_asin,user_id,rolling_avg_rating_user,rolling_review_count_user,helpful_vote,verified_purchase,year,average_rating,rolling_avg_rating_product,rolling_review_count_product,rating_number,main_category,store,product_title_description_feature_pre_processed,rating
778,2004-08-15 20:42:27,B00014DMLO,AGWDYYVVWM3DC3CASUZKXK67G6IA,5.0,1,20,0,2004,4.0,5.0,1,15,All Beauty,Burt's Bees,burt bee marshmallow cream oz burt bee marshma...,5.0
1498,2004-08-23 17:22:10,B0001ZI0BU,AFIJLAW3HIOMRUFSWNH54IJ3XQAA,4.0,1,30,0,2004,3.0,4.0,1,29,All Beauty,Crest,crest whitestrips premium dental whitening sys...,4.0
777,2004-12-11 08:04:29,B00064A816,AGWDYYVVWM3DC3CASUZKXK67G6IA,5.0,2,1,0,2004,5.0,5.0,1,7,All Beauty,Zia,zia natural skincare ultimate body butter oz z...,5.0
776,2005-06-28 16:51:01,B00094DTSW,AGWDYYVVWM3DC3CASUZKXK67G6IA,5.0,3,2,0,2005,4.0,5.0,1,15,All Beauty,Burt's Bees,burt bee marshmallow cream oz burt bee marshma...,5.0
460,2005-08-09 12:12:58,B000068PBJ,AGZZXSMMS4WRHHJRBUJZI4FZDHKQ,5.0,1,5,1,2005,4.0,5.0,1,154,All Beauty,Norelco,norelco xl norelco xl hightech cordcordless sh...,5.0


# Train Test Split

In [25]:
train_df, test_df = global_temporal_split(df, split_ratio=0.8, exclude_cold_start_users=False)

In [26]:
train_df.shape

(11987, 16)

In [27]:
test_df.shape

(2997, 16)

In [28]:
train_df.user_id.unique().shape

(1479,)

In [29]:
test_df.user_id.unique().shape

(749,)

In [30]:
# 141 users only have interactions in the test set
cold_start_users = [i for i in test_df.user_id.unique() if i not in train_df.user_id.unique()]
len(cold_start_users)

141

In [31]:
# 83% of products in the test set not in the train set
# we will try to work wit this by adding content-based features
test_only_products = [i for i in test_df.parent_asin.unique() if i not in train_df.parent_asin.unique()]
len(test_only_products) / len(test_df.parent_asin.unique())

0.8361153262518968

In [32]:
len(train_df.parent_asin.unique())

5880

In [33]:
train_df = train_df.drop(train_test_split_features, axis=1)
test_df = test_df.drop(train_test_split_features, axis=1)

# Generate Embeddings For Text Features

In [34]:
model = SentenceTransformer('all-MiniLM-L6-v2')

In [35]:
train_embeddings = model.encode(train_df['product_title_description_feature_pre_processed'].tolist(), batch_size=64, show_progress_bar=True)
test_embeddings = model.encode(test_df['product_title_description_feature_pre_processed'].tolist(), batch_size=64, show_progress_bar=True)

Batches:   0%|          | 0/188 [00:00<?, ?it/s]

Batches:   0%|          | 0/47 [00:00<?, ?it/s]

In [36]:
np.save('train_embeddings.npy', train_embeddings)
np.save('test_embeddings.npy', test_embeddings)

In [37]:
train_embeddings = np.load('train_embeddings.npy')
test_embeddings = np.load('test_embeddings.npy')

In [38]:
embeding_columns = [f'product_embeddings_{i}' for i in range(train_embeddings.shape[1])]

In [39]:
train_embeddings_df = pd.DataFrame(train_embeddings, columns=embeding_columns)
test_embeddings_df = pd.DataFrame(test_embeddings, columns=embeding_columns)

In [40]:
train_df = pd.concat([train_df, train_embeddings_df], axis=1)
test_df = pd.concat([test_df, test_embeddings_df], axis=1)

In [41]:
train_df.head()

Unnamed: 0,user_id,rolling_avg_rating_user,rolling_review_count_user,helpful_vote,verified_purchase,year,average_rating,rolling_avg_rating_product,rolling_review_count_product,rating_number,main_category,store,product_title_description_feature_pre_processed,rating,product_embeddings_0,product_embeddings_1,product_embeddings_2,product_embeddings_3,product_embeddings_4,product_embeddings_5,product_embeddings_6,product_embeddings_7,product_embeddings_8,product_embeddings_9,product_embeddings_10,product_embeddings_11,product_embeddings_12,product_embeddings_13,product_embeddings_14,product_embeddings_15,product_embeddings_16,product_embeddings_17,product_embeddings_18,product_embeddings_19,product_embeddings_20,product_embeddings_21,product_embeddings_22,product_embeddings_23,product_embeddings_24,product_embeddings_25,product_embeddings_26,product_embeddings_27,product_embeddings_28,product_embeddings_29,product_embeddings_30,product_embeddings_31,product_embeddings_32,product_embeddings_33,product_embeddings_34,product_embeddings_35,product_embeddings_36,product_embeddings_37,product_embeddings_38,product_embeddings_39,product_embeddings_40,product_embeddings_41,product_embeddings_42,product_embeddings_43,product_embeddings_44,product_embeddings_45,product_embeddings_46,product_embeddings_47,product_embeddings_48,product_embeddings_49,product_embeddings_50,product_embeddings_51,product_embeddings_52,product_embeddings_53,product_embeddings_54,product_embeddings_55,product_embeddings_56,product_embeddings_57,product_embeddings_58,product_embeddings_59,product_embeddings_60,product_embeddings_61,product_embeddings_62,product_embeddings_63,product_embeddings_64,product_embeddings_65,product_embeddings_66,product_embeddings_67,product_embeddings_68,product_embeddings_69,product_embeddings_70,product_embeddings_71,product_embeddings_72,product_embeddings_73,product_embeddings_74,product_embeddings_75,product_embeddings_76,product_embeddings_77,product_embeddings_78,product_embeddings_79,product_embeddings_80,product_embeddings_81,product_embeddings_82,product_embeddings_83,product_embeddings_84,product_embeddings_85,product_embeddings_86,product_embeddings_87,product_embeddings_88,product_embeddings_89,product_embeddings_90,product_embeddings_91,product_embeddings_92,product_embeddings_93,product_embeddings_94,product_embeddings_95,product_embeddings_96,product_embeddings_97,product_embeddings_98,product_embeddings_99,product_embeddings_100,product_embeddings_101,product_embeddings_102,product_embeddings_103,product_embeddings_104,product_embeddings_105,product_embeddings_106,product_embeddings_107,product_embeddings_108,product_embeddings_109,product_embeddings_110,product_embeddings_111,product_embeddings_112,product_embeddings_113,product_embeddings_114,product_embeddings_115,product_embeddings_116,product_embeddings_117,product_embeddings_118,product_embeddings_119,product_embeddings_120,product_embeddings_121,product_embeddings_122,product_embeddings_123,product_embeddings_124,product_embeddings_125,product_embeddings_126,product_embeddings_127,product_embeddings_128,product_embeddings_129,product_embeddings_130,product_embeddings_131,product_embeddings_132,product_embeddings_133,product_embeddings_134,product_embeddings_135,product_embeddings_136,product_embeddings_137,product_embeddings_138,product_embeddings_139,product_embeddings_140,product_embeddings_141,product_embeddings_142,product_embeddings_143,product_embeddings_144,product_embeddings_145,product_embeddings_146,product_embeddings_147,product_embeddings_148,product_embeddings_149,product_embeddings_150,product_embeddings_151,product_embeddings_152,product_embeddings_153,product_embeddings_154,product_embeddings_155,product_embeddings_156,product_embeddings_157,product_embeddings_158,product_embeddings_159,product_embeddings_160,product_embeddings_161,product_embeddings_162,product_embeddings_163,product_embeddings_164,product_embeddings_165,product_embeddings_166,product_embeddings_167,product_embeddings_168,product_embeddings_169,product_embeddings_170,product_embeddings_171,product_embeddings_172,product_embeddings_173,product_embeddings_174,product_embeddings_175,product_embeddings_176,product_embeddings_177,product_embeddings_178,product_embeddings_179,product_embeddings_180,product_embeddings_181,product_embeddings_182,product_embeddings_183,product_embeddings_184,product_embeddings_185,product_embeddings_186,product_embeddings_187,product_embeddings_188,product_embeddings_189,product_embeddings_190,product_embeddings_191,product_embeddings_192,product_embeddings_193,product_embeddings_194,product_embeddings_195,product_embeddings_196,product_embeddings_197,product_embeddings_198,product_embeddings_199,product_embeddings_200,product_embeddings_201,product_embeddings_202,product_embeddings_203,product_embeddings_204,product_embeddings_205,product_embeddings_206,product_embeddings_207,product_embeddings_208,product_embeddings_209,product_embeddings_210,product_embeddings_211,product_embeddings_212,product_embeddings_213,product_embeddings_214,product_embeddings_215,product_embeddings_216,product_embeddings_217,product_embeddings_218,product_embeddings_219,product_embeddings_220,product_embeddings_221,product_embeddings_222,product_embeddings_223,product_embeddings_224,product_embeddings_225,product_embeddings_226,product_embeddings_227,product_embeddings_228,product_embeddings_229,product_embeddings_230,product_embeddings_231,product_embeddings_232,product_embeddings_233,product_embeddings_234,product_embeddings_235,product_embeddings_236,product_embeddings_237,product_embeddings_238,product_embeddings_239,product_embeddings_240,product_embeddings_241,product_embeddings_242,product_embeddings_243,product_embeddings_244,product_embeddings_245,product_embeddings_246,product_embeddings_247,product_embeddings_248,product_embeddings_249,product_embeddings_250,product_embeddings_251,product_embeddings_252,product_embeddings_253,product_embeddings_254,product_embeddings_255,product_embeddings_256,product_embeddings_257,product_embeddings_258,product_embeddings_259,product_embeddings_260,product_embeddings_261,product_embeddings_262,product_embeddings_263,product_embeddings_264,product_embeddings_265,product_embeddings_266,product_embeddings_267,product_embeddings_268,product_embeddings_269,product_embeddings_270,product_embeddings_271,product_embeddings_272,product_embeddings_273,product_embeddings_274,product_embeddings_275,product_embeddings_276,product_embeddings_277,product_embeddings_278,product_embeddings_279,product_embeddings_280,product_embeddings_281,product_embeddings_282,product_embeddings_283,product_embeddings_284,product_embeddings_285,product_embeddings_286,product_embeddings_287,product_embeddings_288,product_embeddings_289,product_embeddings_290,product_embeddings_291,product_embeddings_292,product_embeddings_293,product_embeddings_294,product_embeddings_295,product_embeddings_296,product_embeddings_297,product_embeddings_298,product_embeddings_299,product_embeddings_300,product_embeddings_301,product_embeddings_302,product_embeddings_303,product_embeddings_304,product_embeddings_305,product_embeddings_306,product_embeddings_307,product_embeddings_308,product_embeddings_309,product_embeddings_310,product_embeddings_311,product_embeddings_312,product_embeddings_313,product_embeddings_314,product_embeddings_315,product_embeddings_316,product_embeddings_317,product_embeddings_318,product_embeddings_319,product_embeddings_320,product_embeddings_321,product_embeddings_322,product_embeddings_323,product_embeddings_324,product_embeddings_325,product_embeddings_326,product_embeddings_327,product_embeddings_328,product_embeddings_329,product_embeddings_330,product_embeddings_331,product_embeddings_332,product_embeddings_333,product_embeddings_334,product_embeddings_335,product_embeddings_336,product_embeddings_337,product_embeddings_338,product_embeddings_339,product_embeddings_340,product_embeddings_341,product_embeddings_342,product_embeddings_343,product_embeddings_344,product_embeddings_345,product_embeddings_346,product_embeddings_347,product_embeddings_348,product_embeddings_349,product_embeddings_350,product_embeddings_351,product_embeddings_352,product_embeddings_353,product_embeddings_354,product_embeddings_355,product_embeddings_356,product_embeddings_357,product_embeddings_358,product_embeddings_359,product_embeddings_360,product_embeddings_361,product_embeddings_362,product_embeddings_363,product_embeddings_364,product_embeddings_365,product_embeddings_366,product_embeddings_367,product_embeddings_368,product_embeddings_369,product_embeddings_370,product_embeddings_371,product_embeddings_372,product_embeddings_373,product_embeddings_374,product_embeddings_375,product_embeddings_376,product_embeddings_377,product_embeddings_378,product_embeddings_379,product_embeddings_380,product_embeddings_381,product_embeddings_382,product_embeddings_383
0,AGWDYYVVWM3DC3CASUZKXK67G6IA,5.0,1,20,0,2004,4.0,5.0,1,15,All Beauty,Burt's Bees,burt bee marshmallow cream oz burt bee marshma...,5.0,-0.041619,0.033422,0.034966,0.053175,0.006273,0.036283,0.168141,-0.004048,0.040195,-0.06819,-0.014379,-0.103234,-0.024612,-0.003218,0.025046,0.074487,0.085506,0.007298,0.041811,-0.030919,0.094727,0.061391,0.053366,0.037182,-0.057703,0.109932,-0.044511,-0.046651,-0.035115,-0.026908,-0.004776,0.015368,0.049015,-0.087193,0.009593,-0.143599,0.018595,-0.022479,0.097711,0.069048,-0.014929,0.018838,-0.032903,-0.004032,-0.060656,-0.012385,0.039073,0.106798,0.015753,0.038851,-0.027762,-0.025948,-0.055998,-0.059938,0.038153,-0.051628,-0.029998,-0.003397,-0.051159,0.02579,-0.005016,-0.015099,0.027314,0.032414,0.001654,-0.018614,-0.056339,-0.030323,-0.002982,-0.033138,-0.086203,0.003989,0.041736,0.046425,0.033684,0.025491,0.081455,-0.01091,0.017895,0.04594,-0.04192,-0.048615,-0.01685,0.021514,0.034046,0.014899,0.017156,-0.044151,-0.062426,-0.044939,-0.042344,-0.05249,0.011528,0.000544,0.05334,-0.03033,-0.001575,-0.056311,-0.021006,0.128567,-0.003023,-0.046426,-0.011452,-0.066237,0.028228,0.010707,-0.076267,-0.009675,0.07238,0.071126,0.012749,-0.08062,-0.007349,0.032005,-0.028531,-0.045922,-0.026966,-0.009628,-0.03171,-0.01992,-0.041311,-0.03038,0.029098,0.027126,-0.16461,0.03161,0.056631,8.976863e-33,-0.047724,-0.023956,0.050375,-0.070748,0.087149,0.036448,-0.051588,0.047857,0.03707,0.01905,-0.027196,-0.031978,-0.033518,0.049851,-0.02093,-0.016566,-0.034514,-0.001197,-0.041537,-0.064412,-0.072898,0.093509,-0.053511,0.080723,-0.089367,0.048925,-0.002401,-0.058343,0.043327,0.04247,0.061766,-0.058367,0.02733,-0.055684,-0.085032,-0.09198,-0.011726,-0.051535,0.005222,-0.079566,0.03798,0.034069,0.012883,0.104098,-0.057803,0.032699,0.003326,0.073764,0.070654,-0.016908,-0.021269,0.011574,-0.011587,0.014755,-0.007818,-0.07347,-0.012244,0.037911,-0.018179,0.017845,-0.014338,0.022809,0.053757,-0.027907,-0.051064,-0.024855,0.005752,-0.034545,0.002562,-0.06211,-0.009049,0.033479,0.10181,-0.026792,-0.022813,-0.024219,0.151065,0.012344,-0.044449,-0.009495,-0.040062,0.032116,0.099566,0.018618,-0.064685,-0.053971,-0.072051,-0.052067,0.065208,0.032252,-0.140162,0.011928,-0.021896,0.043633,-0.086418,-8.028153e-33,0.025501,-0.037626,0.073033,0.023572,0.053012,-0.009518,-0.0009,0.040408,0.037349,-0.036234,-0.044152,-0.081057,0.099022,-0.004804,-0.017411,0.075297,-0.003287,0.062895,0.039312,0.052961,-0.049559,0.015631,-0.012066,0.016737,-0.055283,0.077352,0.072184,-0.076966,-0.023706,0.008358,0.082743,-0.056234,0.029324,-0.02509,-0.032607,0.041181,0.086479,-0.028731,-0.126566,0.069871,-0.016143,-0.059003,-0.030831,-0.008014,0.0518,-0.003757,-0.051667,-0.040285,-0.036589,0.042554,-0.014773,0.010094,-0.018745,0.019267,-0.052855,0.009376,0.054065,-0.017442,-0.007405,0.041753,0.004203,-0.036168,0.041089,-0.025134,0.027202,-0.00332,-0.004161,-0.002382,-0.040808,-0.021164,0.045569,0.012434,-0.028367,0.066224,0.09003,0.030052,-0.013001,-0.123732,-0.053929,-0.001722,-0.07783,0.030218,-0.017626,0.094826,-0.050817,0.05914,0.040747,-0.016159,-0.038461,0.091919,-0.022169,0.049063,-0.003204,-0.026379,0.065538,-2.717868e-08,0.041985,0.02751,0.022105,0.020417,0.054395,0.038776,-0.0229,-0.0055,-0.049066,-0.036443,-0.039568,-0.011637,-0.037958,0.017315,0.043659,-0.065498,-0.029373,-0.019755,0.015231,0.0124,-0.08296,0.070852,0.149861,-0.099472,-0.012311,-0.076425,0.073925,0.035608,-0.017283,0.056347,0.003962,0.096618,-0.079368,0.024491,-0.064881,-0.080211,-0.021465,-0.028824,-0.100636,0.056855,-0.004805,-0.011747,0.044322,-0.019214,0.044475,-0.082627,0.055245,0.057259,0.006468,-0.016708,-0.041179,0.054393,-0.022286,-0.004523,-0.017271,-0.046717,-0.062927,-0.016951,0.010836,0.028967,0.028294,-0.065174,0.088752,-0.063074
1,AFIJLAW3HIOMRUFSWNH54IJ3XQAA,4.0,1,30,0,2004,3.0,4.0,1,29,All Beauty,Crest,crest whitestrips premium dental whitening sys...,4.0,-0.030708,0.027747,0.046509,-0.019991,0.007082,0.032286,-0.072054,0.080919,-0.063494,0.002166,-0.037535,-0.050737,0.003453,0.033969,0.003383,0.026433,0.068586,0.084047,0.0384,-0.00871,0.064494,-0.038287,-0.055255,0.05265,-0.038804,0.120022,-0.031453,0.033218,0.091077,0.052532,-0.053514,0.043674,0.120593,-0.123029,-0.032934,-0.141558,-0.039998,0.020325,-0.006067,-0.021336,0.004484,-0.073492,-0.053725,-0.043789,0.001442,0.008,0.028644,-0.01748,-0.060857,0.050982,0.049225,-0.065179,-0.089612,0.028899,-0.042939,-0.028652,-0.119724,-0.01703,0.008589,-0.010816,-0.040938,-0.059876,-0.051042,0.075377,-0.060686,0.125007,-0.003194,0.032624,0.040531,0.015749,-0.114379,-0.03983,0.037843,-0.013465,0.018059,0.073293,0.059944,-0.023725,-0.037286,-0.066728,-0.077948,-0.011812,0.001059,0.053629,0.031909,0.054984,-0.146411,-0.051811,-0.009947,-0.02508,0.065323,0.023768,0.078078,-0.026269,-0.013456,0.005063,-0.038757,0.050577,0.047145,0.022747,-0.020042,-0.040909,0.018589,0.022322,0.000313,-0.009204,-0.038081,8.6e-05,-0.005316,0.021764,0.074628,0.042132,0.030689,-0.022205,-0.02082,-0.059669,0.039834,0.041733,0.060604,0.087824,-0.02898,-0.021367,0.105411,-0.080212,0.033396,-0.031758,0.127369,1.535003e-32,-0.023241,0.032883,-0.0081,-0.031231,0.052755,0.01912,0.007904,0.048622,0.010773,0.014576,0.043586,0.030216,-0.060594,0.06802,-0.056663,0.067345,0.111244,-0.046373,-0.065275,0.011845,0.020541,-0.055843,0.045026,0.076487,-0.033737,-0.073755,-0.007403,-0.045445,-0.057573,0.023634,0.006618,0.041185,0.06083,-0.108264,-0.076939,0.151681,-0.010271,-0.055524,-0.00054,-0.034532,0.014363,0.032371,0.015605,0.008423,0.06677,-0.016269,0.032001,0.035811,0.045658,0.045437,-0.046218,-0.017389,-0.086436,0.024758,-0.039475,0.076164,-0.077037,0.014772,-0.063061,0.025372,-0.081562,-0.043191,-0.047545,0.018002,-0.064889,-0.014036,-0.042375,-0.052407,-0.090333,-0.03693,0.01457,0.028942,-0.044683,0.053025,0.007416,-0.02783,0.11629,0.08467,0.039003,-0.005073,0.0315,0.001418,-0.056595,-0.00581,0.050327,0.049962,0.019718,-0.022384,-0.02147,0.024706,0.008617,-0.074529,-0.045938,0.079991,-0.016824,-1.419095e-32,-0.001567,-0.024325,0.036211,0.102179,-0.051268,0.028951,-0.058955,-0.016958,0.041773,-0.065999,0.082993,0.120656,-0.002676,-0.048369,-0.041768,0.003521,-0.000402,0.029607,-0.063519,0.051081,-0.014801,0.044541,-0.01352,-0.032089,-0.013201,-0.005025,-0.025485,0.011912,0.026146,0.066788,-0.005264,0.056995,0.026072,-0.043445,-0.036018,0.049189,0.025482,-0.03082,-0.059673,-0.056045,0.037428,0.036169,-0.002125,-0.040936,-0.033912,0.020107,-0.038169,-0.002393,-0.011202,0.028618,0.009957,-0.021315,-0.090447,0.076912,-0.015403,-0.032589,0.013007,-0.02817,-0.033335,-0.011392,-0.03088,0.038563,-0.104915,-0.057438,0.053417,-0.069148,0.024864,-0.001573,0.013865,-0.057565,-0.03708,-0.06721,-0.031115,-0.042793,-0.048816,0.006295,-0.01251,-0.073963,-0.099701,-0.031009,-0.107749,-0.036176,-0.019104,0.093715,-0.011888,-0.025634,-0.023508,-0.042281,-0.040874,0.006733,-0.0492,0.057585,-0.077815,0.043381,0.02702,-4.33437e-08,0.055148,0.005384,0.023709,0.042552,-0.025468,-0.012619,-0.011863,-0.048087,-0.06576,-0.028708,0.100247,0.057327,-0.065601,-0.050604,-0.039937,0.038126,-0.010572,-0.013624,-0.050271,0.001453,-0.095071,-0.029773,0.131659,0.020123,-0.060696,-0.021392,0.028449,0.09099,0.06032,0.090134,0.016662,0.008798,0.084438,-0.039962,0.055862,-0.055454,0.008206,0.011334,-0.017918,0.079733,0.024273,-0.08378,0.00086,0.052754,-0.057247,-0.061447,0.006607,0.052871,-0.015605,-0.049716,0.040836,-0.000378,-0.0353,0.021078,-0.025614,0.005901,0.099031,0.044414,0.040757,0.005557,0.026288,-0.033177,0.033456,-0.040646
2,AGWDYYVVWM3DC3CASUZKXK67G6IA,5.0,2,1,0,2004,5.0,5.0,1,7,All Beauty,Zia,zia natural skincare ultimate body butter oz z...,5.0,-0.075446,0.0158,0.024032,0.086514,0.108334,0.05473,0.043414,0.013558,-0.064754,-0.060806,0.012195,-0.055281,-0.0172,0.017064,0.047662,0.048893,0.019811,0.062122,-0.103131,-0.061078,0.004012,0.068212,0.048807,0.012973,-0.059307,0.07194,-0.007858,0.028025,0.053615,-0.100136,0.034345,0.061005,0.061003,-0.036681,-0.079509,-0.074181,-0.045305,-0.043422,0.019549,-0.009011,0.004073,-0.044352,-0.063761,-0.020904,0.031073,0.002741,0.029336,0.058432,0.020265,0.042427,-0.094144,-0.074254,-0.09459,0.067398,0.005854,0.005595,-0.139864,-0.064971,-0.0348,0.043135,0.05428,-0.092407,0.022323,0.025334,-0.087268,-0.026591,-0.039973,-0.016886,-0.015378,-0.02926,-0.084137,-0.009898,0.066606,0.032481,-0.009783,0.062309,-0.004203,-0.079692,-0.014801,-0.002343,0.003219,0.01715,0.021392,0.129208,0.069875,0.055694,-0.032245,0.002558,-0.00873,0.029089,0.043024,0.027551,0.060292,0.064785,-0.037878,0.040015,0.016053,-0.115204,-0.074369,0.031653,-0.028505,-0.056095,0.000288,-0.082963,-0.088845,-0.012543,0.009321,-0.033793,-0.015188,0.086712,-0.027227,-0.066768,-0.095049,-0.090195,-0.066252,-0.052292,0.054677,-0.017856,-0.022573,-0.029404,0.005355,-0.043607,0.058916,-0.03438,-0.036544,0.07745,0.024636,1.64535e-32,-0.064313,0.081711,-0.040811,0.029563,0.028908,0.037567,-0.004204,0.013923,0.029733,-0.009011,-0.027386,0.098109,-0.034319,0.067972,-0.070653,0.00568,0.005058,-0.012358,0.003455,-0.035699,0.052247,0.072492,-0.043038,0.006855,-0.056841,-0.010953,0.055506,-0.017477,0.029037,-0.037909,-0.001086,-0.044231,-0.003493,-0.063695,-0.006788,0.06882,-0.018614,-0.101276,-0.021311,-0.061706,-0.016705,0.052838,-0.01067,0.059979,-0.003243,0.03215,-0.055243,0.00833,0.024824,-0.04392,-0.004334,0.044561,0.012299,0.046985,-0.057959,-0.038646,-0.087061,0.098138,0.014209,0.034037,-0.035806,-0.045216,-0.002774,0.017666,-0.017615,-0.011544,-0.072051,-0.074448,-0.055999,0.015431,-0.011382,0.042022,0.12154,0.075654,0.035981,-0.009845,0.030527,0.040518,-0.028157,-0.018301,-0.004102,0.047635,-0.000448,0.119162,-0.00647,-0.0298,-0.066694,0.025267,0.08147,-0.11513,-0.022441,-0.036249,0.05175,0.020745,-0.100363,-1.302331e-32,0.007391,-0.040184,0.038219,0.119239,0.096248,-0.029062,-0.056357,-0.018887,0.106817,-0.030114,0.084362,-0.024917,0.044392,-0.043688,-0.000576,0.037414,0.049277,0.008733,-0.019924,0.014701,-0.010245,0.082229,-0.009287,0.009448,0.013367,0.085483,0.051233,-0.02604,0.040134,0.073486,0.083384,-0.076605,-0.013766,-0.015062,-0.002707,-0.009877,0.016574,-0.064747,-0.068736,0.032391,0.065733,-0.119754,0.024826,0.074393,0.069487,-0.040001,-0.077218,-0.031246,0.003175,0.03766,0.067092,0.000587,-0.070662,-0.021764,0.091225,0.002126,0.068074,-0.089177,-0.003214,-0.011295,-0.01121,0.020086,0.006661,0.006521,0.009037,-0.001602,-0.010748,0.001599,-0.059548,0.008646,0.032124,0.069729,-0.117722,-0.04393,0.033536,0.035947,-0.022391,-0.066391,-0.053146,0.0804,-0.038722,-0.087736,-0.058192,0.025875,-0.000711,0.096642,-0.031124,0.065204,-0.002268,0.003724,-0.020794,-0.008226,-0.045851,0.057728,-0.03746,-3.765638e-08,0.020469,-0.047646,0.030597,0.024313,-0.011474,-0.038549,-0.038237,0.015017,0.032171,-0.03954,0.032481,0.124005,0.020863,0.07333,-0.024424,-0.000409,-0.010339,0.045334,-0.019715,-0.074031,-0.003412,0.014205,0.05206,-0.062263,0.043743,-0.063645,0.067646,0.016266,0.028358,0.010996,0.044007,-0.054451,0.076538,0.037829,-0.024158,-0.000976,0.065173,-0.035253,-0.075169,0.05988,0.001781,-0.039914,0.011279,-0.024552,-0.079646,-0.108989,0.010308,-0.006035,0.028617,-0.106053,-0.037662,0.009789,-0.000889,0.001835,0.019179,-0.004954,0.045433,0.00124,0.070354,-0.019198,-0.011166,-0.054289,0.06416,0.062675
3,AGWDYYVVWM3DC3CASUZKXK67G6IA,5.0,3,2,0,2005,4.0,5.0,1,15,All Beauty,Burt's Bees,burt bee marshmallow cream oz burt bee marshma...,5.0,-0.041619,0.033422,0.034966,0.053175,0.006273,0.036283,0.168141,-0.004048,0.040195,-0.06819,-0.014379,-0.103234,-0.024612,-0.003218,0.025046,0.074487,0.085506,0.007298,0.041811,-0.030919,0.094727,0.061391,0.053366,0.037182,-0.057703,0.109932,-0.044511,-0.046651,-0.035115,-0.026908,-0.004776,0.015368,0.049015,-0.087193,0.009593,-0.143599,0.018595,-0.022479,0.097711,0.069048,-0.014929,0.018838,-0.032903,-0.004032,-0.060656,-0.012385,0.039073,0.106798,0.015753,0.038851,-0.027762,-0.025948,-0.055998,-0.059938,0.038153,-0.051628,-0.029998,-0.003397,-0.051159,0.02579,-0.005016,-0.015099,0.027314,0.032414,0.001654,-0.018614,-0.056339,-0.030323,-0.002982,-0.033138,-0.086203,0.003989,0.041736,0.046425,0.033684,0.025491,0.081455,-0.01091,0.017895,0.04594,-0.04192,-0.048615,-0.01685,0.021514,0.034046,0.014899,0.017156,-0.044151,-0.062426,-0.044939,-0.042344,-0.05249,0.011528,0.000544,0.05334,-0.03033,-0.001575,-0.056311,-0.021006,0.128567,-0.003023,-0.046426,-0.011452,-0.066237,0.028228,0.010707,-0.076267,-0.009675,0.07238,0.071126,0.012749,-0.08062,-0.007349,0.032005,-0.028531,-0.045922,-0.026966,-0.009628,-0.03171,-0.01992,-0.041311,-0.03038,0.029098,0.027126,-0.16461,0.03161,0.056631,8.976863e-33,-0.047724,-0.023956,0.050375,-0.070748,0.087149,0.036448,-0.051588,0.047857,0.03707,0.01905,-0.027196,-0.031978,-0.033518,0.049851,-0.02093,-0.016566,-0.034514,-0.001197,-0.041537,-0.064412,-0.072898,0.093509,-0.053511,0.080723,-0.089367,0.048925,-0.002401,-0.058343,0.043327,0.04247,0.061766,-0.058367,0.02733,-0.055684,-0.085032,-0.09198,-0.011726,-0.051535,0.005222,-0.079566,0.03798,0.034069,0.012883,0.104098,-0.057803,0.032699,0.003326,0.073764,0.070654,-0.016908,-0.021269,0.011574,-0.011587,0.014755,-0.007818,-0.07347,-0.012244,0.037911,-0.018179,0.017845,-0.014338,0.022809,0.053757,-0.027907,-0.051064,-0.024855,0.005752,-0.034545,0.002562,-0.06211,-0.009049,0.033479,0.10181,-0.026792,-0.022813,-0.024219,0.151065,0.012344,-0.044449,-0.009495,-0.040062,0.032116,0.099566,0.018618,-0.064685,-0.053971,-0.072051,-0.052067,0.065208,0.032252,-0.140162,0.011928,-0.021896,0.043633,-0.086418,-8.028153e-33,0.025501,-0.037626,0.073033,0.023572,0.053012,-0.009518,-0.0009,0.040408,0.037349,-0.036234,-0.044152,-0.081057,0.099022,-0.004804,-0.017411,0.075297,-0.003287,0.062895,0.039312,0.052961,-0.049559,0.015631,-0.012066,0.016737,-0.055283,0.077352,0.072184,-0.076966,-0.023706,0.008358,0.082743,-0.056234,0.029324,-0.02509,-0.032607,0.041181,0.086479,-0.028731,-0.126566,0.069871,-0.016143,-0.059003,-0.030831,-0.008014,0.0518,-0.003757,-0.051667,-0.040285,-0.036589,0.042554,-0.014773,0.010094,-0.018745,0.019267,-0.052855,0.009376,0.054065,-0.017442,-0.007405,0.041753,0.004203,-0.036168,0.041089,-0.025134,0.027202,-0.00332,-0.004161,-0.002382,-0.040808,-0.021164,0.045569,0.012434,-0.028367,0.066224,0.09003,0.030052,-0.013001,-0.123732,-0.053929,-0.001722,-0.07783,0.030218,-0.017626,0.094826,-0.050817,0.05914,0.040747,-0.016159,-0.038461,0.091919,-0.022169,0.049063,-0.003204,-0.026379,0.065538,-2.717868e-08,0.041985,0.02751,0.022105,0.020417,0.054395,0.038776,-0.0229,-0.0055,-0.049066,-0.036443,-0.039568,-0.011637,-0.037958,0.017315,0.043659,-0.065498,-0.029373,-0.019755,0.015231,0.0124,-0.08296,0.070852,0.149861,-0.099472,-0.012311,-0.076425,0.073925,0.035608,-0.017283,0.056347,0.003962,0.096618,-0.079368,0.024491,-0.064881,-0.080211,-0.021465,-0.028824,-0.100636,0.056855,-0.004805,-0.011747,0.044322,-0.019214,0.044475,-0.082627,0.055245,0.057259,0.006468,-0.016708,-0.041179,0.054393,-0.022286,-0.004523,-0.017271,-0.046717,-0.062927,-0.016951,0.010836,0.028967,0.028294,-0.065174,0.088752,-0.063074
4,AGZZXSMMS4WRHHJRBUJZI4FZDHKQ,5.0,1,5,1,2005,4.0,5.0,1,154,All Beauty,Norelco,norelco xl norelco xl hightech cordcordless sh...,5.0,-0.007873,0.085747,0.079038,-0.077966,-0.036226,-0.028204,-0.030081,-0.047195,0.020267,-0.029837,0.102249,-0.070273,-0.025504,0.012199,-0.013949,-0.045892,0.014708,0.10599,-0.044376,-0.005595,0.074836,0.054547,-0.005004,-0.029091,-0.066644,-0.025364,0.009577,0.011334,-0.008653,-0.106351,0.089492,0.00284,0.036319,-0.05132,0.032008,-0.049219,0.056215,-0.011401,-0.001374,0.055893,-0.02496,-0.041497,-0.053812,-0.008113,0.006356,-0.017824,-0.052028,-0.03608,-0.069145,0.066654,-0.00475,-0.112307,0.035581,-0.012674,0.040071,0.050308,-0.053256,0.002896,-0.012844,0.004779,0.004775,-0.069209,0.004486,0.081242,-0.038673,0.067506,-0.01153,-0.084635,-0.041974,-0.023936,-0.132243,0.00148,-0.051973,0.028645,0.020383,0.00165,0.017516,0.071506,0.029496,-0.00669,-0.058708,-0.02086,-0.01429,0.09579,0.067977,-0.046593,-0.010516,0.013087,0.007385,-0.032492,0.003601,-0.039011,-0.038757,-0.036449,-0.075451,2e-05,-0.036869,0.142195,0.007643,0.016953,0.004909,-0.094332,-0.02885,-0.02038,-0.022389,0.004301,-0.067268,-0.008946,0.073835,0.005413,-0.000754,-0.041956,-0.097222,-0.040403,0.018701,-0.025239,0.023933,-0.022446,0.10587,-0.06847,0.019791,-0.04961,-0.0104,-0.027876,-0.050531,0.050577,0.023348,1.606204e-32,0.037394,0.043898,-0.013961,0.019544,-0.049991,0.038731,0.002656,0.014828,-0.010039,-0.055977,-0.001381,0.083965,-0.072793,0.073602,0.00828,-0.036994,0.052137,0.003933,-0.01557,-0.05726,-0.00042,0.085151,-0.014747,0.041651,0.090462,0.055205,0.012217,-0.098906,-0.016002,-0.004175,0.05796,-0.022675,0.034858,-0.031469,-0.042395,0.100574,0.020394,-0.039501,0.032212,-0.052131,-0.030327,0.022477,0.003389,-0.084792,-0.068332,-0.023384,-0.035687,0.105921,0.006454,0.093973,-0.064366,0.020235,0.048407,0.031186,-0.019618,-0.02183,-0.049967,0.024343,-0.06582,0.057007,0.0442,-0.020627,-0.024511,0.000317,0.058898,0.008259,0.113867,0.008943,0.060367,0.006985,-0.113365,0.125399,-0.024384,0.017943,-0.045251,0.012411,-0.08922,0.016533,0.008318,0.004416,0.01747,0.116646,0.048222,-0.030091,0.038331,-0.085566,0.083018,0.056113,-0.033085,-0.019212,0.055047,0.004597,-0.061625,0.056306,0.013676,-1.406202e-32,0.100685,-0.045126,-0.002443,0.087548,0.035048,0.05314,0.039781,-0.005008,-0.025948,-0.061375,0.035747,-0.030182,0.008687,-0.015568,0.027861,-0.022209,-0.075641,-0.06495,-0.051082,-0.051277,0.010712,0.035906,0.022495,0.002154,-0.082375,0.042009,-0.006401,-0.038205,-0.043029,0.061288,-0.045033,0.022779,-0.010471,0.114239,-0.019901,0.024107,-0.022307,0.092361,0.042736,0.002493,-0.024458,-0.014353,-0.013142,0.026174,-0.06673,-0.057686,-0.030189,-0.059577,-0.032921,0.068877,-0.081126,0.019753,-0.028021,0.092966,-0.059073,0.04859,-0.040395,-0.152875,-0.008632,-0.029737,-0.031905,0.024221,-0.071408,0.051792,-0.023349,-0.024485,-0.010114,-0.069862,0.01305,-0.04509,-0.069715,-0.02373,0.025218,-0.142731,-0.040555,-0.057841,-0.009694,-0.054208,-0.037678,0.036821,-0.033398,-0.021627,0.025353,0.087452,0.026422,0.022148,-0.008799,0.056296,-0.008643,0.004303,-0.012619,0.06486,-0.076109,0.103686,-0.043997,-4.142217e-08,-0.056249,0.04751,0.021762,0.01678,0.058136,0.034887,0.019782,0.034602,-0.055867,0.082683,0.133739,0.009945,-0.046278,-0.043201,0.003177,-0.013202,0.034103,0.106703,-0.015737,-0.080019,-0.058111,-0.004831,0.023565,0.023743,0.033857,-0.019684,-0.041388,0.128366,0.068452,0.057828,0.013872,0.004385,0.041692,-0.070591,0.018438,0.009708,-0.025061,0.028905,0.090401,0.021147,-0.023528,-0.037556,-0.041803,0.023611,0.0033,-0.087895,0.027701,-0.074206,-0.033942,0.088414,0.007964,0.001171,0.006071,0.035497,-0.075112,0.056553,0.024862,0.020386,0.023547,0.05883,0.060614,-0.098422,0.055729,-0.033769


In [42]:
train_df = train_df.drop(text_features, axis=1)
test_df = test_df.drop(text_features, axis=1)

# Train Model

In [43]:
features = user_review_features + product_features + categoricals + embeding_columns

In [44]:
y_train = train_df[target]
X_train = train_df.drop([target], axis=1)

y_test = test_df[target]
X_test = test_df.drop([target], axis=1)

In [49]:
categorical_indices = [X_train[features].columns.get_loc(col) for col in categoricals]
categorical_indices

[9, 10]

In [50]:
train_df = train_df.sort_values(by='user_id')
test_df = test_df.sort_values(by='user_id')

In [51]:
train_pool = Pool(
    data=train_df[features],
    label=train_df[target],
    group_id=train_df['user_id'].tolist(), 
    cat_features=categorical_indices
)

test_pool = Pool(
    data=test_df[features],
    label=test_df[target],
    group_id=test_df['user_id'].tolist(),
    cat_features=categorical_indices 
)


In [52]:
model = CatBoostRanker(
    loss_function='PairLogit',
    iterations=1000,
    #learning_rate=0.1,
    #depth=6,
    cat_features=categorical_indices
)

model.fit(train_pool,
        eval_set=test_pool,
        metric_period=30,
        use_best_model=True 
          
         )

Groupwise loss function. OneHotMaxSize set to 10
0:	learn: 0.6668149	test: 0.6684777	best: 0.6684777 (0)	total: 111ms	remaining: 1m 51s
30:	learn: 0.2929121	test: 0.3139320	best: 0.3139320 (30)	total: 1.71s	remaining: 53.6s
60:	learn: 0.2404357	test: 0.2676233	best: 0.2676233 (60)	total: 3.3s	remaining: 50.9s
90:	learn: 0.2165997	test: 0.2484674	best: 0.2484674 (90)	total: 4.93s	remaining: 49.3s
120:	learn: 0.2045301	test: 0.2432725	best: 0.2432725 (120)	total: 6.55s	remaining: 47.6s
150:	learn: 0.1925546	test: 0.2336399	best: 0.2336399 (150)	total: 8.22s	remaining: 46.2s
180:	learn: 0.1838758	test: 0.2286192	best: 0.2286192 (180)	total: 9.84s	remaining: 44.5s
210:	learn: 0.1768618	test: 0.2265012	best: 0.2265012 (210)	total: 11.4s	remaining: 42.6s
240:	learn: 0.1713312	test: 0.2248301	best: 0.2248301 (240)	total: 13s	remaining: 40.8s
270:	learn: 0.1664619	test: 0.2237424	best: 0.2237424 (270)	total: 14.5s	remaining: 39s
300:	learn: 0.1584116	test: 0.2185686	best: 0.2185686 (300)	total

<catboost.core.CatBoostRanker at 0x7d713ccaacf0>

In [53]:
feature_importances = model.get_feature_importance(train_pool)
for score, name in sorted(zip(feature_importances, features), reverse=True):
    print('{}: {}'.format(name, score))

rolling_avg_rating_product: 0.34717154502987063
rolling_avg_rating_user: 0.023617360333630072
rolling_review_count_product: 0.007801716286095425
rolling_review_count_user: 0.004556889765504224
product_embeddings_2: 0.0004390898369351359
product_embeddings_41: 0.0004277715162201434
helpful_vote: 0.00041590934700291154
product_embeddings_265: 0.0003968256721671587
product_embeddings_13: 0.0003601350723804464
product_embeddings_211: 0.0003526371483202895
product_embeddings_322: 0.00034305957363919143
product_embeddings_182: 0.00033366277523071836
product_embeddings_254: 0.0003167962340359376
product_embeddings_6: 0.000308839003129259
product_embeddings_325: 0.00030693182414166104
product_embeddings_109: 0.000303290930065786
product_embeddings_38: 0.00029821406633600417
product_embeddings_190: 0.0002963807297866744
product_embeddings_331: 0.00029384633144120126
product_embeddings_314: 0.000290796135021823
product_embeddings_199: 0.0002845296114136406
product_embeddings_244: 0.0002747570134

# Evaluate

In [54]:
test_df["predicted_score"] = model.predict(test_pool)

In [55]:
ndcg_scores = []
num_users = 0
for user_id in test_df['user_id'].unique():
    true_relevance = test_df[test_df['user_id'] == user_id][target].tolist()
    predicted_scores = test_df[test_df['user_id'] == user_id]['predicted_score'].tolist()
    if len(predicted_scores) > 1:
        user_ndcg = ndcg_score([true_relevance], [predicted_scores], k=10)
        ndcg_scores.append(user_ndcg)
        num_users += 1

average_ndcg = np.mean(ndcg_scores)
print(f"Average NDCG@10 across all users with mora than 1 rating ({num_users}, {num_users/test_df.user_id.unique().shape[0]}% of the test set): {average_ndcg:.4f}")


Average NDCG@10 across all users with mora than 1 rating (521, 0.6955941255006676% of the test set): 0.9961


In [56]:
def precision_recall_at_k(group, k=10):
    # sort by predicted_score descending
    group_sorted = group.sort_values("predicted_score", ascending=False)
    
    top_k = group_sorted.head(k)
    
    # number of relevant items in the top K
    relevant_in_top_k = top_k["relevant"].sum()
    
    # total relevant items for this user
    total_relevant = group["relevant"].sum()
    
    precision_k = relevant_in_top_k / k
    recall_k = relevant_in_top_k / total_relevant if total_relevant > 0 else 0.0
    
    return pd.Series({
        "precision@{}".format(k): precision_k,
        "recall@{}".format(k): recall_k
    })

def compute_precision_recall_at_k(df, user_col="user_id", k=10):
    """ 
    Compute mean precision@K and recall@K across all users. 
    """
    metrics_df = (
        df
        .groupby(user_col)
        [df.columns]
        .apply(lambda g: precision_recall_at_k(g, k))
    )

    return metrics_df.mean().to_dict()



In [57]:
test_df["relevant"] = (test_df["rating_number"] >= 4).astype(int)

test_df_sorted = (
    test_df
    .groupby("user_id", group_keys=False)
    [['user_id', 'relevant', 'predicted_score']] 
    .apply(lambda df: df.sort_values("predicted_score", ascending=False))
)

metrics_k10 = compute_precision_recall_at_k(test_df_sorted, user_col="user_id", k=10)
print(metrics_k10)

{'precision@10': 0.35193591455273626, 'recall@10': 0.950020205227441}
