# Negative sampling

More label data can tremendously help model to learn. In this notebook we would prepare negative samples for each user based on the unseen items. The unseen items are sampled based on how frequent they appear in the training dataset. This is an intentional choice to make the negative sample harder, hence potentially more useful. This would also force the model learn relevant patterns about user behaviors rather than biased by popularity.

# Set up

In [1]:
import os
import sys
from datetime import timedelta

import pandas as pd
from feast import FeatureStore
from loguru import logger
from pydantic import BaseModel

sys.path.insert(0, "..")
from src.id_mapper import IDMapper
from cfg.run_cfg import RunCfg
from src.ranker.negative_sampling import generate_negative_samples

# Controller

In [2]:
class Args(BaseModel):
    testing: bool = False
    run_name: str = "000-sequence-modeling"
    notebook_persist_dp: str = None
    random_seed: int = 41

    rc: RunCfg = RunCfg().init()

    user_col: str = "user_id"
    item_col: str = "parent_asin"
    rating_col: str = "rating"
    timestamp_col: str = "timestamp"

    neg_to_pos_ratio: int = 1

    def init(self):
        self.notebook_persist_dp = os.path.abspath(f"data/{self.run_name}")
        os.makedirs(self.notebook_persist_dp, exist_ok=True)

        return self


args = Args().init()

print(args.model_dump_json(indent=2))

[32m2025-03-03 23:46:42.709[0m | [34m[1mDEBUG   [0m | [36mcfg.run_cfg[0m:[36minit[0m:[36m36[0m - [34m[1mSetting use_sbert_features=True requires running notebook 016-sentence-transformers[0m
[32m2025-03-03 23:46:42.709[0m | [34m[1mDEBUG   [0m | [36mcfg.run_cfg[0m:[36minit[0m:[36m40[0m - [34m[1mSetting use_item_tags_from_llm=True requires running notebook 040-retrieve-item-tags-from-llm[0m
[32m2025-03-03 23:46:42.710[0m | [34m[1mDEBUG   [0m | [36mcfg.run_cfg[0m:[36minit[0m:[36m43[0m - [34m[1mChanging use_item_tags_from_llm requires re-running notebook 002-features-v2 to get the new item_metadata_pipeline.dill file[0m


{
  "testing": false,
  "run_name": "000-sequence-modeling",
  "notebook_persist_dp": "/home/dvq/frostmourne/recsys-mvp/notebooks/data/000-sequence-modeling",
  "random_seed": 41,
  "rc": {
    "use_sbert_features": true,
    "use_item_tags_from_llm": true,
    "item_feature_cols": [
      "main_category",
      "categories",
      "price",
      "parent_asin_rating_cnt_365d",
      "parent_asin_rating_avg_prev_rating_365d",
      "parent_asin_rating_cnt_90d",
      "parent_asin_rating_avg_prev_rating_90d",
      "parent_asin_rating_cnt_30d",
      "parent_asin_rating_avg_prev_rating_30d",
      "parent_asin_rating_cnt_7d",
      "parent_asin_rating_avg_prev_rating_7d",
      "tags"
    ],
    "item_tags_from_llm_fp": "../data/item_tags_from_llm.parquet"
  },
  "user_col": "user_id",
  "item_col": "parent_asin",
  "rating_col": "rating",
  "timestamp_col": "timestamp",
  "neg_to_pos_ratio": 1
}


# Test implementation

In [3]:
# Sample input: List of (user_id, item_id) interactions
interactions = [
    (1, 101, 1, 1),
    (1, 102, 2, 2),
    (1, 103, 3, 4),
    (2, 101, 4, 1),
    (2, 104, 5, 2),
    (3, 105, 1, 1),
    (3, 106, 2, 5),
    # Add more interactions as needed
]

# Convert the list to a DataFrame for easier manipulation
df = pd.DataFrame(
    interactions,
    columns=["user_indice", "item_indice", args.rating_col, args.timestamp_col],
)

In [4]:
df

Unnamed: 0,user_indice,item_indice,rating,timestamp
0,1,101,1,1
1,1,102,2,2
2,1,103,3,4
3,2,101,4,1
4,2,104,5,2
5,3,105,1,1
6,3,106,2,5


In [5]:
neg_df = generate_negative_samples(df, neg_to_pos_ratio=5)

  0%|          | 0/7 [00:00<?, ?it/s]

In [6]:
neg_df

Unnamed: 0,user_indice,item_indice,rating,timestamp
0,1,106,0,1
0,1,104,0,1
0,1,105,0,1
1,1,104,0,2
1,1,105,0,2
1,1,106,0,2
2,1,106,0,4
2,1,105,0,4
2,1,104,0,4
3,2,105,0,1


# Load data

In [7]:
train_df = pd.read_parquet("../data/train_features.parquet")
val_df = pd.read_parquet("../data/val_features.parquet")
idm = IDMapper().load("../data/idm.json")

In [8]:
assert val_df[args.timestamp_col].min() > train_df[args.timestamp_col].max()
val_timestamp = train_df[args.timestamp_col].max() + timedelta(seconds=1)
logger.info(f"{val_timestamp=}")

[32m2025-03-03 23:46:43.673[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m3[0m - [1mval_timestamp=Timestamp('2021-08-10 21:28:25.764000')[0m


In [9]:
full_df = pd.concat([train_df, val_df], axis=0)
full_df

Unnamed: 0,user_id,parent_asin,rating,timestamp,timestamp_unix,parent_asin_rating_cnt_365d,parent_asin_rating_avg_prev_rating_365d,parent_asin_rating_cnt_90d,parent_asin_rating_avg_prev_rating_90d,parent_asin_rating_cnt_30d,...,user_rating_list_10_recent_asin_timestamp,item_sequence,item_sequence_ts,item_sequence_ts_bucket,tags,main_category,title,description,categories,price
0,AE225O22SA7DLBOGOEIFL7FT5VYQ,B0006B7DXA,5.0,2004-09-14 03:38:36.000,1095133116,1,5.00,1,5.0,0,...,,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1....","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[MMORPG, Fantasy RPG, Classic Game, Digital Ex...",Video Games,World of Warcraft Battle Chest - (Obsolete),[Experience the World of Warcraft! World of Wa...,"[Video Games, PC, Games]",
1,AE225O22SA7DLBOGOEIFL7FT5VYQ,B001LETH2Q,5.0,2004-10-13 23:01:27.000,1097708487,0,,0,,0,...,1095133116,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1....","[-1, -1, -1, -1, -1, -1, -1, -1, -1, 1095133116]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, 4]","[MMORPG, Collector's Edition, Fantasy, Digital...",Video Games,World of Warcraft Collector's Edition,"[From the Manufacturer, We're very excited to ...","[Video Games, PC, Games]",999.99
2,AE225O22SA7DLBOGOEIFL7FT5VYQ,B0009XEC02,5.0,2005-08-26 21:05:52.000,1125090352,0,,0,,0,...,10951331161097708487,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1....","[-1, -1, -1, -1, -1, -1, -1, -1, 1095133116, 1...","[-1, -1, -1, -1, -1, -1, -1, -1, 5, 5]","[Tactical, Turn-Based Strategy, Single Player,...",Video Games,Advance Wars: Dual Strike - Nintendo DS,[Advance Wars: Dual Strike is the latest in th...,"[Video Games, Legacy Systems, Nintendo Systems...",84.99
3,AE225O22SA7DLBOGOEIFL7FT5VYQ,B000NNDN1M,5.0,2007-04-13 21:47:03.000,1176500823,2,4.00,2,4.0,2,...,109513311610977084871125090352,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 352...","[-1, -1, -1, -1, -1, -1, -1, 1095133116, 10977...","[-1, -1, -1, -1, -1, -1, -1, 6, 6, 6]","[Platformer, Paper Craft, Adventure, RPG Eleme...",Video Games,Super Paper Mario,[The newest chapter of the Paper Mario story i...,"[Video Games, Legacy Systems, Nintendo Systems...",49.99
4,AE225O22SA7DLBOGOEIFL7FT5VYQ,B00136MBHA,5.0,2008-08-13 18:33:22.000,1218652402,1,5.00,0,,0,...,1095133116109770848711250903521176500823,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 352.0, 11...","[-1, -1, -1, -1, -1, -1, 1095133116, 109770848...","[-1, -1, -1, -1, -1, -1, 7, 7, 6, 6]","[Role-Playing Game, Action RPG, Stylish, Uniqu...",Video Games,The World Ends With You,"[Product description, Welcome to Shibuya. Wake...","[Video Games, Legacy Systems, Nintendo Systems]",99.99
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
957,AHZKTZHKO3Z6UYWEYMH4YL52K3LA,B0716CXJ1R,5.0,2021-11-14 04:54:45.568,1636865685,2,5.00,0,,0,...,"1605925289,1605926867,1605927826,1605928012,16...","[3670, 3945, 2970, 3537, 4407, 4453, 4002, 433...","[1605925289, 1605926867, 1605927826, 160592801...","[5, 5, 5, 5, 5, 5, 5, 5, 5, 5]","[Action-Adventure, Collector's Edition, Darksi...",Video Games,Darksiders III - Collector's Edition - Xbox One,[],"[Video Games, Xbox One, Games]",149.99
958,AHZKTZHKO3Z6UYWEYMH4YL52K3LA,B07SM7G9CN,5.0,2021-11-14 04:55:34.529,1636865734,5,4.80,0,,0,...,"1605926867,1605927826,1605928012,1605928551,16...","[3945, 2970, 3537, 4407, 4453, 4002, 4337, 400...","[1605926867, 1605927826, 1605928012, 160592855...","[5, 5, 5, 5, 5, 5, 5, 5, 5, 0]","[Platformer, Adventure, Family Friendly, Co-op...",Video Games,Donkey Kong Country: Tropical Freeze - Nintend...,[Barrel-blast into a critically acclaimed Donk...,"[Video Games, Nintendo Switch, Games]",52.49
959,AHZKTZHKO3Z6UYWEYMH4YL52K3LA,B081W1VBKN,5.0,2022-02-05 09:31:46.803,1644053506,3,4.00,0,,0,...,"1605927826,1605928012,1605928551,1619326697,16...","[2970, 3537, 4407, 4453, 4002, 4337, 4006, 357...","[1605927826, 1605928012, 1605928551, 161932669...","[6, 6, 6, 5, 5, 5, 5, 5, 5, 5]","[Action-Adventure, RPG Elements, Fantasy, Sing...",Video Games,Darksiders 2: Deathinitive Edition - Xbox One ...,"[What starts with War, ends in Death. Awakened...","[Video Games, Xbox One, Games]",14.99
960,AHZLVBGFP4FNOJGC33CZQSHUQXWA,B07H53PZY8,4.0,2021-10-17 23:51:36.799,1634514696,1,5.00,0,,0,...,"1424711004,1424711297,1424711461,1443068823,14...","[983, 703, 616, 3028, 660, 3599, 4534, 2804, 3...","[1424711004, 1424711297, 1424711461, 144306882...","[8, 8, 8, 8, 7, 7, 7, 7, 6, 6]","[Off-Road Simulator, Adventure, Open World, Pl...",Video Games,Mudrunner - American Wilds Edition - PlayStati...,[Mud Runner - American Wilds is the ultimate v...,"[Video Games, PlayStation 4, Games]",23.98


# Generate negative samples

In [10]:
full_features_df = full_df

In [11]:
meta_features = ["main_category", "title", "description", "categories", "price"]

if args.rc.use_item_tags_from_llm:
    meta_features.append("tags")

item_timestamp_features = [
    "parent_asin_rating_cnt_365d",
    "parent_asin_rating_avg_prev_rating_365d",
    "parent_asin_rating_cnt_90d",
    "parent_asin_rating_avg_prev_rating_90d",
    "parent_asin_rating_cnt_30d",
    "parent_asin_rating_avg_prev_rating_30d",
    "parent_asin_rating_cnt_7d",
    "parent_asin_rating_avg_prev_rating_7d",
]

item_features_df = full_features_df.drop_duplicates(subset=[args.item_col])[
    [args.item_col, "item_indice", *meta_features]
]

In [12]:
features = [
    "item_sequence",
    "user_rating_list_10_recent_asin_timestamp",
    "item_sequence_ts",
    "item_sequence_ts_bucket",
    "user_id",
    "user_rating_cnt_90d",
    "user_rating_avg_prev_rating_90d",
    "user_rating_list_10_recent_asin",
]

neg_df = generate_negative_samples(
    full_features_df,
    "user_indice",
    "item_indice",
    args.rating_col,
    neg_label=0,
    neg_to_pos_ratio=args.neg_to_pos_ratio,
    seed=args.random_seed,
    features=features,
)

neg_df = neg_df.pipe(
    lambda df: pd.merge(
        df, item_features_df, how="left", on="item_indice", validate="m:1"
    )
)

  0%|          | 0/165260 [00:00<?, ?it/s]

# Get item timestamp feature  for negative samples
The newly generated negative samples would have the new (timestamp, item_indice). To be able to get the item timestamp features like item popularity we need to call feature store

In [13]:
neg_df

Unnamed: 0,user_indice,item_indice,rating,timestamp,item_sequence,user_rating_list_10_recent_asin_timestamp,item_sequence_ts,item_sequence_ts_bucket,user_id,user_rating_cnt_90d,user_rating_avg_prev_rating_90d,user_rating_list_10_recent_asin,parent_asin,main_category,title,description,categories,price,tags
0,0,1444,0,2004-09-14 03:38:36.000,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1....",,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",AE225O22SA7DLBOGOEIFL7FT5VYQ,1,,,B003A71XKQ,Video Games,KMD Xbox 360 Live Gaming Headset with Mic,[Take your Xbox Live experience to the next le...,[],6.99,"[Headset, Accessory, Communication, Xbox 360, ..."
1,0,374,0,2004-10-13 23:01:27.000,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1....",1095133116,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, 1095133116]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, 4]",AE225O22SA7DLBOGOEIFL7FT5VYQ,2,5.0,B0006B7DXA,B0009A4EV2,Video Games,Dragon Quest VIII: Journey of the Cursed King,"[From the Manufacturer, Dragon Quest VIII: Jou...","[Video Games, Legacy Systems, PlayStation Syst...",135.56,"[RPG, Turn-Based Combat, Fantasy, Adventure, L..."
2,0,3074,0,2005-08-26 21:05:52.000,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1....",10951331161097708487,"[-1, -1, -1, -1, -1, -1, -1, -1, 1095133116, 1...","[-1, -1, -1, -1, -1, -1, -1, -1, 5, 5]",AE225O22SA7DLBOGOEIFL7FT5VYQ,1,,"B0006B7DXA,B001LETH2Q",B00ZM5OXD8,Video Games,Dishonored 2 - PlayStation 4,[Reprise your role as a supernatural assassin ...,"[Video Games, PlayStation 4, Games]",7.5,"[Stealth Action, Narrative-Driven, Supernatura..."
3,0,348,0,2007-04-13 21:47:03.000,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 352...",109513311610977084871125090352,"[-1, -1, -1, -1, -1, -1, -1, 1095133116, 10977...","[-1, -1, -1, -1, -1, -1, -1, 6, 6, 6]",AE225O22SA7DLBOGOEIFL7FT5VYQ,1,,"B0006B7DXA,B001LETH2Q,B0009XEC02",B000641ZC2,Video Games,Mario Power Tennis,[Mario Power Tennis brings the heroes and vill...,"[Video Games, Legacy Systems, Nintendo Systems...",57.72,"[Sports, Tennis, Multiplayer, Family-Friendly,..."
4,0,776,0,2008-08-13 18:33:22.000,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 352.0, 11...",1095133116109770848711250903521176500823,"[-1, -1, -1, -1, -1, -1, 1095133116, 109770848...","[-1, -1, -1, -1, -1, -1, 7, 7, 6, 6]",AE225O22SA7DLBOGOEIFL7FT5VYQ,1,,"B0006B7DXA,B001LETH2Q,B0009XEC02,B000NNDN1M",B001ELJE5G,Video Games,Guitar Hero III: Legends of Rock - Xbox 360,"[Product description, Product InformationThe t...","[Video Games, Legacy Systems, Xbox Systems, Xb...",62.99,"[Music Rhythm, Legacy Title, Multiplayer, Xbox..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
165255,19494,507,0,2021-11-14 04:54:45.568,"[3670, 3945, 2970, 3537, 4407, 4453, 4002, 433...","1605925289,1605926867,1605927826,1605928012,16...","[1605925289, 1605926867, 1605927826, 160592801...","[5, 5, 5, 5, 5, 5, 5, 5, 5, 5]",AHZKTZHKO3Z6UYWEYMH4YL52K3LA,1,,"B071YZVS75,B07DML9W41,B00V5V3E38,B01N6QKT7H,B0...",B000JVM256,Video Games,The Legend of Zelda: Majora's Mask - Collector...,[Game cartridge for the Nintendo 64 video game...,"[Video Games, Legacy Systems, Nintendo Systems...",120.0,"[Adventure, Classic, Fantasy, Collectible, Sin..."
165256,19494,2311,0,2021-11-14 04:55:34.529,"[3945, 2970, 3537, 4407, 4453, 4002, 4337, 400...","1605926867,1605927826,1605928012,1605928551,16...","[1605926867, 1605927826, 1605928012, 160592855...","[5, 5, 5, 5, 5, 5, 5, 5, 5, 0]",AHZKTZHKO3Z6UYWEYMH4YL52K3LA,2,5.0,"B07DML9W41,B00V5V3E38,B01N6QKT7H,B08MBQ51KG,B0...",B00B3PDHBU,Video Games,Turtle Beach Ear Force XP510 BS-2290-01 5.1 Wi...,"[For the ultimate in gaming immersion, you nee...","[Video Games, Legacy Systems, PlayStation Syst...",,"[Gaming Headset, Wireless, Surround Sound, Com..."
165257,19494,2456,0,2022-02-05 09:31:46.803,"[2970, 3537, 4407, 4453, 4002, 4337, 4006, 357...","1605927826,1605928012,1605928551,1619326697,16...","[1605927826, 1605928012, 1605928551, 161932669...","[6, 6, 6, 5, 5, 5, 5, 5, 5, 5]",AHZKTZHKO3Z6UYWEYMH4YL52K3LA,3,5.0,"B00V5V3E38,B01N6QKT7H,B08MBQ51KG,B094WQR3H3,B0...",B00DQNF3PU,Video Games,Zumba Fitness World Party - Xbox One,[Only the premier global fitness brand can off...,"[Video Games, Xbox One, Games]",54.9,"[Fitness, Dance Game, Multiplayer, Exercise, A..."
165258,19497,2126,0,2021-10-17 23:51:36.799,"[983, 703, 616, 3028, 660, 3599, 4534, 2804, 3...","1424711004,1424711297,1424711461,1443068823,14...","[1424711004, 1424711297, 1424711461, 144306882...","[8, 8, 8, 8, 7, 7, 7, 7, 6, 6]",AHZLVBGFP4FNOJGC33CZQSHUQXWA,1,,"B001EYUU4W,B001CU4EMW,B000VTQ3LU,B00YOGZFCO,B0...",B0088TN7NW,Video Games,Just Dance 4,"[From the Manufacturer, Throw the ultimate par...","[Video Games, Legacy Systems, Xbox Systems]",32.95,"[Party Game, Dance, Family Friendly, Motion Co..."


In [14]:
store = FeatureStore(
    repo_path="..", fs_yaml_file="../feature_store_offline_server.yaml"
)

In [15]:
%%time
ts_features = [f"parent_asin_rating_stats:{feature}" for feature in item_timestamp_features]

neg_ts_features_df = store.get_historical_features(neg_df[[args.item_col, args.timestamp_col]].drop_duplicates(), ts_features).to_df()
assert neg_ts_features_df.duplicated().sum() == 0, display(neg_ts_features_df.loc[neg_ts_features_df.duplicated()])



Using timestamp as the event timestamp. To specify a column explicitly, please name it event_timestamp.
CPU times: user 58.7 ms, sys: 5.05 ms, total: 63.7 ms
Wall time: 19.7 s


In [16]:
neg_df = pd.merge(
    neg_df, neg_ts_features_df, on=[args.item_col, args.timestamp_col], how="left"
)
neg_df

Unnamed: 0,user_indice,item_indice,rating,timestamp,item_sequence,user_rating_list_10_recent_asin_timestamp,item_sequence_ts,item_sequence_ts_bucket,user_id,user_rating_cnt_90d,...,price,tags,parent_asin_rating_cnt_365d,parent_asin_rating_avg_prev_rating_365d,parent_asin_rating_cnt_90d,parent_asin_rating_avg_prev_rating_90d,parent_asin_rating_cnt_30d,parent_asin_rating_avg_prev_rating_30d,parent_asin_rating_cnt_7d,parent_asin_rating_avg_prev_rating_7d
0,0,1444,0,2004-09-14 03:38:36.000,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1....",,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",AE225O22SA7DLBOGOEIFL7FT5VYQ,1,...,6.99,"[Headset, Accessory, Communication, Xbox 360, ...",,,,,,,,
1,0,374,0,2004-10-13 23:01:27.000,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1....",1095133116,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, 1095133116]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, 4]",AE225O22SA7DLBOGOEIFL7FT5VYQ,2,...,135.56,"[RPG, Turn-Based Combat, Fantasy, Adventure, L...",,,,,,,,
2,0,3074,0,2005-08-26 21:05:52.000,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1....",10951331161097708487,"[-1, -1, -1, -1, -1, -1, -1, -1, 1095133116, 1...","[-1, -1, -1, -1, -1, -1, -1, -1, 5, 5]",AE225O22SA7DLBOGOEIFL7FT5VYQ,1,...,7.5,"[Stealth Action, Narrative-Driven, Supernatura...",,,,,,,,
3,0,348,0,2007-04-13 21:47:03.000,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 352...",109513311610977084871125090352,"[-1, -1, -1, -1, -1, -1, -1, 1095133116, 10977...","[-1, -1, -1, -1, -1, -1, -1, 6, 6, 6]",AE225O22SA7DLBOGOEIFL7FT5VYQ,1,...,57.72,"[Sports, Tennis, Multiplayer, Family-Friendly,...",1.0,5.000,0.0,,0.0,,0.0,
4,0,776,0,2008-08-13 18:33:22.000,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 352.0, 11...",1095133116109770848711250903521176500823,"[-1, -1, -1, -1, -1, -1, 1095133116, 109770848...","[-1, -1, -1, -1, -1, -1, 7, 7, 6, 6]",AE225O22SA7DLBOGOEIFL7FT5VYQ,1,...,62.99,"[Music Rhythm, Legacy Title, Multiplayer, Xbox...",16.0,3.875,4.0,4.75,0.0,,0.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
165255,19494,507,0,2021-11-14 04:54:45.568,"[3670, 3945, 2970, 3537, 4407, 4453, 4002, 433...","1605925289,1605926867,1605927826,1605928012,16...","[1605925289, 1605926867, 1605927826, 160592801...","[5, 5, 5, 5, 5, 5, 5, 5, 5, 5]",AHZKTZHKO3Z6UYWEYMH4YL52K3LA,1,...,120.0,"[Adventure, Classic, Fantasy, Collectible, Sin...",0.0,,0.0,,0.0,,0.0,
165256,19494,2311,0,2021-11-14 04:55:34.529,"[3945, 2970, 3537, 4407, 4453, 4002, 4337, 400...","1605926867,1605927826,1605928012,1605928551,16...","[1605926867, 1605927826, 1605928012, 160592855...","[5, 5, 5, 5, 5, 5, 5, 5, 5, 0]",AHZKTZHKO3Z6UYWEYMH4YL52K3LA,2,...,,"[Gaming Headset, Wireless, Surround Sound, Com...",0.0,,0.0,,0.0,,0.0,
165257,19494,2456,0,2022-02-05 09:31:46.803,"[2970, 3537, 4407, 4453, 4002, 4337, 4006, 357...","1605927826,1605928012,1605928551,1619326697,16...","[1605927826, 1605928012, 1605928551, 161932669...","[6, 6, 6, 5, 5, 5, 5, 5, 5, 5]",AHZKTZHKO3Z6UYWEYMH4YL52K3LA,3,...,54.9,"[Fitness, Dance Game, Multiplayer, Exercise, A...",0.0,,0.0,,0.0,,0.0,
165258,19497,2126,0,2021-10-17 23:51:36.799,"[983, 703, 616, 3028, 660, 3599, 4534, 2804, 3...","1424711004,1424711297,1424711461,1443068823,14...","[1424711004, 1424711297, 1424711461, 144306882...","[8, 8, 8, 8, 7, 7, 7, 7, 6, 6]",AHZLVBGFP4FNOJGC33CZQSHUQXWA,1,...,32.95,"[Party Game, Dance, Family Friendly, Motion Co...",1.0,5.000,1.0,5.00,0.0,,0.0,


# Concating positive data with negative samples

In [17]:
full_features_df = (
    pd.concat([full_features_df, neg_df], axis=0)
    .reset_index(drop=True)
    .sample(frac=1, replace=False, random_state=args.random_seed)
)

In [18]:
key_cols = [
    args.user_col,
    args.item_col,
    "user_indice",
    "item_indice",
    "item_sequence",
    "item_sequence_ts_bucket",
    args.rating_col,
    args.timestamp_col,
]
assert (
    full_features_df[key_cols].isna().sum().sum() == 0
), "Null values found at key colums"

In [19]:
val_timestamp

Timestamp('2021-08-10 21:28:25.764000')

# Split back train test

In [20]:
to_unix_ts = lambda s: s.astype("int64") // 10**6
train_neg_df = full_features_df.loc[lambda df: df[args.timestamp_col].lt(val_timestamp)]
val_neg_df = full_features_df.loc[lambda df: df[args.timestamp_col].ge(val_timestamp)]

In [21]:
train_neg_df

Unnamed: 0,user_id,parent_asin,rating,timestamp,timestamp_unix,parent_asin_rating_cnt_365d,parent_asin_rating_avg_prev_rating_365d,parent_asin_rating_cnt_90d,parent_asin_rating_avg_prev_rating_90d,parent_asin_rating_cnt_30d,...,user_rating_list_10_recent_asin_timestamp,item_sequence,item_sequence_ts,item_sequence_ts_bucket,tags,main_category,title,description,categories,price
251552,AG57LGJFCNNQJ6P6ABQAVUKXDUDA,B0015AARJI,0.0,2016-01-12 11:59:11.000,,76.0,4.592105,10.0,4.3,3.0,...,1452599936,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1....","[-1, -1, -1, -1, -1, -1, -1, -1, -1, 1452599936]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, 0]","[Wireless Controller, Vibration Feedback, Play...",Video Games,PlayStation 3 Dualshock 3 Wireless Controller ...,"[Amazon.com, The Dualshock 3 wireless controll...","[Video Games, Legacy Systems, PlayStation Syst...",49.99
325041,AHWG4EGOV5ZDKPETL56MAYGPLJRQ,B0BMGHMP23,0.0,2016-04-18 19:26:20.000,,,,,,,...,"1449254540,1449256005,1449257733,1452715791,14...","[3028.0, 2742.0, 2755.0, 3159.0, 3101.0, 3036....","[1449254540, 1449256005, 1449257733, 145271579...","[5, 5, 5, 5, 5, 5, 5, 5, 5, 5]","[Gaming Mouse, Wireless Connectivity, High Pre...",Computers,Logitech G502 Lightspeed Wireless Gaming Mouse...,[G502 is the best gaming mouse from Logitech G...,"[Video Games, PC, Accessories, Gaming Mice]",87.95
293086,AH5PTZ2U74OZ3HT6QVUWM4CV6OVQ,B009AP23NI,0.0,2016-02-10 18:45:08.000,,9.0,4.666667,0.0,,0.0,...,"1443454097,1455129080,1455129186,1455129499,14...","[-1.0, -1.0, 3234.0, 2508.0, 2318.0, 2964.0, 1...","[-1, -1, 1443454097, 1455129080, 1455129186, 1...","[-1, -1, 5, 1, 1, 0, 0, 0, 0, 0]","[Controller, Wii U, Gaming Accessory, Japanese...",Video Games,Nintendo Wii U Pro U Controller (Japanese Vers...,[Wii U PRO controller (black) (WUP-A-RSKA)],"[Video Games, Legacy Systems, Nintendo Systems...",43.99
52027,AFC5XTCF5D7J3NSDITB2Z26XWWYA,B001E8WQUY,5.0,2019-05-01 21:22:39.265,1.556746e+09,0.0,,0.0,,0.0,...,"1327120514,1377289907,1402605836,1402606396,14...","[1987.0, 4569.0, 2114.0, 1606.0, 2159.0, 2279....","[1327120514, 1377289907, 1402605836, 140260639...","[8, 8, 7, 7, 7, 7, 7, 7, 6, 6]","[Rhythm Game, Music Simulation, Party Game, Mu...",Video Games,Rock Band 2 - Nintendo Wii (Game only),"[Product description, Rock Band 2 lets you and...","[Video Games, Legacy Systems, Nintendo Systems...",28.49
48913,AF7LJQOIWF3Y3YD7SGOJ34MA5JPA,B001E8WQKY,5.0,2015-01-09 12:53:25.000,1.420808e+09,16.0,4.375000,8.0,4.5,4.0,...,14208077931420807991,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1....","[-1, -1, -1, -1, -1, -1, -1, -1, 1420807793, 1...","[-1, -1, -1, -1, -1, -1, -1, -1, 0, 0]","[Survival Horror, Action, Co-op, Zombies, Thir...",Video Games,Resident Evil 5 - Xbox 360,[],"[Video Games, Legacy Systems, Xbox Systems, Xb...",29.88
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
250960,AG4RATLNVLOKZCPXN67HKOAK65CA,B078FBVJMB,0.0,2015-10-31 18:25:09.000,,,,,,,...,1425233294,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1....","[-1, -1, -1, -1, -1, -1, -1, -1, -1, 1425233294]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, 5]","[Co-op, Action-adventure, Online Multiplayer, ...",Video Games,A Way Out – PC Origin [Online Game Code],[From the creators of Brothers - A Tale of Two...,"[Video Games, PC, Games]",5.99
217058,AFBXO3BFWBJX6QS5NW73O37IXF2A,B0771ZXXV6,0.0,2011-03-08 02:06:38.000,,,,,,,...,12995495171299549928,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1....","[-1, -1, -1, -1, -1, -1, -1, -1, 1299549517, 1...","[-1, -1, -1, -1, -1, -1, -1, -1, 0, 0]","[Controller, Joy-Con, Wireless, Nintendo Switc...",Video Games,Nintendo Joy-Con (R) - Neon Red - Nintendo Switch,[To be determined],"[Video Games, Nintendo Switch, Accessories, Co...",
323468,AHVANA5GZNJ45UABPXWZNAF4ECBQ,B00BBF6MO6,0.0,2015-02-15 05:31:04.000,,3.0,4.666667,0.0,,0.0,...,137041433213704147071370416530,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 137...","[-1, -1, -1, -1, -1, -1, -1, 1370414332, 13704...","[-1, -1, -1, -1, -1, -1, -1, 6, 6, 6]","[Action, Hack and Slash, Stylized Graphics, Si...",Video Games,Killer is Dead - Xbox 360,[Killer Is Dead is the latest title from the d...,"[Video Games, Legacy Systems, Xbox Systems, Xb...",39.82
132003,AHAVA5VKMJ3OMOLGDZ3W45CKXEWA,B00KTORA0K,5.0,2019-05-25 04:03:51.505,1.558757e+09,3.0,4.666667,1.0,5.0,1.0,...,"1431150669,1431150834,1432041664,1432041986,15...","[-1.0, -1.0, -1.0, 1657.0, 2074.0, 593.0, 583....","[-1, -1, -1, 1431150669, 1431150834, 143204166...","[-1, -1, -1, 7, 7, 7, 7, 5, 5, 0]","[Music, Dance, Family Fun, Motion Control, Par...",Video Games,Just Dance 2015 - Wii,[With more than 50 million copies of Just Danc...,"[Video Games, Legacy Systems, Nintendo Systems...",33.0


In [22]:
val_neg_df

Unnamed: 0,user_id,parent_asin,rating,timestamp,timestamp_unix,parent_asin_rating_cnt_365d,parent_asin_rating_avg_prev_rating_365d,parent_asin_rating_cnt_90d,parent_asin_rating_avg_prev_rating_90d,parent_asin_rating_cnt_30d,...,user_rating_list_10_recent_asin_timestamp,item_sequence,item_sequence_ts,item_sequence_ts_bucket,tags,main_category,title,description,categories,price
165059,AHAQV7A4Z2Z5NJLIPMEPPXDNIRWQ,B07VHHPJBV,2.0,2022-04-10 16:30:15.820,1.649608e+09,7.0,4.000000,0.0,,0.0,...,"1375932925,1402155992,1447214140,1582560804,15...","[-1, -1, -1, -1, -1, 2275, 1464, 3096, 2478, 3...","[-1, -1, -1, -1, -1, 1375932925, 1402155992, 1...","[-1, -1, -1, -1, -1, 8, 8, 8, 6, 6]","[Gaming Mouse, Ergonomic, RGB Lighting, High D...",Computers,"Redragon M602 Griffin RGB Gaming Mouse, RGB Sp...",[Redragon M602 (White) GRIFFIN High-Precision ...,"[Video Games, PC, Accessories, Gaming Mice]",19.99
330244,AGVAG2GSFQZUAXMRSKKSGKEHGG5A,B00XR3YBM0,0.0,2022-01-19 20:57:42.960,,1.0,1.000000,1.0,1.00,0.0,...,"1357571879,1357572295,1456940615,1456940845,15...","[-1, -1, -1, 1736, 117, 1911, 3747, 4598, 3455...","[-1, -1, -1, 1357571879, 1357572295, 145694061...","[-1, -1, -1, 8, 8, 8, 8, 6, 6, 0]","[Sports, Wrestling, Single Player, Multiplayer...",Video Games,WWE 2K16 - PlayStation 4,"[WWE 2K16, Get in the ring and Raise Some Hell...","[Video Games, PlayStation 4, Games]",63.45
164871,AGHUHJSJLGWPS3JE2FD7D5GJODWQ,B00DBDPOZ4,5.0,2022-01-16 16:27:51.164,1.642350e+09,4.0,4.750000,0.0,,0.0,...,"1119634892,1126626823,1401130039,1401130628,14...","[-1, -1, -1, 228, 4526, 593, 2, 711, 790, 862]","[-1, -1, -1, 1119634892, 1126626823, 140113003...","[-1, -1, -1, 9, 9, 8, 8, 8, 8, 8]","[Charging Accessory, Xbox One, Battery Pack, C...",Video Games,Xbox One Play and Charge Kit,[Keep the action going with the Xbox One Play ...,"[Video Games, Xbox One, Accessories]",34.99
330291,AH4TWYG3FCAEPNJXS6E7KR247YMQ,B000034DC6,0.0,2021-11-25 20:51:44.194,,1.0,5.000000,1.0,5.00,1.0,...,"1427912467,1427912522,1427912554,1467916594,14...","[-1, -1, 117, 215, 589, 404, 2849, 981, 2718, ...","[-1, -1, 1427912467, 1427912522, 1427912554, 1...","[-1, -1, 8, 8, 8, 8, 8, 8, 7, 7]","[Vehicular Combat, Action, Multiplayer, PlaySt...",Video Games,Twisted Metal 4,"[Product description, Sweet Tooth and his pump...","[Video Games, Legacy Systems, PlayStation Syst...",69.98
329619,AEBEAZUAX3HMA7EF3BA6L2DK3LPA,B008HPAXZ2,0.0,2022-06-25 22:48:11.729,,0.0,,0.0,,0.0,...,"1494507012,1542751256,1557368493,1557883885,15...","[-1, -1, -1, 3577, 4566, 3740, 4579, 4554, 400...","[-1, -1, -1, 1494507012, 1542751256, 155736849...","[-1, -1, -1, 8, 7, 7, 7, 7, 6, 4]","[Screen Protector, 3DS XL, Accessory, Display ...",Video Games,HORI Nintendo 3DS XL Screen Protective Filter,[Officially licensed by Nintendo. This is the ...,"[Video Games, Legacy Systems, Nintendo Systems...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
330199,AGOX6SXJ74WKBFGBKKZTR6PNOICA,B004Q9SO4K,0.0,2022-03-05 06:30:59.489,,0.0,,0.0,,0.0,...,"1489926789,1523225104,1523225244,1545922551,15...","[-1, -1, -1, -1, 3312, 3126, 3373, 3944, 4130,...","[-1, -1, -1, -1, 1489926789, 1523225104, 15232...","[-1, -1, -1, -1, 7, 7, 7, 7, 6, 6]","[Sports, Football, Multiplayer, Competitive, L...",Video Games,NCAA Football 12 - Xbox 360,"[Product Description, NCAA Football 12 takes t...","[Video Games, Legacy Systems, Xbox Systems, Xb...",43.56
164950,AGRGN2RA4EOW3T4GFI4H76WHUGSA,B07JK9DFKH,5.0,2022-02-12 04:30:57.128,1.644640e+09,8.0,4.000000,1.0,3.00,1.0,...,"1492581053,1533061949,1533669564,1534492667,15...","[2890, 3687, 3719, 3423, 4113, 2911, 4213, 434...","[1492581053, 1533061949, 1533669564, 153449266...","[7, 7, 7, 7, 7, 6, 6, 6, 6, 6]","[HDMI Adapter, USB-C Hub, 4K Output, Multiport...",Cell Phones & Accessories,REAKA Upgraded USB Type C to HDMI Digital AV M...,[],"[Video Games, Nintendo Switch, Accessories, Ca...",22.99
164643,AFL2OB53OGEIQCA4CMFCTQV3FJJA,B00BCX2AZW,5.0,2022-01-13 15:10:43.458,1.642087e+09,0.0,,0.0,,0.0,...,"1530532852,1614361028,1620423075,1620724361,16...","[-1, -1, -1, -1, -1, 4367, 3520, 1316, 2436, 2...","[-1, -1, -1, -1, -1, 1530532852, 1614361028, 1...","[-1, -1, -1, -1, -1, 7, 5, 5, 5, 5]","[Action RPG, Multiplayer, Hunting, Fantasy Wor...",Video Games,Monster Hunter 3 Ultimate - Nintendo Wii U,[The popular fantasy action franchise Monster ...,"[Video Games, Legacy Systems, Nintendo Systems...",54.35
330048,AG6WZATM27WOFLQY4435O3P52SWA,B08JHZHWZ3,0.0,2021-11-23 19:07:31.054,,71.0,4.084507,4.0,4.25,1.0,...,"1609280894,1609280960,1609281003,1609281100,16...","[3844, 3017, 4284, 3813, 3345, 2376, 4629, 352...","[1609280894, 1609280960, 1609281003, 160928110...","[5, 5, 5, 5, 5, 5, 5, 5, 5, 0]","[Platformer, Remastered Classics, Multiplayer ...",Video Games,"Super Mario 3D All-Stars - Nintendo Switch, 17...",[Play three of Mario’s greatest 3D platforming...,"[Video Games, Nintendo Switch, Games]",


# Checks

In [23]:
user = val_neg_df.sample(n=1)[args.user_col].values[0]
logger.info(f"Checking user {user}...")
check_df = train_neg_df.loc[lambda df: df[args.user_col].eq(user)].sort_values(
    args.timestamp_col
)
assert (
    check_df[args.rating_col].gt(0).sum() * (args.neg_to_pos_ratio + 1)
    == check_df.shape[0]
), "Unexpected number of pos and neg samples"

[32m2025-03-03 23:48:06.621[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mChecking user AHSMK75Q6UN55FEZEDBUJ4ZKYXAA...[0m


In [24]:
val_check_df = val_neg_df.loc[lambda df: df[args.user_col].eq(user)].sort_values(
    args.timestamp_col
)
item = val_check_df.loc[lambda df: df[args.rating_col].gt(0)][args.item_col].values[0]
logger.info(f"Checking item {item}...")
assert (
    train_neg_df.loc[lambda df: df[args.item_col].eq(item)].shape[0] > 5
), f"Item {item} does not appear much in training data"

[32m2025-03-03 23:48:06.640[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m5[0m - [1mChecking item B07RBMZRP3...[0m


## Random eye-ball

In [25]:
check_df

Unnamed: 0,user_id,parent_asin,rating,timestamp,timestamp_unix,parent_asin_rating_cnt_365d,parent_asin_rating_avg_prev_rating_365d,parent_asin_rating_cnt_90d,parent_asin_rating_avg_prev_rating_90d,parent_asin_rating_cnt_30d,...,user_rating_list_10_recent_asin_timestamp,item_sequence,item_sequence_ts,item_sequence_ts_bucket,tags,main_category,title,description,categories,price
154694,AHSMK75Q6UN55FEZEDBUJ4ZKYXAA,B00Q6DC96S,5.0,2015-06-13 19:20:14.000,1434223000.0,1.0,1.0,1.0,1.0,0.0,...,,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1....","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[Action RPG, Digital Download, Challenging Gam...",Video Games,Dark Souls II: Scholar of the First Sin - Xbox...,"[The definitive edition of DARK SOULS II, incl...","[Video Games, Xbox One, Games]",
319954,AHSMK75Q6UN55FEZEDBUJ4ZKYXAA,B00CISMP8M,0.0,2015-06-13 19:20:14.000,,69.0,4.73913,6.0,5.0,0.0,...,,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1....","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[Sports, Soccer, Multiplayer, Competitive, Rea...",Video Games,FIFA 14 - Xbox 360,[Experience the emotion of scoring great goals...,"[Video Games, Legacy Systems, Xbox Systems, Xb...",25.0
319955,AHSMK75Q6UN55FEZEDBUJ4ZKYXAA,B000084318,0.0,2020-04-13 00:58:41.238,,1.0,5.0,0.0,,0.0,...,1434223214,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1....","[-1, -1, -1, -1, -1, -1, -1, -1, -1, 1434223214]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, 7]","[Adventure, Action, Open World, Fantasy, Art S...",Video Games,The Legend of Zelda: The Wind Waker,"[Product Description, Continue Link's adventur...","[Video Games, Legacy Systems, Nintendo Systems...",116.98
154695,AHSMK75Q6UN55FEZEDBUJ4ZKYXAA,B07624RBWB,5.0,2020-04-13 00:58:41.238,1586740000.0,84.0,4.714286,21.0,4.714286,2.0,...,1434223214,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1....","[-1, -1, -1, -1, -1, -1, -1, -1, -1, 1434223214]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, 7]","[Controller, Pro, Wireless, Ergonomic Design, ...",Video Games,Nintendo Switch Pro Controller,[],"[Video Games, Nintendo Switch, Accessories, Co...",69.0
154696,AHSMK75Q6UN55FEZEDBUJ4ZKYXAA,B09ZTV42CQ,5.0,2020-07-22 02:00:07.233,1595383000.0,4.0,4.5,1.0,5.0,0.0,...,14342232141586739521,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1....","[-1, -1, -1, -1, -1, -1, -1, -1, 1434223214, 1...","[-1, -1, -1, -1, -1, -1, -1, -1, 8, 5]","[Controller Adapter, Wireless Connectivity, Ve...",Video Games,8Bitdo Adapter 2 USB Wireless Switch Controlle...,[],"[Video Games, Nintendo Switch]",19.99
319956,AHSMK75Q6UN55FEZEDBUJ4ZKYXAA,B014R4KYMS,0.0,2020-07-22 02:00:07.233,,10.0,4.9,2.0,5.0,0.0,...,14342232141586739521,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1....","[-1, -1, -1, -1, -1, -1, -1, -1, 1434223214, 1...","[-1, -1, -1, -1, -1, -1, -1, -1, 8, 5]","[Action-Adventure, Single Player, Narrative Dr...",Video Games,Uncharted 4: A Thief's End - PlayStation 4,[Uncharted comes to the PlayStation 4.Uncharte...,"[Video Games, PlayStation 4, Games]",24.99
154697,AHSMK75Q6UN55FEZEDBUJ4ZKYXAA,B0C3KYVDWT,5.0,2020-07-22 02:13:37.412,1595384000.0,75.0,4.68,15.0,4.466667,5.0,...,143422321415867395211595383207,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 290...","[-1, -1, -1, -1, -1, -1, -1, 1434223214, 15867...","[-1, -1, -1, -1, -1, -1, -1, 8, 5, 1]","[Storage, Accessory, MicroSDXC, High Capacity,...",Computers,"SanDisk 128GB microSDXC-Card, Licensed for Nin...","[With incredible speed, the officially license...","[Video Games, Nintendo Switch, Accessories]",14.99
319957,AHSMK75Q6UN55FEZEDBUJ4ZKYXAA,B0036EWMIK,0.0,2020-07-22 02:13:37.412,,4.0,4.5,1.0,5.0,0.0,...,143422321415867395211595383207,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 290...","[-1, -1, -1, -1, -1, -1, -1, 1434223214, 15867...","[-1, -1, -1, -1, -1, -1, -1, 8, 5, 1]","[Action-Adventure, RPG Elements, Fantasy, Sing...",Video Games,Castlevania: Lords of Shadow,[Castlevania – Lords of Shadow is a dark and v...,"[Video Games, Legacy Systems, Xbox Systems, Xb...",42.2
319958,AHSMK75Q6UN55FEZEDBUJ4ZKYXAA,B072K62L3S,0.0,2020-11-21 00:43:30.613,,0.0,,0.0,,0.0,...,1434223214158673952115953832071595384017,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 2909.0, 3...","[-1, -1, -1, -1, -1, -1, 1434223214, 158673952...","[-1, -1, -1, -1, -1, -1, 8, 5, 5, 5]","[Platformer, Adventure, Cute Characters, Famil...",Video Games,Super Lucky's Tale: Standard Edition - Xbox On...,[Welcome to Adventure! “Super Lucky’s Tale” is...,"[Video Games, Xbox One, Games]",19.99
154698,AHSMK75Q6UN55FEZEDBUJ4ZKYXAA,B08D3XL1KF,5.0,2020-11-21 00:43:30.613,1605919000.0,35.0,4.857143,2.0,5.0,0.0,...,1434223214158673952115953832071595384017,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 2909.0, 3...","[-1, -1, -1, -1, -1, -1, 1434223214, 158673952...","[-1, -1, -1, -1, -1, -1, 8, 5, 5, 5]","[DLC, Fighting Game, Character Expansion, Mult...",Video Games,Super Smash Bros. Ultimate: Challenger Pack 2 ...,[The Hero from the DRAGON QUEST XI game joins ...,"[Video Games, Nintendo Switch, Games]",5.99


In [26]:
val_check_df

Unnamed: 0,user_id,parent_asin,rating,timestamp,timestamp_unix,parent_asin_rating_cnt_365d,parent_asin_rating_avg_prev_rating_365d,parent_asin_rating_cnt_90d,parent_asin_rating_avg_prev_rating_90d,parent_asin_rating_cnt_30d,...,user_rating_list_10_recent_asin_timestamp,item_sequence,item_sequence_ts,item_sequence_ts_bucket,tags,main_category,title,description,categories,price
165194,AHSMK75Q6UN55FEZEDBUJ4ZKYXAA,B07RBMZRP3,5.0,2021-11-11 05:18:12.971,1636608000.0,2.0,5.0,0.0,,0.0,...,"1434223214,1586739521,1595383207,1595384017,16...","[-1, -1, 2909, 3788, 4520, 4607, 4368, 788, 12...","[-1, -1, 1434223214, 1586739521, 1595383207, 1...","[-1, -1, 8, 6, 6, 6, 5, 5, 5, 5]","[Action RPG, Difficult Gameplay, Dark Fantasy,...",Video Games,Dark Souls II: Scholar of the First Sin - Play...,[Prepare to Die again in the complete Dark Sou...,"[Video Games, PlayStation 4, Games]",16.37
330454,AHSMK75Q6UN55FEZEDBUJ4ZKYXAA,B001JKTC9A,0.0,2021-11-11 05:18:12.971,,1.0,5.0,0.0,,0.0,...,"1434223214,1586739521,1595383207,1595384017,16...","[-1, -1, 2909, 3788, 4520, 4607, 4368, 788, 12...","[-1, -1, 1434223214, 1586739521, 1595383207, 1...","[-1, -1, 8, 6, 6, 6, 5, 5, 5, 5]","[Action-Adventure, Single-player, Story-driven...",Video Games,Uncharted 2: Among Thieves - Playstation 3,"[Product Description, Uncharted 2: Among Thiev...","[Video Games, Legacy Systems, PlayStation Syst...",19.99


# Persist

In [27]:
full_features_df.to_parquet(
    "../data/full_features_neg_sampling_df.parquet", index=False
)
train_neg_df.to_parquet("../data/train_features_neg_df.parquet", index=False)
val_neg_df.to_parquet("../data/val_features_neg_df.parquet", index=False)