## Controller

In [63]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [64]:
import os
import sys
from datetime import timedelta

import pandas as pd
from feast import FeatureStore
from loguru import logger
from pydantic import BaseModel

sys.path.insert(0, "..")
from src.utils.embedding_id_mapper import IDMapper 
from src.negative_sampling import generate_negative_samples

## Args

In [65]:
class Args(BaseModel):
    testing: bool = False
    run_name: str = "000-data-prep"
    notebook_persist_dp: str = None
    random_seed: int = 41

    user_col: str = "user_indice"
    item_col: str = "item_indice"
    rating_col: str = "rating"
    timestamp_col: str = "timestamp"

    train_data_fp: str = os.path.abspath("../data_for_ai/interim/train_sample_interactions_16407u_features.parquet")
    val_data_fp: str = os.path.abspath("../data_for_ai/interim/val_sample_interactions_16407u_features.parquet")

    neg_to_pos_ratio: int = 1

    def init(self):
        self.notebook_persist_dp = os.path.abspath(f"data/{self.run_name}")
        os.makedirs(self.notebook_persist_dp, exist_ok=True)

        return self


args = Args().init()

print(args.model_dump_json(indent=2))

{
  "testing": false,
  "run_name": "000-data-prep",
  "notebook_persist_dp": "c:\\Users\\Trieu\\OneDrive\\Desktop\\recsys\\real_time_recsys\\notebooks\\data\\000-data-prep",
  "random_seed": 41,
  "user_col": "user_indice",
  "item_col": "item_indice",
  "rating_col": "rating",
  "timestamp_col": "timestamp",
  "train_data_fp": "c:\\Users\\Trieu\\OneDrive\\Desktop\\recsys\\real_time_recsys\\data_for_ai\\interim\\train_sample_interactions_16407u_features.parquet",
  "val_data_fp": "c:\\Users\\Trieu\\OneDrive\\Desktop\\recsys\\real_time_recsys\\data_for_ai\\interim\\val_sample_interactions_16407u_features.parquet",
  "neg_to_pos_ratio": 1
}


## Test beforhand


In [66]:
# Sample input: List of (user_id, item_id) interactions
interactions = [
    (1, 101, 1, 1),
    (1, 102, 2, 2),
    (1, 103, 3, 4),
    (2, 101, 4, 1),
    (2, 104, 5, 2),
    (3, 105, 1, 1),
    (3, 106, 2, 5),
    # Add more interactions as needed
]

# Convert the list to a DataFrame for easier manipulation
df = pd.DataFrame(
    interactions,
    columns=["user_indice", "item_indice", args.rating_col, args.timestamp_col],
)

df

Unnamed: 0,user_indice,item_indice,rating,timestamp
0,1,101,1,1
1,1,102,2,2
2,1,103,3,4
3,2,101,4,1
4,2,104,5,2
5,3,105,1,1
6,3,106,2,5


In [67]:
neg_df = generate_negative_samples(df, neg_to_pos_ratio=1)

  0%|          | 0/7 [00:00<?, ?it/s]

In [68]:
neg_df

Unnamed: 0,user_indice,item_indice,rating,timestamp
0,1,104,0,1
1,1,104,0,2
2,1,106,0,4
3,2,103,0,1
4,2,102,0,2
5,3,101,0,1
6,3,101,0,5


## Load data

In [69]:
train_df = pd.read_parquet(args.train_data_fp)
val_df = pd.read_parquet(args.val_data_fp)
idm_path = os.path.abspath("../data_for_ai/interim/idm_16407u.json")

In [70]:
assert val_df[args.timestamp_col].min() > train_df[args.timestamp_col].max()
val_timestamp = train_df[args.timestamp_col].max() + timedelta(seconds=1)
logger.info(f"{val_timestamp=}")

[32m2025-06-28 09:36:50.943[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m3[0m - [1mval_timestamp=Timestamp('2020-12-26 23:06:04.454000')[0m


In [71]:
full_df = pd.concat([
    train_df.assign(subset="train"), val_df.assign(subset="val")], axis=0)
full_df

Unnamed: 0,user_id,parent_asin,rating,timestamp,timestamp_unix,parent_asin_rating_cnt_365d,parent_asin_rating_avg_prev_rating_365d,parent_asin_rating_cnt_90d,parent_asin_rating_avg_prev_rating_90d,parent_asin_rating_cnt_30d,...,item_indice,item_sequence,item_sequence_ts,item_sequence_ts_bucket,main_category,title,description,categories,price,subset
0,AEYGPUCRKH7G4VM22FM3VAKSQ23Q,B06XKCPK5W,2.0,2012-06-11 16:41:10.000,1339432870,0.0,,0.0,,0.0,...,2905,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",Computers,Targus CleanVu Cleaning Pads Touch Screen Devi...,[The Targus CleanVu Cleaning Pads for iPad off...,"[Electronics, Television & Video, Accessories,...",6.99,train
1,AEYGPUCRKH7G4VM22FM3VAKSQ23Q,B000CKVOOY,3.0,2012-08-02 02:04:13.000,1343873053,3.0,5.000000,1.0,5.000000,1.0,...,89,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, 2905]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, 1339432870]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, 5]",Computers,Arkon Folding Tablet Stand Compatible with App...,[IPM-TAB1 is a highly versatile folding tablet...,"[Electronics, Computers & Accessories, Tablet ...",14.95,train
2,AEYGPUCRKH7G4VM22FM3VAKSQ23Q,B006GWO5WK,5.0,2012-09-15 16:34:46.000,1347726886,0.0,,0.0,,0.0,...,758,"[-1, -1, -1, -1, -1, -1, -1, -1, 2905, 89]","[-1, -1, -1, -1, -1, -1, -1, -1, 1339432870, 1...","[-1, -1, -1, -1, -1, -1, -1, -1, 5, 5]",Amazon Devices,Amazon Kindle 9W PowerFast Adapter for Acceler...,[],"[Electronics, Computers & Accessories, Tablet ...",,train
3,AEYGPUCRKH7G4VM22FM3VAKSQ23Q,B008LURQ76,5.0,2013-01-03 23:08:45.000,1357254525,4.0,4.250000,2.0,5.000000,2.0,...,959,"[-1, -1, -1, -1, -1, -1, -1, 2905, 89, 758]","[-1, -1, -1, -1, -1, -1, -1, 1339432870, 13438...","[-1, -1, -1, -1, -1, -1, -1, 5, 5, 5]",Computers,amFilm (TM) Premium Screen Protector Film Clea...,[Introducing amFilm® Premium Screen Protector ...,"[Electronics, Computers & Accessories, Tablet ...",,train
4,AEYGPUCRKH7G4VM22FM3VAKSQ23Q,B00AQRUW4Q,4.0,2013-05-06 01:24:39.000,1367803479,3.0,4.333333,3.0,4.333333,1.0,...,1096,"[-1, -1, -1, -1, -1, -1, 2905, 89, 758, 959]","[-1, -1, -1, -1, -1, -1, 1339432870, 134387305...","[-1, -1, -1, -1, -1, -1, 5, 5, 5, 5]",Computers,Seagate Wireless Plus 1TB Portable Hard Drive ...,"[With Seagate Wireless Plus, you can enjoy you...","[Electronics, Computers & Accessories, Data St...",,train
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3474,AHIIISHZP6YAVVHMDEBLJ5CWZ7ZA,B0BZ62FQ13,3.0,2021-07-16 17:08:55.044,1626455335,3.0,5.000000,0.0,,0.0,...,4693,"[-1, -1, 3051, 1472, 1662, 1213, 819, 665, 261...","[-1, -1, 1360730296, 1432061059, 1456243479, 1...","[-1, -1, 8, 8, 8, 8, 8, 8, 7, 7]",Cell Phones & Accessories,"OtterBox iPhone SE 3rd & 2nd Gen, iPhone 8 & i...",[Commuter Series is the compact case that keep...,"[Electronics, Electronics Gift Guide - Top Tec...",,val
3475,AFTE3G43QHXWD3DJGDCI2DHEWQJQ,B08DMXDPW5,5.0,2021-01-14 01:48:09.423,1610588889,15.0,3.533333,15.0,3.533333,8.0,...,3923,"[-1, -1, -1, -1, -1, 3290, 4295, 3089, 3188, 3...","[-1, -1, -1, -1, -1, 1462221728, 1488063495, 1...","[-1, -1, -1, -1, -1, 7, 7, 7, 6, 6]",Amazon Devices,Echo (4th Gen) | Glacier White with Sengled Bl...,[],[],114.98,val
3476,AFENZZDPVUYFVBS47YDOWJCDYBSQ,B09XBT6DS9,4.0,2021-12-05 00:35:40.874,1638664540,2.0,5.000000,1.0,5.000000,0.0,...,4335,"[-1, -1, -1, 137, 209, 4564, 487, 1049, 1652, ...","[-1, -1, -1, 1232828952, 1305485716, 135513762...","[-1, -1, -1, 9, 9, 8, 8, 8, 7, 6]",Home Audio & Theater,Ultra Clarity Cables High Speed HDMI Extension...,[],"[Electronics, Television & Video, Accessories,...",10.99,val
3477,AFMBZYPDAXT5VO3ME67HW5Q5TAOQ,B097KBF8JK,5.0,2022-02-18 11:32:46.732,1645183966,,,,,,...,4147,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",Computers,Corsair Vengeance LPX 128GB (4x32GB) DDR4 3200...,[Corsair Vengeance LPX],"[Electronics, Computers & Accessories, Compute...",279.99,val


## Try exclude the statictic feature about item and user and hope it can avoid the data leakage problem

In [None]:
# exclude these column in full_df 
#  'parent_asin_rating_cnt_365d',
#  'parent_asin_rating_avg_prev_rating_365d',
#  'parent_asin_rating_cnt_90d',
#  'parent_asin_rating_avg_prev_rating_90d',
#  'parent_asin_rating_cnt_30d',
#  'parent_asin_rating_avg_prev_rating_30d',
#  'parent_asin_rating_cnt_7d',
#  'parent_asin_rating_avg_prev_rating_7d',
#  'user_rating_cnt_90d',
#  'user_rating_avg_prev_rating_90d',
#  'user_rating_list_10_recent_asin',
#  'user_rating_list_10_recent_asin_timestamp'

full_df = full_df.drop(columns=[
    "parent_asin_rating_cnt_365d",
    "parent_asin_rating_avg_prev_rating_365d",
    "parent_asin_rating_cnt_90d",
    "parent_asin_rating_avg_prev_rating_90d",
    "parent_asin_rating_cnt_30d",
    "parent_asin_rating_avg_prev_rating_30d",
    "parent_asin_rating_cnt_7d",
    "parent_asin_rating_avg_prev_rating_7d",
    "user_rating_cnt_90d",
    "user_rating_avg_prev_rating_90d",
    "user_rating_list_10_recent_asin",
    "user_rating_list_10_recent_asin_timestamp"
])

In [72]:
list(full_df.columns)

['user_id',
 'parent_asin',
 'rating',
 'timestamp',
 'timestamp_unix',
 'parent_asin_rating_cnt_365d',
 'parent_asin_rating_avg_prev_rating_365d',
 'parent_asin_rating_cnt_90d',
 'parent_asin_rating_avg_prev_rating_90d',
 'parent_asin_rating_cnt_30d',
 'parent_asin_rating_avg_prev_rating_30d',
 'parent_asin_rating_cnt_7d',
 'parent_asin_rating_avg_prev_rating_7d',
 'user_rating_cnt_90d',
 'user_rating_avg_prev_rating_90d',
 'user_rating_list_10_recent_asin',
 'user_rating_list_10_recent_asin_timestamp',
 'user_indice',
 'item_indice',
 'item_sequence',
 'item_sequence_ts',
 'item_sequence_ts_bucket',
 'main_category',
 'title',
 'description',
 'categories',
 'price',
 'subset']

In [None]:
neg_df = generate_negative_samples(
    full_df,
    args.user_col,
    args.item_col,
    seed = args.random_seed,
    neg_to_pos_ratio=args.neg_to_pos_ratio,
    features= ['user_id',
            'parent_asin',
            'timestamp_unix',
            # 'parent_asin_rating_cnt_365d',
            # 'parent_asin_rating_avg_prev_rating_365d',
            # 'parent_asin_rating_cnt_90d',
            # 'parent_asin_rating_avg_prev_rating_90d',
            # 'parent_asin_rating_cnt_30d',
            # 'parent_asin_rating_avg_prev_rating_30d',
            # 'parent_asin_rating_cnt_7d',
            # 'parent_asin_rating_avg_prev_rating_7d',
            # 'user_rating_cnt_90d',
            # 'user_rating_avg_prev_rating_90d',
            # 'user_rating_list_10_recent_asin',
            # 'user_rating_list_10_recent_asin_timestamp',
            'item_sequence',
            'item_sequence_ts',
            'item_sequence_ts_bucket',
            'main_category',
            'title',
            'description',
            'categories',
            'price',
            'subset'],
)

  0%|          | 0/130871 [00:00<?, ?it/s]

In [74]:
full_df.shape, neg_df.shape

((130871, 28), (130871, 28))

In [75]:
list(neg_df.columns)

['user_indice',
 'item_indice',
 'rating',
 'timestamp',
 'user_id',
 'parent_asin',
 'timestamp_unix',
 'parent_asin_rating_cnt_365d',
 'parent_asin_rating_avg_prev_rating_365d',
 'parent_asin_rating_cnt_90d',
 'parent_asin_rating_avg_prev_rating_90d',
 'parent_asin_rating_cnt_30d',
 'parent_asin_rating_avg_prev_rating_30d',
 'parent_asin_rating_cnt_7d',
 'parent_asin_rating_avg_prev_rating_7d',
 'user_rating_cnt_90d',
 'user_rating_avg_prev_rating_90d',
 'user_rating_list_10_recent_asin',
 'user_rating_list_10_recent_asin_timestamp',
 'item_sequence',
 'item_sequence_ts',
 'item_sequence_ts_bucket',
 'main_category',
 'title',
 'description',
 'categories',
 'price',
 'subset']

In [76]:
full_neg_df = (
    pd.concat([full_df, neg_df], axis=0)
    .reset_index(drop=True)
    .sample(frac=1, replace = False, random_state=args.random_seed))

In [77]:
neg_df

Unnamed: 0,user_indice,item_indice,rating,timestamp,user_id,parent_asin,timestamp_unix,parent_asin_rating_cnt_365d,parent_asin_rating_avg_prev_rating_365d,parent_asin_rating_cnt_90d,...,user_rating_list_10_recent_asin_timestamp,item_sequence,item_sequence_ts,item_sequence_ts_bucket,main_category,title,description,categories,price,subset
0,3931,1395,0,2012-06-11 16:41:10.000,AEYGPUCRKH7G4VM22FM3VAKSQ23Q,B06XKCPK5W,1339432870,0.0,,0.0,...,,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",Computers,Targus CleanVu Cleaning Pads Touch Screen Devi...,[The Targus CleanVu Cleaning Pads for iPad off...,"[Electronics, Television & Video, Accessories,...",6.99,train
1,3931,285,0,2012-08-02 02:04:13.000,AEYGPUCRKH7G4VM22FM3VAKSQ23Q,B000CKVOOY,1343873053,3.0,5.000000,1.0,...,1339432870,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, 2905]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, 1339432870]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, 5]",Computers,Arkon Folding Tablet Stand Compatible with App...,[IPM-TAB1 is a highly versatile folding tablet...,"[Electronics, Computers & Accessories, Tablet ...",14.95,train
2,3931,3478,0,2012-09-15 16:34:46.000,AEYGPUCRKH7G4VM22FM3VAKSQ23Q,B006GWO5WK,1347726886,0.0,,0.0,...,13394328701343873053,"[-1, -1, -1, -1, -1, -1, -1, -1, 2905, 89]","[-1, -1, -1, -1, -1, -1, -1, -1, 1339432870, 1...","[-1, -1, -1, -1, -1, -1, -1, -1, 5, 5]",Amazon Devices,Amazon Kindle 9W PowerFast Adapter for Acceler...,[],"[Electronics, Computers & Accessories, Tablet ...",,train
3,3931,270,0,2013-01-03 23:08:45.000,AEYGPUCRKH7G4VM22FM3VAKSQ23Q,B008LURQ76,1357254525,4.0,4.250000,2.0,...,133943287013438730531347726886,"[-1, -1, -1, -1, -1, -1, -1, 2905, 89, 758]","[-1, -1, -1, -1, -1, -1, -1, 1339432870, 13438...","[-1, -1, -1, -1, -1, -1, -1, 5, 5, 5]",Computers,amFilm (TM) Premium Screen Protector Film Clea...,[Introducing amFilm® Premium Screen Protector ...,"[Electronics, Computers & Accessories, Tablet ...",,train
4,3931,669,0,2013-05-06 01:24:39.000,AEYGPUCRKH7G4VM22FM3VAKSQ23Q,B00AQRUW4Q,1367803479,3.0,4.333333,3.0,...,1339432870134387305313477268861357254525,"[-1, -1, -1, -1, -1, -1, 2905, 89, 758, 959]","[-1, -1, -1, -1, -1, -1, 1339432870, 134387305...","[-1, -1, -1, -1, -1, -1, 5, 5, 5, 5]",Computers,Seagate Wireless Plus 1TB Portable Hard Drive ...,"[With Seagate Wireless Plus, you can enjoy you...","[Electronics, Computers & Accessories, Data St...",,train
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3474,14144,1396,0,2021-07-16 17:08:55.044,AHIIISHZP6YAVVHMDEBLJ5CWZ7ZA,B0BZ62FQ13,1626455335,3.0,5.000000,0.0,...,"1360730296,1432061059,1456243479,1456243487,14...","[-1, -1, 3051, 1472, 1662, 1213, 819, 665, 261...","[-1, -1, 1360730296, 1432061059, 1456243479, 1...","[-1, -1, 8, 8, 8, 8, 8, 8, 7, 7]",Cell Phones & Accessories,"OtterBox iPhone SE 3rd & 2nd Gen, iPhone 8 & i...",[Commuter Series is the compact case that keep...,"[Electronics, Electronics Gift Guide - Top Tec...",,val
3475,7343,1017,0,2021-01-14 01:48:09.423,AFTE3G43QHXWD3DJGDCI2DHEWQJQ,B08DMXDPW5,1610588889,15.0,3.533333,15.0,...,"1462221728,1488063495,1495905967,1564232045,15...","[-1, -1, -1, -1, -1, 3290, 4295, 3089, 3188, 3...","[-1, -1, -1, -1, -1, 1462221728, 1488063495, 1...","[-1, -1, -1, -1, -1, 7, 7, 7, 6, 6]",Amazon Devices,Echo (4th Gen) | Glacier White with Sengled Bl...,[],[],114.98,val
3476,5497,3899,0,2021-12-05 00:35:40.874,AFENZZDPVUYFVBS47YDOWJCDYBSQ,B09XBT6DS9,1638664540,2.0,5.000000,1.0,...,"1232828952,1305485716,1355137628,1452428556,14...","[-1, -1, -1, 137, 209, 4564, 487, 1049, 1652, ...","[-1, -1, -1, 1232828952, 1305485716, 135513762...","[-1, -1, -1, 9, 9, 8, 8, 8, 7, 6]",Home Audio & Theater,Ultra Clarity Cables High Speed HDMI Extension...,[],"[Electronics, Television & Video, Accessories,...",10.99,val
3477,6427,2385,0,2022-02-18 11:32:46.732,AFMBZYPDAXT5VO3ME67HW5Q5TAOQ,B097KBF8JK,1645183966,,,,...,,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",Computers,Corsair Vengeance LPX 128GB (4x32GB) DDR4 3200...,[Corsair Vengeance LPX],"[Electronics, Computers & Accessories, Compute...",279.99,val


In [78]:
# split back to train and val and reset the index
train_neg_df = full_neg_df[full_neg_df["subset"] == "train"].drop(columns=["subset"]).reset_index(drop=True)
val_neg_df = full_neg_df[full_neg_df["subset"] == "val"].drop(columns=["subset"]).reset_index(drop=True)

In [79]:
# print the row has user_id=AENOXSRSNC5VGY3JQKZQ5DD7HIUA, sort by timestamp
full_neg_df[full_neg_df["user_id"] == "AENOXSRSNC5VGY3JQKZQ5DD7HIUA"].sort_values(by=args.timestamp_col)

Unnamed: 0,user_id,parent_asin,rating,timestamp,timestamp_unix,parent_asin_rating_cnt_365d,parent_asin_rating_avg_prev_rating_365d,parent_asin_rating_cnt_90d,parent_asin_rating_avg_prev_rating_90d,parent_asin_rating_cnt_30d,...,item_indice,item_sequence,item_sequence_ts,item_sequence_ts_bucket,main_category,title,description,categories,price,subset
20470,AENOXSRSNC5VGY3JQKZQ5DD7HIUA,B001F42MKG,5.0,2016-03-13 16:26:42.000,1457886402,12.0,4.083333,4.0,3.75,3.0,...,218,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",All Electronics,Logitech Trackman Marble Trackball – Wired USB...,[With a sleek ambidextrous design that fits ei...,"[Electronics, Computers & Accessories, Compute...",,train
151341,AENOXSRSNC5VGY3JQKZQ5DD7HIUA,B001F42MKG,0.0,2016-03-13 16:26:42.000,1457886402,12.0,4.083333,4.0,3.75,3.0,...,4654,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",All Electronics,Logitech Trackman Marble Trackball – Wired USB...,[With a sleek ambidextrous design that fits ei...,"[Electronics, Computers & Accessories, Compute...",,train
151342,AENOXSRSNC5VGY3JQKZQ5DD7HIUA,B01I4TE612,0.0,2017-06-10 00:29:53.692,1497054593,22.0,4.727273,0.0,,0.0,...,119,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, 218]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, 1457886402]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, 6]",Cell Phones & Accessories,Garmin Portable Friction Mount - Frustration F...,"[Product description, Keep your device in easy...","[Electronics, GPS, Finders & Accessories, GPS ...",,train
20471,AENOXSRSNC5VGY3JQKZQ5DD7HIUA,B01I4TE612,5.0,2017-06-10 00:29:53.692,1497054593,22.0,4.727273,0.0,,0.0,...,2648,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, 218]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, 1457886402]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, 6]",Cell Phones & Accessories,Garmin Portable Friction Mount - Frustration F...,"[Product description, Keep your device in easy...","[Electronics, GPS, Finders & Accessories, GPS ...",,train
20472,AENOXSRSNC5VGY3JQKZQ5DD7HIUA,B00SG3CWGS,5.0,2017-06-10 00:30:32.698,1497054632,10.0,4.5,1.0,5.0,0.0,...,1950,"[-1, -1, -1, -1, -1, -1, -1, -1, 218, 2648]","[-1, -1, -1, -1, -1, -1, -1, -1, 1457886402, 1...","[-1, -1, -1, -1, -1, -1, -1, -1, 6, 0]",Cell Phones & Accessories,Garmin Nuvi 67LMT 6-Inch GPS Navigator,"[With bright 6” dual-orientation displays, spo...","[Electronics, GPS, Finders & Accessories, Spor...",199.0,train
151343,AENOXSRSNC5VGY3JQKZQ5DD7HIUA,B00SG3CWGS,0.0,2017-06-10 00:30:32.698,1497054632,10.0,4.5,1.0,5.0,0.0,...,4213,"[-1, -1, -1, -1, -1, -1, -1, -1, 218, 2648]","[-1, -1, -1, -1, -1, -1, -1, -1, 1457886402, 1...","[-1, -1, -1, -1, -1, -1, -1, -1, 6, 0]",Cell Phones & Accessories,Garmin Nuvi 67LMT 6-Inch GPS Navigator,"[With bright 6” dual-orientation displays, spo...","[Electronics, GPS, Finders & Accessories, Spor...",199.0,train
151344,AENOXSRSNC5VGY3JQKZQ5DD7HIUA,B00XIVH2LI,0.0,2017-06-10 00:31:25.121,1497054685,11.0,4.727273,0.0,,0.0,...,3678,"[-1, -1, -1, -1, -1, -1, -1, 218, 2648, 1950]","[-1, -1, -1, -1, -1, -1, -1, 1457886402, 14970...","[-1, -1, -1, -1, -1, -1, -1, 6, 0, 0]",Cell Phones & Accessories,Amazon Basics Hard Travel Carrying Case for 5 ...,"[Product Description, Amazon Basics Hard Trave...","[Electronics, GPS, Finders & Accessories, GPS ...",10.07,train
20473,AENOXSRSNC5VGY3JQKZQ5DD7HIUA,B00XIVH2LI,5.0,2017-06-10 00:31:25.121,1497054685,11.0,4.727273,0.0,,0.0,...,2163,"[-1, -1, -1, -1, -1, -1, -1, 218, 2648, 1950]","[-1, -1, -1, -1, -1, -1, -1, 1457886402, 14970...","[-1, -1, -1, -1, -1, -1, -1, 6, 0, 0]",Cell Phones & Accessories,Amazon Basics Hard Travel Carrying Case for 5 ...,"[Product Description, Amazon Basics Hard Trave...","[Electronics, GPS, Finders & Accessories, GPS ...",10.07,train
20474,AENOXSRSNC5VGY3JQKZQ5DD7HIUA,B0BGNG1294,5.0,2018-12-15 15:32:45.209,1544887965,90.0,4.833333,18.0,4.944444,9.0,...,4516,"[-1, -1, -1, -1, -1, -1, 218, 2648, 1950, 2163]","[-1, -1, -1, -1, -1, -1, 1457886402, 149705459...","[-1, -1, -1, -1, -1, -1, 6, 6, 6, 6]",Home Audio & Theater,"Amazon Basics HDMI Cable, 18Gbps High-Speed, 4...","[Product Description, Amazon Basics High-Speed...","[Electronics, Television & Video, Accessories,...",8.55,train
151345,AENOXSRSNC5VGY3JQKZQ5DD7HIUA,B0BGNG1294,0.0,2018-12-15 15:32:45.209,1544887965,90.0,4.833333,18.0,4.944444,9.0,...,4659,"[-1, -1, -1, -1, -1, -1, 218, 2648, 1950, 2163]","[-1, -1, -1, -1, -1, -1, 1457886402, 149705459...","[-1, -1, -1, -1, -1, -1, 6, 6, 6, 6]",Home Audio & Theater,"Amazon Basics HDMI Cable, 18Gbps High-Speed, 4...","[Product Description, Amazon Basics High-Speed...","[Electronics, Television & Video, Accessories,...",8.55,train


## Persit

In [None]:
train_neg_df.to_parquet("../data_for_ai/interim/train_sample_interactions_16407u_features_neg_seq_without_stats_item_user.parquet")
val_neg_df.to_parquet("../data_for_ai/interim/val_sample_interactions_16407u_features_neg_seq_without_stats_item_user.parquet")