## Controller

In [39]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [40]:
import os
import sys
from datetime import timedelta

import pandas as pd
from feast import FeatureStore
from loguru import logger
from pydantic import BaseModel

sys.path.insert(0, "..")
from src.utils.embedding_id_mapper import IDMapper 
from src.negative_sampling import generate_negative_samples

## Args

In [41]:
class Args(BaseModel):
    testing: bool = False
    run_name: str = "000-data-prep"
    notebook_persist_dp: str = None
    random_seed: int = 41

    user_col: str = "user_indice"
    item_col: str = "item_indice"
    rating_col: str = "rating"
    timestamp_col: str = "timestamp"

    train_data_fp: str = os.path.abspath("../data_for_ai/interim/train_sample_interactions_16407u_features.parquet")
    val_data_fp: str = os.path.abspath("../data_for_ai/interim/val_sample_interactions_16407u_features.parquet")

    neg_to_pos_ratio: int = 1

    def init(self):
        self.notebook_persist_dp = os.path.abspath(f"data/{self.run_name}")
        os.makedirs(self.notebook_persist_dp, exist_ok=True)

        return self


args = Args().init()

print(args.model_dump_json(indent=2))

{
  "testing": false,
  "run_name": "000-data-prep",
  "notebook_persist_dp": "/home/dinhln/Desktop/real_time_recsys/notebooks/data/000-data-prep",
  "random_seed": 41,
  "user_col": "user_indice",
  "item_col": "item_indice",
  "rating_col": "rating",
  "timestamp_col": "timestamp",
  "train_data_fp": "/home/dinhln/Desktop/real_time_recsys/data_for_ai/interim/train_sample_interactions_16407u_features.parquet",
  "val_data_fp": "/home/dinhln/Desktop/real_time_recsys/data_for_ai/interim/val_sample_interactions_16407u_features.parquet",
  "neg_to_pos_ratio": 1
}


## Test beforhand


In [42]:
# Sample input: List of (user_id, item_id) interactions
interactions = [
    (1, 101, 1, 1),
    (1, 102, 2, 2),
    (1, 103, 3, 4),
    (2, 101, 4, 1),
    (2, 104, 5, 2),
    (3, 105, 1, 1),
    (3, 106, 2, 5),
    # Add more interactions as needed
]

# Convert the list to a DataFrame for easier manipulation
df = pd.DataFrame(
    interactions,
    columns=["user_indice", "item_indice", args.rating_col, args.timestamp_col],
)

df

Unnamed: 0,user_indice,item_indice,rating,timestamp
0,1,101,1,1
1,1,102,2,2
2,1,103,3,4
3,2,101,4,1
4,2,104,5,2
5,3,105,1,1
6,3,106,2,5


In [43]:
neg_df = generate_negative_samples(df, neg_to_pos_ratio=1)

  0%|          | 0/7 [00:00<?, ?it/s]

In [44]:
neg_df

Unnamed: 0,user_indice,item_indice,rating,timestamp
0,1,104,0,1
1,1,104,0,2
2,1,106,0,4
3,2,103,0,1
4,2,102,0,2
5,3,101,0,1
6,3,101,0,5


## Load data

In [45]:
train_df = pd.read_parquet(args.train_data_fp)
val_df = pd.read_parquet(args.val_data_fp)
idm_path = os.path.abspath("../data_for_ai/interim/idm_16407u.json")

In [46]:
assert val_df[args.timestamp_col].min() > train_df[args.timestamp_col].max()
val_timestamp = train_df[args.timestamp_col].max() + timedelta(seconds=1)
logger.info(f"{val_timestamp=}")

[32m2025-07-01 00:34:09.200[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m3[0m - [1mval_timestamp=Timestamp('2020-12-26 23:06:04.454000')[0m


In [47]:
full_df = pd.concat([
    train_df.assign(subset="train"), val_df.assign(subset="val")], axis=0)
full_df

Unnamed: 0,user_id,parent_asin,rating,timestamp,user_indice,item_indice,item_sequence,timestamp_unix,parent_asin_rating_cnt_365d,parent_asin_rating_avg_prev_rating_365d,...,user_rating_list_10_recent_asin,user_rating_list_10_recent_asin_timestamp,item_sequence_ts,item_sequence_ts_bucket,main_category,title,description,categories,price,subset
0,AFZ4EK2LJ655XQKTEUELCARO6RYA,B00002EQCW,4.0,2003-01-23 03:28:15.000,8071,4,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",1043292495,0.0,,...,,,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",All Electronics,NETGEAR FS105NA - Discontinued by Manufacturer,"[Product Description, The NETGEAR FS105NA Unma...","[Electronics, Computers & Accessories, Network...",57.89,train
1,AFY2C4YOUP2SSMM43HD2L3FIEFZA,B00008SCFL,5.0,2003-11-25 18:12:09.000,7935,36,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",1069783929,0.0,,...,,,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",All Electronics,Netgear WGR614 Wireless-G Router,"[Product Description, NETGEAR's Cable/DSL 54 M...","[Electronics, Computers & Accessories, Network...",39.5,train
2,AHF3TGIOSTD2UCHF3MO4MIHFJ5NQ,B07KQWX947,5.0,2004-06-18 02:02:57.000,13705,3514,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",1087524177,0.0,,...,,,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",All Electronics,Koss Porta Pro Black On Ear Headphones with Ca...,[New porta Pro active lifestyle headphone in B...,"[Electronics, Headphones, Earbuds & Accessorie...",39.99,train
3,AH5Z47PJ5RTSUL2RLCO2QITGIT4Q,B00008SCFL,4.0,2004-09-13 20:18:44.000,12730,36,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",1095106724,1.0,5.0,...,,,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",All Electronics,Netgear WGR614 Wireless-G Router,"[Product Description, NETGEAR's Cable/DSL 54 M...","[Electronics, Computers & Accessories, Network...",39.5,train
4,AEX3L4NKDESOCGWOFNF63GRFGXCA,B00WUI8JN0,5.0,2004-10-22 14:26:12.000,3735,2103,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",1098455172,0.0,,...,,,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",Camera & Photo,Canon Advanced Two Lens Kit with 50mm f/1.4 an...,[Advance to a new realm of image quality with ...,"[Electronics, Camera & Photo, Lenses, Camera L...",1179.0,train
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3474,AEKUF6AOVWDWFYOKPWO2CV72PEDQ,B07QN33986,5.0,2022-02-19 01:32:51.519,2171,3626,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",1645234371,,,...,,,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",Computers,"Lamicall Tablet Stand Adjustable, Tablet Stand...",[],"[Electronics, Computers & Accessories, Tablet ...",19.99,val
3475,AFBTD25HPE4BE4LUFV3DTI2E2N2A,B07TMJ8S5Z,5.0,2022-02-19 16:49:57.966,5159,3699,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",1645289397,,,...,,,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",Amazon Devices,"Amazon Fire HD 8 tablet, 8"" HD display, 64 GB,...",[],[],119.99,val
3476,AEEQZRQBOFHFBFPYBX2BZ5WOI33A,B00007KDX6,5.0,2022-02-19 16:56:53.030,1396,32,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",1645289813,,,...,,,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",Home Audio & Theater,"Panasonic Portable AM / FM Radio, Battery Oper...","[Panasonic RF 2400 AM / FM Radio, Silver.]","[Electronics, Portable Audio & Video, Radios, ...",28.99,val
3477,AHLN6GKTKZE22AON34YAQXTGK63A,B0C682GZ5X,5.0,2022-02-19 17:28:55.519,14550,4772,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",1645291735,,,...,,,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",Computers,NETGEAR Nighthawk Cable Modem CM1200 - Compati...,[Nighthawk Multi-Gig speed Cable Modem by NETG...,"[Electronics, Computers & Accessories, Network...",203.99,val


## Try exclude the statictic feature about item and user and hope it can avoid the data leakage problem

In [48]:
# exclude these column in full_df 
#  'parent_asin_rating_cnt_365d',
#  'parent_asin_rating_avg_prev_rating_365d',
#  'parent_asin_rating_cnt_90d',
#  'parent_asin_rating_avg_prev_rating_90d',
#  'parent_asin_rating_cnt_30d',
#  'parent_asin_rating_avg_prev_rating_30d',
#  'parent_asin_rating_cnt_7d',
#  'parent_asin_rating_avg_prev_rating_7d',
#  'user_rating_cnt_90d',
#  'user_rating_avg_prev_rating_90d',
#  'user_rating_list_10_recent_asin',
#  'user_rating_list_10_recent_asin_timestamp'

full_df = full_df.drop(columns=[
    "parent_asin_rating_cnt_365d",
    "parent_asin_rating_avg_prev_rating_365d",
    "parent_asin_rating_cnt_90d",
    "parent_asin_rating_avg_prev_rating_90d",
    "parent_asin_rating_cnt_30d",
    "parent_asin_rating_avg_prev_rating_30d",
    "parent_asin_rating_cnt_7d",
    "parent_asin_rating_avg_prev_rating_7d",
    "user_rating_cnt_90d",
    "user_rating_avg_prev_rating_90d",
    "user_rating_list_10_recent_asin",
    "user_rating_list_10_recent_asin_timestamp"
])

In [49]:
list(full_df.columns)

['user_id',
 'parent_asin',
 'rating',
 'timestamp',
 'user_indice',
 'item_indice',
 'item_sequence',
 'timestamp_unix',
 'item_sequence_ts',
 'item_sequence_ts_bucket',
 'main_category',
 'title',
 'description',
 'categories',
 'price',
 'subset']

In [50]:
neg_df = generate_negative_samples(
    full_df,
    args.user_col,
    args.item_col,
    seed = args.random_seed,
    neg_to_pos_ratio=args.neg_to_pos_ratio,
    features= ['user_id',
            'parent_asin',
            'timestamp_unix',
            # 'parent_asin_rating_cnt_365d',
            # 'parent_asin_rating_avg_prev_rating_365d',
            # 'parent_asin_rating_cnt_90d',
            # 'parent_asin_rating_avg_prev_rating_90d',
            # 'parent_asin_rating_cnt_30d',
            # 'parent_asin_rating_avg_prev_rating_30d',
            # 'parent_asin_rating_cnt_7d',
            # 'parent_asin_rating_avg_prev_rating_7d',
            # 'user_rating_cnt_90d',
            # 'user_rating_avg_prev_rating_90d',
            # 'user_rating_list_10_recent_asin',
            # 'user_rating_list_10_recent_asin_timestamp',
            'item_sequence',
            'item_sequence_ts',
            'item_sequence_ts_bucket',
            'main_category',
            'title',
            'description',
            'categories',
            'price',
            'subset'],
)

  0%|          | 0/130871 [00:00<?, ?it/s]

In [51]:
full_df.shape, neg_df.shape

((130871, 16), (130871, 16))

In [52]:
list(neg_df.columns)

['user_indice',
 'item_indice',
 'rating',
 'timestamp',
 'user_id',
 'parent_asin',
 'timestamp_unix',
 'item_sequence',
 'item_sequence_ts',
 'item_sequence_ts_bucket',
 'main_category',
 'title',
 'description',
 'categories',
 'price',
 'subset']

In [53]:
full_neg_df = (
    pd.concat([full_df, neg_df], axis=0)
    .reset_index(drop=True)
    .sample(frac=1, replace = False, random_state=args.random_seed))

In [54]:
neg_df

Unnamed: 0,user_indice,item_indice,rating,timestamp,user_id,parent_asin,timestamp_unix,item_sequence,item_sequence_ts,item_sequence_ts_bucket,main_category,title,description,categories,price,subset
0,8071,1395,0,2003-01-23 03:28:15.000,AFZ4EK2LJ655XQKTEUELCARO6RYA,B00002EQCW,1043292495,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",All Electronics,NETGEAR FS105NA - Discontinued by Manufacturer,"[Product Description, The NETGEAR FS105NA Unma...","[Electronics, Computers & Accessories, Network...",57.89,train
1,7935,285,0,2003-11-25 18:12:09.000,AFY2C4YOUP2SSMM43HD2L3FIEFZA,B00008SCFL,1069783929,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",All Electronics,Netgear WGR614 Wireless-G Router,"[Product Description, NETGEAR's Cable/DSL 54 M...","[Electronics, Computers & Accessories, Network...",39.5,train
2,13705,3483,0,2004-06-18 02:02:57.000,AHF3TGIOSTD2UCHF3MO4MIHFJ5NQ,B07KQWX947,1087524177,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",All Electronics,Koss Porta Pro Black On Ear Headphones with Ca...,[New porta Pro active lifestyle headphone in B...,"[Electronics, Headphones, Earbuds & Accessorie...",39.99,train
3,12730,269,0,2004-09-13 20:18:44.000,AH5Z47PJ5RTSUL2RLCO2QITGIT4Q,B00008SCFL,1095106724,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",All Electronics,Netgear WGR614 Wireless-G Router,"[Product Description, NETGEAR's Cable/DSL 54 M...","[Electronics, Computers & Accessories, Network...",39.5,train
4,3735,673,0,2004-10-22 14:26:12.000,AEX3L4NKDESOCGWOFNF63GRFGXCA,B00WUI8JN0,1098455172,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",Camera & Photo,Canon Advanced Two Lens Kit with 50mm f/1.4 an...,[Advance to a new realm of image quality with ...,"[Electronics, Camera & Photo, Lenses, Camera L...",1179.0,train
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3474,2171,1395,0,2022-02-19 01:32:51.519,AEKUF6AOVWDWFYOKPWO2CV72PEDQ,B07QN33986,1645234371,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",Computers,"Lamicall Tablet Stand Adjustable, Tablet Stand...",[],"[Electronics, Computers & Accessories, Tablet ...",19.99,val
3475,5159,1030,0,2022-02-19 16:49:57.966,AFBTD25HPE4BE4LUFV3DTI2E2N2A,B07TMJ8S5Z,1645289397,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",Amazon Devices,"Amazon Fire HD 8 tablet, 8"" HD display, 64 GB,...",[],[],119.99,val
3476,1396,3899,0,2022-02-19 16:56:53.030,AEEQZRQBOFHFBFPYBX2BZ5WOI33A,B00007KDX6,1645289813,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",Home Audio & Theater,"Panasonic Portable AM / FM Radio, Battery Oper...","[Panasonic RF 2400 AM / FM Radio, Silver.]","[Electronics, Portable Audio & Video, Radios, ...",28.99,val
3477,14550,2383,0,2022-02-19 17:28:55.519,AHLN6GKTKZE22AON34YAQXTGK63A,B0C682GZ5X,1645291735,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",Computers,NETGEAR Nighthawk Cable Modem CM1200 - Compati...,[Nighthawk Multi-Gig speed Cable Modem by NETG...,"[Electronics, Computers & Accessories, Network...",203.99,val


In [55]:
# split back to train and val and reset the index
train_neg_df = full_neg_df[full_neg_df["subset"] == "train"].drop(columns=["subset"]).reset_index(drop=True)
val_neg_df = full_neg_df[full_neg_df["subset"] == "val"].drop(columns=["subset"]).reset_index(drop=True)

In [56]:
# print the row has user_id=AENOXSRSNC5VGY3JQKZQ5DD7HIUA, sort by timestamp
full_neg_df[full_neg_df["user_id"] == "AENOXSRSNC5VGY3JQKZQ5DD7HIUA"].sort_values(by=args.timestamp_col)

Unnamed: 0,user_id,parent_asin,rating,timestamp,user_indice,item_indice,item_sequence,timestamp_unix,item_sequence_ts,item_sequence_ts_bucket,main_category,title,description,categories,price,subset
185521,AENOXSRSNC5VGY3JQKZQ5DD7HIUA,B001F42MKG,0.0,2016-03-13 16:26:42.000,2546,3443,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",1457886402,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",All Electronics,Logitech Trackman Marble Trackball – Wired USB...,[With a sleek ambidextrous design that fits ei...,"[Electronics, Computers & Accessories, Compute...",,train
54650,AENOXSRSNC5VGY3JQKZQ5DD7HIUA,B001F42MKG,5.0,2016-03-13 16:26:42.000,2546,218,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",1457886402,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",All Electronics,Logitech Trackman Marble Trackball – Wired USB...,[With a sleek ambidextrous design that fits ei...,"[Electronics, Computers & Accessories, Compute...",,train
210569,AENOXSRSNC5VGY3JQKZQ5DD7HIUA,B01I4TE612,0.0,2017-06-10 00:29:53.692,2546,1844,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, 218]",1497054593,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, 1457886402]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, 6]",Cell Phones & Accessories,Garmin Portable Friction Mount - Frustration F...,"[Product description, Keep your device in easy...","[Electronics, GPS, Finders & Accessories, GPS ...",,train
79698,AENOXSRSNC5VGY3JQKZQ5DD7HIUA,B01I4TE612,5.0,2017-06-10 00:29:53.692,2546,2648,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, 218]",1497054593,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, 1457886402]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, 6]",Cell Phones & Accessories,Garmin Portable Friction Mount - Frustration F...,"[Product description, Keep your device in easy...","[Electronics, GPS, Finders & Accessories, GPS ...",,train
79699,AENOXSRSNC5VGY3JQKZQ5DD7HIUA,B00SG3CWGS,5.0,2017-06-10 00:30:32.698,2546,1950,"[-1, -1, -1, -1, -1, -1, -1, -1, 218, 2648]",1497054632,"[-1, -1, -1, -1, -1, -1, -1, -1, 1457886402, 1...","[-1, -1, -1, -1, -1, -1, -1, -1, 6, 0]",Cell Phones & Accessories,Garmin Nuvi 67LMT 6-Inch GPS Navigator,"[With bright 6” dual-orientation displays, spo...","[Electronics, GPS, Finders & Accessories, Spor...",199.0,train
210570,AENOXSRSNC5VGY3JQKZQ5DD7HIUA,B00SG3CWGS,0.0,2017-06-10 00:30:32.698,2546,3975,"[-1, -1, -1, -1, -1, -1, -1, -1, 218, 2648]",1497054632,"[-1, -1, -1, -1, -1, -1, -1, -1, 1457886402, 1...","[-1, -1, -1, -1, -1, -1, -1, -1, 6, 0]",Cell Phones & Accessories,Garmin Nuvi 67LMT 6-Inch GPS Navigator,"[With bright 6” dual-orientation displays, spo...","[Electronics, GPS, Finders & Accessories, Spor...",199.0,train
79700,AENOXSRSNC5VGY3JQKZQ5DD7HIUA,B00XIVH2LI,5.0,2017-06-10 00:31:25.121,2546,2163,"[-1, -1, -1, -1, -1, -1, -1, 218, 2648, 1950]",1497054685,"[-1, -1, -1, -1, -1, -1, -1, 1457886402, 14970...","[-1, -1, -1, -1, -1, -1, -1, 6, 0, 0]",Cell Phones & Accessories,Amazon Basics Hard Travel Carrying Case for 5 ...,"[Product Description, Amazon Basics Hard Trave...","[Electronics, GPS, Finders & Accessories, GPS ...",10.07,train
210571,AENOXSRSNC5VGY3JQKZQ5DD7HIUA,B00XIVH2LI,0.0,2017-06-10 00:31:25.121,2546,4807,"[-1, -1, -1, -1, -1, -1, -1, 218, 2648, 1950]",1497054685,"[-1, -1, -1, -1, -1, -1, -1, 1457886402, 14970...","[-1, -1, -1, -1, -1, -1, -1, 6, 0, 0]",Cell Phones & Accessories,Amazon Basics Hard Travel Carrying Case for 5 ...,"[Product Description, Amazon Basics Hard Trave...","[Electronics, GPS, Finders & Accessories, GPS ...",10.07,train
234361,AENOXSRSNC5VGY3JQKZQ5DD7HIUA,B0BGNG1294,0.0,2018-12-15 15:32:45.209,2546,1102,"[-1, -1, -1, -1, -1, -1, 218, 2648, 1950, 2163]",1544887965,"[-1, -1, -1, -1, -1, -1, 1457886402, 149705459...","[-1, -1, -1, -1, -1, -1, 6, 6, 6, 6]",Home Audio & Theater,"Amazon Basics HDMI Cable, 18Gbps High-Speed, 4...","[Product Description, Amazon Basics High-Speed...","[Electronics, Television & Video, Accessories,...",8.55,train
103490,AENOXSRSNC5VGY3JQKZQ5DD7HIUA,B0BGNG1294,5.0,2018-12-15 15:32:45.209,2546,4516,"[-1, -1, -1, -1, -1, -1, 218, 2648, 1950, 2163]",1544887965,"[-1, -1, -1, -1, -1, -1, 1457886402, 149705459...","[-1, -1, -1, -1, -1, -1, 6, 6, 6, 6]",Home Audio & Theater,"Amazon Basics HDMI Cable, 18Gbps High-Speed, 4...","[Product Description, Amazon Basics High-Speed...","[Electronics, Television & Video, Accessories,...",8.55,train


## Persit

In [57]:
train_neg_df.to_parquet("../data_for_ai/interim/train_sample_interactions_16407u_features_neg_seq_without_stats_item_user.parquet")
val_neg_df.to_parquet("../data_for_ai/interim/val_sample_interactions_16407u_features_neg_seq_without_stats_item_user.parquet")