# Prepare features


Prepare the necessary features and transformations


# Set up


In [1]:
import sys

import pandas as pd

sys.path.insert(0, "..")
from datasets import load_dataset

from src.cfg import ConfigLoader
from src.id_mapper import IDMapper, map_indice
from src.sequence.utils import generate_item_sequences
from src.viz import custom_style_plotly

custom_style_plotly()

# Controller


In [None]:
cfg = ConfigLoader("../cfg/common.yaml")
cfg

{
  "random_seed": 41,
  "root_dir": "/home/dvq/frostmourne/recsys-blog/1-seq-model",
  "data": {
    "hf_datasets": {
      "name": "McAuley-Lab/Amazon-Reviews-2023",
      "mcauley_variant": "Books"
    },
    "train_fp": "/home/dvq/frostmourne/recsys-blog/1-seq-model/data/train.parquet",
    "val_fp": "/home/dvq/frostmourne/recsys-blog/1-seq-model/data/val.parquet",
    "idm_fp": "/home/dvq/frostmourne/recsys-blog/1-seq-model/data/idm.json",
    "metadata_fp": "/home/dvq/frostmourne/recsys-blog/1-seq-model/data/metadata.parquet",
    "train_features_fp": "/home/dvq/frostmourne/recsys-blog/1-seq-model/data/train_features.parquet",
    "val_features_fp": "/home/dvq/frostmourne/recsys-blog/1-seq-model/data/val_features.parquet",
    "batch_recs_fp": null,
    "user_col": "user_id",
    "item_col": "parent_asin",
    "rating_col": "rating",
    "timestamp_col": "timestamp"
  },
  "sample": {
    "sample_users": 10000,
    "min_val_records": 5000,
    "min_user_interactions": 5,
    "min

# Load data


In [3]:
train_df = pd.read_parquet(cfg.data.train_fp)
val_df = pd.read_parquet(cfg.data.val_fp)

In [4]:
train_df

Unnamed: 0,user_id,parent_asin,rating,timestamp
32,AH6CATODIVPVUOJEWHRSRCSKAOHA,0399240462,5.0,1398818354000
34,AH6CATODIVPVUOJEWHRSRCSKAOHA,0547248288,5.0,1415484437000
35,AH6CATODIVPVUOJEWHRSRCSKAOHA,141694737X,5.0,1416395330000
37,AH6CATODIVPVUOJEWHRSRCSKAOHA,0374360979,5.0,1420650726000
40,AH6CATODIVPVUOJEWHRSRCSKAOHA,0671493205,3.0,1420651291000
...,...,...,...,...
8721774,AHQXEZQVLUATXYKVH46342AIYRPA,B01D4VFDTO,5.0,1458315354000
8721777,AHQXEZQVLUATXYKVH46342AIYRPA,B01N0TBGJC,5.0,1482962333000
8721778,AHQXEZQVLUATXYKVH46342AIYRPA,B01N32NQTH,5.0,1486058822000
8721779,AHQXEZQVLUATXYKVH46342AIYRPA,B06VSBSMQV,5.0,1488318530000


IDMapper is the class responsible for mapping original string indice to integer indice since our model expect the integer indexing.


In [5]:
user_ids = train_df[cfg.data.user_col].values
item_ids = train_df[cfg.data.item_col].values
unique_user_ids = list(set(user_ids))
unique_item_ids = list(set(item_ids))
idm = IDMapper()
idm.fit(unique_user_ids, unique_item_ids)

In [6]:
train_df = train_df.pipe(map_indice, idm, cfg.data.user_col, cfg.data.item_col)
val_df = val_df.pipe(map_indice, idm, cfg.data.user_col, cfg.data.item_col)

In [7]:
idm.save(cfg.data.idm_fp)
idm = IDMapper().load(cfg.data.idm_fp)

#### Persist metadata


In [None]:
metadata_raw = load_dataset(
    cfg.data.hf_datasets.name,
    f"raw_meta_{cfg.data.hf_datasets.mcauley_variant}",
    trust_remote_code=True,
)
metadata_raw_df = metadata_raw["full"].to_pandas()
metadata_raw_df

Loading dataset shards:   0%|          | 0/28 [00:00<?, ?it/s]

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together,subtitle,author
0,Books,Chaucer,4.5,29,[],[],8.23,"{'hi_res': [None], 'large': ['https://m.media-...","{'title': [], 'url': [], 'user_id': []}",Peter Ackroyd (Author),"[Books, Literature & Fiction, History & Critic...","{""Publisher"": ""Chatto & Windus; First Edition ...",0701169850,,"Hardcover – Import, January 1, 2004",{'avatar': 'https://m.media-amazon.com/images/...
1,Books,Notes from a Kidwatcher,5.0,1,[Contains 23 selected articles by this influen...,"[About the Author, SANDRA WILDE, Ph.D., is wid...",3.52,"{'hi_res': [None], 'large': ['https://m.media-...","{'title': [], 'url': [], 'user_id': []}",Sandra Wilde (Editor),"[Books, Reference, Words, Language & Grammar]","{""Publisher"": ""Heinemann; First Edition (May 2...",0435088688,,First Edition,{'avatar': 'https://m.media-amazon.com/images/...
2,Books,Service: A Navy SEAL at War,4.7,3421,"[Marcus Luttrell, author of the #1 bestseller,...","[Review, Praise for SERVICE""An action-packed.....",17.17,"{'hi_res': [None], 'large': ['https://m.media-...","{'title': [], 'url': [], 'user_id': []}","Marcus Luttrell (Author), James D. Hornfischer","[Books, Biographies & Memoirs, Leaders & Notab...","{""Publisher"": ""Little, Brown and Company; 1st ...",0316185361,,"Hardcover – May 8, 2012",{'avatar': 'https://m.media-amazon.com/images/...
3,Books,Monstrous Stories #4: The Day the Mice Stood S...,4.4,40,"[Funny, light-hearted monster stories that are...",[],7.43,"{'hi_res': [None], 'large': ['https://m.media-...","{'title': [], 'url': [], 'user_id': []}",Dr. Roach (Author),"[Books, Children's Books, Science Fiction & Fa...","{""Publisher"": ""Scholastic Paperbacks; Reprint ...",0545425573,,"Paperback – October 29, 2013",
4,Buy a Kindle,Parker & Knight,4.5,381,"[From REMINGTON KANE, the author of The Taken!...",[],0.0,"{'hi_res': [None], 'large': ['https://m.media-...","{'title': [], 'url': [], 'user_id': []}",Remington Kane (Author) Format: Kindle Edition,"[Books, Mystery, Thriller & Suspense, Thriller...","{""Publication date"": ""May 18, 2014"", ""Language...",B00KFOP3RG,,Kindle Edition,{'avatar': 'https://m.media-amazon.com/images/...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4448176,Books,Please Excuse My Daughter,4.3,69,"[Look out for Julie's new book,, The Almost Le...","[About the Author, Julie Klam grew up in Bedfo...",36.06,"{'hi_res': [], 'large': [], 'thumb': [], 'vari...","{'title': [], 'url': [], 'user_id': []}",Julie Klam (Author),"[Books, Biographies & Memoirs, Community & Cul...","{""Publisher"": ""Riverhead Books; Reprint editio...",1594483574,,"Paperback – April 7, 2009",{'avatar': 'https://m.media-amazon.com/images/...
4448177,Books,Inside the Southeast Asian Kitchen: Foodlore a...,5.0,1,[A sumptuous gastronomic tour of ten Southeast...,[],75.0,"{'hi_res': [], 'large': [], 'thumb': [], 'vari...","{'title': [], 'url': [], 'user_id': []}",Artpostasia Pte Ltd. (Author),"[Books, Cookbooks, Food & Wine, Regional & Int...","{""Publisher"": ""Art Post Asia (January 1, 2007)...",9719317051,,"Paperback – January 1, 2007",
4448178,Books,Origin of Negative Dialectics,4.9,16,[Susan Buck-Morss examines and stresses the si...,"[About the Author, Susan Buck-Morss is Disting...",18.39,"{'hi_res': [], 'large': [], 'thumb': [], 'vari...","{'title': [], 'url': [], 'user_id': []}",Susan Buck-Morss (Author),"[Books, Politics & Social Sciences, Philosophy]","{""Publisher"": ""Free Press; Trade edition (Dece...",0029051509,,"Paperback – December 1, 1979",{'avatar': 'https://m.media-amazon.com/images/...
4448179,Books,Trails Illustrated National Parks Guadalupe Mo...,4.8,121,[],[],4.99,"{'hi_res': [], 'large': [], 'thumb': [], 'vari...","{'title': [], 'url': [], 'user_id': []}",,"[Books, Reference, Atlases & Maps]","{""Language"": ""English"", ""ISBN 10"": ""0925873039...",0925873039,,Map,{'avatar': 'https://m.media-amazon.com/images/...


In [None]:
sampled_metadata_df = metadata_raw_df.loc[
    lambda df: df[cfg.data.item_col].isin(set(idm.item_to_index.keys()))
]
sampled_metadata_df

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together,subtitle,author
489,Buy a Kindle,The Alibi,4.3,5532,"[In this suspenseful Southern thriller and #1,...","[Amazon.com Review, Sandra Brown's two previou...",7.99,"{'hi_res': [None], 'large': ['https://m.media-...","{'title': [], 'url': [], 'user_id': []}",Sandra Brown (Author) Format: Kindle Edition,"[Books, Mystery, Thriller & Suspense, Thriller...","{""Publisher"": ""Grand Central Publishing (Augus...",B00BEK6ZR2,,Kindle Edition,{'avatar': 'https://m.media-amazon.com/images/...
509,Buy a Kindle,Later,4.5,33092,"[“Part detective tale, part thriller…touching ...","[Review, #1 on The New York Times bestselling ...",9.99,"{'hi_res': [None], 'large': ['https://m.media-...","{'title': [], 'url': [], 'user_id': []}",Stephen King (Author) Format: Kindle Edition,"[Books, Mystery, Thriller & Suspense, Thriller...","{""Publisher"": ""Hard Case Crime (March 2, 2021)...",B08F4GYM8W,,Kindle Edition,{'avatar': 'https://m.media-amazon.com/images/...
754,Buy a Kindle,The Night Agent: A Novel,4.4,2731,[NOW ON NETFLIX! Starring Gabriel Basso and Lu...,"[Review, “, The Night Agent, is a whirlwind of...",8.99,"{'hi_res': [None], 'large': ['https://m.media-...","{'title': [], 'url': [], 'user_id': []}",Matthew Quirk (Author) Format: Kindle Edition,"[Books, Mystery, Thriller & Suspense, Thriller...","{""Publisher"": ""William Morrow (January 15, 201...",B07B7LB9TN,,Kindle Edition,{'avatar': 'https://m.media-amazon.com/images/...
1721,Buy a Kindle,The Word Is Murder: A Novel (A Hawthorne and H...,4.3,15516,"[""One of the most entertaining mysteries of th...","[From the Back Cover, One bright spring mornin...",14.49,"{'hi_res': [None], 'large': ['https://m.media-...",{'title': ['The Word Is Murder: A Novel (Detec...,Anthony Horowitz (Author) Format: Kindle Edi...,"[Books, Mystery, Thriller & Suspense, Thriller...","{""Publisher"": ""Harper; Reprint edition (June 5...",B072PQXYYJ,,Kindle Edition,{'avatar': 'https://m.media-amazon.com/images/...
1775,Buy a Kindle,The Hard Way Home (The Star and the Shamrock B...,4.7,10964,[Dublin 1950Liesl Bannon has never felt like s...,[],0.0,"{'hi_res': [None], 'large': ['https://m.media-...","{'title': [], 'url': [], 'user_id': []}",Jean Grainger (Author) Format: Kindle Edition,"[Books, Literature & Fiction, Genre Fiction]","{""Publication date"": ""June 29, 2020"", ""Languag...",B088HJ5312,,Kindle Edition,{'avatar': 'https://m.media-amazon.com/images/...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3531406,Books,Adult Coloring Book: Stress Relieving Patterns,4.5,4342,[STRESS RELIEVING | CALMING | RELAXING | CREAT...,[],6.91,"{'hi_res': [], 'large': [], 'thumb': [], 'vari...","{'title': [], 'url': [], 'user_id': []}","Blue Star Coloring (Author), Adult Coloring B...","[Books, Arts & Photography, History & Criticism]","{""Publisher"": ""Blue Star Coloring; Csm edition...",1941325122,,"Paperback – March 28, 2015",{'avatar': 'https://m.media-amazon.com/images/...
3531640,Books,Code Name Verity (Edgar Allen Poe Awards. Best...,4.3,5426,"[The beloved #1, New York Times, bestseller, a...","[Amazon.com Review, Amazon Best Teen Books of ...",9.19,"{'hi_res': [], 'large': [], 'thumb': [], 'vari...","{'title': [], 'url': [], 'user_id': []}",Elizabeth Wein (Author),"[Books, Teen & Young Adult, Literature & Fiction]","{""Publisher"": ""Little, Brown Books for Young R...",1423152190,,"Hardcover – May 15, 2012",{'avatar': 'https://m.media-amazon.com/images/...
3532411,Buy a Kindle,Cold Vengeance (Pendergast Book 11),4.6,3552,"[Twelve years ago, Special Agent Pendergast's ...","[Review, An exceptionally strong number of a b...",8.99,"{'hi_res': [], 'large': [], 'thumb': [], 'vari...","{'title': [], 'url': [], 'user_id': []}","Douglas Preston (Author), Lincoln Child (Auth...","[Books, Literature & Fiction, Genre Fiction]","{""Publisher"": ""Grand Central Publishing (Augus...",B0048EKF3Q,,Kindle Edition,{'avatar': 'https://m.media-amazon.com/images/...
3532800,Buy a Kindle,Ghost Fleet: A Novel of the Next World War,4.2,7400,"[""A novel that reads like science fiction but ...",[Excerpt. © Reprinted by permission. All right...,12.99,"{'hi_res': [], 'large': [], 'thumb': [], 'vari...","{'title': [], 'url': [], 'user_id': []}","P. W. Singer (Author), August Cole (Author) ...","[Books, Mystery, Thriller & Suspense, Thriller...","{""Publisher"": ""Mariner Books; Reprint edition ...",B00LZ7GOI4,,Kindle Edition,{'avatar': 'https://m.media-amazon.com/images/...


In [10]:
sampled_metadata_df.to_parquet(cfg.data.metadata_fp, index=False)

# Item sequence


Historical interactions is normally a very important feature in recommendations. Here we explicitly prepare the list of items a user interacted with regarding every row.


## Test implementation


In [11]:
# Sample DataFrame
data = {
    "user_indices": [0, 0, 1, 1, 1],
    "item_indices": [0, 1, 2, 3, 4],
    "timestamp": [0, 1, 2, 3, 4],
    "ratings": [1, 4, 5, 3, 2],
}

df = pd.DataFrame(data)

# Generate the item sequences
df_with_sequences = generate_item_sequences(
    df,
    user_col="user_indices",
    item_col="item_indices",
    timestamp_col="timestamp",
    sequence_length=3,
    padding=True,
    padding_value=-1,
)

df_with_sequences

Unnamed: 0,user_indices,item_indices,timestamp,ratings,item_sequence
0,0,0,0,1,"[-1.0, -1.0, -1.0]"
1,0,1,1,4,"[-1, -1, 0]"
2,1,2,2,5,"[-1.0, -1.0, -1.0]"
3,1,3,3,3,"[-1, -1, 2]"
4,1,4,4,2,"[-1, 2, 3]"


## Run with real data


In [12]:
full_df = pd.concat(
    [train_df.assign(source="train"), val_df.assign(source="val")],
    axis=0,
)
full_df

Unnamed: 0,user_id,parent_asin,rating,timestamp,user_indice,item_indice,source
32,AH6CATODIVPVUOJEWHRSRCSKAOHA,0399240462,5.0,1398818354000,15591,1302,train
34,AH6CATODIVPVUOJEWHRSRCSKAOHA,0547248288,5.0,1415484437000,15591,1764,train
35,AH6CATODIVPVUOJEWHRSRCSKAOHA,141694737X,5.0,1416395330000,15591,2721,train
37,AH6CATODIVPVUOJEWHRSRCSKAOHA,0374360979,5.0,1420650726000,15591,952,train
40,AH6CATODIVPVUOJEWHRSRCSKAOHA,0671493205,3.0,1420651291000,15591,1943,train
...,...,...,...,...,...,...,...
414351,AHUJXB632AXB4QJF6Y25TYEEV2IA,B08L3B4VH4,5.0,1629146298574,18901,7366,val
414352,AHUJXB632AXB4QJF6Y25TYEEV2IA,B07SJRYDDH,5.0,1633143544897,18901,7104,val
414353,AHUJXB632AXB4QJF6Y25TYEEV2IA,B08H1TM3ZR,5.0,1643718266632,18901,7352,val
414354,AHUJXB632AXB4QJF6Y25TYEEV2IA,B01HZFB38U,5.0,1645724532943,18901,6172,val


In [14]:
df_with_sequences = generate_item_sequences(
    full_df,
    user_col=cfg.data.user_col,
    item_col="item_indice",
    timestamp_col=cfg.data.timestamp_col,
    sequence_length=cfg.train.sequence.sequence_length,
    padding=True,
    padding_value=-1,
)

In [15]:
with pd.option_context("display.max_colwidth", None):
    display(df_with_sequences[[cfg.data.user_col, "item_indice", "item_sequence"]])

Unnamed: 0,user_id,item_indice,item_sequence
4328616,AE224PFXAEAT66IXX43GRJSWHXCA,1251,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0]"
4328618,AE224PFXAEAT66IXX43GRJSWHXCA,3363,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, 1251]"
4328624,AE224PFXAEAT66IXX43GRJSWHXCA,499,"[-1, -1, -1, -1, -1, -1, -1, -1, 1251, 3363]"
4328627,AE224PFXAEAT66IXX43GRJSWHXCA,5410,"[-1, -1, -1, -1, -1, -1, -1, 1251, 3363, 499]"
4328634,AE224PFXAEAT66IXX43GRJSWHXCA,4639,"[-1, -1, -1, -1, -1, -1, 1251, 3363, 499, 5410]"
...,...,...,...
4627387,AHZZZ6UASY7CGOTGP5BH5637FMPA,5902,"[-1, -1, -1, -1, -1, 5149, 4095, 6073, 5821, 6081]"
4627389,AHZZZ6UASY7CGOTGP5BH5637FMPA,5564,"[-1, -1, -1, -1, 5149, 4095, 6073, 5821, 6081, 5902]"
4627390,AHZZZ6UASY7CGOTGP5BH5637FMPA,5567,"[-1, -1, -1, 5149, 4095, 6073, 5821, 6081, 5902, 5564]"
4627391,AHZZZ6UASY7CGOTGP5BH5637FMPA,5139,"[-1, -1, 5149, 4095, 6073, 5821, 6081, 5902, 5564, 5567]"


In [None]:
# Check sample user
user_id = df_with_sequences.sample(n=1)[cfg.data.user_col].values[0]

with pd.option_context("display.max_colwidth", None):
    display(
        df_with_sequences.loc[lambda df: df[cfg.data.user_col].eq(user_id)]
        .sort_values(cfg.data.timestamp_col)[
            [cfg.data.user_col, cfg.data.timestamp_col, "item_indice", "item_sequence"]
        ]
        .tail(10)
    )

Unnamed: 0,user_id,timestamp,item_indice,item_sequence
1337174,AEKAI523FFH6NANB6B22XWKSSGBA,1415887223000,4957,"[-1, -1, -1, -1, -1, -1, -1, 472, 5005, 5102]"
1337175,AEKAI523FFH6NANB6B22XWKSSGBA,1417560275000,5192,"[-1, -1, -1, -1, -1, -1, 472, 5005, 5102, 4957]"
1337176,AEKAI523FFH6NANB6B22XWKSSGBA,1441115494000,4361,"[-1, -1, -1, -1, -1, 472, 5005, 5102, 4957, 5192]"
1337177,AEKAI523FFH6NANB6B22XWKSSGBA,1446259519000,4537,"[-1, -1, -1, -1, 472, 5005, 5102, 4957, 5192, 4361]"
1337178,AEKAI523FFH6NANB6B22XWKSSGBA,1448800470000,5729,"[-1, -1, -1, 472, 5005, 5102, 4957, 5192, 4361, 4537]"
1337184,AEKAI523FFH6NANB6B22XWKSSGBA,1548204407025,3854,"[-1, -1, 472, 5005, 5102, 4957, 5192, 4361, 4537, 5729]"
1337186,AEKAI523FFH6NANB6B22XWKSSGBA,1553957011113,6709,"[-1, 472, 5005, 5102, 4957, 5192, 4361, 4537, 5729, 3854]"
1337187,AEKAI523FFH6NANB6B22XWKSSGBA,1556979951042,4428,"[472, 5005, 5102, 4957, 5192, 4361, 4537, 5729, 3854, 6709]"
1337189,AEKAI523FFH6NANB6B22XWKSSGBA,1565955350441,5007,"[5005, 5102, 4957, 5192, 4361, 4537, 5729, 3854, 6709, 4428]"
1337192,AEKAI523FFH6NANB6B22XWKSSGBA,1600140533014,6776,"[5102, 4957, 5192, 4361, 4537, 5729, 3854, 6709, 4428, 5007]"


# Persist


In [17]:
train_features_df = df_with_sequences.loc[lambda df: df["source"].eq("train")].drop(
    columns=["source"]
)
val_features_df = df_with_sequences.loc[lambda df: df["source"].eq("val")].drop(
    columns=["source"]
)

In [18]:
train_features_df

Unnamed: 0,user_id,parent_asin,rating,timestamp,user_indice,item_indice,item_sequence
4328616,AE224PFXAEAT66IXX43GRJSWHXCA,0399159312,2.0,1373291889000,0,1251,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1...."
4328618,AE224PFXAEAT66IXX43GRJSWHXCA,B000FA5TTW,1.0,1382077065000,0,3363,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, 1251]"
4328624,AE224PFXAEAT66IXX43GRJSWHXCA,030758836X,1.0,1424138603000,0,499,"[-1, -1, -1, -1, -1, -1, -1, -1, 1251, 3363]"
4328627,AE224PFXAEAT66IXX43GRJSWHXCA,B00MSRW6SM,4.0,1437924147000,0,5410,"[-1, -1, -1, -1, -1, -1, -1, 1251, 3363, 499]"
4328634,AE224PFXAEAT66IXX43GRJSWHXCA,B00A18VD7A,1.0,1464603674000,0,4639,"[-1, -1, -1, -1, -1, -1, 1251, 3363, 499, 5410]"
...,...,...,...,...,...,...,...
4627387,AHZZZ6UASY7CGOTGP5BH5637FMPA,B017GFRJZK,5.0,1508089337653,19733,5902,"[-1, -1, -1, -1, -1, 5149, 4095, 6073, 5821, 6..."
4627389,AHZZZ6UASY7CGOTGP5BH5637FMPA,B00RPM9MJ6,4.0,1521230143557,19733,5564,"[-1, -1, -1, -1, 5149, 4095, 6073, 5821, 6081,..."
4627390,AHZZZ6UASY7CGOTGP5BH5637FMPA,B00RU7SNP0,4.0,1534867184329,19733,5567,"[-1, -1, -1, 5149, 4095, 6073, 5821, 6081, 590..."
4627391,AHZZZ6UASY7CGOTGP5BH5637FMPA,B00HGSVGSY,4.0,1534867223318,19733,5139,"[-1, -1, 5149, 4095, 6073, 5821, 6081, 5902, 5..."


In [19]:
val_features_df

Unnamed: 0,user_id,parent_asin,rating,timestamp,user_indice,item_indice,item_sequence
307936,AE23RLRV25THT7OZM4T4ZJ4BMYCA,0062409212,5.0,1646772001708,12,265,"[-1, -1, -1, -1, -1, 4136, 3643, 4566, 6057, 6..."
363910,AE26ASIO5XEEZELXTU3UK5Z4TS6A,B089GSVYJW,5.0,1631481525785,16,7315,"[-1, -1, -1, -1, 3748, 6813, 6696, 6497, 5464,..."
363911,AE26ASIO5XEEZELXTU3UK5Z4TS6A,B07WG8L7WC,4.0,1635636047615,16,7156,"[-1, -1, -1, 3748, 6813, 6696, 6497, 5464, 661..."
173811,AE27AOVHHMVINOYGUM6QCG2SSE6A,B083J7QQ89,5.0,1644968355783,21,7242,"[-1, -1, -1, -1, 1248, 1270, 2040, 3033, 767, ..."
242097,AE2CUFERDXJI4XNIGIGJOBDLCOTQ,B07CGCTSNW,5.0,1636754804021,39,6797,"[-1, -1, 4826, 4340, 4577, 5960, 5916, 6162, 3..."
...,...,...,...,...,...,...,...
137390,AHZLM4RDKSICEFEAYEQQRZW45BPA,B08CV9SPDQ,5.0,1657516186943,19672,7335,"[-1, -1, -1, -1, 6121, 5345, 5735, 6479, 6758,..."
180646,AHZLQPSPG675BABC5R5NJW6KG3WQ,B07D6PZ6P1,5.0,1635813519329,19673,6837,"[-1, -1, -1, -1, -1, 4387, 4340, 4578, 6829, 7..."
88385,AHZNQ34GWKKLJN53IDXLAX22OBJQ,B07ZJ2VHBB,5.0,1652978096579,19682,7208,"[-1, -1, -1, -1, -1, 4195, 7126, 7175, 6817, 6..."
88386,AHZNQ34GWKKLJN53IDXLAX22OBJQ,B00PG8UCGS,5.0,1654707732874,19682,5521,"[-1, -1, -1, -1, 4195, 7126, 7175, 6817, 6807,..."


In [20]:
train_features_df.to_parquet(cfg.data.train_features_fp, index=False)
val_features_df.to_parquet(cfg.data.val_features_fp, index=False)