# Store supporting features

# Set up

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import json
import os
import sys

import pandas as pd
import redis
from dotenv import load_dotenv
from pydantic import BaseModel
from tqdm.auto import tqdm

load_dotenv()

sys.path.insert(0, "..")

from src.id_mapper import IDMapper

# Controller

In [3]:
class Args(BaseModel):
    testing: bool = False
    run_name: str = "005-refactor"
    notebook_persist_dp: str = None
    random_seed: int = 41

    top_K: int = 100

    redis_host: str = "localhost"
    redis_port: int = 6379
    redis_recent_key_prefix: str = "feature:user:recent_items:"
    redis_popular_key: str = "output:popular"

    train_features_fp: str = "../data/train_features.parquet"
    val_features_fp: str = "../data/val_features.parquet"
    id_mapper_fp: str = "../data/idm.json"

    user_col: str = "user_id"
    item_col: str = "parent_asin"
    timestamp_col: str = "timestamp"

    def init(self):
        self.notebook_persist_dp = os.path.abspath(f"data/{self.run_name}")
        os.makedirs(self.notebook_persist_dp, exist_ok=True)

        return self


args = Args().init()

print(args.model_dump_json(indent=2))

{
  "testing": false,
  "run_name": "005-refactor",
  "notebook_persist_dp": "/Users/dvq/frostmourne/fsds/fsds-recsys/chapters/l7/notebooks/data/005-refactor",
  "random_seed": 41,
  "top_K": 100,
  "redis_host": "localhost",
  "redis_port": 6379,
  "redis_recent_key_prefix": "feature:user:recent_items:",
  "redis_popular_key": "output:popular",
  "train_features_fp": "../data/train_features.parquet",
  "val_features_fp": "../data/val_features.parquet",
  "id_mapper_fp": "../data/idm.json",
  "user_col": "user_id",
  "item_col": "parent_asin",
  "timestamp_col": "timestamp"
}


# Load input data

In [4]:
train_features_df = pd.read_parquet(args.train_features_fp)
val_features_df = pd.read_parquet(args.val_features_fp)
idm = IDMapper().load(args.id_mapper_fp)
full_df = pd.concat([train_features_df, val_features_df], axis=0)
full_df

Unnamed: 0,user_id,parent_asin,rating,timestamp,user_indice,item_indice,main_category,title,description,categories,price,item_sequence
0,AFSR5Q6AUWIXDCBJY3Z63SFP7PIQ,B00001KUII,5.0,948686983000,14397,1788,Video Games,Half-Life: Game of the Year Edition - PC,"[Product description, The critics agree. Half-...","[Video Games, PC, Games]",41.99,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1...."
1,AEZGYAZLTQUUBN6DHM7OPECPKUYA,B00002EPZ2,5.0,949551425000,2739,3721,Video Games,Planescape: Torment - PC,"[Amazon.com, Explore Sigil, the City of Doors....","[Video Games, PC, Games]",14.99,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1...."
2,AEKK2OBHEI2MK3EERXMCWLWIU3NQ,B00002NDRY,5.0,949807161000,17762,1841,Video Games,Age of Empires 2: Age of Kings - PC,"[Product description, Age of Empires II: Age o...","[Video Games, PC, Games]",64.88,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1...."
3,AEXEI37RJQEQDQLYNH3QCJTF6A7Q,B001E91OQA,5.0,951150553000,7454,1261,Video Games,Roller Coaster Tycoon - PC,"[Amazon.com, Design your own roller coaster. S...","[Video Games, PC, Games]",40.0,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1...."
4,AFSR5Q6AUWIXDCBJY3Z63SFP7PIQ,B001E91OQA,5.0,951269165000,14397,1261,Video Games,Roller Coaster Tycoon - PC,"[Amazon.com, Design your own roller coaster. S...","[Video Games, PC, Games]",40.0,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1...."
...,...,...,...,...,...,...,...,...,...,...,...,...
957,AHJUZFMUESAEQBPC2QQMBDVUBYFQ,B0B1PB5L93,4.0,1657883331431,4826,3482,Computers,Razer Viper Ultimate Lightweight Wireless Gami...,[Forget about average and claim the unfair adv...,"[Video Games, PC, Accessories, Gaming Mice]",89.99,"[-1, -1, -1, 1238, 914, 1667, 4532, 1322, 1116..."
958,AFIXV7CY3OC6WI5DXCS3JAGP5SQA,B0C37RBK2R,5.0,1657887021161,7410,1483,Video Games,Xbox Series S,"[Introducing the Xbox Series S, the smallest, ...",[],279.0,"[3181, 2777, 386, 1165, 2724, 3426, 4416, 3470..."
959,AFDL3ZQE4ARYEEBBH2KAPMP4NSHQ,B0795GHTBC,5.0,1657910674213,15692,493,All Electronics,ivoler [3 Pack Screen Protector Tempered Glass...,[],"[Video Games, Nintendo Switch, Accessories, Fa...",9.39,"[-1, -1, -1, 2601, 3281, 48, 4100, 2489, 1864,..."
960,AEE72HLCWIZT2GKD7UZRXN36T27A,B0CB8LZT7K,5.0,1657928730786,12857,860,Video Games,Daydayup Switch Carrying Case Compatible with ...,[],"[Video Games, Legacy Systems, Nintendo Systems...",21.99,"[-1, 1304, 4442, 1273, 1489, 3229, 1662, 2182,..."


In [5]:
latest_df = full_df.assign(
    recency=lambda df: df.groupby(args.user_col)[args.timestamp_col].rank(
        method="first", ascending=False
    )
).loc[lambda df: df["recency"].eq(1)]
latest_df

Unnamed: 0,user_id,parent_asin,rating,timestamp,user_indice,item_indice,main_category,title,description,categories,price,item_sequence,recency
9,AFSR5Q6AUWIXDCBJY3Z63SFP7PIQ,B001EYUPAQ,5.0,961869703000,14397,1455,Video Games,Deus Ex: Game of the Year Edition - PC,"[Product description, Real Conspiracies...Seve...","[Video Games, PC, Games]",69.07,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 1788.0, 1...",1.0
81,AGI2TRAJXLJFMCFCVRTE5TXJLZOA,B000038ABO,4.0,977439956000,7610,980,Video Games,Parasite Eve,"[Product description, One of them is a police ...","[Video Games, Legacy Systems, PlayStation Syst...",144.99,"[-1.0, -1.0, -1.0, -1.0, -1.0, 555.0, 3747.0, ...",1.0
110,AFHEODRO4ABX45Q62AGSEU5VR5SQ,B00004TCT3,5.0,979868084000,9213,3913,Video Games,"Pokemon, Silver Version","[Product Description, Pokemon Gold and Silver ...","[Video Games, Legacy Systems, Nintendo Systems...",124.95,"[-1.0, -1.0, -1.0, 3489.0, 1261.0, 3321.0, 104...",1.0
128,AHNLE4FJOHIHA3HUOA5PG4LM4EAA,B00000K3X9,5.0,982539267000,10963,1815,Video Games,Sonic Adventure - Sega Dreamcast,"[Product description, Sega's beloved blue masc...","[Video Games, Legacy Systems, Sega Systems, Se...",99.99,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 1551.0, 2...",1.0
168,AGW5FRNMVFQGVQJWQYQGMDI6C2UQ,B00004U5VK,5.0,987548112000,12429,3702,Video Games,Onimusha Warlords,"[Product Description, Set during the medieval ...","[Video Games, Legacy Systems, PlayStation Syst...",7.49,"[-1.0, -1.0, -1.0, -1.0, 1551.0, 845.0, 218.0,...",1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
957,AHJUZFMUESAEQBPC2QQMBDVUBYFQ,B0B1PB5L93,4.0,1657883331431,4826,3482,Computers,Razer Viper Ultimate Lightweight Wireless Gami...,[Forget about average and claim the unfair adv...,"[Video Games, PC, Accessories, Gaming Mice]",89.99,"[-1, -1, -1, 1238, 914, 1667, 4532, 1322, 1116...",1.0
958,AFIXV7CY3OC6WI5DXCS3JAGP5SQA,B0C37RBK2R,5.0,1657887021161,7410,1483,Video Games,Xbox Series S,"[Introducing the Xbox Series S, the smallest, ...",[],279.0,"[3181, 2777, 386, 1165, 2724, 3426, 4416, 3470...",1.0
959,AFDL3ZQE4ARYEEBBH2KAPMP4NSHQ,B0795GHTBC,5.0,1657910674213,15692,493,All Electronics,ivoler [3 Pack Screen Protector Tempered Glass...,[],"[Video Games, Nintendo Switch, Accessories, Fa...",9.39,"[-1, -1, -1, 2601, 3281, 48, 4100, 2489, 1864,...",1.0
960,AEE72HLCWIZT2GKD7UZRXN36T27A,B0CB8LZT7K,5.0,1657928730786,12857,860,Video Games,Daydayup Switch Carrying Case Compatible with ...,[],"[Video Games, Legacy Systems, Nintendo Systems...",21.99,"[-1, 1304, 4442, 1273, 1489, 3229, 1662, 2182,...",1.0


# Load recent interacted items into Redis

In [6]:
r = redis.Redis(host=args.redis_host, port=args.redis_port, db=0, decode_responses=True)
assert (
    r.ping()
), f"Redis at {args.redis_host}:{args.port} is not running, please make sure you have started the Redis docker service"

In [7]:
latest_df[[args.user_col, args.item_col, "item_sequence"]]

Unnamed: 0,user_id,parent_asin,item_sequence
9,AFSR5Q6AUWIXDCBJY3Z63SFP7PIQ,B001EYUPAQ,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 1788.0, 1..."
81,AGI2TRAJXLJFMCFCVRTE5TXJLZOA,B000038ABO,"[-1.0, -1.0, -1.0, -1.0, -1.0, 555.0, 3747.0, ..."
110,AFHEODRO4ABX45Q62AGSEU5VR5SQ,B00004TCT3,"[-1.0, -1.0, -1.0, 3489.0, 1261.0, 3321.0, 104..."
128,AHNLE4FJOHIHA3HUOA5PG4LM4EAA,B00000K3X9,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 1551.0, 2..."
168,AGW5FRNMVFQGVQJWQYQGMDI6C2UQ,B00004U5VK,"[-1.0, -1.0, -1.0, -1.0, 1551.0, 845.0, 218.0,..."
...,...,...,...
957,AHJUZFMUESAEQBPC2QQMBDVUBYFQ,B0B1PB5L93,"[-1, -1, -1, 1238, 914, 1667, 4532, 1322, 1116..."
958,AFIXV7CY3OC6WI5DXCS3JAGP5SQA,B0C37RBK2R,"[3181, 2777, 386, 1165, 2724, 3426, 4416, 3470..."
959,AFDL3ZQE4ARYEEBBH2KAPMP4NSHQ,B0795GHTBC,"[-1, -1, -1, 2601, 3281, 48, 4100, 2489, 1864,..."
960,AEE72HLCWIZT2GKD7UZRXN36T27A,B0CB8LZT7K,"[-1, 1304, 4442, 1273, 1489, 3229, 1662, 2182,..."


In [8]:
for i, row in tqdm(latest_df.iterrows(), total=latest_df.shape[0]):
    prev_item_indices = [int(item) for item in row["item_sequence"] if item != -1]
    prev_item_ids = [idm.get_item_id(idx) for idx in prev_item_indices]
    updated_item_sequences = prev_item_ids + [row[args.item_col]]
    user_id = row[args.user_col]
    key = args.redis_recent_key_prefix + user_id
    value = "__".join(updated_item_sequences)
    r.set(key, value)

  0%|          | 0/19578 [00:00<?, ?it/s]

In [9]:
test_user_id = latest_df.sample(1)[args.user_col].values[0]
r.get(args.redis_recent_key_prefix + test_user_id)

'B001UQ7042__B001QCWRY8__B001EYUOFM__B003LPUBHS__B014R4KYMS'

# Load popular items into Redis

In [10]:
popular_recs = (
    full_df.groupby(args.item_col).size().sort_values(ascending=False).head(args.top_K)
)
popular_recs

parent_asin
B01N3ASPNV    755
B07YBXFDYN    755
B0086VPUHI    720
B00BGA9WK2    652
B00BN5T30E    544
             ... 
B07YBX7Y3P    166
B00CMQTVK0    164
B004I1JTEK    164
B00HGLLRV2    164
B00CMQTVUA    163
Length: 100, dtype: int64

In [11]:
key = args.redis_popular_key
value = json.dumps(
    {
        "rec_item_ids": popular_recs.index.tolist(),
        "rec_scores": popular_recs.values.tolist(),
    }
)
r.set(key, value)

True

In [12]:
assert len(json.loads(r.get(key))["rec_item_ids"]) == args.top_K