# Set up

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import json
import os
import sys

sys.path.insert(0, "..")
import pandas as pd
import redis
from dotenv import load_dotenv
from pydantic import BaseModel
from tqdm.auto import tqdm

from src.id_mapper import IDMapper

load_dotenv()

True

# Controller

In [3]:
class Args(BaseModel):
    testing: bool = False
    run_name: str = "000-first-attempt"
    notebook_persist_dp: str = None
    random_seed: int = 41

    top_K: int = 100

    redis_host: str = "localhost"
    redis_port: int = 6379
    redis_recent_key_prefix: str = "feature:user:recent_items:"
    redis_popular_key: str = "output:popular"

    train_features_fp: str = "../data/train_features.parquet"
    val_features_fp: str = "../data/val_features.parquet"
    id_mapper_fp: str = "../data/idm.json"

    user_col: str = "user_id"
    item_col: str = "parent_asin"
    timestamp_col: str = "timestamp"

    def init(self):
        self.notebook_persist_dp = os.path.abspath(f"data/{self.run_name}")
        os.makedirs(self.notebook_persist_dp, exist_ok=True)

        if redis_host := os.getenv("REDIS_HOST"):
            self.redis_host = redis_host
            self.redis_port = os.getenv("REDIS_PORT", self.redis_port)

        return self


args = Args().init()

print(args.model_dump_json(indent=2))

{
  "testing": false,
  "run_name": "000-first-attempt",
  "notebook_persist_dp": "/mnt/d/projects/recsys/notebooks/data/000-first-attempt",
  "random_seed": 41,
  "top_K": 100,
  "redis_host": "localhost",
  "redis_port": "6379",
  "redis_recent_key_prefix": "feature:user:recent_items:",
  "redis_popular_key": "output:popular",
  "train_features_fp": "../data/train_features.parquet",
  "val_features_fp": "../data/val_features.parquet",
  "id_mapper_fp": "../data/idm.json",
  "user_col": "user_id",
  "item_col": "parent_asin",
  "timestamp_col": "timestamp"
}


  PydanticSerializationUnexpectedValue(Expected `int` - serialized value may not be as expected [field_name='redis_port', input_value='6379', input_type=str])
  return self.__pydantic_serializer__.to_json(


# Load input data

In [4]:
if not os.path.exists(args.train_features_fp):
    s3 = init_s3_client()
    bucket_name = "data"
    train_key = "train_features.parquet"
    val_key = "val_features.parquet"
    idm_key = "idm.json"

    s3.download_file(bucket_name, train_key, args.train_features_fp)
    s3.download_file(bucket_name, val_key, args.val_features_fp)
    s3.download_file(bucket_name, idm_key, args.id_mapper_fp)

In [5]:
train_features_df = pd.read_parquet(args.train_features_fp)
val_features_df = pd.read_parquet(args.val_features_fp)
idm = IDMapper().load(args.id_mapper_fp)
full_df = pd.concat([train_features_df, val_features_df], axis=0)
full_df

Unnamed: 0,user_id,parent_asin,rating,timestamp,timestamp_unix,parent_asin_rating_cnt_365d,parent_asin_rating_avg_prev_rating_365d,parent_asin_rating_cnt_90d,parent_asin_rating_avg_prev_rating_90d,parent_asin_rating_cnt_30d,...,user_rating_list_10_recent_asin,user_rating_list_10_recent_asin_timestamp,item_sequence,item_sequence_ts,item_sequence_ts_bucket,main_category,title,description,categories,price
0,AHATA6X6MYTC3VNBFJ3WIYVK257A,B0050SVNZ8,4.0,2011-11-21 14:27:44.000,1321885664,1,4.000000,1,4.000000,1,...,,,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1]",Video Games,Amazon Basics Carrying Case for Nintendo - New...,[],"[Video Games, Legacy Systems, Nintendo Systems...",
1,AHATA6X6MYTC3VNBFJ3WIYVK257A,B00LZVNWIA,4.0,2014-08-17 00:00:06.000,1408233606,2,3.500000,2,3.500000,2,...,B0050SVNZ8,1321885664,"[-1, -1, -1, -1, -1, -1, -1, -1, -1, 1820]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, 1321885664]","[-1, -1, -1, -1, -1, -1, -1, -1, -1, 6]",Computers,Logitech G402 Hyperion Fury FPS Gaming Mouse,[Logitech G402 Hyperion Fury FPS Gaming Mouse],"[Video Games, PC, Accessories, Gaming Mice]",
2,AHATA6X6MYTC3VNBFJ3WIYVK257A,B0BH98D8GL,5.0,2017-11-26 15:02:34.100,1511708554,2,3.500000,1,3.000000,0,...,"B0050SVNZ8,B00LZVNWIA",13218856641408233606,"[-1, -1, -1, -1, -1, -1, -1, -1, 1820, 2778]","[-1, -1, -1, -1, -1, -1, -1, -1, 1321885664, 1...","[-1, -1, -1, -1, -1, -1, -1, -1, 8, 7]",Computers,Logitech G433 7.1 Wired Gaming Headset with DT...,[Logitech G433 gaming headset is the premium a...,"[Video Games, Xbox One, Accessories, Headsets]",44.99
3,AHATA6X6MYTC3VNBFJ3WIYVK257A,B074RNL1RX,5.0,2017-11-27 03:26:14.174,1511753174,0,,0,,0,...,"B0050SVNZ8,B00LZVNWIA,B0BH98D8GL",132188566414082336061511708554,"[-1, -1, -1, -1, -1, -1, -1, 1820, 2778, 4549]","[-1, -1, -1, -1, -1, -1, -1, 1321885664, 14082...","[-1, -1, -1, -1, -1, -1, -1, 8, 7, 2]",Video Games,Razer Wolverine Ultimate Officially Licensed X...,[Play anywhere with the Razer Wolverine Ultima...,"[Video Games, PC, Accessories, Controllers]",64.98
4,AHATA6X6MYTC3VNBFJ3WIYVK257A,B089QYP649,5.0,2018-07-08 23:33:40.696,1531092820,26,4.153846,7,4.428571,2,...,"B0050SVNZ8,B00LZVNWIA,B0BH98D8GL,B074RNL1RX",1321885664140823360615117085541511753174,"[-1, -1, -1, -1, -1, -1, 1820, 2778, 4549, 3757]","[-1, -1, -1, -1, -1, -1, 1321885664, 140823360...","[-1, -1, -1, -1, -1, -1, 8, 7, 5, 5]",Video Games,Turtle Beach Stealth 600 Wireless Surround Sou...,[The Turtle Beach Stealth 600 is the latest wi...,"[Video Games, PlayStation 4, Accessories, Head...",168.75
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
957,AE3NRCMFIBBA2XVODR47YYNLKRDA,B001EYUQC8,5.0,2021-11-13 09:59:46.634,1636797586,1,3.000000,1,3.000000,0,...,"B000OLXX86,B000B9RI14,B0050SWQ86,B00CTKHXFO,B0...","1342793426,1362929993,1368549700,1392151606,14...","[-1, -1, -1, 543, 417, 1835, 2407, 3123, 1526,...","[-1, -1, -1, 1342793426, 1362929993, 136854970...","[-1, -1, -1, 8, 8, 8, 8, 8, 8, 7]",Video Games,007 Quantum Of Solace - Playstation 3,[James Bond is back to settle the score in Qua...,"[Video Games, Legacy Systems, PlayStation Syst...",44.49
958,AEV5TZDZQEP24PM3SZ7SNV4TR26Q,B01N3ASPNV,5.0,2022-06-17 07:42:54.083,1655451774,10,4.000000,0,,0,...,"B00HRH79H6,B00JM57VDS,B00VILBF0Y,B072C3VM5F",1423572849142357323014244889911523051352,"[-1, -1, -1, -1, -1, -1, 2614, 2683, 2975, 3691]","[-1, -1, -1, -1, -1, -1, 1423572849, 142357323...","[-1, -1, -1, -1, -1, -1, 8, 8, 8, 7]",All Electronics,amFilm Tempered Glass Screen Protector for Nin...,[],"[Video Games, Nintendo Switch, Accessories, Fa...",8.91
959,AELRDP5MCGSCANM6GWUXAMBN75LQ,B009AGXH64,5.0,2022-06-03 18:23:36.536,1654280616,1,5.000000,0,,0,...,"B07SSZGYNR,B011AH9A16,B001ELJE5Q,B003N63BPE,B0...","1618466381,1618466731,1623954406,1625231840,16...","[-1, -1, -1, -1, -1, 4168, 3101, 777, 1503, 3526]","[-1, -1, -1, -1, -1, 1618466381, 1618466731, 1...","[-1, -1, -1, -1, -1, 6, 6, 5, 5, 5]",Video Games,Nintendo Wii U Console - 32GB Black Deluxe Set,[Wii U is the next great gaming console from N...,"[Video Games, Legacy Systems, Nintendo Systems...",199.99
960,AHERXKLMQLGPQLW4ZLKD4IRLMZAA,B07M6RVMPJ,5.0,2021-11-27 00:36:11.015,1637973371,0,,0,,0,...,"B001EYUXUI,B001EYUY3Y,B00006FWUU,B003FMTZSI,B0...","1595304483,1595304825,1595304936,1606090839,16...","[1029, 1035, 220, 1470, 1019, 260, 273, 289, 1...","[1595304483, 1595304825, 1595304936, 160609083...","[6, 6, 6, 6, 5, 5, 5, 5, 5, 5]",Video Games,Mario Party: The Top 100 - Nintendo 3DS,[Ever partied with Mario? stuffed mouthfuls of...,"[Video Games, Legacy Systems, Nintendo Systems...",48.99


In [6]:
latest_df = full_df.assign(
    recency=lambda df: df.groupby(args.user_col)[args.timestamp_col].rank(
        method="first", ascending=False
    )
).loc[lambda df: df["recency"].eq(1)]
latest_df

Unnamed: 0,user_id,parent_asin,rating,timestamp,timestamp_unix,parent_asin_rating_cnt_365d,parent_asin_rating_avg_prev_rating_365d,parent_asin_rating_cnt_90d,parent_asin_rating_avg_prev_rating_90d,parent_asin_rating_cnt_30d,...,user_rating_list_10_recent_asin_timestamp,item_sequence,item_sequence_ts,item_sequence_ts_bucket,main_category,title,description,categories,price,recency
5,AHATA6X6MYTC3VNBFJ3WIYVK257A,B07DHNX18W,4.0,2020-11-02 20:18:55.046,1604348335,6,3.000000,1,2.000000,0,...,"1321885664,1408233606,1511708554,1511753174,15...","[-1, -1, -1, -1, -1, 1820, 2778, 4549, 3757, 4...","[-1, -1, -1, -1, -1, 1321885664, 1408233606, 1...","[-1, -1, -1, -1, -1, 8, 8, 6, 6, 6]",Computers,Razer Huntsman Elite Gaming Keyboard: Fast Key...,[Introduces the new Razer Opto-Mechanical swit...,"[Video Games, PC, Accessories, Gaming Keyboards]",219.99,1.0
13,AHV6QCNBJNSGLATP56JAWJ3C4G2A,B08N7Q1J4W,4.0,2019-11-15 13:52:20.147,1573825940,3,4.333333,3,4.333333,3,...,"1451860309,1538135312,1554755712,1556830735,15...","[-1, -1, -1, 3242, 4371, 4608, 4557, 4112, 416...","[-1, -1, -1, 1451860309, 1538135312, 155475571...","[-1, -1, -1, 7, 6, 5, 5, 5, 5, 5]",Video Games,PowerA FUSION Wired Fightpad for PlayStation 4...,[Control every fight with the FUSION by PowerA...,"[Video Games, PlayStation 4, Accessories, Cont...",,1.0
18,AHOEABHRAFWXIT4JZ5MKJ3FMASGA,B01I14IASK,5.0,2019-06-18 00:01:02.601,1560816062,2,5.000000,0,,0,...,1435767781143658507514493229571453836493,"[-1, -1, -1, -1, -1, -1, 2922, 562, 1795, 2910]","[-1, -1, -1, -1, -1, -1, 1435767781, 143658507...","[-1, -1, -1, -1, -1, -1, 7, 7, 7, 7]",Computers,KIMILAR Replacement Wired Infrared IR Ray Moti...,[],"[Video Games, Legacy Systems, Nintendo Systems...",7.49,1.0
23,AHBZRDFYB2FWUAO63DCSF2VSTJ2Q,B01N6QKT7H,4.0,2019-12-14 22:11:35.562,1576361495,16,4.625000,7,4.857143,1,...,1576360014157636006215763600771576361272,"[-1, -1, -1, -1, -1, -1, 3441, 3449, 3813, 3789]","[-1, -1, -1, -1, -1, -1, 1576360014, 157636006...","[-1, -1, -1, -1, -1, -1, 1, 1, 1, 0]",Video Games,Nintendo Joy-Con (L/R) - Gray,"[Introducing Joy-Con, controllers that make ne...","[Video Games, Nintendo Switch, Accessories, Co...",98.99,1.0
34,AEWRZLS7FBAX3BXANLXDKVV33CCA,B014R4KYMS,5.0,2016-05-27 08:55:22.000,1464339322,99,4.737374,99,4.737374,99,...,"1368219646,1371499982,1393374256,1394396097,14...","[2123, 2055, 2379, 1505, 2339, 2980, 2722, 419...","[1368219646, 1371499982, 1393374256, 139439609...","[7, 6, 6, 6, 6, 6, 6, 6, 5, 5]",Video Games,Uncharted 4: A Thief's End - PlayStation 4,[Uncharted comes to the PlayStation 4.Uncharte...,"[Video Games, PlayStation 4, Games]",24.99,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
957,AE3NRCMFIBBA2XVODR47YYNLKRDA,B001EYUQC8,5.0,2021-11-13 09:59:46.634,1636797586,1,3.000000,1,3.000000,0,...,"1342793426,1362929993,1368549700,1392151606,14...","[-1, -1, -1, 543, 417, 1835, 2407, 3123, 1526,...","[-1, -1, -1, 1342793426, 1362929993, 136854970...","[-1, -1, -1, 8, 8, 8, 8, 8, 8, 7]",Video Games,007 Quantum Of Solace - Playstation 3,[James Bond is back to settle the score in Qua...,"[Video Games, Legacy Systems, PlayStation Syst...",44.49,1.0
958,AEV5TZDZQEP24PM3SZ7SNV4TR26Q,B01N3ASPNV,5.0,2022-06-17 07:42:54.083,1655451774,10,4.000000,0,,0,...,1423572849142357323014244889911523051352,"[-1, -1, -1, -1, -1, -1, 2614, 2683, 2975, 3691]","[-1, -1, -1, -1, -1, -1, 1423572849, 142357323...","[-1, -1, -1, -1, -1, -1, 8, 8, 8, 7]",All Electronics,amFilm Tempered Glass Screen Protector for Nin...,[],"[Video Games, Nintendo Switch, Accessories, Fa...",8.91,1.0
959,AELRDP5MCGSCANM6GWUXAMBN75LQ,B009AGXH64,5.0,2022-06-03 18:23:36.536,1654280616,1,5.000000,0,,0,...,"1618466381,1618466731,1623954406,1625231840,16...","[-1, -1, -1, -1, -1, 4168, 3101, 777, 1503, 3526]","[-1, -1, -1, -1, -1, 1618466381, 1618466731, 1...","[-1, -1, -1, -1, -1, 6, 6, 5, 5, 5]",Video Games,Nintendo Wii U Console - 32GB Black Deluxe Set,[Wii U is the next great gaming console from N...,"[Video Games, Legacy Systems, Nintendo Systems...",199.99,1.0
960,AHERXKLMQLGPQLW4ZLKD4IRLMZAA,B07M6RVMPJ,5.0,2021-11-27 00:36:11.015,1637973371,0,,0,,0,...,"1595304483,1595304825,1595304936,1606090839,16...","[1029, 1035, 220, 1470, 1019, 260, 273, 289, 1...","[1595304483, 1595304825, 1595304936, 160609083...","[6, 6, 6, 6, 5, 5, 5, 5, 5, 5]",Video Games,Mario Party: The Top 100 - Nintendo 3DS,[Ever partied with Mario? stuffed mouthfuls of...,"[Video Games, Legacy Systems, Nintendo Systems...",48.99,1.0


# Load recent interacted items into Redis

In [7]:
r = redis.Redis(host=args.redis_host, port=args.redis_port, db=0, decode_responses=True)
assert (
    r.ping()
), f"Redis at {args.redis_host}:{args.port} is not running, please make sure you have started the Redis docker service"

In [8]:
latest_df[[args.user_col, args.item_col, "item_sequence"]]

Unnamed: 0,user_id,parent_asin,item_sequence
5,AHATA6X6MYTC3VNBFJ3WIYVK257A,B07DHNX18W,"[-1, -1, -1, -1, -1, 1820, 2778, 4549, 3757, 4..."
13,AHV6QCNBJNSGLATP56JAWJ3C4G2A,B08N7Q1J4W,"[-1, -1, -1, 3242, 4371, 4608, 4557, 4112, 416..."
18,AHOEABHRAFWXIT4JZ5MKJ3FMASGA,B01I14IASK,"[-1, -1, -1, -1, -1, -1, 2922, 562, 1795, 2910]"
23,AHBZRDFYB2FWUAO63DCSF2VSTJ2Q,B01N6QKT7H,"[-1, -1, -1, -1, -1, -1, 3441, 3449, 3813, 3789]"
34,AEWRZLS7FBAX3BXANLXDKVV33CCA,B014R4KYMS,"[2123, 2055, 2379, 1505, 2339, 2980, 2722, 419..."
...,...,...,...
957,AE3NRCMFIBBA2XVODR47YYNLKRDA,B001EYUQC8,"[-1, -1, -1, 543, 417, 1835, 2407, 3123, 1526,..."
958,AEV5TZDZQEP24PM3SZ7SNV4TR26Q,B01N3ASPNV,"[-1, -1, -1, -1, -1, -1, 2614, 2683, 2975, 3691]"
959,AELRDP5MCGSCANM6GWUXAMBN75LQ,B009AGXH64,"[-1, -1, -1, -1, -1, 4168, 3101, 777, 1503, 3526]"
960,AHERXKLMQLGPQLW4ZLKD4IRLMZAA,B07M6RVMPJ,"[1029, 1035, 220, 1470, 1019, 260, 273, 289, 1..."


In [9]:
for i, row in tqdm(latest_df.iterrows(), total=latest_df.shape[0]):
    prev_item_indices = [int(idx) for idx in row["item_sequence"] if idx != -1]
    prev_item_ids = [idm.get_item_id(idx) for idx in prev_item_indices]
    updated_item_sequences = prev_item_ids + [row[args.item_col]]
    user_id = row[args.user_col]
    key = args.redis_recent_key_prefix + user_id
    value = "__".join(updated_item_sequences)
    r.set(key, value)

  0%|          | 0/19578 [00:00<?, ?it/s]

In [10]:
test_user_id = latest_df.sample(1)[args.user_col].values[0]
r.get(args.redis_recent_key_prefix + test_user_id)

'B001UIG6M2__B003O6FV8S__B0039QWK0A__B001EYUVZK__B00HM3QAWK__B009DL2TBA__B01FSKACPY__B01MYUCFBK__B077TB64KB__B079FGT22K'

# Load popular items into Redis

In [11]:
popular_recs = (
    full_df.groupby(args.item_col).size().sort_values(ascending=False).head(args.top_K)
)
popular_recs

parent_asin
B01N3ASPNV    755
B07YBXFDYN    755
B0086VPUHI    720
B00BGA9WK2    652
B00BN5T30E    544
             ... 
B07YBX7Y3P    166
B00CMQTVK0    164
B004I1JTEK    164
B00HGLLRV2    164
B00CMQTVUA    163
Length: 100, dtype: int64

In [12]:
key = args.redis_popular_key
value = json.dumps(
    {
        "rec_item_ids": popular_recs.index.tolist(),
        "rec_scores": popular_recs.values.tolist(),
    }
)
r.set(key, value)

True

In [13]:
redis_data = json.loads(r.get(key))
print(redis_data)
assert len(redis_data["rec_item_ids"]) == args.top_K

{'rec_item_ids': ['B01N3ASPNV', 'B07YBXFDYN', 'B0086VPUHI', 'B00BGA9WK2', 'B00BN5T30E', 'B004RMK5QG', 'B00C1TTF86', 'B07YBWT3PK', 'B000N5Z2L4', 'B077GG9D5D', 'B087NNPYP3', 'B00KIWEMIG', 'B004LLHFAW', 'B00W9DHUBS', 'B00CJ9OTNE', 'B07624RBWB', 'B00BHRD4BM', 'B00BGA9X9W', 'B00K0NV5J2', 'B007CM0K86', 'B014R4KYMS', 'B019WRM1IA', 'B0015AARJI', 'B07DPK5NPD', 'B004HD55VK', 'B087SHFL9B', 'B00HVBPRUO', 'B09V5R5LSZ', 'B00DDILSBG', 'B07K3KHFSY', 'B00Z9TM72Q', 'B087NN2K41', 'B00DTY9B0O', 'B005GISQX4', 'B008CZN458', 'B00NOD0OTW', 'B00DJRLDMU', 'B01GY35HKE', 'B0088TN73M', 'B07YBX6T95', 'B00DBDPOZ4', 'B0BDWVBWC9', 'B08D3XL1KF', 'B00DC7G2W8', 'B07YBXFF5C', 'B07YBXFF99', 'B00HM1XPN4', 'B07PZ8NZSZ', 'B01GY35GIM', 'B0029LJIFG', 'B001D8Q5MA', 'B005OGKYVK', 'B001EYUQTQ', 'B00GOZSR96', 'B003ZDOFF0', 'B017QU5KLK', 'B00J5C3Z10', 'B00KCCNMYW', 'B004G5YI3U', 'B00VULDPCI', 'B00XBLQCLQ', 'B01IC2A28C', 'B00503E8S2', 'B00BZS9JV2', 'B002JTX7JQ', 'B00884S6YW', 'B00KSQHX1K', 'B001EYUSJ4', 'B00DB2BI8M', 'B002Z01QO2', 'B