In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import json
import os
import sys

import redis
from dotenv import load_dotenv
from loguru import logger
from pydantic import BaseModel
from tqdm.auto import tqdm

load_dotenv()

sys.path.insert(0, "..")

In [10]:
class Args(BaseModel):
    testing: bool = False
    run_name: str = "000-first-attempt"
    notebook_persist_dp: str = None
    random_seed: int = 41

    redis_host: str = os.getenv("REDIS_HOST", "localhost")
    redis_port: int = os.getenv("REDIS_PORT", 6379)
    redis_key_prefix: str = "output:i2i:"
    redis_popular_key: str = "output:popularitems"

    batch_recs_fp: str = "data/000-first-attempt/batch_recs.jsonl"
    popular_items_fp: str = "data/first-attempt/001-popular-baseline/top_popular_df.csv"

    def init(self):
        self.notebook_persist_dp = os.path.abspath(f"data/{self.run_name}")
        os.makedirs(self.notebook_persist_dp, exist_ok=True)

        if redis_host := os.getenv("REDIS_HOST"):
            self.redis_host = redis_host
            self.redis_port = os.getenv("REDIS_PORT", self.redis_port)

        return self


args = Args().init()

print(args.model_dump_json(indent=2))

{
  "testing": false,
  "run_name": "000-first-attempt",
  "notebook_persist_dp": "/home/dinhln/Desktop/real_time_recsys/notebooks/data/000-first-attempt",
  "random_seed": 41,
  "redis_host": "138.2.61.6",
  "redis_port": "6379",
  "redis_key_prefix": "output:i2i:",
  "redis_popular_key": "output:popularitems",
  "batch_recs_fp": "data/000-first-attempt/batch_recs.jsonl",
  "popular_items_fp": "data/first-attempt/001-popular-baseline/top_popular_df.csv"
}


  PydanticSerializationUnexpectedValue(Expected `int` - serialized value may not be as expected [input_value='6379', input_type=str])
  return self.__pydantic_serializer__.to_json(


In [5]:
r = redis.Redis(host=args.redis_host, port=args.redis_port, db=0, decode_responses=True)
assert (
    r.ping()
), f"Redis at {args.redis_host}:{args.redis_port} is not running, please make sure you have started the Redis docker service"

In [None]:
def store_recommendations(file_path: str):
    with open(file_path, "r") as file:
        for line in tqdm(file):
            rec_data = json.loads(line)
            target_item = rec_data["target_item"]
            key = args.redis_key_prefix + target_item
            r.set(
                key,
                json.dumps(
                    {
                        "rec_item_ids": rec_data["rec_item_ids"],
                        "rec_scores": rec_data["rec_scores"],
                    }
                ),
            )


def get_recommendations(target_item):
    key = args.redis_key_prefix + target_item
    rec_data = r.get(key)
    if rec_data:
        return json.loads(rec_data)
    return None


def get_example_keys(count=5):
    keys = r.scan_iter(match=args.redis_key_prefix + "*", count=count)
    output = []
    for i, key in enumerate(keys, 1):
        output.append(key)
        if i >= count:
            return output

In [6]:
logger.info(f"Loading batch recs output from {args.batch_recs_fp}...")
store_recommendations(args.batch_recs_fp)

[32m2025-06-28 22:13:17.448[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mLoading batch recs output from data/000-first-attempt/batch_recs.jsonl...[0m


0it [00:00, ?it/s]

In [7]:
get_recommendations(get_example_keys()[0])

{'rec_item_ids': ['B098HS1NDV',
  'B00C8T5KOW',
  'B00KFD0SYK',
  'B08GZYBK6Z',
  'B01AI2YGK4',
  'B085HBM4Y5',
  'B08JYVX1W4',
  'B08R7HBWCF',
  'B0BC8CL92P',
  'B011LZY20G',
  'B016Y8XJCW',
  'B0C33Z5X4B',
  'B01DZS9UB8',
  'B07CK7LG51',
  'B088KQ1P52',
  'B0B46FYQVX',
  'B015ILEN80',
  'B00F0H8TOC',
  'B07CGB33W9',
  'B01MQPMLBR',
  'B09WKJQKGJ',
  'B00IAKLM54',
  'B06VX1YGQX',
  'B07M5GFPQN',
  'B00C30FCUI',
  'B08CKZ36N7',
  'B074N81CZN',
  'B0BZRY8JPY',
  'B07WNYLXM1',
  'B07QL36SDW',
  'B00KCRYIWM',
  'B015IK5URY',
  'B081K9C3Q1',
  'B01LXJA5JD',
  'B07C9H4JY8',
  'B08DKXZ9MS',
  'B07121TS59',
  'B07KF6WSL8',
  'B095CF8C2B',
  'B01AWBA8U8',
  'B09YDHLYRL',
  'B06XHXRXP1',
  'B076X3TTMK',
  'B01M0GB8CC',
  'B01N987LAZ',
  'B0BDJ2L28L',
  'B01MSNCDTA',
  'B0097BEFWC',
  'B0BYJSGL3D',
  'B007FUDKB4',
  'B09P4NGVWB',
  'B07VBJH9FW',
  'B07XWXYBW9',
  'B0713RRZMB',
  'B00JF2JM8K',
  'B01J2W4JUI',
  'B01LYHEC16',
  'B00QRPPCMI',
  'B01179M6DQ',
  'B07R5HM156',
  'B00F0R9V76',
  'B004Z

In [1]:
# Store popular items

In [6]:
import pandas as pd

popular_df = pd.read_csv(args.popular_items_fp)

popular_df

Unnamed: 0,parent_asin,size,score,rec_ranking
0,B01K8B8YA8,934,1.000000,1
1,B075X8471B,862,0.922912,2
2,B010BWYDYA,597,0.639186,3
3,B011BRUOMO,586,0.627409,4
4,B07S764D9V,544,0.582441,5
...,...,...,...,...
95,B01M3ULMWP,123,0.131692,96
96,B006GWO5WK,122,0.130621,97
97,B07BX4X77P,122,0.130621,98
98,B0BZ5KPQZK,122,0.130621,99


In [8]:
recs_item_ids = popular_df["parent_asin"].tolist()

score = popular_df["score"].tolist()

In [11]:
popular_recs = json.dumps({
    "rec_item_ids": recs_item_ids,
    "rec_scores": score
})

In [13]:
r.set(args.redis_popular_key, popular_recs)

True

In [14]:
r.get(args.redis_popular_key)

'{"rec_item_ids": ["B01K8B8YA8", "B075X8471B", "B010BWYDYA", "B011BRUOMO", "B07S764D9V", "B07GZFM1ZM", "B0BGNG1294", "B008J0Z9TA", "B083TH1B45", "B07P9V8GSH", "B00U3FPN4U", "B01MTF2Z37", "B0791TX5P5", "B0BW4PFM58", "B000WYVBR0", "B00CX5P8FC", "B00JO6RO8C", "B07KTYJ769", "B0BN74ZJDK", "B07456BG8N", "B07454F4JH", "B00BWF5U0M", "B0BS2ZMHCL", "B00PFQPX5Q", "B003NR57BY", "B00L0YLRUW", "B0043T7FXE", "B00EHFJGW2", "B00IVPU786", "B071R715MZ", "B00HEEOQBO", "B0BSF17PM2", "B00FB50S0Q", "B07HZLHPKP", "B08KZ1TZYB", "B06ZZCZS7R", "B08RLW7918", "B07H65KP63", "B08CLNX58K", "B00AJHDZSI", "B00FB50SBU", "B00IVPU7AO", "B07P374FF3", "B01I4TE612", "B075JZNHTD", "B00OQVZDJM", "B0BV93KQZR", "B08KG14KCT", "B09TWVB2TH", "B00N2ZDXW2", "B07WMB4XS4", "B0052SCU8U", "B0BB6Y5N3M", "B075KP9XHS", "B09Z3BM734", "B006JH8T3S", "B00TS6JL4S", "B09SXP5VB5", "B00OBRE5UE", "B01N75TWSA", "B08SCF2HC2", "B0BMQJYLQV", "B0BXYXD1Q2", "B0051VVOB2", "B004XC6GJ0", "B001FWYGJS", "B07F4P3JH7", "B003MTTJOY", "B009RUZ7TS", "B00OAJ412U", "