# 1. Amazon data based check

## 1) Measuring Market Entry Progress by Item (#10, 30, 50, 100 interactions)

In [4]:
from pathlib import Path
import time
import polars as pl

BASE = Path("/home/heek/edda_backbone/preprocess_raw/amazon/23/user_reviews/5_core")
SRC = BASE / "Home_and_Kitchen.json"

OUT_DIR = Path("/home/heek/aigs/NTS/reachtime_amazon")
OUT_DIR.mkdir(parents=True, exist_ok=True)
OUT_PATH = OUT_DIR / "home_itm_summary.csv"

N_LIST = [10, 30, 50, 100]


def now() -> float:
    return time.perf_counter()


def home_summary() -> None:
    if not SRC.exists():
        print(f"missing file: {SRC}")
        return

    t0 = now()

    scan = (
        pl.scan_ndjson(SRC)
        .select(
            pl.col("user_id").cast(pl.Utf8),
            pl.col("parent_asin").cast(pl.Utf8).alias("item_id"),
            pl.col("timestamp").cast(pl.Float64).alias("ts_raw"),
        )
    )

    df = (
        scan.with_columns(
            pl.when(pl.col("ts_raw") > 1e11)
            .then(pl.col("ts_raw") / 1000.0)
            .otherwise(pl.col("ts_raw"))
            .alias("ts_s")
        )
        .select(
            pl.col("user_id"),
            pl.col("item_id"),
            pl.col("ts_s").round(0).cast(pl.Int64).alias("ts"),
        )
        .unique(subset=["user_id", "item_id", "ts"])
        .collect(engine="streaming")
    )

    if df.is_empty():
        print("empty dataframe")
        return

    df_sorted = df.sort(["item_id", "ts"])
    df_cc = df_sorted.with_columns(
        pl.int_range(0, pl.len()).over("item_id").alias("cc")
    )

    t0_tbl = df_sorted.group_by("item_id").agg(pl.col("ts").first().alias("t0"))
    t_last_tbl = df_sorted.group_by("item_id").agg(pl.col("ts").last().alias("t_last"))
    total_tbl = df_sorted.group_by("item_id").len().rename({"len": "total_cnt"})

    items = (
        t0_tbl.join(t_last_tbl, on="item_id")
        .join(total_tbl, on="item_id")
    )

    for n in N_LIST:
        idx = n - 1
        tn = (
            df_cc.filter(pl.col("cc") == idx)
            .select(["item_id", pl.col("ts").alias(f"t{n}")])
        )
        items = items.join(tn, on="item_id", how="left")

    total_items = items.height

    rows = []
    for n in N_LIST:
        items = items.with_columns(
            ((pl.col(f"t{n}") - pl.col("t0")) / 86400.0)
            .cast(pl.Float64)
            .alias(f"days_{n}")
        )
        reached = items.filter(pl.col(f"t{n}").is_not_null())

        if reached.height == 0 or total_items == 0:
            rows.append(
                {
                    "n": n,
                    "count": 0,
                    "reach_rate": 0.0,
                    "mean_days": None,
                    "median_days": None,
                }
            )
            continue

        cnt, mean_days, median_days = reached.select(
            pl.len().alias("count"),
            pl.col(f"days_{n}").mean().alias("mean_days"),
            pl.col(f"days_{n}").median().alias("median_days"),
        ).row(0)

        rows.append(
            {
                "n": n,
                "count": cnt,
                "reach_rate": float(cnt) / float(total_items),
                "mean_days": mean_days,
                "median_days": median_days,
            }
        )

    summary = pl.DataFrame(rows).sort("n")
    summary.write_csv(OUT_PATH)

    t1 = now()
    print(summary)
    print(f"saved to {OUT_PATH}")
    print(f"elapsed: {t1 - t0:,.2f}s")


if __name__ == "__main__":
    home_summary()


shape: (4, 5)
┌─────┬────────┬────────────┬─────────────┬─────────────┐
│ n   ┆ count  ┆ reach_rate ┆ mean_days   ┆ median_days │
│ --- ┆ ---    ┆ ---        ┆ ---         ┆ ---         │
│ i64 ┆ i64    ┆ f64        ┆ f64         ┆ f64         │
╞═════╪════════╪════════════╪═════════════╪═════════════╡
│ 10  ┆ 440995 ┆ 0.577494   ┆ 531.173514  ┆ 344.449132  │
│ 30  ┆ 158671 ┆ 0.207784   ┆ 832.735617  ┆ 600.448206  │
│ 50  ┆ 95592  ┆ 0.12518    ┆ 987.320446  ┆ 742.468744  │
│ 100 ┆ 46410  ┆ 0.060775   ┆ 1215.037832 ┆ 939.190162  │
└─────┴────────┴────────────┴─────────────┴─────────────┘
saved to /home/heek/aigs/NTS/reachtime_amazon/home_itm_summary.csv
elapsed: 8.25s


## 2) more than 100 interactions item based estimation

In [6]:
from pathlib import Path
import time
import polars as pl

BASE = Path("/home/heek/edda_backbone/preprocess_raw/amazon/23/user_reviews/5_core")
SRC = BASE / "Home_and_Kitchen.json"

OUT_DIR = Path("/home/heek/aigs/NTS/reachtime_amazon")
OUT_DIR.mkdir(parents=True, exist_ok=True)
OUT_PATH = OUT_DIR / "home_itm_ge100_summary.csv"

N_LIST = [10, 30, 50, 100]


def now() -> float:
    return time.perf_counter()


def home_summary_ge100() -> None:
    if not SRC.exists():
        print(f"missing file: {SRC}")
        return

    t0 = now()

    scan = (
        pl.scan_ndjson(SRC)
        .select(
            pl.col("user_id").cast(pl.Utf8),
            pl.col("parent_asin").cast(pl.Utf8).alias("item_id"),
            pl.col("timestamp").cast(pl.Float64).alias("ts_raw"),
        )
    )

    df = (
        scan.with_columns(
            pl.when(pl.col("ts_raw") > 1e11)
            .then(pl.col("ts_raw") / 1000.0)
            .otherwise(pl.col("ts_raw"))
            .alias("ts_s")
        )
        .select(
            pl.col("user_id"),
            pl.col("item_id"),
            pl.col("ts_s").round(0).cast(pl.Int64).alias("ts"),
        )
        .unique(subset=["user_id", "item_id", "ts"])
        .collect(engine="streaming")
    )

    if df.is_empty():
        print("empty dataframe")
        return

    df_sorted = df.sort(["item_id", "ts"])
    df_cc = df_sorted.with_columns(
        pl.int_range(0, pl.len()).over("item_id").alias("cc")
    )

    t0_tbl = df_sorted.group_by("item_id").agg(pl.col("ts").first().alias("t0"))
    t_last_tbl = df_sorted.group_by("item_id").agg(pl.col("ts").last().alias("t_last"))
    total_tbl = df_sorted.group_by("item_id").len().rename({"len": "total_cnt"})

    items = (
        t0_tbl.join(t_last_tbl, on="item_id")
        .join(total_tbl, on="item_id")
    )

    for n in N_LIST:
        idx = n - 1
        tn = (
            df_cc.filter(pl.col("cc") == idx)
            .select(["item_id", pl.col("ts").alias(f"t{n}")])
        )
        items = items.join(tn, on="item_id", how="left")

    core = items.filter(pl.col("total_cnt") >= 100)
    total_core = core.height

    if total_core == 0:
        print("no items with >= 100 interactions")
        return

    print(f"items with >= 100 interactions: {total_core}")

    rows = []
    for n in N_LIST:
        tmp = core.with_columns(
            ((pl.col(f"t{n}") - pl.col("t0")) / 86400.0)
            .cast(pl.Float64)
            .alias(f"days_{n}")
        )
        reached = tmp.filter(pl.col(f"t{n}").is_not_null())

        if reached.is_empty():
            rows.append(
                {
                    "n": n,
                    "mean_days": None,
                    "median_days": None,
                }
            )
            continue

        mean_days, median_days = reached.select(
            pl.col(f"days_{n}").mean().alias("mean_days"),
            pl.col(f"days_{n}").median().alias("median_days"),
        ).row(0)

        rows.append(
            {
                "n": n,
                "mean_days": mean_days,
                "median_days": median_days,
            }
        )

    summary = pl.DataFrame(rows).sort("n")
    summary.write_csv(OUT_PATH)

    t1 = now()
    print(summary)
    print(f"saved to {OUT_PATH}")
    print(f"elapsed: {t1 - t0:,.2f}s")


if __name__ == "__main__":
    home_summary_ge100()


items with >= 100 interactions: 46410
shape: (4, 3)
┌─────┬─────────────┬─────────────┐
│ n   ┆ mean_days   ┆ median_days │
│ --- ┆ ---         ┆ ---         │
│ i64 ┆ f64         ┆ f64         │
╞═════╪═════════════╪═════════════╡
│ 10  ┆ 315.394416  ┆ 157.355168  │
│ 30  ┆ 579.000792  ┆ 364.711539  │
│ 50  ┆ 762.539871  ┆ 521.012101  │
│ 100 ┆ 1215.037832 ┆ 939.190162  │
└─────┴─────────────┴─────────────┘
saved to /home/heek/aigs/NTS/reachtime_amazon/home_itm_ge100_summary.csv
elapsed: 7.78s


# 2. Preliminaries

## 1) Home and Kitchen domain Pre-estimation about ITEM

In [7]:
from pathlib import Path
import json
from collections import Counter

BASE = Path("/home/heek/edda_backbone/preprocess_raw/amazon/23/user_reviews/5_core")
FILE = BASE / "Home_and_Kitchen.json"

if not FILE.exists():
    raise FileNotFoundError(FILE)

print("Using JSON:", FILE)

item_count = Counter()
with FILE.open("r", encoding="utf-8") as f:
    for line in f:
        if not line.strip():
            continue
        d = json.loads(line)
        item_id = d.get("parent_asin") or d.get("asin")
        if item_id:
            item_count[item_id] += 1

counts_sorted = sorted(item_count.values())
n_items = len(counts_sorted)

idx1 = n_items // 3
idx2 = (2 * n_items) // 3

thr1 = counts_sorted[idx1]
thr2 = counts_sorted[idx2]

def get_group(cnt):
    if cnt <= thr1:
        return 0
    elif cnt <= thr2:
        return 1
    else:
        return 2

user_sets = [set(), set(), set()]
with FILE.open("r", encoding="utf-8") as f:
    for line in f:
        if not line.strip():
            continue
        d = json.loads(line)
        user = d.get("user_id")
        item_id = d.get("parent_asin") or d.get("asin")
        if not user or not item_id:
            continue

        cnt = item_count[item_id]
        group = get_group(cnt)
        user_sets[group].add(user)

users_all_three = set.intersection(*user_sets)

print("The Number of users who interacted items at least once in each 3 groups: ", len(users_all_three))


Using JSON: /home/heek/edda_backbone/preprocess_raw/amazon/23/user_reviews/5_core/Home_and_Kitchen.json
The Number of users who interacted items at least once in each 3 groups:  689755


In [9]:
from pathlib import Path
import json
from collections import Counter, defaultdict
import numpy as np
import pandas as pd
import math


BASE = Path("/home/heek/edda_backbone/preprocess_raw/amazon/23/user_reviews/5_core")
FILE = BASE / "Home_and_Kitchen.json"

if not FILE.exists():
    raise FileNotFoundError(FILE)

print("Using JSON:", FILE)


item_count = Counter()
with FILE.open("r", encoding="utf-8") as f:
    for line in f:
        line = line.strip()
        if not line:
            continue
        d = json.loads(line)
        item_id = d.get("parent_asin") or d.get("asin")
        if item_id:
            item_count[item_id] += 1

if not item_count:
    raise RuntimeError("item_count is empty")

counts_sorted = sorted(item_count.values())
n_items = len(counts_sorted)

idx1 = n_items // 3
idx2 = (2 * n_items) // 3

thr1 = counts_sorted[idx1]
thr2 = counts_sorted[idx2]

print(f"#items={n_items}, thr1={thr1}, thr2={thr2}")


def get_group_idx(cnt: int) -> int:
    if cnt <= thr1:
        return 0   # L
    elif cnt <= thr2:
        return 1   # M
    else:
        return 2   # H


user_group_cnts = defaultdict(lambda: [0, 0, 0])

with FILE.open("r", encoding="utf-8") as f:
    for line in f:
        line = line.strip()
        if not line:
            continue
        d = json.loads(line)
        user_id = d.get("user_id")
        item_id = d.get("parent_asin") or d.get("asin")
        if not user_id or not item_id:
            continue

        cnt = item_count.get(item_id)
        if cnt is None:
            continue
        g_idx = get_group_idx(cnt)
        user_group_cnts[user_id][g_idx] += 1

filtered_users = {
    u: cnts for u, cnts in user_group_cnts.items()
    if all(c > 0 for c in cnts)
}

print("Number of users interacting across all three groups:", len(filtered_users))


def balance_key(cnts):
    m = sum(cnts) / 3.0
    var = sum((c - m) ** 2 for c in cnts) / 3.0
    std = math.sqrt(var)
    total = sum(cnts)
    return (std, -total)


sorted_users = sorted(filtered_users.items(), key=lambda x: balance_key(x[1]))
top_k = 5000
top_users = sorted_users[:min(top_k, len(sorted_users))]

print("Selected top users:", len(top_users))

group_counts = list(zip(*[cnts for _, cnts in top_users]))

rows = []
for g in range(3):
    arr = np.array(group_counts[g], dtype=np.int64)
    q1 = float(np.quantile(arr, 0.25))
    q3 = float(np.quantile(arr, 0.75))
    row = {
        "group": ["L", "M", "H"][g],
        "min": int(arr.min()),
        "Q1": q1,
        "mean": float(arr.mean()),
        "Q3": q3,
        "max": int(arr.max()),
    }
    rows.append(row)

df_stats = pd.DataFrame(rows, columns=["group", "min", "Q1", "mean", "Q3", "max"])
print("\n==== Top 5,000 balanced users: Group interaction statistics ====")
print(df_stats.to_string(index=False))


Using JSON: /home/heek/edda_backbone/preprocess_raw/amazon/23/user_reviews/5_core/Home_and_Kitchen.json
#items=763636, thr1=8, thr2=18
Number of users interacting across all three groups: 689755
Selected top users: 5000

==== Top 5,000 balanced users: Group interaction statistics ====
group  min  Q1   mean  Q3  max
    L    2 2.0 2.3786 3.0   19
    M    2 2.0 2.4306 3.0   19
    H    2 2.0 2.6722 3.0   18


## 2) Item Popularity Stats (items interacted with selected Users)

In [None]:
top_user_ids = {u for u, _ in top_users}

user_items = set()
with FILE.open("r", encoding="utf-8") as f:
    for line in f:
        line = line.strip()
        if not line:
            continue
        d = json.loads(line)
        u = d.get("user_id")
        if u not in top_user_ids:
            continue
        item_id = d.get("parent_asin") or d.get("asin")
        if item_id:
            user_items.add(item_id)

print("\nNumber of unique items interacted by top users:", len(user_items))

vals_L = []
vals_M = []
vals_H = []

for it in user_items:
    cnt = item_count.get(it, 0)
    g_idx = get_group_idx(cnt)
    if g_idx == 0:
        vals_L.append(cnt)
    elif g_idx == 1:
        vals_M.append(cnt)
    else:
        vals_H.append(cnt)


def make_stats(arr):
    if not arr:
        return {"min": None, "Q1": None, "mean": None, "Q3": None, "max": None}
    arr = np.array(arr, dtype=np.int64)
    return {
        "min": int(arr.min()),
        "Q1": float(np.quantile(arr, 0.25)),
        "mean": float(arr.mean()),
        "Q3": float(np.quantile(arr, 0.75)),
        "max": int(arr.max()),
    }


stats_L = make_stats(vals_L)
stats_M = make_stats(vals_M)
stats_H = make_stats(vals_H)

df_item_stats = pd.DataFrame(
    [
        {"group": "L", **stats_L},
        {"group": "M", **stats_M},
        {"group": "H", **stats_H},
    ],
    columns=["group", "min", "Q1", "mean", "Q3", "max"],
)

print("\nItem popularity stats for items interacted by top 5,000 users")
print(df_item_stats.to_string(index=False))



Number of unique items interacted by top users: 35405

==== Item popularity stats for items interacted by top 5,000 users ====
group  min   Q1       mean    Q3   max
    L    5  5.0   6.350155   7.0     8
    M    9 10.0  12.916723  15.0    18
    H   19 41.0 392.749524 328.0 34666


## 3) Train/Valid/Test Cold item ratio check

In [18]:
import os
from collections import Counter

DOMAIN = "Home_and_Kitchen"
DATA_ROOT = f"/home/heek/aigs/NTS/data/{DOMAIN}"
MAP_DIR = os.path.join(DATA_ROOT, "maps")

TRAIN_PATH = os.path.join(DATA_ROOT, "train.txt")
VALID_PATH = os.path.join(DATA_ROOT, "valid.txt")
TEST_PATH  = os.path.join(DATA_ROOT, "test.txt")
ITEM2ID_PATH = os.path.join(MAP_DIR, "item2id.txt")


def load_pairs(path):
    pairs = []
    items = []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            u_str, i_str = line.split()
            u = int(u_str)
            i = int(i_str)
            pairs.append((u, i))
            items.append(i)
    return pairs, items

def load_item2id(path):
    idx_set = set()
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            _, idx_str = line.split()
            idx = int(idx_str)
            idx_set.add(idx)
    return idx_set


def main():
    all_item_indices = load_item2id(ITEM2ID_PATH)
    print(f"Total items in item2id.txt: {len(all_item_indices)}")
    print(f"Global item index range: [{min(all_item_indices)}, {max(all_item_indices)}]")
    print()

    train_pairs, train_items_list = load_pairs(TRAIN_PATH)
    valid_pairs, valid_items_list = load_pairs(VALID_PATH)
    test_pairs,  test_items_list  = load_pairs(TEST_PATH)

    train_items = set(train_items_list)
    valid_items = set(valid_items_list)
    test_items  = set(test_items_list)

    print("Basic stats")
    print(f"#train pairs: {len(train_pairs)}")
    print(f"#valid pairs: {len(valid_pairs)}")
    print(f"#test  pairs: {len(test_pairs)}")
    print()
    print(f"Unique items in train: {len(train_items)}")
    print(f"Unique items in valid: {len(valid_items)}")
    print(f"Unique items in test : {len(test_items)}")
    print()

    # overlap check
    print("Overlap between splits (by item index)")
    print(f"Items in test ∩ train: {len(test_items & train_items)}")
    print(f"Items in test ∩ valid: {len(test_items & valid_items)}")
    print(f"Items in valid ∩ train: {len(valid_items & train_items)}")
    print(f"Items in all three (train ∩ valid ∩ test): {len(train_items & valid_items & test_items)}")
    print()

    valid_only_items = valid_items - train_items
    test_only_items  = test_items - train_items
    valid_test_only_items = (valid_items | test_items) - train_items

    print("Items not appearing in train (cold w.r.t train)")
    print(f"Items only in valid (not in train): {len(valid_only_items)}")
    print(f"Items only in test  (not in train): {len(test_only_items)}")
    print(f"Items in (valid ∪ test) but not in train: {len(valid_test_only_items)}")
    print()


if __name__ == "__main__":
    main()

Total items in item2id.txt: 29284
Global item index range: [0, 29283]

Basic stats
#train pairs: 33304
#valid pairs: 2036
#test  pairs: 1949

Unique items in train: 23587
Unique items in valid: 1618
Unique items in test : 1510

Overlap between splits (by item index)
Items in test ∩ train: 1510
Items in test ∩ valid: 425
Items in valid ∩ train: 1618
Items in all three (train ∩ valid ∩ test): 425

Items not appearing in train (cold w.r.t train)
Items only in valid (not in train): 0
Items only in test  (not in train): 0
Items in (valid ∪ test) but not in train: 0



# 3. Item meta data embedding related

## 1) where to fill

In [20]:
import json
import random
from pathlib import Path

# load metadata json
META_PATH = Path("/home/heek/edda_backbone/preprocess_raw/amazon/23/filtered_data/f_item_meta/f_meta_Home_and_Kitchen.json")

# read json file
with META_PATH.open("r", encoding="utf-8") as f:
    meta = json.load(f)

# meta is a list → convert to dict indexed by item_id
item_dict = {}
for entry in meta:
    if not isinstance(entry, dict):
        continue

    item_id = entry.get("parent_asin") or entry.get("asin")
    if item_id:
        item_dict[item_id] = entry

# sample 5 random items
sample_items = random.sample(list(item_dict.keys()), 5)

# print fields
for item in sample_items:
    info = item_dict[item]

    main_cat = info.get("main_category")
    cats = info.get("categories")
    title = info.get("title")
    desc = info.get("description")

    print("=" * 80)
    print(f"item_id      : {item}")
    print(f"main_category: {main_cat}")
    print(f"categories   : {cats}")
    print(f"title        : {title}")
    print(f"description  : {desc}")
print("=" * 80)


item_id      : B009EA1AH4
main_category: Amazon Home
categories   : ['Home & Kitchen', 'Bath', 'Bathroom Accessories', 'Shower Curtains, Hooks & Liners', 'Shower Curtains']
title        : LORRAINE HOME FASHIONS Blaze Shower Curtain, 70 by 72-Inch, Platinum
description  : ['Jacquard woven fabric shower curtain in flame stich motif with lurex thread accent. great design for the bathroom.']
item_id      : B0BLRSD96N
main_category: Appliances
categories   : ['Home & Kitchen', 'Vacuums & Floor Care', 'Vacuum Parts & Accessories', 'Filters', 'Upright Filters']
title        : Bogda Filter Replacement (6 Pack) Compatible with Shark ION Robot RV700_N RV720_N RV851WV RV850 and Shark IQ Robot R101AE RV1001AE UR1005AE Vacuum Cleaner Replace Part #RVFFK950
description  : []
item_id      : B01MY8FHE9
main_category: Amazon Home
categories   : ['Home & Kitchen', 'Bedding', "Kids' Bedding", 'Bedding Sets & Collections', 'Bedspread & Coverlet Sets']
title        : Urban Habitat Kids Trixie Twin/Twin XL 

## 2) Embedding Result Check

In [None]:
import os
import pickle
import numpy as np


PKL_PATH = "/home/heek/aigs/NTS/data/Home_and_Kitchen/itm_txt_emb/itm_txt_emb_home.pkl"

def main():
    with open(PKL_PATH, "rb") as f:
        data = pickle.load(f)

    field_embs = data["field_emb"]   
    idx2item = data["idx2item"]     
    meta = data["meta"]

    fields = meta["fields"]         # ["main_category", "categories", "title", "description"]
    n_items = len(idx2item)
    dim = meta["dim"]

    print(f"#items = {n_items}, dim = {dim}")
    print(f"fields = {fields}")
    print()

    np.random.seed(0)
    sample_indices = np.random.choice(n_items, size=10, replace=False)

    for idx in sample_indices:
        item_id = idx2item[idx]
        print("=" * 60)
        print(f"item_idx = {idx}, item_id = {item_id}")
        for field in fields:
            emb = field_embs[field][idx]   # (dim,)
            head_str = ", ".join([f"{v:.4f}" for v in emb[:8]])
            print(f"  [{field}] emb[:8] = [{head_str}]  ...")
        print()

if __name__ == "__main__":
    main()


#items = 29284, dim = 64
fields = ['main_category', 'categories', 'title', 'description']

item_idx = 7723, item_id = B00GY8ZIRM
  [main_category] emb[:8] = [-0.0203, 0.1814, -0.3674, 0.0737, 0.2086, 0.0422, -0.0488, -0.0788]  ...
  [categories] emb[:8] = [-0.1256, 0.0142, -0.3931, 0.0204, 0.1085, -0.0790, -0.0891, -0.1803]  ...
  [title] emb[:8] = [-0.0897, 0.1412, -0.3264, -0.0217, 0.0063, -0.0372, -0.0409, -0.1941]  ...
  [description] emb[:8] = [0.0345, 0.1020, -0.3936, -0.0524, 0.0989, -0.0729, -0.0055, -0.0258]  ...

item_idx = 24614, item_id = B09KRC6P9Z
  [main_category] emb[:8] = [-0.0203, 0.1814, -0.3674, 0.0737, 0.2086, 0.0422, -0.0488, -0.0788]  ...
  [categories] emb[:8] = [-0.0665, 0.1022, -0.3586, -0.0197, 0.1052, -0.1370, -0.1925, -0.0668]  ...
  [title] emb[:8] = [-0.0647, 0.0577, -0.4199, -0.0764, 0.0075, -0.1266, -0.1279, 0.0130]  ...
  [description] emb[:8] = [-0.0284, 0.0823, -0.3962, -0.0458, 0.0589, -0.1307, -0.0472, 0.0757]  ...

item_idx = 23971, item_id = B098