# Item Lookup (ASIN â†’ metadata)

Provide a list of Amazon item IDs (ASINs) and this notebook will fetch their metadata (title, category, brand, price) from the SNAP metadata file.



In [None]:
# Set your ASIN list here (from API output)
ASINS = [
    "B007WTAJTO",
    "B003ES5ZUU",
    "B00DR0PDNE",
    "B0019EHU8G",
    "B002WE6D44",
    "B003ELYQGG",
    "B0002L5R78",
    "B009SYZ8OC",
    "B00BGGDVOO",
    "B002V88HFE",
]
ASINS = list(dict.fromkeys(ASINS))  # de-duplicate preserving order
ASINS


In [None]:
from pathlib import Path
import requests

RAW_META_URL = "https://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Electronics.json.gz"
meta_path = Path("../data/raw/meta_Electronics.json.gz")
meta_path.parent.mkdir(parents=True, exist_ok=True)

if not meta_path.exists():
    print(f"Downloading metadata to {meta_path} ...")
    with requests.get(RAW_META_URL, stream=True, timeout=60) as r:
        r.raise_for_status()
        with open(meta_path, "wb") as f:
            for chunk in r.iter_content(chunk_size=1 << 20):
                if chunk:
                    f.write(chunk)
    print("Download complete.")
else:
    print(f"Found existing metadata: {meta_path}")
meta_path


In [None]:
import pandas as pd

cols_keep = ["asin", "title", "category", "brand", "price"]
found = []
remaining = set(ASINS)

# Stream metadata file in chunks to avoid loading entirely into memory
for chunk in pd.read_json(meta_path, lines=True, compression="gzip", chunksize=100_000):
    if "asin" not in chunk.columns:
        continue
    hits = chunk[chunk["asin"].isin(remaining)][cols_keep].copy()
    if not hits.empty:
        found.append(hits)
        remaining.difference_update(set(hits["asin"]))
    if not remaining:
        break

if found:
    df_items = pd.concat(found, ignore_index=True)
    # Preserve input order of ASINS
    order = {asin: i for i, asin in enumerate(ASINS)}
    df_items["_ord"] = df_items["asin"].map(order)
    df_items = df_items.sort_values("_ord").drop(columns=["_ord"]).reset_index(drop=True)
else:
    df_items = pd.DataFrame(columns=cols_keep)

print(f"Found {len(df_items)} / {len(ASINS)} items.")
df_items


If some items are missing, they might not exist in the 5-core metadata file. You can also inspect reviews for these ASINs to get summaries.
