# LLM Playground: Tagging items

# Set up

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import asyncio
import concurrent.futures
import os
import sys
import time
from typing import List

import pandas as pd
from datasets import load_dataset
from dotenv import load_dotenv
from loguru import logger
from openai import OpenAI
from pydantic import BaseModel

load_dotenv()

sys.path.insert(0, "..")

from llm.tagger import PROMPT as ITEM_TAGGING_PROMPT
from llm.tagger import SYSTEM_PROMPT
from llm.utils import load_extracted

# Controller

In [3]:
class Args(BaseModel):
    testing: bool = False
    run_name: str = "040-llm-tag-items"
    notebook_persist_dp: str = None
    random_seed: int = 41

    user_col: str = "user_id"
    item_col: str = "parent_asin"
    rating_col: str = "rating"
    timestamp_col: str = "timestamp"

    selected_cols: List[str] = ["title", "categories"]

    skip_processed: bool = False
    # processed_fp: str = "/Users/dvq/frostmourne/recsys-mvp/notebooks/data/040-llm-tag-items/run__040-llm-tag-items__batch__1731068753.jsonl"
    processed_fp: str = None

    def init(self):
        self.notebook_persist_dp = os.path.abspath(f"data/{self.run_name}")
        os.makedirs(self.notebook_persist_dp, exist_ok=True)

        return self


args = Args().init()

print(args.model_dump_json(indent=2))

{
  "testing": false,
  "run_name": "040-llm-tag-items",
  "notebook_persist_dp": "/home/dvquys/frostmourne/tmp/recsys-mvp/notebooks/data/040-llm-tag-items",
  "random_seed": 41,
  "user_col": "user_id",
  "item_col": "parent_asin",
  "rating_col": "rating",
  "timestamp_col": "timestamp",
  "selected_cols": [
    "title",
    "categories"
  ],
  "skip_processed": false,
  "processed_fp": null
}


# Load data

In [4]:
metadata_raw = load_dataset(
    "McAuley-Lab/Amazon-Reviews-2023", "raw_meta_Video_Games", trust_remote_code=True
)
metadata_df = metadata_raw["full"].to_pandas()
metadata_df

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together,subtitle,author
0,Video Games,Dash 8-300 Professional Add-On,5.0,1,[Features Dash 8-300 and 8-Q300 ('Q' rollout l...,[The Dash 8-300 Professional Add-On lets you p...,,"{'hi_res': [None], 'large': ['https://m.media-...","{'title': [], 'url': [], 'user_id': []}",Aerosoft,"[Video Games, PC, Games]","{""Pricing"": ""The strikethrough price is the Li...",B000FH0MHO,,,
1,Video Games,Phantasmagoria: A Puzzle of Flesh,4.1,18,[Windows 95],[],,{'hi_res': ['https://m.media-amazon.com/images...,"{'title': [], 'url': [], 'user_id': []}",Sierra,"[Video Games, PC, Games]","{""Best Sellers Rank"": {""Video Games"": 137612, ...",B00069EVOG,,,
2,Video Games,NBA 2K17 - Early Tip Off Edition - PlayStation 4,4.3,223,[The #1 rated NBA video game simulation series...,[Following the record-breaking launch of NBA 2...,58.0,{'hi_res': ['https://m.media-amazon.com/images...,{'title': ['NBA 2K17 - Kobe: Haters vs Players...,2K,"[Video Games, PlayStation 4, Games]","{""Release date"": ""September 16, 2016"", ""Best S...",B00Z9TLVK0,,,
3,Video Games,Nintendo Selects: The Legend of Zelda Ocarina ...,4.9,22,[Authentic Nintendo Selects: The Legend of Zel...,[],37.42,{'hi_res': ['https://m.media-amazon.com/images...,"{'title': [], 'url': [], 'user_id': []}",Amazon Renewed,"[Video Games, Legacy Systems, Nintendo Systems...","{""Best Sellers Rank"": {""Video Games"": 51019, ""...",B07SZJZV88,,,
4,Video Games,Thrustmaster Elite Fitness Pack for Nintendo Wii,3.0,3,"[Includes (9) Total Accessories, Pedometer, Wi...",[The Thrustmaster Motion Plus Elite Fitness Pa...,,"{'hi_res': [None, None, None, None, None, None...","{'title': [], 'url': [], 'user_id': []}",THRUSTMASTER,"[Video Games, Legacy Systems, Nintendo Systems...","{""Release date"": ""November 1, 2009"", ""Pricing""...",B002WH4ZJG,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137264,,Story of Seasons: Pioneers Of Olive Town (Nint...,4.5,397,[A wild world of discovery - tame the wilderne...,"[Product Description, Inspired by Tales of you...",31.04,{'hi_res': ['https://m.media-amazon.com/images...,"{'title': [], 'url': [], 'user_id': []}",Marvelous Europe,"[Video Games, Nintendo Switch, Games]","{""Release date"": ""March 26, 2021"", ""Best Selle...",B09XQJS4CZ,,,
137265,Video Games,MotoGP 18 (PC DVD) UK IMPORT REGION FREE,4.0,1,[Brand new game engine - MotoGP18 has been reb...,[Become the champion of the 2018 MotoGP Season...,,{'hi_res': ['https://m.media-amazon.com/images...,"{'title': [], 'url': [], 'user_id': []}",Milestone,"[Video Games, Game Genre of the Month]","{""Pricing"": ""The strikethrough price is the Li...",B07DGPTGNV,,,
137266,Cell Phones & Accessories,Century Accessory Soft Silicone Protective Ski...,2.9,19,"[Easy access to all buttons, controls and port...",[This soft case cover will add a splash of col...,,{'hi_res': ['https://m.media-amazon.com/images...,"{'title': [], 'url': [], 'user_id': []}",Century Accessory,"[Video Games, Legacy Systems, Xbox Systems, Xb...","{""Package Dimensions"": ""2.76 x 2.76 x 0.2 inch...",B00HUWCQBW,,,
137267,,Hasbro Interactive Mr. Potato Head Activity Pa...,3.9,5,[],"[Amazon.com, Everyone's favorite master-of-dis...",,"{'hi_res': [None, 'https://m.media-amazon.com/...","{'title': [], 'url': [], 'user_id': []}",Hasbro,"[Video Games, PC, Games]","{""Release date"": ""July 24, 1999"", ""Best Seller...",B00002S9MH,,,


In [5]:
metadata_df.sample(10, random_state=args.random_seed)[
    [args.item_col, *args.selected_cols]
].to_dict(orient="records")

[{'parent_asin': 'B09ZYFYT5J',
  'title': 'Gliging 120Pcs/Set MX Switch Films Mechanical Keyboard Switches stabilizer Switch Film Repair for Cherry MX kailh Gateron Switch',
  'categories': array(['Video Games', 'PC', 'Accessories', 'Gaming Keyboards'],
        dtype=object)},
 {'parent_asin': 'B00GZ6TX7E',
  'title': 'NEW HOLLAND SKYLINE [Xbox 360]',
  'categories': array(['Video Games', 'Legacy Systems', 'Xbox Systems', 'Xbox 360',
         'Interactive Gaming Figures'], dtype=object)},
 {'parent_asin': 'B09QZ2RZLV',
  'title': 'HORI Gaming Headset (Pikachu POP) for Nintendo Switch & Switch Lite - Officially Licensed by Nintendo & Pokemon Company International - Nintendo Switch',
  'categories': array(['Video Games', 'Nintendo Switch', 'Accessories'], dtype=object)},
 {'parent_asin': 'B000021Y1B',
  'title': 'Rampage 2: Universal Tour',
  'categories': array(['Video Games', 'Legacy Systems', 'Nintendo Systems', 'Nintendo 64',
         'Games'], dtype=object)},
 {'parent_asin': 'B07MM

# Call OpenAI to get tags

In [6]:
sampled_items = metadata_df.sample(10, random_state=args.random_seed)[
    [args.item_col, *args.selected_cols]
].to_dict(orient="records")

In [7]:
prompt = ITEM_TAGGING_PROMPT.format(input_list=sampled_items)
prompt

'\nFor each of the following items, generate a set of tags that capture the main features, target audience, platform compatibility, and item type (e.g., accessory, game, hardware).\n\nReturn the output as a list of JSON objects, where each object includes the original item `title`, `parent_asin`, and a list of tags.\n\nFocus on adding tags that help shoppers easily find these items based on genre, brand associations, and product functionality.\n\nIf possible, add new information that is not obvious from the item title.\n\nDo not include the tags that are similar to those in the input categories, for example do not include tags about gaming platforms as output.\n\n<EXAMPLE>\n\nExample item titles with JSON input and output:\n\nInput:  \n[\n    { "parent_asin": "B09ZYFYT5J", "title": "Gliging 120Pcs/Set MX Switch Films Mechanical Keyboard Switches stabilizer Switch Film Repair for Cherry MX kailh Gateron Switch" },\n    { "parent_asin": "B00GZ6TX7E", "title": "NEW HOLLAND SKYLINE [Xbox 3

In [8]:
client = OpenAI()


# Define the structured output format using Pydantic for a single item
class ItemTag(BaseModel):
    parent_asin: str
    item_title: str
    tags: List[str]


# Define the output format to wrap the list of items within an object
class ItemTaggingOutput(BaseModel):
    items: List[ItemTag]  # Wrap list in an "items" field

In [9]:
def retrieve_item_tags_from_llm(item_metadata: List[dict]):
    prompt = ITEM_TAGGING_PROMPT.format(input_list=item_metadata)

    completion = client.beta.chat.completions.parse(
        model="gpt-4o-mini",
        messages=[
            {
                "role": "system",
                "content": SYSTEM_PROMPT,
            },
            {"role": "user", "content": prompt},
        ],
        response_format=ItemTaggingOutput,
    )

    tagged_items = completion.choices[0].message.parsed

    return tagged_items.dict()["items"]

In [10]:
extracted_tags = retrieve_item_tags_from_llm(sampled_items)
with pd.option_context("display.max_colwidth", None):
    display(pd.DataFrame(extracted_tags))

Unnamed: 0,parent_asin,item_title,tags
0,B09ZYFYT5J,Gliging 120Pcs/Set MX Switch Films Mechanical Keyboard Switches stabilizer Switch Film Repair for Cherry MX kailh Gateron Switch,"[Keyboard Accessory, Mechanical Keyboard, Switch Film, Custom Modding, Gaming Peripheral]"
1,B00GZ6TX7E,NEW HOLLAND SKYLINE [Xbox 360],"[Racing Game, Simulation, Legacy Gaming, Brand Association, Interactive Play]"
2,B09QZ2RZLV,HORI Gaming Headset (Pikachu POP) for Nintendo Switch & Switch Lite - Officially Licensed by Nintendo & Pokemon Company International - Nintendo Switch,"[Gaming Headset, Pikachu, Anime Merchandise, Officially Licensed, Audio Accessory]"
3,B000021Y1B,Rampage 2: Universal Tour,"[Action Game, Retro Gaming, Multiplayer, Destruction, Classic]"
4,B07MMP45FS,Warner Home Video Lego Jurassic World PS4,"[Action-Adventure, Family-Friendly, Lego Game, Jurassic Park, Cooperative Gameplay]"
5,B00EEH3U2K,Skulls of the Shogun: Bone-a-Fide Edition [Online Game Code],"[Strategy Game, Turn-Based Combat, Indie Game, Digital Download, Historical Setting]"
6,B001A33GMU,Mysteryville - PC (Jewel case),"[Puzzle Game, Mystery, Casual Gaming, Single Player, Adventure]"
7,B0BV24K2CD,"Wireless Controller Replacement for WII Controller,GLOWANT 2 Pack Wii Remote Controller Compatible with Wii/Wii U Console with Wrist Strap and Silicone Case","[Controller Accessory, Wii-Compatible, Wireless Technology, Multiple Packs, Gaming Accessories]"
8,B00UBRKJFI,Wolfenstein The Old Blood [ceroZ],"[First-Person Shooter, Action, Narrative-Driven, Single Player, Historic Setting]"
9,B003C3V6WI,Hard Drive Data Migration Transfer Cable Kit For XBOX 360 [Electronics],"[Data Transfer, Console Accessory, Hardware Utility, Storage Solution, Electronics]"


# Get sample items

In [11]:
train_df = pd.read_parquet("../data/train_features_neg_df.parquet")

In [12]:
all_items = train_df[args.item_col].unique()
logger.info(f"{len(all_items)=}")

[32m2024-11-26 21:00:06.509[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mlen(all_items)=4630[0m


In [13]:
all_items_df = metadata_df.loc[lambda df: df[args.item_col].isin(all_items)]

In [14]:
all_items_df

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together,subtitle,author
2,Video Games,NBA 2K17 - Early Tip Off Edition - PlayStation 4,4.3,223,[The #1 rated NBA video game simulation series...,[Following the record-breaking launch of NBA 2...,58.0,{'hi_res': ['https://m.media-amazon.com/images...,{'title': ['NBA 2K17 - Kobe: Haters vs Players...,2K,"[Video Games, PlayStation 4, Games]","{""Release date"": ""September 16, 2016"", ""Best S...",B00Z9TLVK0,,,
15,Video Games,"Warhammer 40,000 Dawn of War Game of the Year ...",4.0,68,[Real-time strategy game based on the popular ...,"[From the Manufacturer, This Game of The Year ...",29.95,"{'hi_res': [None, 'https://m.media-amazon.com/...","{'title': [], 'url': [], 'user_id': []}",THQ,"[Video Games, PC, Games]","{""Release date"": ""September 20, 2005"", ""Best S...",B001EYUX4Y,,,
46,Video Games,Polk Audio Striker Zx Xbox One Gaming Headset ...,3.9,169,[Powered by 40 years of audio heritage and tun...,[Our ProFit Comfort system creates a lightweig...,,{'hi_res': ['https://m.media-amazon.com/images...,"{'title': [], 'url': [], 'user_id': []}",Polk Audio,"[Video Games, Xbox One, Accessories, Headsets]","{""Release date"": ""October 5, 2014"", ""Best Sell...",B00OLOQGAY,,,
63,Video Games,The Legend of Heroes: Trails in the Sky - Sony...,4.4,91,[After a brief hiatus since its last release i...,"[Product Description, In the peaceful town of ...",185.0,{'hi_res': ['https://m.media-amazon.com/images...,{'title': ['The Legend of Heroes: Trails in th...,Xseed Games,"[Video Games, Legacy Systems, PlayStation Syst...","{""Release date"": ""March 29, 2011"", ""Best Selle...",B004BV5O0U,,,
65,Video Games,Harry Potter: Goblet of Fire - Sony PSP,3.6,38,[All the Magic of the Movie - Characters model...,"[From the Manufacturer, Be Harry Potter in a n...",19.43,"{'hi_res': [None, 'https://m.media-amazon.com/...","{'title': [], 'url': [], 'user_id': []}",Electronic Arts,"[Purchase Circles, Geography, United States, M...","{""Release date"": ""September 8, 2006"", ""Best Se...",B001ELJEA6,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
118437,Video Games,Resident Evil 4,4.6,1133,[],[In Resident Evil 4 you'll know a new type of ...,44.98,{'hi_res': ['https://m.media-amazon.com/images...,"{'title': [], 'url': [], 'user_id': []}",Capcom,"[Video Games, Legacy Systems, Nintendo Systems...","{""Release date"": ""June 19, 2007"", ""Best Seller...",B000P46NKC,,,
118457,All Electronics,ivoler Carrying Storage Case for Nintendo Swit...,4.8,13702,[Deluxe Travel Carrying Case. Specially design...,[],26.39,{'hi_res': ['https://m.media-amazon.com/images...,"{'title': [], 'url': [], 'user_id': []}",ivoler,"[Video Games, Legacy Systems, Nintendo Systems...","{""Product Dimensions"": ""11.2 x 4 x 9 inches"", ...",B076GYVWRY,,,
118459,Video Games,EastVita New Charger Dock + 4 x Battery for Ni...,3.9,676,[Include: 1x Remote Controller Charger 4 x 280...,[Charger Dock + 4 x Battery for Wii Remote],,{'hi_res': ['https://m.media-amazon.com/images...,"{'title': [], 'url': [], 'user_id': []}",EastVita,"[Video Games, Legacy Systems, Nintendo Systems...","{""Pricing"": ""The strikethrough price is the Li...",B004Y2VAVS,,,
118510,Video Games,Mario & Luigi: Partners In Time,4.6,607,"[Players use the top screen to study the land,...",[Mario and Luigi: Partners In Time an insane s...,99.99,{'hi_res': ['https://m.media-amazon.com/images...,"{'title': [], 'url': [], 'user_id': []}",Nintendo,"[Video Games, Legacy Systems, Nintendo Systems...","{""Release date"": ""November 28, 2005"", ""Best Se...",B000B8J7K0,,,


# Batch extraction

In [15]:
processed_fp = args.processed_fp
skip_processed = args.skip_processed

## Processed outputs

In [16]:
if not skip_processed and processed_fp is not None and os.path.exists(processed_fp):
    logger.info(f"Loading processed data from {processed_fp}...")
    processed_json = load_extracted(processed_fp)
    processed_df = pd.DataFrame(processed_json)
    to_process_df = all_items_df.loc[
        lambda df: ~df[args.item_col].isin(processed_df[args.item_col])
    ]
    logger.info(
        f"{all_items_df.shape=} - {processed_df.shape=} = {to_process_df.shape=}"
    )
    output_file = processed_fp
else:
    to_process_df = all_items_df.copy()
    output_file = f"{args.notebook_persist_dp}/{args.run_name}.jsonl"

## Run

In [17]:
run_datetime = int(time.time())
run_name = f"run__{args.run_name}__batch__{run_datetime}"
log_file = f"{args.notebook_persist_dp}/{args.run_name}.log"
pipeline_metadata = dict()

logger.add(
    output_file,
    filter=lambda record: "[OUTPUT]" in record["message"],
    level="DEBUG",
    serialize=True,
    mode="a",
)
logger.add(
    log_file,
    filter=lambda record: "[COLLECT]" in record["message"],
    level="ERROR",
    serialize=True,
)

2

In [18]:
# Group search queries as batch to send to Gemini API
num_inputs_per_batch = 10
iterator_group_df = (
    to_process_df.reset_index(drop=True)
    .reset_index()
    .assign(group=lambda df: df["index"] // num_inputs_per_batch)
    .groupby(["group"])[[args.item_col, *args.selected_cols]]
    .apply(lambda df: df.to_dict(orient="records"))
    .to_frame("item_metadata")
)
iterator_group_df

Unnamed: 0_level_0,item_metadata
group,Unnamed: 1_level_1
0,"[{'parent_asin': 'B00Z9TLVK0', 'title': 'NBA 2..."
1,"[{'parent_asin': 'B00BN5T30E', 'title': 'Assas..."
2,"[{'parent_asin': 'B00YM7AKLG', 'title': 'FIFA ..."
3,"[{'parent_asin': 'B003QCJLRM', 'title': 'Zumba..."
4,"[{'parent_asin': 'B07BHXMYV6', 'title': 'Valky..."
...,...
458,"[{'parent_asin': 'B001EYUQC8', 'title': '007 Q..."
459,"[{'parent_asin': 'B000Q6J51S', 'title': 'Phoen..."
460,"[{'parent_asin': 'B078S9S2XF', 'title': 'Shant..."
461,"[{'parent_asin': 'B07HPF7PRT', 'title': 'POUND..."


In [19]:
input_list = iterator_group_df["item_metadata"].values.tolist()

In [20]:
if args.testing:
    logger.info(f"testing=True so only process 10 prompts")
    input_list = input_list[:10]
logger.info(f"Number of prompts to be batch-run: {len(input_list)}")
pipeline_metadata["num_prompts_processed"] = len(input_list)

[32m2024-11-26 21:00:06.708[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mNumber of prompts to be batch-run: 463[0m


In [21]:
max_workers = 4


def retrieve_item_tags_from_llm_wrapper(fn):
    def __wrapper(*args, **kwargs):
        llm_results = fn(*args, **kwargs)
        input_item_metadata = args[0]
        (
            logger.opt(lazy=True)
            .bind(input_item_metadata=input_item_metadata, llm_extracted=llm_results)
            .debug("[OUTPUT] LLM Extracted successfully")
        )
        return llm_results

    return __wrapper


async def retrieve_item_tags_from_llm_async():
    loop = asyncio.get_event_loop()

    # Use a ThreadPoolExecutor to run the functions in parallel
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Use list comprehension to submit tasks to the executor
        futures = [
            loop.run_in_executor(
                executor,
                retrieve_item_tags_from_llm_wrapper(retrieve_item_tags_from_llm),
                input_item_metadata,
            )
            for input_item_metadata in input_list
        ]

        # Wait for all tasks to complete
        results = await asyncio.gather(*futures)

    return results

In [22]:
t0 = time.perf_counter()
logger.debug(f"Outputing to {output_file}...")
try:
    logger.info(
        f"""To monitor the progress, run in a separate CLI: \n```\nwatch -n 0.1 "grep -wF 'LLM Extracted successfully' {output_file} | wc -l"\n```"""
    )
    results = await retrieve_item_tags_from_llm_async()
finally:
    t1 = time.perf_counter()

[32m2024-11-26 21:00:06.730[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [34m[1mOutputing to /home/dvquys/frostmourne/tmp/recsys-mvp/notebooks/data/040-llm-tag-items/040-llm-tag-items.jsonl...[0m
[32m2024-11-26 21:00:06.731[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mTo monitor the progress, run in a separate CLI: 
```
watch -n 0.1 "grep -wF 'LLM Extracted successfully' /home/dvquys/frostmourne/tmp/recsys-mvp/notebooks/data/040-llm-tag-items/040-llm-tag-items.jsonl | wc -l"
```[0m
[32m2024-11-26 21:00:13.001[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m__wrapper[0m:[36m11[0m - [34m[1m[OUTPUT] LLM Extracted successfully[0m
[32m2024-11-26 21:00:14.423[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m__wrapper[0m:[36m11[0m - [34m[1m[OUTPUT] LLM Extracted successfully[0m
[32m2024-11-26 21:00:14.510[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m__wrapper[0m:[36m11[0m - [34m[1m[OU

In [23]:
logger.info(
    f"Run duration: {t1 - t0:,.2f}s, on avg. {(t1-t0)/len(input_list):,.2f}s per batch"
)

[32m2024-11-26 21:13:25.728[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mRun duration: 798.97s, on avg. 1.73s per batch[0m


In [24]:
logger.info(f"Checking output...")
output_json = load_extracted(output_file)

[32m2024-11-26 21:13:25.738[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mChecking output...[0m


In [25]:
extracted_df = pd.DataFrame(output_json)
with pd.option_context("display.max_colwidth", None):
    display(extracted_df)

Unnamed: 0,parent_asin,item_title,tags
0,B00BN5T30E,Assassin's Creed IV Black Flag - PC,"[Action-Adventure, Open World, Historical Fiction, Single Player, Pirate Theme]"
1,B07KXS41QL,Nyko Charge Block Duo - 2 Port Controller Charging Station with Patented Charging Dongles and Micro-USB/AC Power Cord for PlayStation 4,"[Charging Station, Dual Charger, Controller Accessory, Micro-USB, Convenience]"
2,B00XY0IVK4,NHL 16 - PlayStation 4,"[Sports, Hockey, Multiplayer, Realistic Gameplay, Franchise Mode]"
3,B078GLDBZX,Street Fighter 30th Anniversary Collection - Nintendo Switch Standard Edition,"[Fighting, Multiplayer, Classic Games, Arcade Mode, Retro Collection]"
4,B01GY35HKE,Mass Effect Andromeda Deluxe - Xbox One,"[Action RPG, Sci-Fi, Narrative Driven, Space Exploration, Single Player]"
...,...,...,...
4623,B000P46NKC,Resident Evil 4,"[Survival Horror, Action-adventure, Classic, Legacy, Narrative-driven]"
4624,B076GYVWRY,"ivoler Carrying Storage Case for Nintendo Switch/For Switch OLED Model (2021),Portable Travel All Protective Hard Messenger Bag Soft Lining 18Games for Switch Console Pro Controller Accessories Black","[Storage Case, Travel Accessory, Protective Case, Switch Accessories, Portable]"
4625,B004Y2VAVS,EastVita New Charger Dock + 4 x Battery for Nintendo Wii Remote,"[Charging Dock, Battery Pack, Wii Remote, Accessory Bundle, Convenience]"
4626,B000B8J7K0,Mario & Luigi: Partners In Time,"[Role-playing Game, Platformer, Nintendo DS, Adventure, Cooperative Gameplay]"


# Persist

In [26]:
output_fp = "../data/item_tags_from_llm.parquet"
extracted_df.to_parquet(output_fp, index=False)