# Finding a product by description using LLM

In [None]:
import json
import re
import transformers
import torch

In [None]:
#  A function to extract the response as a list of identifiers from the LLM response


def extract_list(text):
    pattern = r"\[(\d+(?:,\s*\d+)*)\]"

    matches = re.findall(pattern, text)

    if matches:
        extracted_list = [int(num) for num in matches[0].split(",")]
        return extracted_list
    else:
        return None

In [5]:
# Numbering the list of products

def enumerate_list(products):
    for i, product in enumerate(products, start=1):
        product["ID"] = i
        new_product = {"ID": product["ID"]}
        new_product.update(product)
        products[i-1] = new_product

In [14]:
# String representation of the dictionary list

def dicts_to_str(products):
    prods = ""

    for product in products[:-1]:
        prods +="ID:" + str(product["ID"]) + " "
        prods +="Name:" +  product["Name"] + " "
        prods +="Description:"+ product["Description"] + " "
        prods +="Applications:" + ", ".join(product["APPLICATIONS"]) + " "
        prods +="Product Categories:" + ", ".join(product["PRODUCT CATEGORIES"]) + " "
        prods +="Target Industries:" + ", ".join(product["TARGET INDUSTRIES"]) + " "
        prods +="\n"
    
    prods +="ID:" + str(products[-1]["ID"]) + " "
    prods +="Name:" +  products[-1]["Name"] + " "
    prods +="Description:"+ products[-1]["Description"] + " "
    prods +="Applications:" + ", ".join(products[-1]["APPLICATIONS"]) + " "
    prods +="Product Categories:" + ", ".join(products[-1]["PRODUCT CATEGORIES"]) + " "
    prods +="Target Industries:" + ", ".join(products[-1]["TARGET INDUSTRIES"]) + " "
     
    return  prods

In [19]:
# Creating a database for product search

def creat_db(products):
    enumerate_list(products)
     
    return dicts_to_str(products)
    

In [None]:

with open("products_DB_(EN).json", encoding="utf-8") as f:
    data = json.load(f)

products = creat_db(data)

In [None]:
# Loading a model and creating a pipeline

model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device="cuda",
)

In [None]:
# Creating a prompt

messages = [
    {"role": "system", "content": f"You are a useful assistant in the search. Your task is to find the names of the products from the list \n{products}} \n that correspond to user content, return a python list containing the product IDs that correspond to user content, output without explanation, if you can't find corresponding product return 'Not found', \n example: [1,2,3,...,n] "},
    {"role": "user", "content": "I need a device used for heat measurements."},
]

prompt = pipeline.tokenizer.apply_chat_template(
        messages, 
        tokenize=False, 
        add_generation_prompt=True
)

terminators = [
    pipeline.tokenizer.eos_token_id,
    pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
]




In [None]:
# Finding a product

outputs = pipeline(
    prompt,
    max_new_tokens=500,
    eos_token_id=terminators,
    do_sample=True,
    temperature=0.2,
    top_p=0.9,
    pad_token_id=pipeline.tokenizer.eos_token_id
)

answer = outputs[0]["generated_text"][len(prompt):]

Product_IDs = extract_list(answer)

print(f"Found products: {Product_IDs}")
print(f"LLM answer {answer}")