Note: you may need to restart the kernel to use updated packages.


In [43]:
import json

In [38]:
from langchain_community.document_loaders import AsyncHtmlLoader

urls = ["https://www.justnsnparts.com/rfq/3m-purification-inc/4330012137798/12838-21-50-0015/", "https://www.ebay.com/itm/312574229877"]
loader = AsyncHtmlLoader(urls)
docs = loader.load()

Fetching pages: 100%|##########| 2/2 [00:00<00:00,  3.13it/s]


In [19]:
import os
from dotenv import load_dotenv
from openai import AzureOpenAI

load_dotenv()
AOAI_ENDPOINT = os.getenv("AOAI_ENDPOINT")
AOAI_KEY = os.getenv("AOAI_KEY")
client = AzureOpenAI(  
    azure_endpoint=os.getenv('AOAI_ENDPOINT'),  
    api_key=os.getenv('AOAI_KEY'),  
    api_version="2024-05-01-preview",
)

In [42]:
with open('/home/xinlinjing/Repos/researcher/prompts/extract_item_info_sys.prompt', 'r') as file:
    EXTRACT_ITM_SYS_PROMPT = file.read()
with open('/home/xinlinjing/Repos/researcher/prompts/compare_items_sys.prompt', 'r') as file:
    COMPARE_ITM_SYS_PROMPT = file.read()

In [None]:
from langchain_community.document_transformers import Html2TextTransformer
html2text = Html2TextTransformer()
docs_transformed = html2text.transform_documents(docs)

In [48]:
def get_item_info(url, doc):
    #Prepare the chat prompt 
    messages = [
        {
            "role": "system",
            "content": [
                {
                    "type": "text",
                    "text": EXTRACT_ITM_SYS_PROMPT
                }
            ]
        }
    ]
    messages.append(
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "The following information is extracted from the website: " + url + ":\n" + doc.page_content
                }
            ]
        }
    ) 
        
    # Generate the completion  
    completion = client.chat.completions.create(  
        model="gpt-4o",
        messages=messages,
        max_tokens=800,  
        temperature=0.7,  
        top_p=0.95,  
        frequency_penalty=0,  
        presence_penalty=0,
        stop=None,  
        stream=False,
        response_format={ "type": "json_object" }
    )
    # print the model response
    response = completion.choices[0].message.content
    # print(response)
    return response
def get_llm_response(item_infos, user_query):
    #Prepare the chat prompt 
    messages = [
        {
            "role": "system",
            "content": [
                {
                    "type": "text",
                    "text": COMPARE_ITM_SYS_PROMPT
                }
            ]
        }
    ]
    messages.append(
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": "The user asks: " + user_query + "The following information is extracted: " + json.dumps(item_infos)
                }
            ]
        }
    ) 
        
    # Generate the completion  
    completion = client.chat.completions.create(  
        model="gpt-4o",
        messages=messages,
        max_tokens=800,  
        temperature=0.7,  
        top_p=0.95,  
        frequency_penalty=0,  
        presence_penalty=0,
        stop=None,  
        stream=False,
    )
    # print the model response
    response = completion.choices[0].message.content
    return response

In [40]:
item_infos = []
for url, doc in zip(urls, docs_transformed):
    item_info = get_item_info(url, doc)
    item_infos.append(item_info)
    print(item_info)

{
  "URL": "https://www.justnsnparts.com/rfq/3m-purification-inc/4330012137798/12838-21-50-0015/",
  "Company": "3M Purification Inc",
  "Price": null,
  "Dimensions": null,
  "Item Description": "The 12838-21-50-0015 is a filter fluid part manufactured by 3M Purification Inc, available for immediate shipping."
}

{
  "URL": "https://www.ebay.com/itm/312574229877",
  "Company": "AMF",
  "Price": 90.66,
  "Dimensions": null,
  "Item Description": "The AMF 12838-21-50-0015 Pneumatic Filter is a used 1\" NPT filter with a 125 PSI capacity."
}


In [49]:
user_query = "FILTER-STRAINER, STEEL ELEMENT, MODEL EGS, 5' LONG, 0.0015' SPACING  AMF CUNO INC  12838-21-50-0015"
llm_response = get_llm_response(item_infos, user_query)
print(llm_response)

Here's a comparison table based on the provided information:

| URL                                                               | Company             | Price  | Dimensions | Item Description                                                                                   |
|-------------------------------------------------------------------|---------------------|--------|------------|---------------------------------------------------------------------------------------------------|
| [Just NSN Parts](https://www.justnsnparts.com/rfq/3m-purification-inc/4330012137798/12838-21-50-0015/) | 3M Purification Inc | Null   | Null       | The 12838-21-50-0015 is a filter fluid part manufactured by 3M Purification Inc, available for immediate shipping. |
| [eBay](https://www.ebay.com/itm/312574229877)                     | AMF                 | 90.66  | Null       | The AMF 12838-21-50-0015 Pneumatic Filter is a used 1" NPT filter with a 125 PSI capacity.                        |

### Recomm