In [26]:
from llm_model import CienaGPTChatModel
import pydantic
import os
import requests
from dotenv import load_dotenv
from langchain.chat_models import AzureChatOpenAI
from langchain.output_parsers import PydanticOutputParser
from langchain_core.messages import SystemMessage
from langchain.prompts import HumanMessagePromptTemplate, ChatPromptTemplate
from pydantic import BaseModel, Field
from bs4 import BeautifulSoup

In [27]:
from typing import List, Mapping
from langchain.output_parsers import PydanticOutputParser
class ProductDetails(BaseModel):
    '''
        This class is used to capture additional product details related to Mobile
    '''
    RAM: int = Field(descriptoin="RAM of the mobile")
    ROM: int = Field(description="ROM of the mobile")
    Battery: int = Field(description="Battery capacity of the mobile")
    Processor: str = Field(description="CPU Processor of the mobile")

class Product(BaseModel):
    '''
        This class is used to capture primary product details of Mobile
    '''
    Name: str = Field(description="Name of the mobile.")
    Price: int = Field(description="Price of the mobile.")
    Details: ProductDetails = Field(description="Additional features of the Mobile")

class Products(BaseModel):
    '''
        This class is used to store the collection/list of Mobiles 
    '''
    Mobiles: list[Product] = Field("List of mobiles listed in the text")
    
class BPAHelpPage(BaseModel):
    header: str = Field(description="Header or Title of the page")
    p: List[str] = Field(description="List of p tags")
    li: Mapping[str, List[str]] = Field(description="Dictionary of lists with the list content")
    table: List[str] = Field(description="table tags as github markdown")

In [28]:
output_parser = PydanticOutputParser(pydantic_object = Products)
format_instructions = output_parser.get_format_instructions()

In [29]:
human_text = "{input}"
messages = [
    SystemMessage(
        content='Extract all the products from the user input. Do not use markdown for the output.' + format_instructions
    ),
    HumanMessagePromptTemplate.from_template(human_text)
]
prompt = ChatPromptTemplate.from_messages(messages)

In [30]:
llm = CienaGPTChatModel(model_name='test-model', model_id = '5', base_address='https://gptapi.ciena.com/', okta_domain='ciena.okta.com', okta_custom_scope='api', client_secret='uNU1c9Vl01WfkBdQDUivZr0O1pR3qxJpe5js3d1PJhwqGO63WEvqvOXgBux9dRyi', client_id='0oa1x6yhddqyoeoTY0h8')

In [31]:
chain = prompt | llm | output_parser

In [32]:
with open('input.html') as f:
    input_text = f.read()
print(input_text)
soup = BeautifulSoup(input_text, "html.parser")
html_input = soup.body.text


<!doctype html>
<html lang="en">
<head>
    <link href="https://rukminim2.flixcart.com" rel="preconnect"/>
    <link rel="stylesheet" href="//static-assets-web.flixcart.com/fk-p-linchpin-web/fk-cp-zion/css/atlas.chunk.8dd48d.css"/>
    <link rel="stylesheet" href="//static-assets-web.flixcart.com/fk-p-linchpin-web/fk-cp-zion/css/app_modules.chunk.c48a12.css"/>
    <link rel="stylesheet" href="//static-assets-web.flixcart.com/fk-p-linchpin-web/fk-cp-zion/css/app.chunk.1def6f.css"/>
    <meta http-equiv="Content-type" content="text/html; charset=utf-8"/>
    <meta http-equiv="X-UA-Compatible" content="IE=Edge"/>
    <meta property="fb:page_id" content="102988293558"/>
    <meta property="fb:admins" content="658873552,624500995,100000233612389"/>
    <link rel="shortcut icon" href="https://static-assets-web.flixcart.com/www/promos/new/20150528-140547-favicon-retina.ico"/>
    <link type="application/opensearchdescription+xml" rel="search" href="/osdd.xml?v=2"/>
    <meta property="og:type

In [33]:
products = chain.invoke({"input":html_input})

success: 200
{'result': {'id': 0, 'oktaUserId': None, 'openAiId': 'chatcmpl-A9FvSj8vjM3ARAjVewIiG9pd2XdyX', 'model': 'gpt-4o-2024-05-13', 'title': None, 'choices': [{'message': {'content': '{\n  "Mobiles": [\n    {\n      "Name": "SAMSUNG Galaxy S23 Ultra 5G (Cream, 512 GB)",\n      "Price": 134999,\n      "Details": {\n        "RAM": 12,\n        "ROM": 512,\n        "Battery": 5000,\n        "Processor": "Qualcomm Snapdragon 8 Gen 2"\n      }\n    },\n    {\n      "Name": "SAMSUNG Galaxy S23 Ultra 5G (Phantom Black, 512 GB)",\n      "Price": 109990,\n      "Details": {\n        "RAM": 12,\n        "ROM": 512,\n        "Battery": 5000,\n        "Processor": "Qualcomm Snapdragon 8 Gen 2"\n      }\n    },\n    {\n      "Name": "SAMSUNG Galaxy S23 Ultra 5G (Green, 256 GB)",\n      "Price": 98900,\n      "Details": {\n        "RAM": 12,\n        "ROM": 256,\n        "Battery": 5000,\n        "Processor": "Qualcomm Snapdragon 8 Gen 2"\n      }\n    },\n    {\n      "Name": "SAMSUNG Galaxy 

In [34]:
products

Products(Mobiles=[Product(Name='SAMSUNG Galaxy S23 Ultra 5G (Cream, 512 GB)', Price=134999, Details=ProductDetails(RAM=12, ROM=512, Battery=5000, Processor='Qualcomm Snapdragon 8 Gen 2')), Product(Name='SAMSUNG Galaxy S23 Ultra 5G (Phantom Black, 512 GB)', Price=109990, Details=ProductDetails(RAM=12, ROM=512, Battery=5000, Processor='Qualcomm Snapdragon 8 Gen 2')), Product(Name='SAMSUNG Galaxy S23 Ultra 5G (Green, 256 GB)', Price=98900, Details=ProductDetails(RAM=12, ROM=256, Battery=5000, Processor='Qualcomm Snapdragon 8 Gen 2')), Product(Name='SAMSUNG Galaxy S23 Ultra 5G (Green, 512 GB)', Price=119990, Details=ProductDetails(RAM=12, ROM=512, Battery=5000, Processor='Qualcomm Snapdragon 8 Gen 2')), Product(Name='SAMSUNG Galaxy S23 Ultra 5G (Green, 1 TB)', Price=154999, Details=ProductDetails(RAM=12, ROM=1024, Battery=5000, Processor='Qualcomm Snapdragon 8 Gen 2')), Product(Name='SAMSUNG Galaxy S23 Ultra 5G (Cream, 1 TB)', Price=154999, Details=ProductDetails(RAM=12, ROM=1024, Battery=

In [35]:
products.Mobiles[0]

Product(Name='SAMSUNG Galaxy S23 Ultra 5G (Cream, 512 GB)', Price=134999, Details=ProductDetails(RAM=12, ROM=512, Battery=5000, Processor='Qualcomm Snapdragon 8 Gen 2'))

In [36]:
import pandas as pd
pd.read_html('input.html')

ImportError: Missing optional dependency 'html5lib'.  Use pip or conda to install html5lib.