# Model output of evaluated PDFs for ground truth value comparison

In [1]:
from pathlib import Path
import os
import getpass

import pytesseract
from pdf2image import convert_from_path

# LangChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA, ConversationalRetrievalChain

In [2]:

# Input your openAI API key
os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')
DOCUMENT_DIR = Path("./EvaluatedPDF")

In [3]:
class TesseractDocument:
    def __init__(self, source, page_content):
        self.metadata = {'source': source}
        self.page_content = page_content

In [4]:
def load_documents_tesseract(pdf_name, parent_dir):
    """
    Load the documents from the temp directory and extract text from them using Tesseract.
    
    """
    pdf_path = parent_dir/pdf_name
    # Convert PDF to images
    images = convert_from_path(pdf_path.as_posix()) # Explicitly convert the path to a string

    documents = []

    
    for image in images:
        # Extract text from each image (each page) 
        text = pytesseract.image_to_string(image)
        print(text)
        # create TesseractDocumen object for each pages
        doc = TesseractDocument(source=pdf_name, page_content=text)
        documents.append(doc)

    return documents

In [5]:
def split_documents(documents):
    """
    Split the documents into chunks of text.
    documents: list of Document objects
    """
    text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=100)
    texts = text_splitter.split_documents(documents)
    return texts

In [6]:
def load_to_vectordb(pdf_name, splited_docs, clear_up = False):

    # Download embeddings from OpenAI
    embeddings = OpenAIEmbeddings()

    # get vectore database
    db = Chroma.from_documents(splited_docs, embeddings)

    # get current information for the vector database 
    db_info = db.get()
    source = [data['source'] for data in db_info['metadatas']]
    doc_ids =  db_info['ids']
   
   # clear up irrelevant(old) documents in the vector database
    if clear_up: 
        delete_ids = []
        id_source = tuple(zip(doc_ids, source))
        for i in range(len(id_source)):
            if pdf_name not in id_source[i][1]:
                # delete chunks if their pdf source in the vector database is not the target pdf
                delete_ids.append(id_source[i][0])
        
        print("deleted ids: ", delete_ids)
        if len(delete_ids) != 0: 
            print("Deleting folloing ids in the vector database: ", delete_ids)
            db._collection.delete(ids=delete_ids)
            db_info = db.get()
        print([data['source'] for data in db_info['metadatas']])


    return db

In [7]:
# Main function to process PDF and ask questions
def process_pdf_and_ask(pdf_name, parent_dir, clear_up = False):

    # ------------------------Preprocess documents----------------------------#
    # load pdf files as tesseract a list of objects
    documents = load_documents_tesseract(pdf_name, parent_dir)
    print(documents)

    # split the loaded documents into different chunks
    splited_docs = split_documents(documents)
    
    # create vector database retriever
    db = load_to_vectordb(pdf_name, splited_docs, clear_up)
    num_chunks = len(db.get()["ids"])

    retriever = db.as_retriever(search_kwargs={"k": min(4, num_chunks)})
 
    # print("count after", db._collection.count())

    
    #-----------------------Initialize OpenAI LLM and load qa Chain--------------#
    llm = ChatOpenAI( model_name="gpt-3.5-turbo", temperature=0)

    chain = RetrievalQA.from_chain_type(
    llm,
    retriever=retriever
    )

    return chain

## Different Systematic Messages

In [8]:
system_message1 = "You are a helpful assistant. Extract the information from this document. Say you do not know if you do not know. Please provide the answer in the following format:  { 'attribute': 'attribute Name', 'value': 'attribute value', 'note': 'context to where and why you extracted the value' }, { 'attribute': 'attribute Name', 'value': 'attribute value', 'context': 'context to where and why you extracted the value'}"

system_message2 = "You are a helpful assistant. Extract the information from this document. If you don't find any relevant information, \
                please answer: { 'attribute': 'attribute Name', 'value': 'Not found', 'note': 'No information about current rating is mentioned in the document' }.\
                if you found any information, please provide the answer in the following format:  { 'attribute': 'attribute Name', 'value': 'attribute value', 'note': 'context to where and why you extracted the value' }, { 'attribute': 'attribute Name', 'value': 'attribute value', 'context': 'context to where and why you extracted the value' }"


system_message3 = "You are a helpful assistant. Extract the information from this document. If you don't find any relevant information, \
                please answer: { 'attribute': 'attribute Name', 'value': 'Not found', 'note': 'No information about current rating is mentioned in the document' }.\
                if you found any information, please provide the answer in the following format:  { 'attribute': 'attribute Name', 'value': 'attribute value', 'note': 'context to where and why you extracted the value' }.\
                if you found multiple values for the same attribute, please provided answers in the nested dictonaries as following:\
                {'attribute': 'attribute Name', 'value': [{'attribute': 'attribute subtype 1', 'value': 'attribute subtype 1 value', 'note': 'context to where and why you extracted the attribute subtype 1 value' },\
                                                            {'attribute': 'attribute subtype 2', 'value': 'attribute subtype 2 value', 'note': 'context to where and why you extracted the attribute subtype 2 value' }] }"



## Different Prompts

General and simple prompt:

In [9]:
def simple_prompt_answers(chain, system_message):
    attribute_list = ["Supplier Name", "Product Type", "Dimensions", "Orientation", "Current Rating", "Voltage", \
                      "Frequency", "Impedance", "Capacitance", "Temperature"]
    
    for attribute in  attribute_list:
        simple_prompt = f"Instruction: I am looking for {attribute} in this document."
        query = "Please extract any information or a range of values for the attribute. Please help me to find as much information as possible."
        question = system_message + "\n" + simple_prompt + query

        result = chain({"query": question})
        print(result["result"])


Cusomized and complex prompt:

In [11]:
def complex_prompt_answers(chain, system_message):

    # customized questions for each attribute
    supplier_name_question = "I am looking for Supplier Name attribute in the document. The supplier name usually is a company name which indicates where the document is from. It usually can be found at the top or bottom of the document."
    Product_Type_question = "I am looking for Product Type attribute in the document. The Product Type can be indicated in the product name. If you cannot find any information for product type, you can also give me answers for categoies or division of product (e.g. whether it is capacitor or resistor)."
    dimension_question = "I am looking for Dimension attribute for product 0402(1E)  in the document. This is typically a measurement of size, can be presented as a numeric for length(L), width (W) and tall(T) with a unit of length (e.g., mm, cm, m, inches). Give me the answer with unit specified"
    orientation_question = "I am looking for Orientation attribute in the document. The orientation typically refers to the position, alignment, or directioin of the product or component."
    Current_Rating_question = "I am looking for Current Rating attribute in the document."
    Voltage_question = "I am looking for Voltage attribute in the document. Voltages typically listed in units of volts (V) or kilovolts (kV)."
    Frequency_question = "I am looking for Frequency attribute in the document. Frequency refers to the number of cycles an electronic signal completes in one second, and it is typically expressed in hertz (Hz), kilohertz (kHz), or megahertz (MHz)."
    Impedance_question = "I am looking for the Impedance attribute in the document. Impedance is a measure of how much a circuit resists the flow of an alternating current (AC) and is typically expressed in ohms (Ω). It can also provide insights into other properties of the circuit such as resonant frequency and Q factor."
    capacitance_question = "I am looking for the Capacitance attribute in the document. Capacitance refers to the ability of a component or circuit to store and release electrical energy and is measured in farads (F), often represented in microfarads (μF), nanofarads (nF), or picofarads (pF)."
    temperature_question = "I am looking for the Temperature attribute in the document. They are typically numerica values measured in degrees Celsius (°C) or Fahrenheit (°F)."

    complex_prompt = [supplier_name_question, Product_Type_question, dimension_question, orientation_question, \
                  Current_Rating_question, Voltage_question, Frequency_question, Impedance_question, capacitance_question, temperature_question]
    
    for prompt in  complex_prompt:
        query = "Please extract any information or a range of values for the attribute. Please help me to find as much information as possible."
        question = system_message + "\n" + prompt + query

        result = chain({"query": question})
        print(result["result"])


## Test PDFs

### PDF1: gs-12-1565.pdf

simple prompts:


In [31]:
pdf_name = "gs-12-1565.pdf"  

chain = process_pdf_and_ask(pdf_name, DOCUMENT_DIR)
simple_prompt_answers(chain, system_message1)

Created a chunk of size 530, which is longer than the specified 500
Created a chunk of size 533, which is longer than the specified 500
Created a chunk of size 1407, which is longer than the specified 500
Created a chunk of size 502, which is longer than the specified 500
Created a chunk of size 525, which is longer than the specified 500
Created a chunk of size 582, which is longer than the specified 500
Created a chunk of size 546, which is longer than the specified 500
Created a chunk of size 568, which is longer than the specified 500
Created a chunk of size 552, which is longer than the specified 500
Created a chunk of size 836, which is longer than the specified 500
Created a chunk of size 1362, which is longer than the specified 500
Created a chunk of size 1099, which is longer than the specified 500


{'ids': ['d06fbfb6-8e2a-11ee-8ca3-7e0641d3cee1', 'd06fc01a-8e2a-11ee-8ca3-7e0641d3cee1', 'd06fc02e-8e2a-11ee-8ca3-7e0641d3cee1', 'd06fc042-8e2a-11ee-8ca3-7e0641d3cee1', 'd06fc060-8e2a-11ee-8ca3-7e0641d3cee1', 'd06fc06a-8e2a-11ee-8ca3-7e0641d3cee1', 'd06fc07e-8e2a-11ee-8ca3-7e0641d3cee1', 'd06fc092-8e2a-11ee-8ca3-7e0641d3cee1', 'd06fc0a6-8e2a-11ee-8ca3-7e0641d3cee1', 'd06fc0ba-8e2a-11ee-8ca3-7e0641d3cee1', 'd06fc0c4-8e2a-11ee-8ca3-7e0641d3cee1', 'd06fc0d8-8e2a-11ee-8ca3-7e0641d3cee1', 'd06fc0ec-8e2a-11ee-8ca3-7e0641d3cee1', 'd06fc100-8e2a-11ee-8ca3-7e0641d3cee1', 'd06fc10a-8e2a-11ee-8ca3-7e0641d3cee1', 'd06fc11e-8e2a-11ee-8ca3-7e0641d3cee1', 'd06fc132-8e2a-11ee-8ca3-7e0641d3cee1', 'd06fc146-8e2a-11ee-8ca3-7e0641d3cee1', 'd06fc150-8e2a-11ee-8ca3-7e0641d3cee1', 'd06fc16e-8e2a-11ee-8ca3-7e0641d3cee1', 'd06fc178-8e2a-11ee-8ca3-7e0641d3cee1', 'd06fc18c-8e2a-11ee-8ca3-7e0641d3cee1', 'd06fc1a0-8e2a-11ee-8ca3-7e0641d3cee1', 'd06fc1b4-8e2a-11ee-8ca3-7e0641d3cee1', 'd06fc1be-8e2a-11ee-8ca3-7e0641

In [19]:
simple_prompt_answers(chain, system_message2)

{ 'attribute': 'Supplier Name', 'value': 'Not found', 'note': 'No information about the supplier name is mentioned in the document' }
{ 'attribute': 'Product Type', 'value': 'EXAMAX2™ and EXAMEZZ2™ Connector System', 'context': 'The document mentions the EXAMAX2™ and EXAMEZZ2™ Connector System as the product type.' }
{ 'attribute': 'Sample Quantity and Description', 'value': 'See Table 2 for connector sample sizes', 'context': 'The connector sample sizes are shown in Table 2' },
{ 'attribute': 'Equipment Calibration', 'value': 'Calibration system in accordance with ANSI Z-540 and ISO 9000', 'context': 'All test equipment and inspection facilities used in the performance of any test shall be maintained in a calibration system in accordance with ANSI Z-540 and ISO 9000' }
{ 'attribute': 'Orientation', 'value': 'RIGHT ANGLE RECEPTACLE (RAR) DIRECT MATE ORTHOGONAL (DMO): RIGHT ANGLE WITH VERTICAL HEADER (VH) ORTHOGONAL HEADER (RAOH) WITH RAR', 'context': 'The document mentions the orientat

In [32]:
simple_prompt_answers(chain, system_message3)

{'attribute': 'Operating Voltage Rating', 'value': '< 30 VAC RMS / DC', 'note': 'Extracted from section 3.1. Operating Voltage Rating'}
{'attribute': 'Non-Agency Voltage Rating', 'value': '150 Vac RMS or Voc Maximum', 'note': 'Extracted from section 3.1. Operating Voltage Rating'}
{'attribute': 'Operating Current Rating', 'value': 'See AICC Power Design Guide GS-20-0456', 'note': 'Extracted from section 3.2 Operating Current Rating'}
{'attribute': 'Operating Temperature Range', 'value': '-55 to 85 °C', 'note': 'Extracted from section 3.3. Operating Temperature Range'}
{'attribute': 'Product Type', 'value': 'EXAMAX2™ and EXAMEZZ2™ Connector System', 'note': 'Extracted from the document title'}
{'attribute': 'Operating Voltage Rating', 'value': '< 30 VAC RMS / DC', 'note': 'Extracted from section 3.1. Operating Voltage Rating'}
{'attribute': 'Operating Current Rating', 'value': 'See AICC Power Design Guide GS-20-0456', 'note': 'Extracted from section 3.2 Operating Current Rating'}
{'attr

Complex prompts:

In [34]:
complex_prompt_answers(chain, system_message1)

{ 'attribute': 'Supplier Name', 'value': 'Amphenol ICC', 'context': 'The document is titled "GS-12-1565 PRODUCT SPECIFICATION Amphenol ICC"' }
{ 'attribute': 'Product Type', 'value': 'EXAMAX2™ and EXAMEZZ2™ Connector System', 'context': 'The document mentions the EXAMAX2™ and EXAMEZZ2™ Connector System in the title page and throughout the document.' }
I'm sorry, but I couldn't find any information related to dimensions or numeric measurements in the provided document.
{ 'attribute': 'Orientation', 'value': 'Right Angle', 'context': 'The document mentions "Right Angle Receptacle (RAR)" and "Right Angle Orthogonal Header (RAOH)" which indicate a right angle orientation.' }, { 'attribute': 'Orientation', 'value': 'Vertical', 'context': 'The document mentions "Vertical Header (VH)" and "Vertical Receptacle (VR)" which indicate a vertical orientation.' }, { 'attribute': 'Orientation', 'value': 'Orthogonal', 'context': 'The document mentions "Right Angle Orthogonal Header (RAOH)" which indic

In [42]:
complex_prompt_answers(chain, system_message2)

{ 'attribute': 'Supplier Name', 'value': 'Amphenol ICC', 'context': 'The document is titled "GS-12-1565 PRODUCT SPECIFICATION Amphenol ICC" and the authorized by date is signed by S. Minich, indicating that Amphenol ICC is the supplier.' }
{ 'attribute': 'Product Type', 'value': 'EXAMAX2™ and EXAMEZZ2™ Connector System', 'context': 'The document mentions the EXAMAX2™ and EXAMEZZ2™ Connector System as the product being specified.' }
{ 'attribute': 'Dimension', 'value': 'Not found', 'note': 'No information about dimensions is mentioned in the document' }
{ 'attribute': 'Orientation', 'value': 'Right angle', 'context': 'The document mentions "BACKPLANE: RIGHT ANGLE RECEPTACLE (RAR) DIRECT MATE ORTHOGONAL (DMO): RIGHT ANGLE WITH VERTICAL HEADER (VH) ORTHOGONAL HEADER (RAOH) WITH RAR" which indicates that the orientation of the connector system is right angle.' }
{ 'attribute': 'Current Rating', 'value': 'Not found', 'note': 'No information about current rating is mentioned in the document'

# Documents with ground truth values
## Capacitors

### PDF2: KGM15CR51E106K-DATA.pdf

simple prompts:


In [None]:
pdf_name = "KGM15CR51E106K-DATA.pdf" 

chain2 = process_pdf_and_ask(pdf_name, DOCUMENT_DIR, clear_up=True)

In [42]:
simple_prompt_answers(chain2, system_message1)

I'm sorry, but I couldn't find any information about the Supplier Name in the given document.
I'm sorry, but I couldn't find any information about the Product Type in the given document.
{ 'attribute': 'Dimension', 'value': '1608(JIS) / 0603(EIA) L ~ w', 'note': 'Extracted from the document' }
I'm sorry, but I couldn't find any information about the orientation in the given document.
I'm sorry, but I couldn't find any information about the Current Rating in the given document.
{ 'attribute': 'Unit', 'value': 'mm', 'note': 'Unit of dimension' },
{ 'attribute': 'DC Bias Voltage', 'value': '20V', 'note': 'DC Bias Voltage value' },
{ 'attribute': 'AC Voltage', 'value': '20V', 'note': 'AC Voltage value' },
{ 'attribute': 'Temperature Range', 'value': '-55deg to 85deg', 'note': 'Temperature Range value' },
{ 'attribute': 'Temperature Coefficient', 'value': '+/-15%', 'note': 'Temperature Coefficient value' },
{ 'attribute': 'Capacitance', 'value': '10uF', 'note': 'Capacitance value' },
{ 'att

In [13]:

simple_prompt_answers(chain2, system_message2)

Created a chunk of size 647, which is longer than the specified 500


{ 'attribute': 'Supplier Name', 'value': 'KYOCERA', 'note': 'The document mentions KYOCERA as the supplier' }
{ 'attribute': 'Product Type', 'value': 'Not found', 'note': 'No information about the product type is mentioned in the document' }
{ 'attribute': 'Dimension', 'value': '1608(JIS) / 0603(EIA) L ~ w', 'note': 'The document mentions the dimensions as 1608(JIS) / 0603(EIA) L ~ w' }
{ 'attribute': 'Orientation', 'value': 'Not found', 'note': 'No information about orientation is mentioned in the document' }
{ 'attribute': 'Current Rating', 'value': 'Not found', 'note': 'No information about current rating is mentioned in the document' }
{ 'attribute': 'Voltage', 'value': '25Vdc', 'context': 'The Rated Voltage is mentioned as 25Vdc' }
{ 'attribute': 'Frequency', 'value': '20 MHz', 'note': 'Frequency (MHz) mentioned in the document' },
{ 'attribute': 'Frequency', 'value': '10 MHz', 'note': 'Frequency (MHz) mentioned in the document' },
{ 'attribute': 'Frequency', 'value': '1 MHz', 'no

In [29]:
complex_prompt_answers(chain2, system_message2)

{ 'attribute': 'Supplier Name', 'value': 'KYOCERA', 'context': 'KYOCERA is mentioned at the bottom of the document' }
{ 'attribute': 'Product Type', 'value': 'Capacitor', 'context': 'The document mentions "Capacitance" and "Capacitor Division" in the context of the information provided.' }
{ 'attribute': 'Dimension', 'value': '1608(JIS) / 0603(EIA) L ~ w', 'note': 'The document mentions the dimensions as 1608(JIS) / 0603(EIA) for length (L) and width (w).' }
{ 'attribute': 'Orientation', 'value': 'Not found', 'note': 'No information about orientation is mentioned in the document' }
{ 'attribute': 'Current Rating', 'value': 'Not found', 'note': 'No information about current rating is mentioned in the document' }
{ 'attribute': 'Voltage', 'value': '25Vdc', 'context': 'The document mentions a "Rated Voltage" of 25Vdc.' }
{ 'attribute': 'Frequency', 'value': '20 MHz', 'context': 'Frequency (MHz) in the Capacitor Division' }, { 'attribute': 'Frequency', 'value': '10 MHz', 'context': 'Freque

In [43]:
complex_prompt_answers(chain2, system_message3)

{'attribute': 'Supplier Name', 'value': 'KYOCERA', 'note': 'The supplier name is mentioned at the bottom of the document'}
{'attribute': 'Product Type', 'value': 'Capacitor', 'note': 'The document mentions "Capacitor Division" in the context'}
{'attribute': 'Dimension', 'value': '1608(JIS) / 0603(EIA) L ~ w', 'note': 'The document mentions the dimensions as 1608(JIS) / 0603(EIA) L ~ w, indicating the size of the component in millimeters.'}
{'attribute': 'Orientation', 'value': 'Not found', 'note': 'No information about orientation is mentioned in the document'}
{'attribute': 'Current Rating', 'value': 'Not found', 'note': 'No information about current rating is mentioned in the document'}
{'attribute': 'Voltage', 'value': '25Vdc', 'note': 'The document mentions a rated voltage of 25Vdc.'}
{'attribute': 'Frequency', 'value': '20 MHz', 'note': 'Extracted from the "Frequency (MHz)" column in the document'}
{'attribute': 'Impedance', 'value': 'Z(0Vdc)', 'note': 'Extracted from the S parame

## PDF3: vzh680m2atr-1313s.pdf

In [None]:
pdf_name = "vzh680m2atr-1313s.pdf" 

chain3 = process_pdf_and_ask(pdf_name, DOCUMENT_DIR, clear_up=True)

In [39]:
complex_prompt_answers(chain3, system_message2)

{ 'attribute': 'Supplier Name', 'value': 'LELON ELECTRONICS CORP.', 'context': 'Found in the Lelon P/N section of the document' }
{ 'attribute': 'Product Type', 'value': 'Capacitor', 'context': 'The document mentions "Capacitor" in the product name "VZH680M2ATR-1313S"' }
{ 'attribute': 'Dimension', 'value': '12.5mm x 13.5mm', 'context': 'DIAGRAM OF DIMENSIONS' }
{ 'attribute': 'Orientation', 'value': 'Not found', 'note': 'No information about orientation is mentioned in the document' }
{ 'attribute': 'Current Rating', 'value': 'Not found', 'note': 'No information about current rating is mentioned in the document' }
{ 'attribute': 'Rated voltage', 'value': '100 V', 'context': 'The document mentions "68 uF / 100 V" in the Lelon P/N section.' }
{ 'attribute': 'Frequency', 'value': '50, 60, 120, 1k, 10k', 'context': 'Ripple Current & Frequency (Hz)}' }
{ 'attribute': 'Impedance', 'value': '< 0.320', 'context': 'Impedance value is mentioned in the "Items Performance" section of the document

In [40]:
complex_prompt_answers(chain3, system_message3)

{'attribute': 'Supplier Name', 'value': 'LELON ELECTRONICS CORP.', 'note': 'The supplier name is mentioned in the Lelon P/N section of the document.'}
{'attribute': 'Product Type', 'value': 'Capacitor', 'note': 'The document mentions "Capacitance" and "Cap." in the product name, indicating that the product is a capacitor.'}
{'attribute': 'Dimension', 'value': {'Length': '12.5 mm', 'Width': '10 mm', 'Height': '13.5 mm'}}
{'attribute': 'Orientation', 'value': 'Not found', 'note': 'No information about orientation is mentioned in the document'}
{'attribute': 'Current Rating', 'value': 'Not found', 'note': 'No information about current rating is mentioned in the document'}
{'attribute': 'Voltage', 'value': '100 V', 'note': 'The document mentions a rated voltage of 100 V for the capacitor.'}
{'attribute': 'Frequency', 'value': '50, 60 120 1k 10k up', 'note': 'Frequency values mentioned in the document'}
{'attribute': 'Impedance', 'value': '< 0.320 (100kHz, 20°C)', 'note': 'Extracted from th

## Resistors
### PDF4: RK73H.pdf

In [12]:
pdf_name = "RK73H.pdf" 

chain4 = process_pdf_and_ask(pdf_name, DOCUMENT_DIR, clear_up=False)

=O \\ RK7SH

precision O.5°%o, 190 tolerance
thick film chip resistor

E
features RoH.

» Products with lead-free terminations meet COMPLIANT
EU RoHS requirements. EU RoHS regulation
is not intended for Pb-glass contained in electrode,
resistor element and glass.
+ AEC-Q200 Tested: 0201 (1H), 0402 (1E), 0603 (1u),
0805 (2A), 1206 (2B), 1210 (2E), 2010 (2H/W2h), 2512

IN

KOA SPEER ELECTRONICS, INC.

(8A/W3A/W3A2)
Type* Dimensions inches (mm)
\(Inch Size Code) L WwW c d t

.016+.0008].008+.0008) .004+.001 | .004+.001 |.005+.0008

uP
dimensions and construction (01005) (0.440.02) | (0.2+0.02) | (0.140.03) | (0.11+0.03) | (0.1340.02)

1H .024+.001 | .012+.001 | .004+.002 | .006+.002 | .009+.001
Lowy t are (0201) (0.6£0.03) | (0.3+0.03) | (0.140.05) | (0.15+0.05) | (0.23£0.03)
002
wi Z| 1E soa 008.004 | -01 “Soa
+ i
7 . ami Stang (0402) | .039 302 | oz+.002 | (9240-1) | (0.25 +245) | 914.002
f " 0.1 0.5+0.05, 0.35+0.05,
P Nn EAT | (19 ‘fis? | ) o1z004 | 0122.06 | )
J 9 (0402) (0.25+0.1) | 

Created a chunk of size 3286, which is longer than the specified 500
Created a chunk of size 1076, which is longer than the specified 500
Created a chunk of size 710, which is longer than the specified 500


RK7SH

precision O.5°%o, 190 tolerance
thick film chip resistor

= OWN

KOA SPEER ELECTRONICS, INC.

applications and ratings
Rated

Rated Resistance Range Maximum Maximum 9;
meta - fone Ambient Terminal ag Ds0.5% Fei% Working Overload Ces
emp. Temp. E-24, E-96 E-24, E-96" Voltage Voltage Range
roles) 0.03w - “ = Oe = ~ 20v 30V -55°C to 125°C
+200 - : i
R201) Oat +400 “= = “ i a av sv
+100 109 - 1MQ. 109 - 1MQ.
rto402) ow 2200 _ 1.09 - 9.760, 1.02MQ - 10M 78
ow #100 1.02kQ - 1MO 1.02kO - 1M 100v
RK73H1J #200 = 1.02MQ - 10MO 75V
(0603) +100 100 - 1k 109 - 1k
Ost 2200 = 1.00 - 9.760
#100 100 - 1MQ 100 - 1MaQ
ri0e0s) 0.25W +200 = 1.00 - 9.760 150V 200V
#400 = 7.02M0 - 70M
#100 100 - Ma 100 - 1Ma
See 0.25W 70°C 125°C 2200 = 1.00 - 9.769, 1.02M0 - 5.6M0
#400 = 5.62MQ - 10MQ -B5°C to +155°C
#100 100 - MQ 100 - 1MQ
a 0.5wW +200 = 1.00 - 9.760, 1.02MQ - 5.6MQ 200V 400V
#400 = 5.62MQ - 10MQ
#100 102 - 1MaQ 100 - 1MQ
Sugaaetald o75w #200 = 1.00 - 9.769, 1.02MQ - 5.6MO
(2010) #400 = 5.62MQ - 10MO

In [14]:
complex_prompt_answers(chain4, system_message2)

{ 'attribute': 'Supplier Name', 'value': 'Not found', 'note': 'No information about the supplier name is mentioned in the document' }
{ 'attribute': 'Product Type', 'value': 'Resistor', 'context': 'RK73H 2H, 3A and 3A2 are also still available (different “d” dimensions = 0.4 +0.2/-0.1mm)' }
{ 'attribute': 'Dimension', 'value': '0.25 inches (6.35 mm) x 0.125 inches (3.175 mm)', 'context': 'Dimensions for 0402 (1E) resistor are mentioned as 0.25 inches (6.35 mm) x 0.125 inches (3.175 mm)' }
{ 'attribute': 'Orientation', 'value': 'Not found', 'note': 'No information about orientation is mentioned in the document' }
{ 'attribute': 'Current Rating', 'value': 'Not found', 'note': 'No information about current rating is mentioned in the document' }
{ 'attribute': 'Voltage', 'value': '20V', 'context': 'The maximum applicable voltage is equal to the max. overload voltage' },
{ 'attribute': 'Voltage', 'value': '30V', 'context': 'The maximum applicable voltage is equal to the max. overload voltag

In [15]:
complex_prompt_answers(chain4, system_message3)

{'attribute': 'Supplier Name', 'value': 'Not found', 'note': 'No information about the supplier name is mentioned in the document'}
{'attribute': 'Product Type', 'value': 'Resistor', 'note': 'The document mentions RK73H 1F-1J, RK73H 2A-2E, RK73H W2H-W3A2, RK73H 1F-2A, RK73H 2B-W3A2 as product types, which are all resistors.'}
{'attribute': 'Dimension', 'value': [{'attribute': 'Length', 'value': '0.6 mm'}, {'attribute': 'Width', 'value': '0.3 mm'}, {'attribute': 'Height', 'value': '0.14 mm'}], 'note': 'Extracted from the dimensions and construction section for 0402(1E)'}
{'attribute': 'Orientation', 'value': 'Not found', 'note': 'No information about orientation is mentioned in the document'}
{'attribute': 'Current Rating', 'value': 'Not found', 'note': 'No information about current rating is mentioned in the document'}
{'attribute': 'Voltage', 'value': [{'attribute': 'Rated voltage', 'value': '20V', 'note': 'Extracted from the context: Rated voltage = Power rating x resistance value or

### PDF 5: IMX8MMIEC.pdf

In [58]:
pdf_name = "IMX8MMIEC.pdf" 

chain5 = process_pdf_and_ask(pdf_name, DOCUMENT_DIR, clear_up=True)

Created a chunk of size 1797, which is longer than the specified 500
Created a chunk of size 790, which is longer than the specified 500
Created a chunk of size 2312, which is longer than the specified 500
Created a chunk of size 744, which is longer than the specified 500
Created a chunk of size 1047, which is longer than the specified 500
Created a chunk of size 1741, which is longer than the specified 500
Created a chunk of size 984, which is longer than the specified 500
Created a chunk of size 949, which is longer than the specified 500
Created a chunk of size 1443, which is longer than the specified 500
Created a chunk of size 1583, which is longer than the specified 500
Created a chunk of size 597, which is longer than the specified 500
Created a chunk of size 1057, which is longer than the specified 500
Created a chunk of size 1282, which is longer than the specified 500
Created a chunk of size 1189, which is longer than the specified 500
Created a chunk of size 636, which is l

deleted ids:  ['fbfbd138-8f01-11ee-8d93-7e0641d3cee1', 'fbfbd192-8f01-11ee-8d93-7e0641d3cee1', 'fbfbd1a6-8f01-11ee-8d93-7e0641d3cee1']
Deleting folloing ids in the vector database:  ['fbfbd138-8f01-11ee-8d93-7e0641d3cee1', 'fbfbd192-8f01-11ee-8d93-7e0641d3cee1', 'fbfbd1a6-8f01-11ee-8d93-7e0641d3cee1']
['IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf', 'IMX8MMIEC.pdf',

In [68]:
complex_prompt_answers(chain5, system_message2)

{ 'attribute': 'Supplier Name', 'value': 'NXP Semiconductors', 'context': 'The document is from NXP Semiconductors' }
{ 'attribute': 'Product Type', 'value': 'Not found', 'note': 'No information about the product type is mentioned in the document' }
{ 'attribute': 'Dimension', 'value': '14 x 14 mm', 'context': 'Table 68. 14 x 14mm, 0.5 mm pitch ball map' }
{ 'attribute': 'Orientation', 'value': 'Not found', 'note': 'No information about orientation is mentioned in the document' }
{ 'attribute': 'Current Rating', 'value': 'Not found', 'note': 'No information about current rating is mentioned in the document' }
{ 'attribute': 'Voltage', 'value': 'Not found', 'note': 'No information about voltage is mentioned in the document' }
{ 'attribute': 'Clock Frequency (Low Speed)', 'value': '0 - 400 kHz', 'note': 'The document states that in Low-Speed mode, the card clock must be lower than 400 kHz.' },
{ 'attribute': 'Clock Frequency (SD/SDIO Full Speed/High Speed)', 'value': '0 - 25/50 MHz', 'no

In [69]:
complex_prompt_answers(chain5, system_message3)

{'attribute': 'Supplier Name', 'value': 'Not found', 'note': 'No information about the supplier name is mentioned in the document'}
{'attribute': 'Product Type', 'value': 'Not found', 'note': 'No information about the product type is mentioned in the document'}
{'attribute': 'Dimension', 'value': '14 x 14 mm', 'note': 'Extracted from the document as the package size'}
{'attribute': 'Orientation', 'value': 'Not found', 'note': 'No information about orientation is mentioned in the document'}
{'attribute': 'Current Rating', 'value': 'Not found', 'note': 'No information about current rating is mentioned in the document'}
{'attribute': 'Voltage', 'value': [{'attribute': 'High-level output voltage', 'value': '0.8xVpp - Vpp', 'note': 'Extracted from the "Electrical characteristics" section'}, {'attribute': 'Low-level output voltage', 'value': '0 - 0.2xVpp', 'note': 'Extracted from the "Electrical characteristics" section'}, {'attribute': 'High-level input voltage', 'value': '0.7xVpp - Vpp + 0