In [1]:
import os
path = "./ml/input/"

In [2]:
file_name = ["enrollment-api.pdf", "pet-api.pdf"]

In [3]:
from unstructured.partition.pdf import partition_pdf

raw_pdf_elements = partition_pdf(
    filename=path + file_name[0],
    extract_images_in_pdf=True,
    infer_table_structure=True,
    chunking_strategy="by_title",
    max_characters=4000,
    new_after_n_chars=3800,
    combine_text_under_n_chars=2000,
    image_output_dir_path=path,)

raw_pdf_elements = raw_pdf_elements + partition_pdf(
    filename=path + file_name[1],
    extract_images_in_pdf=True,
    infer_table_structure=True,
    chunking_strategy="by_title",
    max_characters=4000,
    new_after_n_chars=3800,
    combine_text_under_n_chars=2000,
    image_output_dir_path=path,)

Some weights of the model checkpoint at microsoft/table-transformer-structure-recognition were not used when initializing TableTransformerForObjectDetection: ['model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing TableTransformerForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TableTransformerForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [4]:
tables = []
texts = []
for element in raw_pdf_elements:
    if "unstructured.documents.elements.Table" in str(type(element)):
        tables.append(str(element))
    elif "unstructured.documents.elements.CompositeElement" in str(type(element)):
        texts.append(str(element))

print(len(tables))
print(len(texts))

1
6


In [6]:
import os
import uuid

import chromadb
import numpy as np
from langchain.vectorstores import Chroma
from langchain_experimental.open_clip import OpenCLIPEmbeddings
from PIL import Image as _PILImage

# Create chroma
vectorstore = Chroma(
    collection_name="metlife_rag_clip", embedding_function=OpenCLIPEmbeddings()
)

# Get image URIs with .jpg extension only
image_uris = sorted(
    [
        os.path.join(path, image_name)
        for image_name in os.listdir(path)
        if image_name.endswith(".jpg")
    ]
)

# Add images
vectorstore.add_images(uris=image_uris)

# Add documents
vectorstore.add_texts(texts=texts)

# Make retriever
retriever = vectorstore.as_retriever()

In [7]:
import base64
import io
from io import BytesIO

import numpy as np
from PIL import Image


def resize_base64_image(base64_string, size=(128, 128)):
    """
    Resize an image encoded as a Base64 string.

    Args:
    base64_string (str): Base64 string of the original image.
    size (tuple): Desired size of the image as (width, height).

    Returns:
    str: Base64 string of the resized image.
    """
    # Decode the Base64 string
    img_data = base64.b64decode(base64_string)
    img = Image.open(io.BytesIO(img_data))

    # Resize the image
    resized_img = img.resize(size, Image.LANCZOS)

    # Save the resized image to a bytes buffer
    buffered = io.BytesIO()
    resized_img.save(buffered, format=img.format)

    # Encode the resized image to Base64
    return base64.b64encode(buffered.getvalue()).decode("utf-8")


def is_base64(s):
    """Check if a string is Base64 encoded"""
    try:
        return base64.b64encode(base64.b64decode(s)) == s.encode()
    except Exception:
        return False


def split_image_text_types(docs):
    """Split numpy array images and texts"""
    images = []
    text = []
    for doc in docs:
        doc = doc.page_content  # Extract Document contents
        if is_base64(doc):
            # Resize image to avoid OAI server error
            images.append(
                resize_base64_image(doc, size=(250, 250))
            )  # base64 encoded str
        else:
            text.append(doc)
    return {"images": images, "texts": text}

In [8]:
from operator import itemgetter

from langchain.chat_models import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda, RunnablePassthrough,RunnableParallel


def prompt_func(dict):
    format_texts = "\n".join(dict["context"]["texts"])
    messages = []
    if dict["context"]["images"]:
        image_message = {
            "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{dict['context']['images'][0]}"}
        }
        messages.append(image_message)
    
    text_message = {"type": "text", "text": f"""Answer the question based only on the following context, which can include text, tables, and the below image:
                Question: {dict["question"]}
                Text and tables:
                {format_texts}
                """}
    messages.append(text_message)
    
    return [
        HumanMessage(
            content=messages
        )
    ]

In [9]:
model = ChatOpenAI(temperature=0,
                   openai_api_key=os.getenv("OPENAI_API_KEY"),
                   model="gpt-4-vision-preview",
                   max_tokens=2048)

# RAG pipeline
chain = (
    {
        "context": retriever | RunnableLambda(split_image_text_types),
        "question": RunnablePassthrough(),
    }
    | RunnableParallel({"response":prompt_func| model| StrOutputParser(),
                      "context": itemgetter("context"),})
)



In [10]:
from IPython.display import HTML, display


def plt_img_base64(img_base64):
    # Create an HTML img tag with the base64 string as the source
    image_html = f'<img src="data:image/jpeg;base64,{img_base64}" />'

    # Display the image by rendering the HTML
    display(HTML(image_html))

In [11]:
response = chain.invoke("Give me a list of all Enrollment API URLs")
print(response["response"])
for image in response['context']['images']:
    plt_img_base64(response['context']['images'][0])

Based on the provided context, the list of all Enrollment API URLs are:

1. Test Environment URL for LIMRA Enrollment Delivery API:
   - https://qa.api.metlife.com/metlife/qa/gvwb/enrollmentServices/api/v1/enrollments

2. Production Environment URL for LIMRA Enrollment Delivery API:
   - https://api.metlife.com/metlife/production/gvwb/enrollmentServices/api/v1/enrollments

3. Test Environment URL for Security/Token API:
   - https://qa.api.metlife.com/metlife/qa/authorization/token

4. Production Environment URL for Security/Token API:
   - https://api.metlife.com/metlife/production/authorization/token

(Note: The context provided also includes URLs for the MetLife Pet Insurance API, but since the question specifically asks for Enrollment API URLs, those have not been included in the list.)


In [12]:
response = chain.invoke("Give me a list of all Pet API URLs")
print(response["response"])
for image in response['context']['images']:
    plt_img_base64(response['context']['images'][0])

Based on the provided context, the list of all Pet API URLs are as follows:

1. Catalog API:
   - Test Environment URL: https://qa.api.metlife.com/metlife/qa/channel/catalogsservices/tpe/v1/tenants/US/views/products/catalogs?q=product.typeCode==1100
   - Production Environment URL: https://api.metlife.com/metlife/production/channel/catalogsservices/tpe/v1/tenants/US/views/products/catalogs?q=product.typeCode==1100

2. Create Application API:
   - Test Environment URL: https://qa.api.metlife.com/metlife/qa/channel/applicationsservices/tpe/v1/tenants/US/products/applications/create
   - Production Environment URL: https://api.metlife.com/metlife/production/channel/applicationsservices/tpe/v1/tenants/US/products/applications/create

3. Generate Quote API:
   - Test Environment URL: https://qa.api.metlife.com/metlife/qa/channel/quoteservices/tpe/v1/tenants/US/products/quotes/generate
   - Production Environment URL: https://api.metlife.com/metlife/production/channel/quoteservices/tpe/v1/te

In [13]:
response = chain.invoke("What should be my first step to integrate with Metlife?")
print(response["response"])
for image in response['context']['images']:
    plt_img_base64(response['context']['images'][0])

Based on the provided context, the first step to integrate with Metlife would be to make a call to the security/token API to obtain a temporary token.


In [14]:
response = chain.invoke("How do I get access to MetLife APIs")
print(response["response"])
for image in response['context']['images']:
    plt_img_base64(response['context']['images'][0])

To get access to MetLife APIs, you need to follow a two-step process:

1. Make a call to the security/token API to obtain a temporary token.
2. Use the obtained token to make a call to the functional API.

For detailed steps on calling the Token API and the Functional API, refer to the API Integration Guide provided by MetLife.
