# Generate a list of synthetic question and answers using Multimodal indexes 

***This notebook works best with the `conda_python3` on the `ml.t3.large` instance***.

---

This notebook does as follows:

1. Iterates through each pdf file/page and generates a list of questions and answers to it

1. All question and answers are saved in a CSV and `JSON` file format

## Step 1. Setup

Install the required Python packages and import the relevant files.

In [35]:
import sys
!{sys.executable} -m pip install -r requirements.txt



In [36]:
# import necessary libraries to run this notebook
import os
import io
import ray
import sys
import json
import time
import yaml
import glob
import boto3
import base64
import logging
import requests
import botocore
import sagemaker
import opensearchpy
import numpy as np
import pandas as pd
import globals as g
from PIL import Image
from pathlib import Path
from typing import List, Dict
from litellm import completion ## support for text generation models on bedrock
from IPython.display import Image
from urllib.parse import urlparse
from botocore.auth import SigV4Auth
from pandas.core.series import Series
from sagemaker import get_execution_role
from botocore.awsrequest import AWSRequest
from utils import get_cfn_outputs, get_text_embedding, get_llm_response
from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth

In [37]:
logging.basicConfig(format='[%(asctime)s] p%(process)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s', level=logging.INFO)
logger = logging.getLogger(__name__)

In [38]:
if ray.is_initialized():
    ray.shutdown()
ray.init()

2024-06-03 13:07:11,829	INFO worker.py:1752 -- Started a local Ray instance.


0,1
Python version:,3.10.14
Ray version:,2.10.0


[36m(async_get_inference pid=6343)[0m [2024-06-03 13:07:16,775] p6343 {1731223211.py:4} INFO - row 1/10, model_id=anthropic.claude-3-sonnet-20240229-v1:0
[36m(async_get_inference pid=6343)[0m [2024-06-03 13:07:16,794] p6343 {531889130.py:28} INFO - Invoking bedrock/anthropic.claude-3-sonnet-20240229-v1:0......
[36m(async_get_inference pid=6343)[0m [2024-06-03 13:07:16,828] p6343 {credentials.py:1075} INFO - Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole
[36m(async_get_inference pid=6343)[0m [92m13:07:16 - LiteLLM:INFO[0m: utils.py:1298 - [92m
[36m(async_get_inference pid=6343)[0m 
[36m(async_get_inference pid=6343)[0m POST Request Sent from LiteLLM:
[36m(async_get_inference pid=6343)[0m curl -X POST \
[36m(async_get_inference pid=6343)[0m https://bedrock-runtime.us-west-2.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/invoke \
[36m(async_get_inference pid=6343)[0m -H 'Content-Type: application/json' -H 'X-Amz-Date: 20240603T130716

In [39]:
# global constants
CONFIG_FILE_PATH = "config.yaml"
# read the config yaml file
fpath = CONFIG_FILE_PATH
with open(fpath, 'r') as yaml_in:
    config = yaml.safe_load(yaml_in)
logger.info(f"config read from {fpath} -> {json.dumps(config, indent=2)}")

[2024-06-03 13:07:13,256] p10461 {3725478428.py:7} INFO - config read from config.yaml -> {
  "app_name": "multi-modal-rag-bedrock",
  "aws": {
    "region": "us-west-2",
    "cfn_stack_name": "multi-modal-revised",
    "os_service": "aoss"
  },
  "pdf_dir_info": {
    "source_pdf_dir": "pdf_data",
    "pdf_img_path": "images",
    "pdf_txt_path": "text_files",
    "pdf_extracted_data": "pdf_extracted_data",
    "json_img_dir": "pdf_img_json_dir",
    "json_txt_dir": "pdf_text_json_dir",
    "bucket_prefix": "multimodal",
    "bucket_img_prefix": "img",
    "qna_dir": "question_answer_files",
    "image_format": "JPEG",
    "prompts": "prompt_templates"
  },
  "run_steps": {
    "1_data_prep_pdf_files.ipynb": true,
    "1_data_prep_slide_deck.ipynb": false,
    "2_data_ingestion.ipynb": true,
    "3_rag_inference.ipynb": true,
    "4_rag_evaluation.ipynb": true
  },
  "metrics_dir": {
    "dir_name": "metrics",
    "text_and_image_raw_content": "all_content_description.csv",
    "eval_

In [40]:
!pygmentize globals.py

[33m"""[39;49;00m
[33mGlobal variables used throughout the code.[39;49;00m
[33m"""[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mos[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mboto3[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36msagemaker[39;49;00m[37m[39;49;00m
[37m[39;49;00m
BUCKET_PREFIX: [36mstr[39;49;00m = [33m"[39;49;00m[33mmultimodal[39;49;00m[33m"[39;49;00m[37m[39;49;00m
BUCKET_EMB_PREFIX: [36mstr[39;49;00m = [33mf[39;49;00m[33m"[39;49;00m[33m{[39;49;00mBUCKET_PREFIX[33m}[39;49;00m[33m/osi-embeddings-json[39;49;00m[33m"[39;49;00m[37m[39;49;00m
BUCKET_IMG_PREFIX: [36mstr[39;49;00m = [33mf[39;49;00m[33m"[39;49;00m[33m{[39;49;00mBUCKET_PREFIX[33m}[39;49;00m[33m/img[39;49;00m[33m"[39;49;00m[37m[39;49;00m
BUCKET_PDF_IMG_PREFIX: [36mstr[39;49;00m = [33mf[39;49;00m[33m"[39;49;00m[33m{[39;49;00mBUCKET_PREFIX[33m}[39;49;00m[33m/pdf_img[39;49;00m[33m"[39;49;00m[37m[39;49;00m
BUC

### Get all text descriptions for `Image` only responses in a Dataframe

Here, we fetch responses from the image index, image index to get a question bank created that acts as a curated dataset

In [41]:
## Represents extracted all metric files
image_json_content = [pos_json for pos_json in os.listdir(config['pdf_dir_info']['json_img_dir']) if pos_json.endswith('.json')]
# Get relative file paths by joining directory path with each file name
image_json_files = [os.path.join(config['pdf_dir_info']['json_img_dir'], file) for file in image_json_content]
logger.info(f"there are {len(image_json_files)} image files in {config['pdf_dir_info']['json_img_dir']}")

[2024-06-03 13:07:14,080] p10461 {741134680.py:5} INFO - there are 49 image files in pdf_img_json_dir


In [42]:
# view the image only df
image_index_contents = []
for f in image_json_files:
    image_index_contents.append(json.loads(Path(f).read_text()))
img_index_df = pd.DataFrame(image_index_contents)
# img_index_df = img_index_df.rename(columns={'file_name': 'image_file_name', 'text': 'image_text', 'page_number': 'image_page_number'})
img_index_df.head(10)

Unnamed: 0,file_type,file_name,text,entities,page_number
0,.jpg,Boeing_rec_page_2.jpg,"Based on the image, here are the entities I ca...","Based on the image, here are the entities I ca...",2
1,.jpg,Intel_rec_page_5.jpg,"Based on the image, here are the entities I ca...","Based on the image, here are the entities I ca...",5
2,.jpg,Cisco_rec_page_2.jpg,"Based on the image, here is a list of the enti...","Based on the image, here is a list of the enti...",2
3,.jpg,Microsoft_rec_page_2.jpg,"Based on the image, here are the entities I ca...","Based on the image, here are the entities I ca...",2
4,.jpg,tesla_rec_page_2.jpg,"Based on the image, here are the relevant enti...","Based on the image, here are the relevant enti...",2
5,.jpg,Cisco_rec_page_5.jpg,"Based on the image, here are the relevant enti...","Based on the image, here are the relevant enti...",5
6,.jpg,Boeing_rec_page_1.jpg,"Based on the image, here is a list of the key ...","Based on the image, here is a list of the key ...",1
7,.jpg,Microsoft_rec_page_3.jpg,"Based on the image, here is a list of entities...","Based on the image, here is a list of entities...",3
8,.jpg,Amazon_rec_page_6.jpg,"Based on the image, here are the relevant enti...","Based on the image, here are the relevant enti...",6
9,.jpg,Intel_rec_page_7.jpg,"Based on the image, here is a list of relevant...","Based on the image, here is a list of relevant...",7


### Get all text descriptions for `Text` only responses in a Dataframe

In [43]:
# Now, we fetch responses from text only index responses
text_json_content = [pos_json for pos_json in os.listdir(config['pdf_dir_info']['json_txt_dir']) if pos_json.endswith('.json')]
# Get relative file paths by joining directory path with each file name
text_json_files = [os.path.join(config['pdf_dir_info']['json_txt_dir'], file) for file in text_json_content]
logger.info(f"there are {len(text_json_files)} image files in {config['pdf_dir_info']['json_txt_dir']}")

[2024-06-03 13:07:14,114] p10461 {1048578250.py:5} INFO - there are 49 image files in pdf_text_json_dir


In [44]:
# view the image only df
text_index_contents = []
for f in text_json_files:
    text_index_contents.append(json.loads(Path(f).read_text()))
txt_index_df = pd.DataFrame(text_index_contents)
# txt_index_df = txt_index_df.rename(columns={'file_name': 'txt_file_name', 'text': 'text', 'page_number': 'txt_page_number'})
txt_index_df.head(10)

Unnamed: 0,file_type,file_name,text,page_number,entities
0,.txt,AMD_rec_text_7,METHODOLOGY & DISCLAIMERSMETHODOLOGY & DISCLAI...,7,"METHODOLOGY, DISCLAIMERSMETHODOLOGY, DISCLAIME..."
1,.txt,APPLE_rec_text_4,APPLE INCAPPLE INCAPPLE INCAPPLE INCNASDAQ: AA...,4,"AAPL, AAPL AAPL, AAPL NotesAnalyst, NotesAnaly..."
2,.txt,Microsoft_rec_text_7,METHODOLOGY & DISCLAIMERSMETHODOLOGY & DISCLAI...,7,"METHODOLOGY, DISCLAIMERSMETHODOLOGY, DISCLAIME..."
3,.txt,Cisco_rec_text_6,METHODOLOGY & DISCLAIMERSMETHODOLOGY & DISCLAI...,6,"METHODOLOGY, DISCLAIMERSMETHODOLOGY, DISCLAIME..."
4,.txt,Microsoft_rec_text_2,MICROSOFT CORPMICROSOFT CORPMICROSOFT CORPMICR...,2,"MICROSOFT, CORPMICROSOFT, CORPMICROSOFT CORPMI..."
5,.txt,Amazon_rec_text_4,AMAZON.COM INCAMAZON.COM INCAMAZON.COM INCAMAZ...,4,"AMZN, AMZN AMZN, AMZN NotesAnalyst, NotesAnaly..."
6,.txt,AMD_rec_text_5,ADVANCED MICRO DEVICES INCADVANCED MICRO DEVIC...,5,"ADVANCED, MICRO, AMD, AMD AMD, AMD NotesAnalys..."
7,.txt,Microsoft_rec_text_1,MICROSOFT CORPMICROSOFT CORPMICROSOFT CORPMICR...,1,"MICROSOFT, CORPMICROSOFT, CORPMICROSOFT CORPMI..."
8,.txt,Cisco_rec_text_3,CISCO SYSTEMS INCCISCO SYSTEMS INCCISCO SYSTEM...,3,"CISCO, SYSTEMS, INCCISCO, INCCISCO INCCISCO, C..."
9,.txt,APPLE_rec_text_2,APPLE INCAPPLE INCAPPLE INCAPPLE INCNASDAQ: AA...,2,"AAPL, AAPL AAPL, AAPL NotesAnalyst, NotesAnaly..."


#### View all of the content from text AND image only indexes

In [45]:
merged_text_and_image_df = pd.concat([txt_index_df, img_index_df], ignore_index=True)
merged_text_and_image_df.reset_index(drop=True, inplace=True)
merged_text_and_image_df = merged_text_and_image_df.rename(columns={'text': 'content_description'})
merged_text_and_image_df = merged_text_and_image_df.sort_values(by=['file_type', 'file_name', 'page_number'])
merged_text_and_image_df.head(10)

Unnamed: 0,file_type,file_name,content_description,page_number,entities
65,.jpg,AMD_rec_page_1.jpg,"Based on the image, here are the relevant enti...",1,"Based on the image, here are the relevant enti..."
82,.jpg,AMD_rec_page_2.jpg,"Based on the image, here are the key entities ...",2,"Based on the image, here are the key entities ..."
80,.jpg,AMD_rec_page_3.jpg,"Based on the image, here are the relevant enti...",3,"Based on the image, here are the relevant enti..."
94,.jpg,AMD_rec_page_4.jpg,"Based on the image, here are the key entities ...",4,"Based on the image, here are the key entities ..."
88,.jpg,AMD_rec_page_5.jpg,"Based on the image, here are the relevant enti...",5,"Based on the image, here are the relevant enti..."
90,.jpg,AMD_rec_page_6.jpg,"Based on the image, here are the relevant enti...",6,"Based on the image, here are the relevant enti..."
97,.jpg,AMD_rec_page_7.jpg,"Based on the image, here are the relevant enti...",7,"Based on the image, here are the relevant enti..."
76,.jpg,APPLE_rec_page_1.jpg,"Based on the image, here's a list of the key e...",1,"Based on the image, here's a list of the key e..."
69,.jpg,APPLE_rec_page_2.jpg,"Based on the image, here are the key entities ...",2,"Based on the image, here are the key entities ..."
73,.jpg,APPLE_rec_page_3.jpg,"Based on the image, here is a list of entities...",3,"Based on the image, here is a list of entities..."


In [46]:
# convert the raw data from both text and image only index and save in a metrics dir
metrics_dir: str = config['metrics_dir']['dir_name']
os.makedirs(metrics_dir, exist_ok=True)
all_content_description_file: str = os.path.join(metrics_dir, config['metrics_dir']['text_and_image_raw_content'])
merged_text_and_image_df.to_csv(all_content_description_file, index=True)
logger.info(f"all intial content description on description from text and image only indexes are saved in: {all_content_description_file}")

[2024-06-03 13:07:14,204] p10461 {3039535643.py:6} INFO - all intial content description on description from text and image only indexes are saved in: metrics/all_content_description.csv


## Generate `Question-Answer` banks for each content described by (Image index and Text index) and generate a `5-10 Pairs` of Question and Answers based on the `Content Description`

In [47]:
def llm_QA_generator(model_id: str, prompt: str):
    # represents the service name
    service_name: str = "bedrock"
    # represents creating the bedrock model to invoke the litellm api for response for titan, llama and claude
    bedrock_model: str = f"{service_name}/{model_id}"
    # represents the current aws region
    aws_region = boto3.Session().region_name 
    # initialize the response dict
    ret = dict(exception = None,
               prompt = prompt,
               completion = None,
               file_name = None,
               # initializing to 0 since none type throws an error later, this is used to calculate price per token input/output on ODT pricing
               completion_token_count = 0,
               # initializing to 0 since none type throws an error later
               prompt_token_count=0,
               input_token_price = None, 
               output_token_pricing = None,
               model_id = model_id)
    body = ret['prompt']
    os.environ["AWS_REGION_NAME"] = aws_region
    parameters = config['inference_parameters_for_qna_generation']
    temperature = parameters.get('temperature', 0.1)
    caching = parameters.get('caching', False)
    max_tokens = parameters.get("max_tokens", 500)
    try:
        # Represents calling the litellm completion/messaging api utilizing the completion/embeddings API
        logger.info(f"Invoking {bedrock_model}......")
        response = completion(model=bedrock_model,
                              messages=[{ "content": body,"role": "user"}],
                              temperature=temperature,
                              max_tokens=max_tokens,
                              caching=caching)
        # iterate through the entire model response
        for idx, choice in enumerate(response.choices):
            # extract the message and the message's content from litellm
            if choice.message and choice.message.content:
                # extract the response from the dict
                ret["completion"] = choice.message.content.strip()
        # Extract number of input and completion prompt tokens (this is the same structure for embeddings and text generation models on Amazon Bedrock)
        ret['QnA_prompt_token_count'] = response.usage.prompt_tokens
        ret['QnA_completion_token_count'] = response.usage.completion_tokens
    except Exception as e:
        logger.error(f"Exception occurred during invoking {model_id}, exception={e}")
        ret['exception'] = e
    logger.info(f"completion: {ret['completion']}")
    return ret

In [48]:
config['QnA_generator_prompt']

'"Human: Based on the text description provided in <text_desc></text_desc> tags, generate a list of five to 10 questions. Only refer to the context in the <text_desc> tags, and do not provide questions that are not related to the context provided. Your response should be in a JSON format containing two elements: \'question\' and \'answer\'. The question should be directly related to the context provided in the <text_desc> tags and the answer should be the answer to that question from the <text_desc> context. Do not make up an answer.If you do not know the answer to the question just say that you don\'t know the answer. Don\'t try to make up an answer or a question. Refer to the context below:\n<text_desc>\n{context}\n</text_desc>\nAssistant: Sure, here are a list of Questions and Answers generated from the context in JSON format:"\n'

In [49]:
def get_inference(i: int, row: Dict, total: int) -> Dict:
    # save all the responses from the model in a dictionary
    resp: Dict = {}
    logger.info(f"row {i}/{total}, model_id={config['bedrock_model_info']['claude_sonnet_model_id']}")
    model_id = config['bedrock_model_info']['claude_sonnet_model_id']
    # create the payload for model inference
    prompt = config['QnA_generator_prompt'].format(context=row['content_description'])
    # generate the chapter title based on the given chapter in the prompt 
    resp = llm_QA_generator(model_id, prompt)
    resp['file_type'] = row['file_type']
    resp['page_number'] = row['page_number']
    resp['file_type'] = row['file_type']
    resp['file_name'] = row['file_name']
    resp['content_description'] = row['content_description']
    resp['page_number'] = row['page_number']
    # calculate the input and output token price for all of the calls
    resp['QnA_input_token_price'] = (resp['prompt_token_count']/1000) * config['bedrock_model_info']['claude_input_tokens_pricing']
    logger.info(f"The price for {resp['prompt_token_count']} tokens for {model_id} for filename={row['file_name']} is {resp['QnA_input_token_price']}")
    resp['QnA_output_token_price'] = (resp['QnA_prompt_token_count']/1000) * config['bedrock_model_info']['claude_output_tokens_pricing']
    logger.info(f"The price for {resp['QnA_completion_token_count']} tokens for {model_id} for filename={row['file_name']} is {resp['QnA_output_token_price']}")
    dir_path = os.path.join(config['pdf_dir_info']['qna_dir'], row['file_name'], model_id.replace(":", "-"))
    os.makedirs(dir_path, exist_ok=True)
    fpath = os.path.join(dir_path, f"question_answers_{row['file_name']}.json")
    logger.info(f"writing response={resp} to {fpath}")
    Path(fpath).write_text(json.dumps(resp, default=str, indent=2))
    logger.info(f"response {i}: {resp}")
    return resp

In [50]:
@ray.remote
def async_get_inference(i: int, row: Dict, total: int) -> Dict:
    logging.basicConfig(format='[%(asctime)s] p%(process)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s', level=logging.INFO)
    logger = logging.getLogger(__name__)
    return get_inference(i, row, total)

In [51]:
# merged_text_and_image_df = pd.DataFrame(merged_text_and_image_df)
# merged_text_and_image_json = merged_text_and_image_df.to_json(orient='records')
merged_text_and_image_df = json.loads(merged_text_and_image_df.to_json(orient='records'))
n: int = config['parallel_inference_count']
resp_list: List = []
st = time.perf_counter()
logger.info(f"------ Generating QnA bank using {config['bedrock_model_info']['claude_sonnet_model_id']} -----")
list_of_lists = [merged_text_and_image_df[i * n:(i + 1) * n] for i in range((len(merged_text_and_image_df) + n - 1) // n )]
logger.info(f"split input list of size {len(merged_text_and_image_df)} into {len(merged_text_and_image_df)} lists")
for idx, l in enumerate(list_of_lists):
    logger.info(f"getting inference for list {idx+1}/{len(list_of_lists)}, size of list={len(l)} ")
    resp_list.extend(ray.get([async_get_inference.remote(i+1, e, len(l)) for i, e in enumerate(l)]))
elapsed_time = time.perf_counter() - st
logger.info(f"------ model={config['bedrock_model_info']['claude_sonnet_model_id']} completed in {elapsed_time} ------ ")

[2024-06-03 13:07:14,282] p10461 {3167352693.py:7} INFO - ------ Generating QnA bank using anthropic.claude-3-sonnet-20240229-v1:0 -----
[2024-06-03 13:07:14,284] p10461 {3167352693.py:9} INFO - split input list of size 98 into 98 lists
[2024-06-03 13:07:14,285] p10461 {3167352693.py:11} INFO - getting inference for list 1/10, size of list=10 
[2024-06-03 13:07:55,943] p10461 {3167352693.py:11} INFO - getting inference for list 2/10, size of list=10 
[2024-06-03 13:08:22,535] p10461 {3167352693.py:11} INFO - getting inference for list 3/10, size of list=10 
[2024-06-03 13:08:53,086] p10461 {3167352693.py:11} INFO - getting inference for list 4/10, size of list=10 
[2024-06-03 13:09:25,285] p10461 {3167352693.py:11} INFO - getting inference for list 5/10, size of list=10 
[2024-06-03 13:09:57,729] p10461 {3167352693.py:11} INFO - getting inference for list 6/10, size of list=10 
[2024-06-03 13:10:32,620] p10461 {3167352693.py:11} INFO - getting inference for list 7/10, size of list=10 


### Save all the `question-answer` files to a dataframe for further analytics/evaluations

In [52]:
## Represents extracted all metric files
qna_fpath = os.path.join(config['pdf_dir_info']['qna_dir'], "**", "*", "*.json")
qna_files = glob.glob(qna_fpath, recursive=True)
logger.info(f"there are {len(qna_files)} files in {qna_fpath}")

[2024-06-03 13:12:51,006] p10461 {1787632173.py:4} INFO - there are 98 files in question_answer_files/**/*/*.json


In [53]:
qna = []
for f in qna_files:
    qna.append(json.loads(Path(f).read_text()))
df = pd.DataFrame(qna)
df = df.drop(columns=['exception', 'prompt', 'completion_token_count', 'prompt_token_count', 'input_token_price', 'output_token_pricing', 
                     'QnA_input_token_price', 'QnA_output_token_price', 'file_type'])
df = df.sort_values(by=['page_number', 'file_name'])
logger.info(f"all metrics data is read into a dataframe of shape {df.shape}")
count = df.shape[0]
metrics_dir: str = config['metrics_dir']['dir_name']
qna_csv_fpath = os.path.join(metrics_dir, config['metrics_dir']['QnA_bank'])
df.to_csv(qna_csv_fpath, index=False)
df.head(20)

[2024-06-03 13:12:51,043] p10461 {248952128.py:8} INFO - all metrics data is read into a dataframe of shape (98, 7)


Unnamed: 0,completion,file_name,model_id,QnA_prompt_token_count,QnA_completion_token_count,page_number,content_description
46,"{\n ""question"": ""What is the name of the comp...",AMD_rec_page_1.jpg,anthropic.claude-3-sonnet-20240229-v1:0,816,602,1,"Based on the image, here are the relevant enti..."
49,"{\n ""question"": ""What is the Argus rating for...",AMD_rec_text_1,anthropic.claude-3-sonnet-20240229-v1:0,2836,566,1,ADVANCED MICRO DEVICES INCADVANCED MICRO DEVIC...
2,"{\n ""question"": ""What is the Twelve Month Rat...",APPLE_rec_page_1.jpg,anthropic.claude-3-sonnet-20240229-v1:0,1280,639,1,"Based on the image, here's a list of the key e..."
22,"{\n ""questions"": [\n {\n ""question"": ...",APPLE_rec_text_1,anthropic.claude-3-sonnet-20240229-v1:0,2738,746,1,APPLE INCAPPLE INCAPPLE INCAPPLE INCNASDAQ: AA...
88,"[\n {\n ""question"": ""What is the Twelve Mo...",Amazon_rec_page_1.jpg,anthropic.claude-3-sonnet-20240229-v1:0,1111,571,1,"Based on the image, here is a list of entities..."
64,"{\n ""question"": ""What is Amazon.com Inc?"",\n ...",Amazon_rec_text_1,anthropic.claude-3-sonnet-20240229-v1:0,2759,560,1,AMAZON.COM INCAMAZON.COM INCAMAZON.COM INCAMAZ...
55,"{\n ""question"": ""What is Argus' Twelve Month ...",Boeing_rec_page_1.jpg,anthropic.claude-3-sonnet-20240229-v1:0,1012,557,1,"Based on the image, here is a list of the key ..."
58,"{\n ""question"": ""What is the Argus rating for...",Boeing_rec_text_1,anthropic.claude-3-sonnet-20240229-v1:0,2732,558,1,BOEING COBOEING COBOEING COBOEING CONYSE: BA ...
73,"{\n ""question"": ""What is Argus' twelve month ...",Cisco_rec_page_1.jpg,anthropic.claude-3-sonnet-20240229-v1:0,816,470,1,Here is a list of entities present in the imag...
37,"{\n ""question"": ""What is the Argus rating for...",Cisco_rec_text_1,anthropic.claude-3-sonnet-20240229-v1:0,2733,536,1,CISCO SYSTEMS INCCISCO SYSTEMS INCCISCO SYSTEM...


## Clean Up
