# Retrieval Augumented Generation (RAG) inference

***This notebook works best with the `conda_python3` on the `ml.t3.xlarge` instance***.

---


## Step 1. Setup

Install the required Python packages and import the relevant files.

In [15]:
import sys
!{sys.executable} -m pip install -r requirements.txt

Collecting git+https://github.com/haotian-liu/LLaVA.git@v1.1.1 (from -r requirements.txt (line 2))
  Cloning https://github.com/haotian-liu/LLaVA.git (to revision v1.1.1) to /tmp/pip-req-build-c3q0x3hc
  Running command git clone --filter=blob:none --quiet https://github.com/haotian-liu/LLaVA.git /tmp/pip-req-build-c3q0x3hc
  Running command git checkout -q 1619889c712e347be1cb4f78ec66e7cf414ac1a6
  Resolved https://github.com/haotian-liu/LLaVA.git to commit 1619889c712e347be1cb4f78ec66e7cf414ac1a6
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Installing backend dependencies ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone


In [16]:
import os
import io
import sys
import json
import glob
import uuid
import boto3
import base64
import logging
import requests
import botocore
import sagemaker
import numpy as np
import pandas as pd
import globals as g
from PIL import Image
from pathlib import Path
from typing import List, Dict
from utils import get_bucket_name
from IPython.display import Image
from utils import get_cfn_outputs
from urllib.parse import urlparse
from botocore.auth import SigV4Auth
from pandas.core.series import Series
from sagemaker import get_execution_role
from botocore.awsrequest import AWSRequest
from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth

In [17]:
!pygmentize globals.py

[33m"""[39;49;00m
[33mGlobal variables used throughout the code.[39;49;00m
[33m"""[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mos[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mboto3[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36msagemaker[39;49;00m[37m[39;49;00m
[37m[39;49;00m
[37m# model deployment[39;49;00m[37m[39;49;00m
HF_MODEL_ID: [36mstr[39;49;00m = [33m"[39;49;00m[33mliuhaotian/llava-v1.5-13b[39;49;00m[33m"[39;49;00m[37m[39;49;00m
LLM_ENGINE: [36mstr[39;49;00m = [33m"[39;49;00m[33mdeepspeed[39;49;00m[33m"[39;49;00m[37m[39;49;00m
[37m[39;49;00m
[37m# HF_TASK: str = "question-answering"[39;49;00m[37m[39;49;00m
[37m# TRANSFORMERS_VERSION: str = "4.28.1"[39;49;00m[37m[39;49;00m
[37m# PYTORCH_VERSION: str = "2.0.0"[39;49;00m[37m[39;49;00m
[37m# PYTHON_VERSION: str = "py310"[39;49;00m[37m[39;49;00m
[37m[39;49;00m
[37m# S3 bucket strucutre, we use the default sagemaker bucket in the curren

In [18]:
logging.basicConfig(format='[%(asctime)s] p%(process)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s', level=logging.INFO)
logger = logging.getLogger(__name__)

In [19]:
bucket_name: str = get_bucket_name(g.CFN_STACK_NAME)
s3 = boto3.client('s3')

## Step 2: Inference with Llava13B

In [20]:
endpoint_name = Path(g.ENDPOINT_FILENAME).read_text()
logger.info(f"llava13b endpoint {endpoint_name}")

[2024-02-08 22:57:24,927] p14877 {953591467.py:2} INFO - llava13b endpoint llava-djl-2024-02-08-22-47-21-804-12xl-endpoint


In [21]:
sagemaker_session = sagemaker.Session()
sm_client = sagemaker_session.sagemaker_client
sm_runtime_client = sagemaker_session.sagemaker_runtime_client

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [22]:
prompt = "Describe the image in detail"

In [None]:
paginator = s3.get_paginator('list_objects_v2')
pages = paginator.paginate(Bucket=bucket_name, Prefix=g.BUCKET_IMG_PREFIX)
for page in pages:
    for obj in page['Contents']:
        obj_key=obj["Key"]
        obj_name=(obj["Key"]).split( "/")[2].split(".")[0]

        payload = bytes(json.dumps(
                {
                    "text": [prompt],
                    "input_image_s3": os.path.join("s3://", bucket_name, obj_key),
                }
            ), 'utf-8')
        response = sm_runtime_client.invoke_endpoint(
            EndpointName=endpoint_name, 
            ContentType="application/json",
            Body=payload
        )
        print(response)
        print(response['Body'].read().decode("utf-8"))
        

{'ResponseMetadata': {'RequestId': '3faa49b1-db97-40f5-8c16-65c6cd54e007', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '3faa49b1-db97-40f5-8c16-65c6cd54e007', 'x-amzn-invoked-production-variant': 'variant1', 'date': 'Thu, 08 Feb 2024 22:57:40 GMT', 'content-type': 'application/json', 'content-length': '276', 'connection': 'keep-alive'}, 'RetryAttempts': 0}, 'ContentType': 'application/json', 'InvokedProductionVariant': 'variant1', 'Body': <botocore.response.StreamingBody object at 0x7f1a1e122680>}
The image features a blue and pink gradient background with the words "Summit 2023" written in white. The text is positioned towards the center of the image, with the word "Summit" on top and "2023" below it. The overall design gives a sense of a modern and stylish event.</s>
{'ResponseMetadata': {'RequestId': 'be3ae591-2c94-4412-a40f-c2d0baa91597', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'be3ae591-2c94-4412-a40f-c2d0baa91597', 'x-amzn-invoked-production-vari