In [None]:
import sys
!{sys.executable} -m pip install -r requirements.txt


In [None]:
import os
import io
import sys
import json
import glob
import faiss
import boto3
import botocore
import base64
import logging
import requests
import numpy as np
import pandas as pd
from PIL import Image
from globals import *
from typing import List
from botocore.auth import SigV4Auth
from faiss import write_index, read_index
from langchain.llms.bedrock import Bedrock
from botocore.awsrequest import AWSRequest
from faiss.swigfaiss_avx2 import IndexFlatIP
from pandas.core.series import Series
from urllib.parse import urlparse
from sagemaker.huggingface.model import HuggingFaceModel, HuggingFacePredictor
from sagemaker import get_execution_role
import numpy as np
import numpy
from typing import Dict
from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth


logging.basicConfig(format='[%(asctime)s] p%(process)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s', level=logging.INFO)
logger = logging.getLogger(__name__)


In [None]:
%run -i globals.py

In [None]:
s3 = boto3.client('s3')

bedrock = boto3.client(
    service_name="bedrock-runtime", region_name="us-east-1", endpoint_url=FMC_URL
)

host = 'g87vqnxql0c42iqjaenh.us-east-1.aoss.amazonaws.com' # cluster endpoint, for example: my-test-domain.us-east-1.aoss.amazonaws.com
region = 'us-east-1'
service = 'aoss'
session = boto3.Session(profile_name="SMNotebook")
credentials = session.get_credentials()
auth = AWSV4SignerAuth(credentials, region, service)

os_client = OpenSearch(
    hosts = [{'host': host, 'port': 443}],
    http_auth = auth,
    use_ssl = True,
    verify_certs = True,
    connection_class = RequestsHttpConnection,
    pool_maxsize = 20
)


In [None]:
with open("./endpointname.txt", 'r') as endpoint:
    endpoint_name = endpoint.read()

predictor = HuggingFacePredictor(endpoint_name)

In [None]:
def download_img_from_s3(s3_img_path:str):
    s3_path = urlparse(s3_img_path, allow_fragments=False)
    img_key = s3_path.path

    local_img_path = os.path.join(IMAGE_DIR, os.path.basename(s3_img_path))
    with open(local_img_path, 'wb') as local_img:
        s3.download_fileobj(BUCKET_NAME, img_key[1:], local_img)
    
    return local_img_path

In [None]:
def get_embeddings(text: str) -> numpy.ndarray:

    # You can specify either text or image or both
    body = json.dumps(
        {
            "inputText": text
        }
    )

    modelId = FMC_MODEL_ID
    accept = ACCEPT_ENCODING
    contentType = CONTENT_ENCODING

    try:
        response = bedrock.invoke_model(
            body=body, modelId=modelId, accept=accept, contentType=contentType
        )
        response_body = json.loads(response.get("body").read())        
        embeddings = np.array([response_body.get("embedding")]).astype(np.float32)        
    except Exception as e:
        logger.error(f"exception while encoding text={text}, exception={e}")
        embeddings = None
    return embeddings



In [None]:
search_text = "What is the third step in the AI/ML fly wheel"
image_embedding = get_embeddings(search_text)

In [None]:
query = {
    "size": 1,
    "query": {
        "knn": {
            "vector_embedding": {
                "vector": image_embedding[0].tolist(),
                "k": 1
            }
        }
    }
}
try:
    image_based_search_response = os_client.search(body=query, index="llava-slides-index")
except Exception as ex:
    print(ex)

In [None]:
s3_img_path = image_based_search_response.get('hits', {}).get('hits')[0].get('_source').get('image_path')
print(s3_img_path)

In [None]:
s3_img_path = "s3://sagemaker-us-east-1-205088436647/multimodal/img/CMP301_TrainDeploy_E1_20230607_SPEdited_image_12.jpg"
print(f"going to analyze image \"{s3_img_path}\"")
data = {
    "image" : s3_img_path,
    "question" : search_text,
    "temperature" : 0.1,
}

output = predictor.predict(data)
print(f"Image=\"{s3_img_path}\"\nDescription: {output}\n\n")