## Scenario: Sementic Search for the images

### Amazon Bedrock Mutimodal Embdedding Model used for vectoization
### FAISS in memory vector database used to store and search vectors

In [None]:
## Install Packages - do it once and then restart the kernal

!pip -q install langchain 
!pip install -q botocore --upgrade
!pip install -q boto3 --upgrade
!pip install -q faiss-cpu

In [None]:
## Import Libraries

import boto3, json
import faiss 
import numpy as np
import base64 
import os

In [None]:
## Initiate Bedrock Client

region = "us-east-1"
modelId = 'amazon.titan-embed-image-v1' 
accept = 'application/json'
contentType = 'application/json'

bedrockclient = boto3.client('bedrock-runtime',region_name=region)

In [None]:
## Common Reusable Functions

## Convert image to base64 string
def getimagestring(imagefile):
    with open(imagefile, "rb") as image2string: 
        return base64.b64encode(image2string.read()).decode('utf8')

## Generate vector embedding for an images using Bedrock Model
def getvector(imagestring):
    body = json.dumps({
    "inputImage": imagestring
    })
    response = bedrockclient.invoke_model(body=body, modelId=modelId, accept=accept, contentType=contentType)
    response_body = json.loads(response.get('body').read())
    return response_body["embedding"]

## Create FAISS index supporting 1024 dimensions
def getfaissindex():
    vector_dimension = 1024
    return faiss.IndexFlatL2(vector_dimension)

## Add vector to the FAISS index
def addvectortofaissindex(index, vector):
    vector = np.array([vector])
    index.add(vector)

## Search image in FAISS index using image's vector
def searchfaissindex(index, imagevector):
    k = 3
    imagevector = np.array([imagevector])
    d, i = index.search(imagevector, k=k)
    return i

In [None]:
## Initiate FAISS vector

myindex =  getfaissindex()

In [None]:
## Loop into **dbimage** folder, vectorize each image and then add to FAISS index
## For each image added in the index, record image name and index location in **imagesarray** array

path = "./dbimages"
dir = os.listdir( path )
imagearray = []
for image in dir:
    if image.endswith(".jpg"):
        myimage = path + "/" + image
        imgstr = getimagestring(myimage)
        myvector = getvector(imgstr)
        addvectortofaissindex(myindex, myvector)
        imagearray.append(image)

In [None]:
## Check number of vectors added to the FAISS index

myindex.ntotal

In [None]:
## Semantic search for a particular image in FAISS index using image vector

searchimage = "./testimages/test3.jpg"
searchimagestr =  getimagestring(searchimage)
searchimagevector = getvector(searchimagestr)
searchresult = searchfaissindex(myindex,searchimagevector)

In [None]:
## Loop in array of search result  to get name of the images
## using **imagearray** array to check if similar images are returned

for imageindex in searchresult[0]:
    print(imagearray[imageindex])