# IGDB Embeddings pgvector Demo - Inference & Search with Gradio

In [1]:
!pip install "psycopg[binary]" gradio --quiet

In [2]:
import sagemaker
import boto3
import json
import pandas as pd
import psycopg
from sagemaker.huggingface.model import HuggingFacePredictor
import gradio as gr

In [3]:
# Create SageMaker Session
sess = sagemaker.Session()

# Create SecretsManager client
secretsmanager = boto3.client('secretsmanager')

## Retrive all variables from Notebook Instance's tags

In [4]:
# Get this Notebook Instance's ARN
NOTEBOOK_ARN = !jq '.ResourceArn' /opt/ml/metadata/resource-metadata.json --raw-output
NOTEBOOK_ARN = NOTEBOOK_ARN[0]

# Get the tags of this Notebook Instance
tags = sess.sagemaker_client.list_tags(ResourceArn=NOTEBOOK_ARN)['Tags']

# Filter out the keys that contains the necessary information
DB_SECRET_ARN = list(filter(lambda x: x['Key'] == 'VAR_DB_SECRET_ARN', tags))[0]['Value']
MODEL_ENDPOINT = list(filter(lambda x: x['Key'] == 'VAR_MODEL_ENDPOINT', tags))[0]['Value']

## Inference with Model Endpoint

In [5]:
# Get database credentials
secretsmanager = boto3.client('secretsmanager')

db_secret = secretsmanager.get_secret_value(
    SecretId=DB_SECRET_ARN
)
db_secret_string = json.loads(db_secret['SecretString'])
db_secret_string
db_host = db_secret_string['host']
db_port = db_secret_string['port']
db_user = db_secret_string['username']
db_pass = db_secret_string['password']

In [6]:
# Get the existing model
predictor = HuggingFacePredictor(MODEL_ENDPOINT, sagemaker_session=sess)

In [7]:
# Create a simple GUI frontend with Gradio
def formatter_image(hash):
    return f'<img src="https://images.igdb.com/igdb/image/upload/t_screenshot_med/{hash}.jpg">'

def formatter_url(url):
    return f'<a href={url} target="_blank" rel="noopener noreferrer">Link to IGDB</a>'

def query(inp):
    # Change the question to embedding
    data = {"inputs": inp}
    prediction = predictor.predict(data=data)
    vector = prediction['vectors']

    # Query
    with psycopg.connect(host=db_host, user=db_user, password=db_pass, port=db_port, connect_timeout=10, autocommit=True) as conn:
        with conn.cursor() as cur:
        # # Official answer but FAILED
        # cur.execute("""SELECT igdb_id, name, summary, description, url, artwork_hash, screenshot_hash, description_embeddings
        #                 FROM igdb 
        #                ORDER BY descriptions_embeddings <-> %s::vector limit 5;""", 
        #             (np.array(vector),))

            # Ref: https://github.com/pgvector/pgvector-python/issues/4#issuecomment-1474930998
            cur.execute("""SELECT igdb_id, name, summary, description, url, artwork_hash, screenshot_hash
                             FROM igdb 
                            ORDER BY description_embeddings <-> %s::vector limit 5;""", 
                        (vector,))

            responses = cur.fetchall()

    result = pd.DataFrame(responses)
    result.columns = ("IGDB ID", "Name", "Summary", "Description", "IGDB Page", "Artwork", "Screenshot",)
    html = result.to_html(escape=False, formatters={
        "IGDB Page": formatter_url,
        "Artwork": formatter_image,
        "Screenshot": formatter_image,
    })
    return html

with gr.Blocks() as demo:
    textbox_input = gr.Textbox(label="Query")
    gr.Examples(
        examples=[["Zelda: Tears of the Kingdom"]],
        inputs=textbox_input,
    )
    button_submit = gr.Button("Submit")
    html_output = gr.HTML()  
    
    button_submit.click(fn=query, inputs=textbox_input, outputs=html_output, api_name="query")

demo.launch()

Running on local URL:  http://127.0.0.1:7860
Sagemaker notebooks may require sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Running on public URL: https://e54cb7fa83a24a0e84.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


