# Run embedding search in Postgres


In [1]:
# Install dependencies
# !pip install sqlalchemy psycopg2 pandas

# Start the docker container via
# make run

In [2]:
from sqlalchemy import create_engine
import pandas as pd
engine = create_engine(f'postgresql://postgres@localhost:5432/postgres?sslmode=disable')

def execute(query: str):
    with engine.connect() as conn:
        with conn.begin():
            return conn.execute(query)


In [3]:
pd.read_sql("""SELECT * FROM images""", con=engine)

Unnamed: 0,image_id,image,embedding
0,1,(http://farm2.staticflickr.com/1129/4726871278...,"[0.07276332, 0.57534903, 0.40776607, 0.2994129..."
1,2,(http://farm4.staticflickr.com/3726/9457732891...,"[0.26861557, 0.58947176, 0.30805334, 0.5611011..."


# Show all models available

In [4]:
pd.read_sql("SELECT * FROM ml.models", con=engine)

Unnamed: 0,name,flavor,model_type,uri,options
0,ssd,pytorch,ssd,,{}
1,embedding,pytorch,features,,{}


In [5]:
# Train a PCA model
execute("SELECT ml.train('pca', 'pca', 'images', 'embedding')")

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7fb008855ae0>

In [6]:
pd.read_sql("SELECT * FROM ml.models", con=engine)

Unnamed: 0,name,flavor,model_type,uri,options
0,ssd,pytorch,ssd,,{}
1,embedding,pytorch,features,,{}
2,pca,sklearn,pca,/tmp/models/pca/c631b1c0-048b-45ce-af82-b61f35...,


In [7]:
execute("CREATE EXTENSION IF NOT EXISTS vector")
execute("ALTER TABLE images ADD COLUMN idx vector(2)")

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7faf9c5fcd90>

In [8]:
# Build the vector embedding index

execute("CREATE INDEX ON images USING ivfflat (idx vector_l2_ops);")

# Populate the index with embeddinged (after PCA)
execute("UPDATE images SET idx = ml.pca(embedding);")

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7faf9c5fdcf0>

In [9]:
# Search based on the embedding

pd.read_sql("SELECT image FROM images ORDER BY idx <=> '[1, 2]' limit 1;", con=engine)

Unnamed: 0,image
0,(http://farm4.staticflickr.com/3726/9457732891...
