Skip to content

dqii/lantern-python

Repository files navigation

lantern-python

Lantern support for Python.

It is based on pgvector's Python client.

This library adds support for Django, SQLAlchemy, SQLModel, and Peewee. Psycopg 3, Psycopg 2, and asyncpg are supported out of the box; installing this library is not necessary.

Build Status

Installation

Run:

pip install lantern-django

And follow the instructions for your database library:

Django

Create a migration to enable the extension(s)

from django.db import migrations
from lantern_django import LanternExtension, LanternExtrasExtension

class Migration(migrations.Migration):
    operations = [
        LanternExtension(),
        LanternExtrasExtension(),
    ]

Add an embedding field to your model

from django.db import models
from django.contrib.postgres.fields import ArrayField

class Book(models.Model):
    book_embedding = ArrayField(models.FloatField(), size=128)

Insert a vector

book = Book(book_embedding=[1, 2, 3])

Find nearest rows with L2Distance, CosineDistance, or HammingDistance

from lantern_django import L2Distance

Book.objects.order_by(L2Distance('embedding', [3, 1, 2]))[:5]

Add a vector index

from django.db import models
from lantern_django import HnswIndex

class Book(models.Model):
    class Meta:
        indexes = [
            HnswIndex(
                name='book_embedding_index',
                fields=['book_embedding'],
                m=2,
                ef_construction=10,
                ef=4,
                dim=3,
                opclasses=['dist_l2sq_ops']
            )
        ]

Generate one-off embeddings (note that these cannot be used unless the Lantern Extras extension is enabled as well)

from django.db import models

results = Book.objects.annotate(
    text_embedding=TextEmbedding('BAAI/bge-small-en', 'My text input')
)
for result in results:
    print(result.text_embedding)

TODO: SQLAlchemy

Enable the extension(s)

session.execute(text('CREATE EXTENSION IF NOT EXISTS lantern'))
session.execute(text('CREATE EXTENSION IF NOT EXISTS lantern_extras'))

Add a vector column

from pgvector.sqlalchemy import Vector

class Item(Base):
    embedding = mapped_column(Vector(3))

Insert a vector

item = Item(embedding=[1, 2, 3])
session.add(item)
session.commit()

Get the nearest neighbors to a vector

session.scalars(select(Item).order_by(Item.embedding.l2_distance([3, 1, 2])).limit(5))

Also supports max_inner_product and cosine_distance

Get the distance

session.scalars(select(Item.embedding.l2_distance([3, 1, 2])))

Get items within a certain distance

session.scalars(select(Item).filter(Item.embedding.l2_distance([3, 1, 2]) < 5))

Average vectors

from sqlalchemy.sql import func

session.scalars(select(func.avg(Item.embedding))).first()

Also supports sum

Add an approximate index

index = Index('my_index', Item.embedding,
    postgresql_using='hnsw',
    postgresql_with={'m': 16, 'ef_construction': 64},
    postgresql_ops={'embedding': 'vector_l2_ops'}
)

index.create(engine)

Use vector_ip_ops for inner product and vector_cosine_ops for cosine distance

TODO: SQLModel

Enable the extension

session.exec(text('CREATE EXTENSION IF NOT EXISTS vector'))

Add a vector column

from pgvector.sqlalchemy import Vector
from sqlalchemy import Column

class Item(SQLModel, table=True):
    embedding: List[float] = Field(sa_column=Column(Vector(3)))

Insert a vector

item = Item(embedding=[1, 2, 3])
session.add(item)
session.commit()

Get the nearest neighbors to a vector

session.exec(select(Item).order_by(Item.embedding.l2_distance([3, 1, 2])).limit(5))

Also supports max_inner_product and cosine_distance

Psycopg 3

Enable the extension

conn.execute('CREATE EXTENSION IF NOT EXISTS lantern')

Create a table

conn.execute('CREATE TABLE items (id bigserial PRIMARY KEY, embedding REAL[3])')

Insert a vector

embedding = [1, 2, 3]
conn.execute('INSERT INTO items (embedding) VALUES (%s)', (embedding,))

Get the nearest neighbors to a vector

conn.execute('SELECT * FROM items ORDER BY embedding <-> %s LIMIT 5', (embedding,)).fetchall()

Psycopg 2

Enable the extension

cur = conn.cursor()
cur.execute('CREATE EXTENSION IF NOT EXISTS lantern')
cur.execute('CREATE EXTENSION IF NOT EXISTS lantern_extras')

Create a table

cur.execute('CREATE TABLE items (id bigserial PRIMARY KEY, embedding REAL[3])')

Insert a vector

embedding = [1, 2, 3]
cur.execute('INSERT INTO items (embedding) VALUES (%s)', (embedding,))

Get the nearest neighbors to a vector

cur.execute('SELECT * FROM items ORDER BY embedding <-> %s LIMIT 5', (embedding,))
cur.fetchall()

asyncpg

Enable the extension

await conn.execute('CREATE EXTENSION IF NOT EXISTS lantern')
await conn.execute('CREATE EXTENSION IF NOT EXISTS lantern_extras')

Create a table

await conn.execute('CREATE TABLE books (id SERIAL PRIMARY KEY, embedding REAL[3])')

Insert a vector

embedding = [1, 2, 3]
await conn.execute('INSERT INTO books (embedding) VALUES ($1)', embedding)

Get the nearest neighbors to a vector

await conn.fetch('SELECT * FROM books ORDER BY embedding <-> $1 LIMIT 5', embedding)

TODO: Peewee

Add a vector column

from pgvector.peewee import VectorField

class Item(BaseModel):
    embedding = VectorField(dimensions=3)

Insert a vector

item = Item.create(embedding=[1, 2, 3])

Get the nearest neighbors to a vector

Item.select().order_by(Item.embedding.l2_distance([3, 1, 2])).limit(5)

Also supports max_inner_product and cosine_distance

Get the distance

Item.select(Item.embedding.l2_distance([3, 1, 2]).alias('distance'))

Get items within a certain distance

Item.select().where(Item.embedding.l2_distance([3, 1, 2]) < 5)

Average vectors

from peewee import fn

Item.select(fn.avg(Item.embedding)).scalar()

Also supports sum

Add an approximate index

Item.add_index('embedding vector_l2_ops', using='hnsw')

Use vector_ip_ops for inner product and vector_cosine_ops for cosine distance

About

No description, website, or topics provided.

Resources

License

Stars

Watchers

Forks

Releases

No releases published

Packages

 
 
 

Contributors

Languages