In [None]:
import pandas as pd

posts = pd.read_csv('blog-with-embeddings.csv', delimiter=',', quotechar='"', converters={'embedding': pd.eval})
posts.head()

In [None]:
import redis
from redis.commands.search.field import (TextField, VectorField)
from redis.commands.search.indexDefinition import (IndexDefinition, IndexType)

# Create a Redis client communicating with the local Dragonfly instance.
client = redis.Redis()

# Create an index 'posts', using the TEXT type for 'title', and the VECTOR type for 'embedding'.
client.ft("posts").create_index(
        fields = [TextField("title"), VectorField("embedding", "FLAT", {"DIM": "1536"})],
        definition = IndexDefinition(prefix=["post:"], index_type=IndexType.HASH)
)

In [None]:
import numpy as np

# Store blog post data as HASH values in Dragonfly.
# Since the index is created for all keys with the 'post:' prefix, these documents will be indexed.
for i, post in posts.iterrows():
    embedding_bytes = np.array(post['embedding']).astype(np.float32).tobytes()
    client.hset(f"post:{i}", mapping={**post, 'embedding': embedding_bytes})

In [None]:
from redis.commands.search.query import Query
import openai

# How to get an OpenAI API key: https://platform.openai.com/docs/api-reference/introduction
# NOTE: Do not share your API key with anyone, do not commit it to git, do not hardcode it in your code.
openai.api_key = "{YOUR_OPENAI_API_KEY}"
EMBEDDING_MODEL = "text-embedding-ada-002"

# Create a vector for a query string using the OpenAI API.
query = "How to switch from a multi node redis setup to Dragonfly"
query_vec = openai.embeddings.create(input=query, model=EMBEDDING_MODEL).data[0].embedding

# Compose a search query for Dragonfly.
query_expr = Query("*=>[KNN 3 @embedding $query_vector AS vector_score]").return_fields("title", "vector_score").paging(0, 30)
params = {"query_vector": np.array(query_vec).astype(dtype=np.float32).tobytes()}

# Search by query.
docs = client.ft("posts").search(query_expr, params).docs
for i, doc in enumerate(docs):
    print(i+1, doc.vector_score, doc.title)

In [None]:
# Get index information.
client.ft("posts").info()