In [16]:
import redis

# Basic connection to Redis Stack running locally
client = redis.Redis(host='127.0.0.1', port=6379, decode_responses=True)

# Test the connection
try:
    response = client.ping()
    print("Connection to Redis successful!")
    print("Redis PING response:", response)
except redis.exceptions.ConnectionError as e:
    print(f"Connection to Redis failed: {e}")

client.set("foo", "bar")
client.get("foo")


Connection to Redis successful!
Redis PING response: True


'bar'

In [5]:
import json


with open('sections.json', 'r') as file:
    sections = json.load(file)  # Ensure this is valid JSON

pipeline = client.pipeline()
for i, section in enumerate(sections, start=1):
    redis_key = f"section:{i:05}"
    
    # Use Path.root_path() instead of "$" or "."
    pipeline.json().set(redis_key, "$", section)

try:
    res = pipeline.execute()
    print(f"Inserted {len(res)} documents successfully!")
except Exception as e:
    print(f"Error: {e}")





Inserted 1541 documents successfully!


In [6]:
import numpy as np
from openai import OpenAI
import os
from dotenv import load_dotenv


load_dotenv(dotenv_path="c:/Users/benja/startup_projects/civgen/.env")
openai_client = OpenAI(
  api_key=os.getenv("OPENAI_API_KEY")
)

# Retrieve keys from Redis
keys = sorted(client.keys("section:*"))
content = client.json().mget(keys, "$.content")

# Flatten list and filter invalid entries

content = [item for sublist in content for item in sublist]  # Flatten
content = [text for text in content if isinstance(text, str) and text.strip()]  # Filter non-strings and empty strings

if not content:
    raise ValueError("No valid content available for generating embeddings.")

# Call OpenAI's embedding API
response = openai_client.embeddings.create(
    input=content,
    model="text-embedding-3-large"
)

# Extract embeddings using correct attribute access
embeddings = [item.embedding for item in response.data]

# Convert embeddings to float32 and list format
embeddings_array = np.array(embeddings, dtype=np.float32).tolist()

VECTOR_DIMENSION = len(embeddings_array[0]) if embeddings_array else 0
print(f"Generated {len(embeddings_array)} embeddings with dimension {VECTOR_DIMENSION}")


Generated 1530 embeddings with dimension 3072


In [7]:
pipeline = client.pipeline()
for key, embedding in zip(keys, embeddings):
  #note that its named section embeddings and not content embeddings
  pipeline.json().set(key, "$.section_embeddings", embedding)
pipeline.execute()


[True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,

In [8]:
res = client.json().get("section:00002")
print(res)


{'pdf': '46.2.1.pdf', 'section': '46.2-341.', 'content': '1 et seq.) and a driver privilege card issued\npursuant to', 'section_embeddings': [0.015789968892931938, -0.020682981237769127, -0.0015857360558584332, -0.01252251584082842, -0.03989560902118683, 0.003841300029307604, -0.02811643667519093, 0.019196288660168648, -0.02917836047708988, 0.03992828354239464, 0.0035860303323715925, 0.008025682531297207, 0.01160762831568718, -0.02314990758895874, -0.032756220549345016, 0.03688954934477806, -0.02481631003320217, 0.015920666977763176, 0.006257173605263233, 0.021826589480042458, -0.007972586899995804, -0.0008597486885264516, -0.035517219454050064, 0.019735420122742653, -0.000513858103659004, -0.0049011800438165665, -0.01932698674499989, 0.018902217969298363, -0.011967048048973083, -0.013854002580046654, -0.007617251016199589, 0.02051960863173008, -0.010104600340127943, -0.021516181528568268, 0.034242913126945496, -0.0025036863517016172, 0.006244920659810305, 0.024865321815013885, -0.0793

In [9]:
import redis
from redis.commands.search.field import (
    TextField,
    VectorField,
)

from redis.commands.search.indexDefinition import IndexDefinition, IndexType

schema = (
  TextField("$.pdf", no_stem=True, as_name="pdf"),
  TextField("$.section", no_stem=True, as_name="section"),
  TextField("$.content", no_stem=True, as_name="content"),
  VectorField(
    "$.section_embeddings",
    "FLAT",
    {
      "TYPE": "FLOAT32",
      "DIM": VECTOR_DIMENSION,
      "DISTANCE_METRIC": "COSINE",
    },
    as_name="vector"
  )
)

definition = IndexDefinition(prefix=["section:"], index_type=IndexType.JSON)
res = client.ft("idx:section_vss").create_index(fields=schema, definition=definition)


In [10]:
info = client.ft("idx:section_vss").info()
num_docs = info["num_docs"]
indexing_failures = info["hash_indexing_failures"]
print(f"{num_docs} documents indexed with {indexing_failures} failures")





1541 documents indexed with 0 failures


In [15]:
import numpy as np
from redis.commands.search.query import Query

# 1. Encode ONE query
query_text = "How do I register my vehicle in Virginia?"

query_vector = np.array(
    openai_client.embeddings.create(
        input=query_text,
        model="text-embedding-3-large"
    ).data[0].embedding, dtype=np.float32
)

# 2. Define query
query = (
    Query('(*)=>[KNN 10 @vector $query_vector AS vector_score]')
    .sort_by('vector_score')
    .return_fields('vector_score', 'pdf', 'section', 'content')
    .dialect(2)
)

# 3. Execute search
result = client.ft('idx:section_vss').search(
    query,
    {'query_vector': query_vector.tobytes()}  # Convert NumPy array to bytes
)

# 4. Process results
for doc in result.docs:
    print(f"Score: {doc.vector_score}", doc.pdf, doc.section, doc.content)


Score: 0.406598091125 46.2.6.pdf 46.2-644.3. Acquisition of all-terrain vehicle or off-road motorcycle by dealer
Any dealer licensed under § 46.2-1508 who acquires an all-terrain vehicle or off-road motorcycle
for resale shall be exempt from the titling requirements of this title.
  
Any dealer transferring an all-terrain vehicle or off-road motorcycle titled under this title shall
assign the title to the new owner or, in the case of a new all-terrain vehicle or off-road
motorcycle, assign the certificate of origin.
  
2006, c. 896;2015, c. 615.
    
Article 2. Titling Vehicles
Score: 0.466176390648 46.2.6.pdf 46.2-2000. b. Thirty-three dollars for each motor home if the motor home weighs 4,000 pounds or less,
provided that it is not used for the transportation of passengers for compensation and is not kept
or used for rent or for hire, or is not operated under a lease without a chauffeur.
  
2. a. Twenty-eight dollars for each private passenger car that weighs more than 4,000 pounds,
