In [None]:
import weaviate
from weaviate.config import AdditionalConfig
from weaviate.classes.config import Configure

import json
import pandas as pd
import numpy as np
import os
import dotenv

dotenv.load_dotenv()

AZURE_OPENAI_API_KEY = os.getenv('AZURE_OPENAI_API_KEY')
AZURE_OPENAI_ENDPOINT = os.getenv('AZURE_OPENAI_ENDPOINT')
AZURE_OPENAI_API_VERSION = os.getenv('OPENAI_API_VERSION')
AZURE_OPENAI_RESOURCE_NAME = os.getenv('AZURE_OPENAI_RESOURCE_NAME')


client = weaviate.connect_to_local(
    port=8083,
    grpc_port=50051,
    # headers = {
    #     "X-Azure-Api-Key": AZURE_OPENAI_API_KEY,
    # },
)

print(client.is_ready())


In [None]:
data = pd.read_parquet('/home/isma/repos/book/data-pipelines-with-airflow-2nd-ed/chapter13_genai/recipe_book/notebooks/splitted_with_vectors.parquet')
data

In [None]:
COLLECTION_NAME = "recipes"

existing_collections = [item.lower() for item in list(client.collections.list_all().keys())]

if COLLECTION_NAME.lower() in existing_collections:

    client.collections.delete(COLLECTION_NAME)


collection = client.collections.create(
    COLLECTION_NAME,
    vectorizer_config=[
        Configure.NamedVectors.text2vec_azure_openai(
            name="vector",
            source_properties=["chunk"],
            base_url= AZURE_OPENAI_ENDPOINT,
            resource_name=AZURE_OPENAI_RESOURCE_NAME,
            deployment_id="text-embedding-3-large",
        )    
    ],





)

collection.config.get().to_dict()


In [None]:
COLLECTION_NAME = "recipes"
collection = client.collections.get(COLLECTION_NAME)


source_objects = json.loads( 
    data
    .drop(columns=["document_sha"])
    .to_json(orient="records")
) 


with collection.batch.dynamic() as batch:
    for src_obj in source_objects:
        properties_obj = {
            "filename": src_obj["filename"],
            "chunk": src_obj["chunk"],
        }

        # The model provider integration will automatically vectorize the object
        batch.add_object(
            properties=properties_obj,
            uuid=src_obj["chunk_sha"],
        )


In [None]:

response = collection.query.near_text(
    query="Como hacer empanadas",  # The model provider integration will automatically vectorize the query
    limit=2
)

for obj in response.objects:
    print(obj.properties["chunk"])

In [None]:
for item in collection.iterator():
    print(item.uuid, item.properties)

In [None]:
collection.data.delete_by_id(
    '9c95627f-1e0c-5a5d-8bdd-eb4e9cbb7da0'
)

In [None]:
COLLECTION_NAME = "recipes"
collection = client.collections.get(COLLECTION_NAME)

In [None]:
for item in collection.iterator():
    print(item.uuid, item.properties)

In [None]:
client.collections.delete(COLLECTION_NAME)