In [None]:
import weaviate
from weaviate.config import AdditionalConfig
from weaviate.classes.config import Configure

import json
import pandas as pd
import numpy as np
import os
import dotenv


from langchain_weaviate.vectorstores import WeaviateVectorStore
from langchain_openai import AzureOpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_openai.chat_models import AzureChatOpenAI  

dotenv.load_dotenv("~/.env")

AZURE_OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
AZURE_OPENAI_ENDPOINT = os.getenv('AZURE_OPENAI_ENDPOINT')
AZURE_OPENAI_API_VERSION = os.getenv('AZURE_OPENAI_API_VERSION')
AZURE_OPENAI_RESOURCE_NAME = os.getenv('AZURE_OPENAI_RESOURCE_NAME')

client = weaviate.connect_to_local(
    port=8083,
    grpc_port=50051,
    headers = {
        "X-Azure-Api-Key": AZURE_OPENAI_API_KEY,
    },
)

print(client.is_ready())


In [None]:
df = pd.read_parquet("compared.parquet")
df

In [None]:
print(df)

In [None]:
from weaviate.classes.query import Filter


In [None]:
COLLECTION_NAME = "recipes"
collection = client.collections.get(name=COLLECTION_NAME)

for item in collection.iterator():
    print(item.uuid, item.properties)

In [None]:
COLLECTION_NAME = "recipes"
collection = client.collections.get(name=COLLECTION_NAME)

for document_uuid in df.document_sha.unique():

    collection.data.delete_many(
       where=Filter.by_id().contains_any(['108bda91-73bf-55ff-9c80-510e547ea471']) 
    )

In [None]:
from weaviate.util import generate_uuid5
generate_uuid5("hola")

In [None]:
client.collections.delete(name="recipes")

In [None]:
import pandas as pd
df = pd.read_parquet("/Users/ismael.cabral/repos/data-pipelines-with-airflow-2nd-ed/chapter13/vectorvault/notebooks/preprocessed.parquet")


df.loc[:,["recipe_uuid", "recipe_name", "chunk_uuid", "chunk"]]

In [None]:
import pandas as pd
df = pd.read_parquet("/Users/ismael.cabral/repos/data-pipelines-with-airflow-2nd-ed/chapter13/vectorvault/notebooks/compared.parquet")


df.loc[:,["recipe_uuid", "recipe_name", "chunk_uuid", "chunk"]]

In [None]:
from weaviate.util import generate_uuid5


In [None]:
text = """
    Cheese Cake Recipe
    Ingredients:
    - 2 cups of cream cheese
    - 1 cup of sugar
    - 2 eggs
    - 1 cup of vanilla extract

    Instructions:
    1. Mix all ingredients in a bowl
    2. Pour mixture into a baking pan
    3. Bake at 350 degrees for 30 minutes
"""

generate_uuid5(text)

In [None]:
text = """
    Cheese Cake Recipe
    Ingredients:
    - 2 cups of cream cheese
    - 1 cup of sugar
    - 3 eggs
    - 1 cup of vanilla extract

    Instructions:
    1. Mix all ingredients in a bowl
    2. Pour mixture into a baking pan
    3. Bake at 350 degrees for 30 minutes
"""

generate_uuid5(text)

In [None]:



files_to_process = [
    {
        "recipe_name": "Avo & Toast",
        "ingredients": "- Avocado\n- Bread",
        "instructions": "Mash the avocado and spread it on toasted bread."
    },
    {
        "recipe_name": "Sushi",
        "ingredients": "- Rice\n- Fish",
        "instructions": "Cook the rice. Cut the fish in pieces and make rolls."
    }
]


In [None]:

chunks = []
for file in files_to_process:

    recipe_name = file['recipe_name']

    ingredients = file.get('ingredients', 'No ingredients found')
    instructions = file.get('instructions', 'No instructions found')
    
    chunks.append({"recipe_name": recipe_name, "chunk": f"{recipe_name} \n Ingredients: {ingredients}"})
    chunks.append({"recipe_name": recipe_name, "chunk": f"{recipe_name} \n Instructions: {instructions}"})

pd.DataFrame(chunks)