In [None]:
import weaviate
from weaviate.config import AdditionalConfig
from weaviate.classes.config import Configure

import json
import pandas as pd
import numpy as np
import os
import dotenv


from langchain_weaviate.vectorstores import WeaviateVectorStore
from langchain_openai import AzureOpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_openai.chat_models import AzureChatOpenAI  

dotenv.load_dotenv("/home/isma/repos/book/data-pipelines-with-airflow-2nd-ed/chapter13_genai/.env")

AZURE_OPENAI_API_KEY = os.getenv('AZURE_OPENAI_API_KEY')
AZURE_OPENAI_ENDPOINT = os.getenv('AZURE_OPENAI_ENDPOINT')
AZURE_OPENAI_API_VERSION = os.getenv('AZURE_OPENAI_API_VERSION')
AZURE_OPENAI_RESOURCE_NAME = os.getenv('AZURE_OPENAI_RESOURCE_NAME')


client = weaviate.connect_to_local(
    port=8083,
    grpc_port=50051,
    headers = {
        "X-Azure-Api-Key": AZURE_OPENAI_API_KEY,
    },
)

print(client.is_ready())


In [None]:
COLLECTION_NAME = "recipes"

embeddings = AzureOpenAIEmbeddings(model="text-embedding-3-large")

db = WeaviateVectorStore(
    client=client, 
    index_name=COLLECTION_NAME, 
    text_key="chunk", 
    embedding=embeddings
)

In [None]:
prompt_template = """Text: {context}

Question: {question}

Answer the question based on the text provided. If the text doesn't contain the answer, 
reply that the answer is not available."""


PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

chain_type_kwargs = {"prompt": PROMPT}
prompt_template



In [None]:

# source_file = "netherlands-wikipedia-article-text.pdf"
# where_filter = {
#       "operator": "Equal",
#       "path": ["source"],
#       "valueText": source_file
#   }

# we want our retriever to filter the results
# retriever = db.as_retriever(search_kwargs={"where_filter": where_filter})

from langchain import hub
prompt = hub.pull("rlm/rag-prompt")


retriever = db.as_retriever()

openai_client = AzureChatOpenAI(
    model_name="gpt-35-turbo", 
    deployment_name = "gpt-35-turbo",
    api_version=AZURE_OPENAI_API_VERSION,
    )

qa_chain = RetrievalQA.from_llm(
    openai_client, retriever=retriever, prompt=prompt
)


qa_chain.invoke("How to make chilaquiles?")



In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# See full prompt at https://smith.langchain.com/hub/rlm/rag-prompt
prompt = hub.pull("rlm/rag-prompt")


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


qa_chain = (
    {
        "context": db.as_retriever() | format_docs,
        "question": RunnablePassthrough(),
    }
    | prompt
    | AzureChatOpenAI
    | StrOutputParser()
)

qa_chain.invoke("What are autonomous agents?")

In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

# See full prompt at https://smith.langchain.com/hub/langchain-ai/retrieval-qa-chat
retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat")

combine_docs_chain = create_stuff_documents_chain(openai_client, retrieval_qa_chat_prompt)
rag_chain = create_retrieval_chain(db.as_retriever(), combine_docs_chain)

a = rag_chain.invoke({"input": "how to make pavo?"})

In [None]:
a