In [1]:
from langchain_chroma import Chroma
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnableLambda
import os
from dotenv import load_dotenv

# Load NIM API key
load_dotenv()
NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY")

llm = ChatNVIDIA(model="meta/llama-3.1-70b-instruct")
embeddings_model = NVIDIAEmbeddings(
    model="nvidia/llama-3.2-nv-embedqa-1b-v1",
    api_key=NVIDIA_API_KEY,
    truncate="NONE",
)

persist_directory = "./chroma_db"
vectorstore = Chroma(
    persist_directory=persist_directory,
    embedding_function=embeddings_model
)

In [None]:
template = """  
    Use the following pieces of context to answer the question at the end.
    Sometimes, the user will only give a simple description of the bird,
    or they have meet a bird somewhere, in this case,
    you need to find the most suitable birds from provided context according to user input,
    and express the characteristics of these birds.
    Please only use and describe the bird name inside the context.
    Do not explain birds not mentioned in the provided context!
    If you don't know the answer, just say that you don't know, don't try to make up an answer.
    Use three sentences maximum and keep the answer as concise as possible.
    
    {context}

    Question: {question}

    Helpful Answer:
    """

custom_rag_prompt = PromptTemplate.from_template(template)

retriever = RunnableLambda(vectorstore.similarity_search_with_relevance_scores).bind(k=3)

retrieved_docs_list = []
similarity_list = []
def format_save_docs(data):
    global retrieved_docs_list
    retrieved_docs_list.clear()
    formatted_str = "\n\n"
    for doc, score in data:
        retrieved_docs_list.append(doc)
        formatted_str += doc.page_content + "\n\n"
        similarity_list.append(score)
    return formatted_str

rag_chain = (
    {"context": retriever | format_save_docs, "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
)

# for chunk in rag_chain.stream("Blue Bird by the Pond's Edge."):
#     print(chunk, end="", flush=True)

In [3]:
vectorstore.similarity_search_with_score("blue bird.")

[(Document(metadata={'binomialName': 'Alcedo atthis', 'birdName': 'Common Kingfisher', 'macaulayID': '26854431', 'url': 'https://ebird.org/species/comkin1/JP-13'}, page_content='Common Kingfisher, also called Alcedo atthis, is Beautiful little blue-and-orange bird with a long, pointed bill. Often rather shy and inconspicuous despite bright plumage. Found along rivers, streams, lakes, and ponds—almost any fresh or brackish habitat with small fish. Often perches quietly in trees over water; most often seen in very fast low flight as a turquoise flash over the water, usually flying away. Easily detected once its high, shrill whistled call is learned, even if the bird itself is hidden. The only small blue kingfisher over much of its range.'),
  0.6716244220733643),
 (Document(metadata={'binomialName': 'Eurystomus orientalis', 'birdName': 'Dollarbird', 'macaulayID': '382216911', 'url': 'https://ebird.org/species/dollar1/JP-13'}, page_content='Dollarbird, also called Eurystomus orientalis, i

In [4]:
rag_chain.invoke("blue bird.")

KeyboardInterrupt: 

In [None]:
# for doc in retrieved_docs_list:
#     print("Metadata:", doc.metadata['macaulayID'])