In [1]:
from langchain_huggingface import HuggingFaceEmbeddings
import os
from langchain_qdrant import QdrantVectorStore
from pprint import pprint
from langchain.chat_models import init_chat_model
from dotenv import load_dotenv  # used to store secret stuff like API keys or configuration values

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Model Settings
MODEL_NAME="sentence-transformers/all-MiniLM-L6-v2"

# Qdrant server URL
URL =os.getenv("QDRANT_URL")
API_KEY = os.getenv("QDRANT_KEY")
# Qdrant dimension of the collection
DIMENSION = 384
# Qdrant collection name
COLLECTION_NAME = "PWD_SENTENCE_TRANSFORMERS"
METRIC_NAME ="COSINE"

In [3]:
model_name = MODEL_NAME
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

In [4]:
qdrant = QdrantVectorStore.from_existing_collection(
    embedding=embeddings,
    collection_name=COLLECTION_NAME,
    url=URL,
    api_key=API_KEY,
)

In [5]:
retriever  = qdrant.as_retriever(search_type="similarity", 
                                 search_kwargs={"k": 4})

In [None]:
question = "What are the types of Types of Public Private Participation ?"

In [6]:
question = "What is anodic index ?"

In [7]:
docs = retriever.invoke(question)

In [8]:
len(docs)

4

In [9]:
def initialize_llm():
    # we are using gemini model. You can use different models.
    

    load_dotenv()

    llm = init_chat_model(
        "azure_openai:gpt-4o",
        azure_deployment="gpt4o",
    )
    metadata = f"CRAG, gpt4o"
    return llm, metadata

In [10]:
llm, metadata = initialize_llm()

In [11]:
### Generate

from langchain import hub
from langchain_core.output_parsers import StrOutputParser

# Prompt
prompt = hub.pull("rlm/rag-prompt")


# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


# Chain
rag_chain = prompt | llm | StrOutputParser()



In [12]:
documents = "\n".join([d.page_content for d in docs])
response = rag_chain.invoke({"context": documents, "question": question})

In [13]:
pprint(response)

('The anodic index is a scale that measures the tendency of metals to corrode; '
 'it indicates the relative electrochemical potential of different metals. '
 'When selecting materials for environments with varying exposure levels, the '
 'difference in anodic index between two metals should not exceed specific '
 'voltage limits to prevent corrosion. This difference limits typically range '
 'from 0.15 V for harsh environments to 0.50 V for controlled environments.')
