Vector stores and retrievers 
 
 we will cover 
 * documents
 * vector stores
 * Retriver

In [1]:
from langchain_core.documents import Document

from langchain.schema import Document

# Hospital attributes as structured content
documents = [
    Document(
        page_content=(
            "Name: Ananthapuri Hospital and Research Institute (AHRI)\n"
            "Location: Thiruvananthapuram, Kerala\n"
            "Type: Multi-specialty tertiary care hospital\n"
            "Accreditation: NABH (National Accreditation Board for Hospitals)\n"
            "Established: [Not specified]\n"
            "Bed Capacity: 400+\n"
            "Emergency Services: 24x7 available\n"
            "Specialties: Cardiology, Neurology, Nephrology, Oncology, Gastroenterology, Orthopedics, Pediatrics, "
            "General Surgery, Urology, Internal Medicine\n"
            "Facilities: ICU, Emergency Department, Diagnostic Services, Blood Bank, Dialysis Unit, Pharmacy, "
            "Physiotherapy, Deluxe Inpatient Rooms, Medical Imaging (MRI, CT, Ultrasound), Robotic Surgery\n"
            "Medical Education: Yes — training, internships, CME programs\n"
            "Technology: Minimally Invasive Surgery, Robotic Surgery\n"
            "Website: www.ananthapurihospitals.com\n"
            "Phone: +91-471-2579900\n"
            "Address: NH Bypass Road, Chacka, Thiruvananthapuram, Kerala - 695024\n"
        ),
        metadata={
            "source": "ananthapuri_official",
            "region": "Kerala",
            "category": "hospital_info"
        }
    ),
    Document(
        page_content=(
            "It specializes in various fields including Cardiology, Neurology, Nephrology, Oncology, "
            "ANd this hospital has very good facilities for patients. "
            
        ),
        metadata={
            "source": "ananthapuri_official",
            "region": "Kerala",
            "category": "hospital_info"
        }
    )
]



In [6]:
import os
from dotenv import load_dotenv
from langchain_groq import ChatGroq
load_dotenv()
groq_api = os.getenv("GROQ_API_KEY")
os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")
llm = ChatGroq(groq_api_key=groq_api, model="Llama3-8b-8192", temperature=0.1, max_tokens=512)

In [7]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2")

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

In [8]:
## vector Store 
from langchain_chroma import Chroma
vectorstore = Chroma.from_documents(documents, embeddings, persist_directory="chroma_db")
vectorstore


<langchain_chroma.vectorstores.Chroma at 0x196ea3fce30>

In [10]:
vectorstore.similarity_search("hospital speciality") # it will return the most similar document to the query.

Number of requested results 4 is greater than number of elements in index 2, updating n_results = 2


[Document(page_content='It specializes in various fields including Cardiology, Neurology, Nephrology, Oncology, ANd this hospital has very good facilities for patients. ', metadata={'category': 'hospital_info', 'region': 'Kerala', 'source': 'ananthapuri_official'}),
 Document(page_content='Name: Ananthapuri Hospital and Research Institute (AHRI)\nLocation: Thiruvananthapuram, Kerala\nType: Multi-specialty tertiary care hospital\nAccreditation: NABH (National Accreditation Board for Hospitals)\nEstablished: [Not specified]\nBed Capacity: 400+\nEmergency Services: 24x7 available\nSpecialties: Cardiology, Neurology, Nephrology, Oncology, Gastroenterology, Orthopedics, Pediatrics, General Surgery, Urology, Internal Medicine\nFacilities: ICU, Emergency Department, Diagnostic Services, Blood Bank, Dialysis Unit, Pharmacy, Physiotherapy, Deluxe Inpatient Rooms, Medical Imaging (MRI, CT, Ultrasound), Robotic Surgery\nMedical Education: Yes — training, internships, CME programs\nTechnology: Min

In [12]:
## async query 
await  vectorstore.asimilarity_search("hospital speciality") # it will return the most similar document to the query.

Number of requested results 4 is greater than number of elements in index 2, updating n_results = 2


[Document(page_content='It specializes in various fields including Cardiology, Neurology, Nephrology, Oncology, ANd this hospital has very good facilities for patients. ', metadata={'category': 'hospital_info', 'region': 'Kerala', 'source': 'ananthapuri_official'}),
 Document(page_content='Name: Ananthapuri Hospital and Research Institute (AHRI)\nLocation: Thiruvananthapuram, Kerala\nType: Multi-specialty tertiary care hospital\nAccreditation: NABH (National Accreditation Board for Hospitals)\nEstablished: [Not specified]\nBed Capacity: 400+\nEmergency Services: 24x7 available\nSpecialties: Cardiology, Neurology, Nephrology, Oncology, Gastroenterology, Orthopedics, Pediatrics, General Surgery, Urology, Internal Medicine\nFacilities: ICU, Emergency Department, Diagnostic Services, Blood Bank, Dialysis Unit, Pharmacy, Physiotherapy, Deluxe Inpatient Rooms, Medical Imaging (MRI, CT, Ultrasound), Robotic Surgery\nMedical Education: Yes — training, internships, CME programs\nTechnology: Min

In [13]:
vectorstore.similarity_search_with_score("hospital speciality") 

Number of requested results 4 is greater than number of elements in index 2, updating n_results = 2


[(Document(page_content='It specializes in various fields including Cardiology, Neurology, Nephrology, Oncology, ANd this hospital has very good facilities for patients. ', metadata={'category': 'hospital_info', 'region': 'Kerala', 'source': 'ananthapuri_official'}),
  0.9359505418944805),
 (Document(page_content='Name: Ananthapuri Hospital and Research Institute (AHRI)\nLocation: Thiruvananthapuram, Kerala\nType: Multi-specialty tertiary care hospital\nAccreditation: NABH (National Accreditation Board for Hospitals)\nEstablished: [Not specified]\nBed Capacity: 400+\nEmergency Services: 24x7 available\nSpecialties: Cardiology, Neurology, Nephrology, Oncology, Gastroenterology, Orthopedics, Pediatrics, General Surgery, Urology, Internal Medicine\nFacilities: ICU, Emergency Department, Diagnostic Services, Blood Bank, Dialysis Unit, Pharmacy, Physiotherapy, Deluxe Inpatient Rooms, Medical Imaging (MRI, CT, Ultrasound), Robotic Surgery\nMedical Education: Yes — training, internships, CME 

### retrivers
Retrivers runnable implemnt methods to be incorporate with LCEL chains 

In [16]:
from typing import List, Tuple
from langchain_core.documents import Document
from langchain_core.runnables import RunnableLambda

retriever = RunnableLambda(vectorstore.similarity_search).bind(k=1)
retriever.batch(["hospital speciality","location"]) # it will return the most similar document to the query.

[[Document(page_content='It specializes in various fields including Cardiology, Neurology, Nephrology, Oncology, ANd this hospital has very good facilities for patients. ', metadata={'category': 'hospital_info', 'region': 'Kerala', 'source': 'ananthapuri_official'})],
 [Document(page_content='It specializes in various fields including Cardiology, Neurology, Nephrology, Oncology, ANd this hospital has very good facilities for patients. ', metadata={'category': 'hospital_info', 'region': 'Kerala', 'source': 'ananthapuri_official'})]]

vector store implements as retriver method that will generate a Retriever , specifically a VectorStoreRetriever.These retrievers include a specific search_type and search_kwarghs that identfies what under lying vector that should be called and hot to parametrice them 

In [20]:
rectriver = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 1},
)

In [21]:
rectriver.batch(["hospital speciality","location"]) # it will return the most similar document to the query.

[[Document(page_content='It specializes in various fields including Cardiology, Neurology, Nephrology, Oncology, ANd this hospital has very good facilities for patients. ', metadata={'category': 'hospital_info', 'region': 'Kerala', 'source': 'ananthapuri_official'})],
 [Document(page_content='It specializes in various fields including Cardiology, Neurology, Nephrology, Oncology, ANd this hospital has very good facilities for patients. ', metadata={'category': 'hospital_info', 'region': 'Kerala', 'source': 'ananthapuri_official'})]]

In [None]:
## Rag
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
message = """
Anser the question based on the context provided.

{question}
Context: {context}
"""
prompt = ChatPromptTemplate.from_messages([
    ("human", message)

])

In [23]:
rag_chain = {"context":retriever, "question":RunnablePassthrough()} | prompt | llm
response = rag_chain.invoke("tell me about ananthapuri hospital") # it will return the most similar document to the query.