In [None]:
!pip install -q python-dotenv langid langgraph openai==0.28 langchain_community

In [None]:
!pip install -q numpy scikit-learn torch transformers langchain_chroma langchain_huggingface

In [None]:
stored_texts = [
    {
        "category": "age",
        "instructions": "For patients under 18, use a reassuring tone. Start the email with 'Dear Guardian'. Use words like 'little one', 'concerned about their health', 'take care'. For patients aged 18-40, use a more casual tone. Start the email with the patient's name, e.g., 'Hi Martin'. You may say things like 'Hope you're doing well', 'take care', 'feel free to ask'. Make it friendly and warm. For patients aged over 60, use a formal tone. For patients aged over 60, Start the email with 'Dear Mr./Ms. [Name]'. Use words like 'respectfully', 'kindly', 'wishing you good health'."
    },
    {

        "category": "annual_income",
        "instructions": "If annual_income < $250K, emphasize the option to use the case manager for help with making appointments/obtaining prescription."
    }
]

patientdata = {
    "patient_name": "Diana",
    "age": 66,
    "facility": "Edward Hospital",
    "department": "Cardiology",
    "annual_income": "$300K",
}

feedback_data = [
        {"question": "How do you feel today?", "option": "Better", "comments": "not well"},
        {"question": "Do you have any questions?", "option": "No", "comments": "yes"}
    ]

In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores import Chroma
import chromadb
from langchain.prompts import PromptTemplate
from langchain_huggingface import HuggingFaceEndpoint
from langchain.chains import RetrievalQA

client = chromadb.PersistentClient()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)


# Model and embeddings setup
model = "all-MiniLM-L6-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {"normalize_embeddings": True}
embedding = HuggingFaceBgeEmbeddings(
    model_name=model,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

  embedding = HuggingFaceBgeEmbeddings(


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
def create_collection(text, category):
    texts = text_splitter.split_text(text)

    documents = [{"content": text, "metadata": {"category": category, "id": str(idx)}} for idx, text in enumerate(texts)]

    ids = [doc['metadata']['id'] for doc in documents]
    contents = [doc['content'] for doc in documents]

    existing_collections = client.list_collections()
    collection_names = [col.name for col in existing_collections]

    if category in collection_names:
        client.delete_collection(name=category)
        print(f"Existing collection '{category}' deleted.")

    collection = client.get_or_create_collection(
        category,
        metadata={"hnsw:space": "cosine"}
    )

    collection.add(documents=contents, ids=ids)
    print(f"New data added to collection '{category}'.")

    return collection

In [None]:
def initialize_llm(queries, collection_name):
    # Initialize the LLM
    llm = HuggingFaceEndpoint(repo_id="microsoft/Phi-3.5-mini-instruct", temperature=0.1)

    # Define the prompt template
    template = """
    You are an assistant designed to help retrieve relevant instructions.
    When given a question, retrieve all relevant instructions from the context. Do not generate email. Return the instruction as is.

    Context: {context}
    Question: {question}
    Answer:
    """
    prompt = PromptTemplate(
        template=template,
        input_variables=["context", "question"]
    )

    vectordb = Chroma(embedding_function=embedding, persist_directory="./chroma", client=client, collection_name=collection_name)
    retriever = vectordb.as_retriever()

    context = retriever.get_relevant_documents(queries)
    context_text = "\n".join([doc.page_content for doc in context])

    chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, chain_type_kwargs={"prompt": prompt})

    result = chain.invoke({"context": context_text, "query": queries})

    return result["result"]

In [None]:
def delete_collection(client, collection_name):
    try:
        client.delete_collection(name=collection_name)
        print(f"Collection '{collection_name}' deleted.")
    except Exception as e:
        print(f"Error deleting collection '{collection_name}': {e}")


In [None]:
for i in range(len(stored_texts)):
    text = stored_texts[i]["instructions"]
    category = stored_texts[i]["category"]
    print(category)
    create_collection(text, category)

age


/root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx.tar.gz: 100%|██████████| 79.3M/79.3M [00:02<00:00, 32.2MiB/s]


New data added to collection 'age'.
annual_income
New data added to collection 'annual_income'.


In [None]:
metadata = []

for item in stored_texts:
    category = item["category"]
    if category in patientdata:
        metadata.append({category: patientdata[category]})

metadata

[{'age': 66}, {'annual_income': '$300K'}]

In [None]:
for item in metadata:
    key, value = next(iter(item.items()))
    queries=f"{key} {value}"
    result=initialize_llm(queries,key)
    print(result)






    Start the email with 'Dear Mr./Ms. [Name]'.
    Use words like 'respectfully', 'kindly', 'wishing you good health'.
1. If annual_income < $250K, emphasize the option to use the case manager for help with making appointments/obtaining prescription.
    2. (No instruction related to annual_income of $300K)


In [None]:
def fetch_specific_instruction(query, collection_name, keyword):
    vectordb = Chroma(
        embedding_function=embedding,
        persist_directory="./chroma",
        client=client,
        collection_name=collection_name
    )
    retriever = vectordb.as_retriever()

    results = retriever.invoke(query)

    if results:
        print(f"Results for query: {results}")
        for result in results:
            if keyword in result.page_content:
                instructions = result.page_content.split(". ")
                for instruction in instructions:
                    if keyword in instruction:
                        return instruction
    return "No relevant instruction found."

for item in metadata:
    key, value = next(iter(item.items()))
    query = f"Instruction for {key} is {value}"
    print(f"Query: {query}")

    # Define the keyword to filter the relevant instruction
    keyword = "patients aged over 60" if key == "age" else value

    # Fetch the specific instruction
    specific_instruction = fetch_specific_instruction(query, key, keyword)

    # Print the specific instruction
    print(f"Specific Instruction: {specific_instruction}")




Query: Instruction for age is 66
Results for query: [Document(metadata={}, page_content="For patients under 18, use a reassuring tone. Start the email with 'Dear Guardian'. Use words like 'little one', 'concerned about their health', 'take care'. For patients aged 18-40, use a more casual tone. Start the email with the patient's name, e.g., 'Hi Martin'. You may say things like 'Hope you're doing well', 'take care', 'feel free to ask'. Make it friendly and warm. For patients aged over 60, use a formal tone. For patients aged over 60, Start the email with 'Dear Mr./Ms. [Name]'. Use"), Document(metadata={}, page_content="Start the email with 'Dear Mr./Ms. [Name]'. Use words like 'respectfully', 'kindly', 'wishing you good health'.")]
Specific Instruction: For patients aged over 60, use a formal tone
Query: Instruction for annual_income is $300K
Results for query: [Document(metadata={}, page_content='If annual_income < $250K, emphasize the option to use the case manager for help with makin

In [None]:
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnableMap


def llm_chat_chain(query: str,chroma_collection):
    context = Chroma(client=client,
                     embedding_function=embedding,
                     collection_name=chroma_collection,
                     )
    llm = HuggingFaceEndpoint(repo_id="meta-llama/Llama-3.2-1B",
                              temperature=0.1)


    template = """Answer the question based only on the following context.Only retrieve instruction. Do not generate email.
        {context}

        Question: {question}
        """
    prompt = ChatPromptTemplate.from_template(template)
    output_parser = StrOutputParser()
    retriever = context.as_retriever(search_kwargs={"k": 5})
    chain = RunnableMap({
        "context": lambda x: retriever.invoke(x["question"]),
        "question": lambda x: x["question"],

    }) | prompt | llm | output_parser
    response = chain.invoke({"question": query})
    print("Response : \n" + response)
    return response


result= llm_chat_chain("age is 65","age")




Response : 

