In [69]:
# load the key from .env file
from dotenv import load_dotenv
load_dotenv()

import asyncio
from fastapi import FastAPI, Query

from langchain.llms import OpenAI, HuggingFaceHub
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceBgeEmbeddings, CacheBackedEmbeddings
from langchain.chains import LLMChain, HypotheticalDocumentEmbedder
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores.pgvector import PGVector, DistanceStrategy
from langchain.storage import LocalFileStore
from langchain.chains.question_answering import load_qa_chain
from langchain.document_loaders import TextLoader
import langchain
import os
from langchain.vectorstores import FAISS

# langchain.debug = True

from langchain.chains import RetrievalQA
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter

from langchain_core.runnables import RunnablePassthrough
from typing import List
from operator import itemgetter
import sys
import json
from langserve import add_routes



from langchain.schema import StrOutputParser
output_parser = StrOutputParser()
from langchain.document_transformers import LongContextReorder
reordering = LongContextReorder()
sys.path.append("../..")
from do_not_share import CONNECTION_STRING # getting the connection string from to the postgres database

In [2]:
DOC_SPACE_DIR_ = './faiss_doc_space'
# DOC_SPACE_DIR_ = '/home/dosisiddhesh/LANGCHAIN_EXP/dummy_faiss_doc_space_OpenAI_GTE'
########################################################################################################################
# Global Variables
llm_query = None
llm_hyde = None
embeddings = None
chain = None
db = None
retriever = None

# #-----------------------------------------------------------------------------------------------------------------------
# app = FastAPI(
#     title="LangChain Server",
#     version="1.0",
#     description="A simple API server using LangChain's Runnable interfaces",
# )

In [16]:
def load_model_n_embedding(hyde_llm_name: str, query_llm_name: str, embedding_name: str, temp_hyde: float, temp_query: float, openai_emb_name= None):
    global llm_query
    global llm_hyde
    global embeddings
    global retriever
    global db
    try:
        if embedding_name == "openai-gpt":
            print("f", openai_emb_name)
            embeddings = OpenAIEmbeddings(model=openai_emb_name) # getting the embedding model with dim 384
        else:
            print("Loading HuggingFaceEmbeddings with model name: ", embedding_name)
            embeddings = HuggingFaceEmbeddings(model_name=embedding_name)
    except Exception as e: 
        print("Error in embedding load: ",e)
        embeddings = None
        return   
    if hyde_llm_name == "openai-gpt":
        llm_hyde = OpenAI(temperature=temp_hyde)
    else:
        llm_hyde = HuggingFaceHub(repo_id=hyde_llm_name, model_kwargs={"temperature":temp_hyde})
    if query_llm_name == "openai-gpt":
        llm_query= ChatOpenAI(model="gpt-3.5-turbo", temperature=temp_query)
    else:
        llm_query = HuggingFaceHub(repo_id=query_llm_name,model_kwargs={"temperature":temp_query})        
    hyde_embedding_gte = HypotheticalDocumentEmbedder.from_llm(llm_hyde, embeddings, prompt_key="web_search")
    print("HyDe Embedding created")

    embeddings = hyde_embedding_gte

    # store = LocalFileStore("./cache_gte_pubmed/")
    # cached_hyde_embedding_gte = CacheBackedEmbeddings.from_bytes_store(
    #     hyde_embedding_gte, store, 
    # )
    try:
        db = PGVector(
            connection_string=CONNECTION_STRING,
            embedding_function=embeddings,
            collection_name="my_collection",
            distance_strategy=DistanceStrategy.COSINE,
        ) 
    except Exception as e:
        print("Error in db creation: ",e)
        
    print("DB created/Loaded")
    retriever = db.as_retriever(search_kwargs={'k':3})
    # chain = load_qa_chain( llm=llm_query, chain_type="stuff") #why stuff?



In [17]:
load_model_n_embedding(
    hyde_llm_name= "openai-gpt", 
    query_llm_name="openai-gpt", 
    embedding_name="openai-gpt", 
    temp_hyde=0.5, 
    temp_query=0.1, 
    openai_emb_name= "text-embedding-ada-002")


f text-embedding-ada-002
HyDe Embedding created
DB created/Loaded


In [21]:
relavent_docs = await retriever.aget_relevant_documents("what are the symptoms of pneumonia?")

In [22]:
reordered_docs= reordering.transform_documents(relavent_docs)

In [46]:
# template = """Follow the Previous chat history: {history_qna}
#     Answer the question based only on the following context:
#     {context}
#     The Question is: {question}
#     """

template = """

Information Context:
{context}

Based only on the information provided above, without speculating or adding new details, answer the following question:

Question:
{question}
"""

query_template = """
Context:
{history_qna}

Rewrite or reframe the following query in the context of the conversation history above:

Query:
{question}
"""

query_prompt = PromptTemplate.from_template(query_template)

chain1 = (
    {
        "history_qna": itemgetter("history_qna"),
        "question": itemgetter("question"),
    }
    | query_prompt
    | llm_query
    | output_parser
)

print(chain1.invoke({"question": "What is procedure of taking this drug",
                "history_qna": "Human: I am having loose motion;\
                                AI: You can take ORS. It will help you to recover from dehydration."}))

global reordered_docs
prompt = ChatPromptTemplate.from_template(template)
def format_docs(docs):
    reordered_docs = reordering.transform_documents(docs)
    return "\n\n".join([d.page_content for d in reordered_docs])

chain = (
    {   
        # "history_qna": itemgetter("history_qna"),
        "question": chain1,
        "context": retriever | format_docs, 
    }
    | prompt
    | llm_query
    | output_parser
)

origianl_answer = chain.invoke({"question": "What is procedure of taking this drug", 
              "history_qna": "Human: I am having loose motion;\
                              AI: You can take ORS. It will help you to recover from dehydration."})

How should I take ORS to recover from dehydration?


- temp1 = '''Identify any instances of ambiguity, vagueness, or incomplete information. 
Then, generate questions to address each identified uncertainty and 
improve the clarity of the original answer.
'''
- temp2 = '''Look for speculative statements or instances where the information is not grounded in concrete details.
 Formulate questions to ask the model for more precise information and to reduce speculation.'''

- temp3 = '''Check for any discrepancies or contradictions within the response. 
Generate questions to resolve these inconsistencies and ensure a coherent and accurate explanation.
'''

##################### 4
- temp4 = ''' Identify and categorize any uncertainty concepts present in the response. 
Examples include ambiguity, vagueness, incomplete information, speculation, etc.

Based on the identified uncertainties, 
generate questions to ask the model for clarification or additional details. 
Return question phrases to ask the language model to improve the clarity and reduce uncertainty in the original answer.
'''

##################### 5
- temp5 = '''
Consider the context and identify any instances of ambiguity. 
Generate questions to seek clarity and ensure that the response is precise and unambiguous.
'''

- temp6 = '''Identify any aspects of the question that the response did not address or left unclear. 
Generate questions to inquire about these unanswered aspects and fill in the missing details.
'''
##################### 7
- temp7 = '''Identify any areas where the information lacks precision or specificity. 
Formulate questions to ask the model for more detailed and specific information to enhance
the accuracy of the original answer.
'''

- temp8 = '''Identify any statements that introduce hypothetical scenarios or possibilities.
Generate questions to explore these hypotheticals and seek clarification on the likelihood or
conditions associated with them.
'''


| for now let us go with the 4th template


In [65]:
origianl_answer

'To recover from dehydration, you should take ORS (oral rehydration solution) as directed. The recommended dosage of ORS depends on age. For adults, 2 liters of oral rehydration fluid should be given in the first 24 hours, followed by unrestricted normal fluids with 200 mL of rehydration solution per loose stool or vomit. For children, 30-50 mL/kg of ORS should be given over 3-4 hours. It is best to sip the solution every 5-10 minutes rather than drinking it in large quantities less frequently. Additionally, it is important to continue monitoring blood glucose levels, especially for diabetic patients.'

In [73]:
from langchain.output_parsers import CommaSeparatedListOutputParser
output_parser2 = CommaSeparatedListOutputParser()
format_instructions = output_parser2.get_format_instructions()


In [80]:
# Foramt Instructions: {format_instructions}
template_uncertainity = """
Given the following generated answer:

Answer:
{answer}

Identify and categorize any uncertainty concepts present in the response. Examples include ambiguity, vagueness, incomplete information, speculation, etc.
Based on the identified uncertainties, generate questions to ask the model for clarification or additional details. Return question phrases to ask the language model to improve the clarity and reduce uncertainty in the original answer.
"""
template_uncertainity = """
Given the following generated answer:

Answer:
{answer}

Identify and categorize any uncertainty concepts present in the response. Uncertainty can be defined by ambiguity, vagueness, incomplete information, speculation, etc.
Based on the identified uncertainties, generate questions to ask the model for clarification or additional details. 
"Return ONLY" the question phrases to ask the language model to improve the clarity and reduce uncertainty in the original answer.
"""
prompt_uncertainty = PromptTemplate.from_template(
    template= template_uncertainity,
    # partial_variables={'format_instructions': format_instructions}
    )

chain_uncertainty = (
    {
        "answer": RunnablePassthrough(),
    }
    | prompt_uncertainty
    | llm_query
    | output_parser
)

uncertainity_answer = chain_uncertainty.invoke({"answer": origianl_answer})
uncertainity_answer

'1. What is the recommended dosage of ORS for adults?\n2. How much rehydration solution should be given per loose stool or vomit for adults?\n3. What is the recommended dosage of ORS for children?\n4. How should the ORS solution be consumed - in large quantities less frequently or sipped every 5-10 minutes?\n5. Are there any specific instructions for diabetic patients regarding ORS consumption?\n6. Are there any specific instructions for monitoring blood glucose levels during dehydration recovery?'

In [81]:
uncertainity_answers = uncertainity_answer.split("\n")
uncertainity_answers

['1. What is the recommended dosage of ORS for adults?',
 '2. How much rehydration solution should be given per loose stool or vomit for adults?',
 '3. What is the recommended dosage of ORS for children?',
 '4. How should the ORS solution be consumed - in large quantities less frequently or sipped every 5-10 minutes?',
 '5. Are there any specific instructions for diabetic patients regarding ORS consumption?',
 '6. Are there any specific instructions for monitoring blood glucose levels during dehydration recovery?']

### RunnableParallel to find the answers parallelly


In [93]:
from langchain_core.runnables import RunnableParallel
#******************************************************** Directly asking the question to the model ******************************************************
# chain1 = (
#     {
#         "question_uncertainty": RunnablePassthrough(),
#     }
#     | ChatPromptTemplate.from_template("Please clarify more about following {question_uncertainty}. Return question answer pairs in the following format: Question: Answer:")
#     | llm_query 
#     | output_parser
# )

# question_input = [{"question_uncertainty": q} for q in uncertainity_answers]


# answer_uncertainty = chain1.batch(question_input)

# answer_uncertainty

#******************************************************** Using RAG (Retreiver) to get the answer ******************************************************

template_uncertainity_rag = '''
Information Context:
{context}

Based only on the information provided above, without speculating or adding new details, answer the following question.

Question:
{question}
Return question answer pairs in the following format: Question: Answer:
'''

prompt_uncertainty_rag = PromptTemplate.from_template(template= template_uncertainity_rag)

chain_uncertainty_answer = (
    {   
        "question": chain1,
        "context": retriever | format_docs, 
    }
    | prompt_uncertainty_rag
    | llm_query
    | output_parser
)
question_input = [{"question_uncertainty": q} for q in uncertainity_answers]
answer_uncertainty = chain_uncertainty_answer.batch(question_input)
answer_uncertainty

['Question: What is the recommended dosage of ORS for adults?\nAnswer: The recommended dosage of ORS for adults is 200-400 mL after each loose stool or vomiting episode.',
 'Question: How much rehydration solution should be given per loose stool or vomit for adults?\nAnswer: The recommended amount of rehydration solution to be given per loose stool or vomit for adults is approximately 200-400 ml (6-13 ounces) per episode.',
 'Question: What is ORS?\nAnswer: ORS stands for Oral Rehydration Solution, which is a special drink used to treat dehydration caused by diarrhea or vomiting.\nQuestion: Why is ORS recommended for children?\nAnswer: ORS is recommended for children because it helps replace the fluids and electrolytes lost during diarrhea or vomiting, preventing dehydration.\nQuestion: How does ORS work?\nAnswer: ORS works by providing the body with a balanced amount of water, salts, and sugar. This helps the body absorb fluids more effectively and restore the electrolyte balance.\nQu

In [94]:
template_final = """
Given the original base answer and the answers for the uncertainty questions, generate a final answer that is more complete and clear than the original answer.
Original Answer: 
{original_answer}
Uncertainty Answers:
{uncertainty_answers}
Final Answer:
"""

prompt_final = PromptTemplate.from_template(template_final)

chain_final = (
    {
        "original_answer": RunnablePassthrough(),
        "uncertainty_answers": RunnablePassthrough(),
    }
    | prompt_final
    | llm_query
    | output_parser
)

final_answer = chain_final.invoke({"original_answer": origianl_answer, "uncertainty_answers": answer_uncertainty})

final_answer

'To recover from dehydration, it is recommended to take ORS (oral rehydration solution) as directed. The dosage of ORS depends on age. For adults, it is recommended to consume 2 liters of oral rehydration fluid in the first 24 hours, followed by unrestricted normal fluids with 200 mL of rehydration solution per loose stool or vomit. For children, the recommended dosage is 30-50 mL/kg of ORS over 3-4 hours. It is best to sip the solution every 5-10 minutes rather than drinking it in large quantities less frequently. ORS is a special drink used to treat dehydration caused by diarrhea or vomiting. It works by providing the body with a balanced amount of water, salts, and sugar, helping the body absorb fluids more effectively and restore the electrolyte balance. ORS is safe for children and is specifically formulated to meet their needs. It is important to administer ORS to children in small, frequent sips using a spoon, cup, or oral syringe. While ORS generally does not have significant s

In [95]:
origianl_answer

'To recover from dehydration, you should take ORS (oral rehydration solution) as directed. The recommended dosage of ORS depends on age. For adults, 2 liters of oral rehydration fluid should be given in the first 24 hours, followed by unrestricted normal fluids with 200 mL of rehydration solution per loose stool or vomit. For children, 30-50 mL/kg of ORS should be given over 3-4 hours. It is best to sip the solution every 5-10 minutes rather than drinking it in large quantities less frequently. Additionally, it is important to continue monitoring blood glucose levels, especially for diabetic patients.'

# Only for practice purpose

## check the results from pubmed database 

It is giving only the abstract title and not the full text

eg: 
```[Document(page_content='# ArticleTitle\nStrategies for the prevention and management of coronavirus disease 2019.\n AbstractText\n\n AuthorList\nGuan, Wei-Jie, Chen, Rong-Chang, Zhong, Nan-Shan\n ArticleId\n13993003.00597-2020\n PubMedPubDate\n2020-3-8'),
 Document(page_content='# ArticleTitle\nPrevention is the best treatment.\n AbstractText\n\n AuthorList\nSchneider, H S\n ArticleId\nMissing ArticleId\n PubMedPubDate\n1968-8-1'),
 Document(page_content='# ArticleTitle\nPrevention and Control of Coronavirus Disease 2019: Where Do We Go From Here?\n AbstractText\n\n AuthorList\nNeuzil, Kathleen M\n ArticleId\n6520891\n PubMedPubDate\n2022-1-20')]
 ```

In [3]:
# #-----------------------------------------------------------------------------------------------------------------------
# app = FastAPI(
#     title="LangChain Server",
#     version="1.0",
#     description="A simple API server using LangChain's Runnable interfaces",
# )

In [4]:
def load_model_n_embedding(hyde_llm_name: str, query_llm_name: str, embedding_name: str, temp_hyde: float, temp_query: float, openai_emb_name= None):
    global llm_query
    global llm_hyde
    global embeddings
    global retriever
    global db
    try:
        if embedding_name == "openai-gpt":
            embeddings = OpenAIEmbeddings(openai_emb_name) # getting the embedding model with dim 384
        else:
            print("Loading HuggingFaceEmbeddings with model name: ", embedding_name)
            embeddings = HuggingFaceEmbeddings(model_name=embedding_name)
    except Exception as e: 
        print("Error in embedding load: ",e)
        embeddings = None
        return   
    if hyde_llm_name == "openai-gpt":
        llm_hyde = OpenAI(temperature=temp_hyde)
    else:
        llm_hyde = HuggingFaceHub(repo_id=hyde_llm_name, model_kwargs={"temperature":temp_hyde})
    if query_llm_name == "openai-gpt":
        llm_query= ChatOpenAI(model="gpt-3.5-turbo", temperature=temp_query)
    else:
        llm_query = HuggingFaceHub(repo_id=query_llm_name,model_kwargs={"temperature":temp_query})        
    hyde_embedding_gte = HypotheticalDocumentEmbedder.from_llm(llm_hyde, embeddings, prompt_key="web_search")
    print("HyDe Embedding created")

    embeddings = hyde_embedding_gte

    # store = LocalFileStore("./cache_gte_pubmed/")
    # cached_hyde_embedding_gte = CacheBackedEmbeddings.from_bytes_store(
    #     hyde_embedding_gte, store, 
    # )
    try:
        db = PGVector(
            connection_string=CONNECTION_STRING_2,
            embedding_function=embeddings,
            collection_name="pubmed",
            distance_strategy=DistanceStrategy.COSINE,
        ) 
    except Exception as e:
        print("Error in db creation: ",e)
        
    print("DB created/Loaded")
    retriever = db.as_retriever(search_kwargs={'k':3})
    # chain = load_qa_chain( llm=llm_query, chain_type="stuff") #why stuff?


In [5]:
load_model_n_embedding("openai-gpt", "openai-gpt", "thenlper/gte-small", 0.9, 0.1, 
                    #    openai_emb_name = 'text-embedding-ada-002'
                       )


Loading HuggingFaceEmbeddings with model name:  thenlper/gte-small


  from .autonotebook import tqdm as notebook_tqdm


HyDe Embedding created
DB created/Loaded


In [6]:

# #-----------------------------------------------------------------------------------------------------------------------

query = "What is the best treatment for COVID-19?"
history_qna = []

docs = retriever.get_relevant_documents(query)
docs

[Document(page_content='# ArticleTitle\nStrategies for the prevention and management of coronavirus disease 2019.\n AbstractText\n\n AuthorList\nGuan, Wei-Jie, Chen, Rong-Chang, Zhong, Nan-Shan\n ArticleId\n13993003.00597-2020\n PubMedPubDate\n2020-3-8'),
 Document(page_content='# ArticleTitle\nPrevention is the best treatment.\n AbstractText\n\n AuthorList\nSchneider, H S\n ArticleId\nMissing ArticleId\n PubMedPubDate\n1968-8-1'),
 Document(page_content='# ArticleTitle\nPrevention and Control of Coronavirus Disease 2019: Where Do We Go From Here?\n AbstractText\n\n AuthorList\nNeuzil, Kathleen M\n ArticleId\n6520891\n PubMedPubDate\n2022-1-20')]

In [3]:
embeddingx = HuggingFaceEmbeddings(model_name="thenlper/gte-small")
dbx = PGVector(
            connection_string=CONNECTION_STRING_2,
            embedding_function=embeddingx,
            collection_name="pubmed",
            distance_strategy=DistanceStrategy.COSINE,
        )
retrieverx = dbx.as_retriever(search_kwargs={'k':3})


  from .autonotebook import tqdm as notebook_tqdm


NameError: name 'query' is not defined

In [6]:
query = "What is the best treatment for pneumonia?"

docsx = retrieverx.get_relevant_documents(query)
docsx

[Document(page_content='# ArticleTitle\nTreatment of pneumonia.\n AbstractText\n\n AuthorList\nGREGOIRE, F\n ArticleId\nPMC1937668\n PubMedPubDate\n1960-1-16'),
 Document(page_content='# ArticleTitle\n[Treatment of pneumonia].\n AbstractText\n\n AuthorList\nGREGOIRE, F\n ArticleId\nMissing ArticleId\n PubMedPubDate\n1960-1-1'),
 Document(page_content='# ArticleTitle\nTreatment of pneumonia.\n AbstractText\n\n AuthorList\nADAMS, J M\n ArticleId\nMissing ArticleId\n PubMedPubDate\n1951-3-1')]

In [8]:
dbx.similarity_search(query, k=3)

[Document(page_content='# ArticleTitle\nTreatment of pneumonia.\n AbstractText\n\n AuthorList\nGREGOIRE, F\n ArticleId\nPMC1937668\n PubMedPubDate\n1960-1-16'),
 Document(page_content='# ArticleTitle\n[Treatment of pneumonia].\n AbstractText\n\n AuthorList\nGREGOIRE, F\n ArticleId\nMissing ArticleId\n PubMedPubDate\n1960-1-1'),
 Document(page_content='# ArticleTitle\nTreatment of pneumonia.\n AbstractText\n\n AuthorList\nADAMS, J M\n ArticleId\nMissing ArticleId\n PubMedPubDate\n1951-3-1')]