In [1]:
import os

from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatAnthropic
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores.qdrant import Qdrant

from getpass import getpass

In [2]:
os.environ['ANTHROPIC_API_KEY'] = getpass("Enter Anthropic key:")
os.environ['QDRANT_API_KEY'] = getpass("Enter Qdrant API key:")
#qdrant_url = 'https://58de2381-e750-4aed-8eb2-7b08d8faf30b.us-east4-0.gcp.cloud.qdrant.io:6333'
qdrant_url = 'https://05eacc30-67bb-4870-ad13-9ab539b30239.us-east4-0.gcp.cloud.qdrant.io:6333'
os.environ['SENTENCE_TRANSFORMERS_HOME'] = '/mnt/data/MedRAG-JA/model_cache/'

Enter Anthropic key: ········
Enter Qdrant API key: ········


In [3]:
# Load the data
loader = CSVLoader(file_path='/mnt/code/data/disease_components.csv',source_column="link")
data = loader.load()

In [4]:
metadatas = []
texts = []
for row in data:
  metadatas.append(row.metadata)
  texts.append(row.page_content)
print(len(metadatas),len(texts))

1183 1183


In [5]:
prompt_template = """Use the following pieces of context to answer the question enclosed within  3 backticks at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
Please provide an answer which is factually correct and based on the information retrieved from the vector store.
Please also mention any quotes supporting the answer if any present in the context supplied within two double quotes "" .

{context}

QUESTION:```{question}```
ANSWER:
"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["context","question"])
#
chain_type_kwargs = {"prompt": PROMPT}

In [6]:
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': True}
embeddings = HuggingFaceBgeEmbeddings(model_name="BAAI/bge-small-en",
                                      model_kwargs=model_kwargs,
                                      encode_kwargs=encode_kwargs
                                     )

doc_store = Qdrant.from_texts(texts,
                          metadatas=metadatas,
                          embedding=embeddings,
                          url=qdrant_url,
                          api_key=os.environ['QDRANT_API_KEY'],
                          collection_name=f"medical_qa_search")

rag_llm = ChatAnthropic(temperature=0,
                        anthropic_api_key=os.environ["ANTHROPIC_API_KEY"])

.gitattributes:   0%|          | 0.00/1.52k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/90.2k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/684 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/134M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [7]:
qa_chain = RetrievalQA.from_chain_type(llm=rag_llm,
                                       chain_type="stuff",
                                       chain_type_kwargs={"prompt": PROMPT},
                                       retriever=doc_store.as_retriever(search_kwargs={"k": 5}),
                                       return_source_documents=True
                                      )

In [8]:
user_question = input("Please provide the symptoms here :")
result = qa_chain(user_question)

Please provide the symptoms here : What is the cause of tendonitis?


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [9]:
result['result']

' Based on the information provided, the main cause of tendinitis is repetitive or intense strain on the tendon over time, especially from repetitive motions involved in certain jobs, hobbies, or sports activities. Some key evidence from the context:\n\nFrom the Tendinitis overview:\n"Overuse or strain on a joint can irritate tendons and result in tendinitis."\n\nFrom the Tendinitis causes:  \n"Although tendinitis can be caused by a sudden injury, the condition is much more likely to stem from the repetition of a particular movement over time. Most people develop tendinitis because their jobs or hobbies involve repetitive motions, which put stress on the tendons."\n\nFrom the Achilles Tendinitis causes:\n"Achilles tendinitis is caused by repetitive or intense strain on the Achilles tendon, the band of tissue that connects your calf muscles to your heel bone."\n\nSo in summary, the main cause is repetitive strain on the tendon over time, especially from repetitive motions.'

In [10]:
result['source_documents'][0].page_content

'name: Tendinitis\nlink: https://www.mayoclinic.org/diseases-conditions/tendinitis/symptoms-causes/syc-20378243\nSymptoms: [\'Signs and symptoms of tendinitis tend to occur at the point where a tendon attaches to a bone and typically include:\', \'Most cases of tendinitis respond to self-care measures. See your doctor if your signs and symptoms persist and interfere with your daily activities for more than a few days.\', \'\', \'\', \'\', \'Pain often described as a dull ache, especially when moving the affected limb or joint\', \'Tenderness\', \'Mild swelling\']\nOverview: [\'\', \'Tendons are thick fibrous cords that attach muscles to bone. Overuse or strain on a joint can irritate tendons and result in tendinitis.\', \'\', \'Tendinitis is inflammation or irritation of a tendon — the thick fibrous cords that attach muscle to bone. The condition causes pain and tenderness just outside a joint.\', "While tendinitis can occur in any of your tendons, it\'s most common around your shoulde