In [1]:
from dotenv import load_dotenv

load_dotenv()

True

In [4]:
from llama_index.core import SimpleDirectoryReader

reader = SimpleDirectoryReader(input_files=["/Users/yelopez/git/heimat/medivise-hackathon/data/drug-interactions-book.pdf"])
pages = reader.load_data()

In [5]:
documents = [doc.to_langchain_format() for doc in pages]
len(documents)

1473

In [6]:

filtered_documents = [doc for doc in documents if doc.metadata["page_label"].isdigit()]
len(filtered_documents)

1464

In [7]:

raw_docs = [page for page in filtered_documents if 1301 > int(page.metadata["page_label"]) > 10] # skipping content table


In [8]:
from langchain_text_splitters import CharacterTextSplitter

text_splitter = CharacterTextSplitter(separator="\n",chunk_size=1000, chunk_overlap=200)
documents = text_splitter.split_documents(raw_docs)
print(len(raw_docs), len(documents))

1290 13757


In [9]:
documents[0]

Document(page_content='General considerations 11\nTo aid collection of data in this area, health professionals should routine-\nly ask patients about their use of herbal medicines and supplements, and\nreport any unexpected responses to treatment. \nAn additional problem in interpreting these interactions, is that the inter-\nacting constituent of the herb is usually not known and is therefore not\nstandardised for. It could vary widely between different products, and\nbatches of the same product.\nSt John’s wort\nAn increasing number of reports have implicated St John’s wort ( Hyperi-\ncum perforatum ) in drug interactions. Evidence has shown that the herb\ncan induce the cytochrome P450 isoenzyme CYP3A4, and can also induce\n‘P-glycoprotein’, (p.8). Hence St John’s wort decreases the levels of\n‘ciclosporin’, (p.1037) and ‘digoxin’, (p.927), respectively. Other less cer-\ntain evidence suggests that CYP2E1 and CYP1A2 may also be induced.\nSt John’s wort has serotonergic properties, a

In [10]:
from langchain_community.embeddings import DatabricksEmbeddings


embeddings = DatabricksEmbeddings(endpoint="databricks-bge-large-en")


query_result = embeddings.embed_query("This is a test document.")
len(query_result)


* 'schema_extra' has been renamed to 'json_schema_extra'


1024

In [11]:
import os

MONGO_URI = os.getenv("MONGO_URI")
DB_NAME = os.getenv("MONGO_DB")
COLLECTION_NAME = os.getenv("MONGO_COLLECTION_NAME")
ATLAS_VECTOR_SEARCH_INDEX_NAME=os.getenv("ATLAS_VECTOR_SEARCH_INDEX_NAME")


In [12]:
from pymongo import MongoClient

client = MongoClient(MONGO_URI)

In [14]:
documents[0:2]

[Document(page_content='General considerations 11\nTo aid collection of data in this area, health professionals should routine-\nly ask patients about their use of herbal medicines and supplements, and\nreport any unexpected responses to treatment. \nAn additional problem in interpreting these interactions, is that the inter-\nacting constituent of the herb is usually not known and is therefore not\nstandardised for. It could vary widely between different products, and\nbatches of the same product.\nSt John’s wort\nAn increasing number of reports have implicated St John’s wort ( Hyperi-\ncum perforatum ) in drug interactions. Evidence has shown that the herb\ncan induce the cytochrome P450 isoenzyme CYP3A4, and can also induce\n‘P-glycoprotein’, (p.8). Hence St John’s wort decreases the levels of\n‘ciclosporin’, (p.1037) and ‘digoxin’, (p.927), respectively. Other less cer-\ntain evidence suggests that CYP2E1 and CYP1A2 may also be induced.\nSt John’s wort has serotonergic properties, 

In [16]:
from langchain.vectorstores.mongodb_atlas import MongoDBAtlasVectorSearch

MONGODB_COLLECTION = client[DB_NAME][COLLECTION_NAME]

vector_store = MongoDBAtlasVectorSearch.from_documents(
    documents,
    embeddings,
    collection=MONGODB_COLLECTION,
    index_name=ATLAS_VECTOR_SEARCH_INDEX_NAME,
)

In [17]:
vector_store.similarity_search("paracetamol")


[Document(page_content='alcohol daily in men and 40 g daily in women).\n23 \nIn a randomised, placebo-controlled study, there was no difference in\nmeasures of hepatotoxicity (mean AST levels, mean INR) between 102 al-\ncoholic patients who received paracetamol 1 g four times daily for 2 days,\nand 99 alcoholic patients who received placebo. In this study, patients hadentered an alcohol detoxification centre, and were given paracetamol im-Alcohol + Orlistat\nAlcohol + Paracetamol (Acetaminophen)', metadata={'_id': ObjectId('663788137ba472d5b7fab4a1'), 'embedding': [0.01251983642578125, 0.00811004638671875, 0.002056121826171875, 0.023529052734375, -0.052093505859375, -0.0372314453125, 0.007320404052734375, 0.01702880859375, -0.031707763671875, 0.0177001953125, 0.039581298828125, -0.05462646484375, 0.01168060302734375, -0.028228759765625, 0.00902557373046875, 0.013763427734375, -0.01293182373046875, -0.056396484375, 0.002124786376953125, 0.0026683807373046875, 0.0102691650390625, 0.03753

In [18]:
from langchain_community.chat_models import ChatDatabricks
llm = ChatDatabricks(endpoint="databricks-dbrx-instruct")


In [19]:
llm.invoke("hello!")

AIMessage(content="Hello! How can I assist you today? I'm here to help with any questions or tasks you have. Just let me know what you need.", response_metadata={'prompt_tokens': 226, 'completion_tokens': 30, 'total_tokens': 256}, id='run-c508a444-64c5-4efc-bd43-3692781f90c5-0')

In [20]:
from langchain.vectorstores.mongodb_atlas import MongoDBAtlasVectorSearch
from langchain_openai import OpenAIEmbeddings


vector_store = MongoDBAtlasVectorSearch.from_connection_string(
    MONGO_URI,
    DB_NAME + "." + COLLECTION_NAME,
    embeddings,
    index_name=ATLAS_VECTOR_SEARCH_INDEX_NAME,
)

retriever = vector_store.as_retriever()

In [23]:
def remove_metadata(documents):
    return [document.page_content for document in documents]

In [28]:
import json
from langchain.schema.runnable import RunnablePassthrough
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser


template = """Use the following pieces of context to answer the question at the end. 
The following is information related to medications:
{context}
Based on the context information reply if I can mix the following medications: {medications}

Consider the information provided and tell me if there is evidence that explains why I can't mix the medicines. If there is no information to answer the question reply with "No information available".
Can I mix the medications? Yes or No. If yes, provide a brief explanation for a patient. 
If no, provide a brief explanation for a patient about why the medications can't be mixed.
Return a JSON with the following structure using the keys "can_mix", "explanation":
"""
rag_prompt = PromptTemplate.from_template(template)

rag_chain = (
    {"context": retriever | remove_metadata, "medications": RunnablePassthrough()}
    | rag_prompt
    | llm
    | StrOutputParser()
)

In [29]:
response_rag = rag_chain.invoke("Warfarin and ibuprofen")
print(response_rag)


{
"can_mix": "No",
"explanation": "The information provided indicates that there is a potential interaction between Warfarin and ibuprofen. Combining these medications may increase the risk of bleeding due to the effect of ibuprofen on blood clotting. It is important to consult with your healthcare provider before mixing these medications."
}


In [30]:
rag_chain.get_graph().print_ascii()

              +------------------------------------+         
              | Parallel<context,medications>Input |         
              +------------------------------------+         
                      ****               ****                
                   ***                       ***             
                 **                             ***          
  +----------------------+                         **        
  | VectorStoreRetriever |                          *        
  +----------------------+                          *        
              *                                     *        
              *                                     *        
              *                                     *        
+-------------------------+                  +-------------+ 
| Lambda(remove_metadata) |                  | Passthrough | 
+-------------------------+                  +-------------+ 
                      ****               ****                
        

In [26]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=retriever,
)


In [27]:
qa_chain.invoke("Warfarin and acetaminophen?")

{'query': 'Warfarin and acetaminophen?',
 'result': 'I cannot find any information about the interaction between warfarin and acetaminophen in the provided text.'}