In [181]:
import os
import credentials

#set environment variable for OpenAI API Key
os.environ["OPENAI_API_KEY"] = credentials.llm_api_key

## Load PDFs from PhD

In [182]:
from langchain.document_loaders import PyPDFLoader

#Load research paper https://scholar.google.com/scholar?hl=en&as_sdt=0%2C14&q=erik+widman&btnG=#:~:text=Shear%20wave%20elastography%20plaque%20characterization%20with%20mechanical%20testing%20validation%3A%20a%20phantom%20study
loader = PyPDFLoader("Widman_2015_Phys._Med._Biol._60_3151.pdf")
paper_1 = loader.load()

In [183]:
#Let's look at a page
page = paper_1[0]
print(page.page_content)

Physics in Medicine & Biology
      
PAPER • OPEN ACCESS
Shear wave elastography plaque characterization
with mechanical testing validation: a phantom
study
To cite this article: E Widman et al 2015 Phys. Med. Biol.  60 3151 
 
View the article online  for updates and enhancements. You may also like 
Atherosclerotic carotid bifurcation 
phantoms with stenotic soft inclusions for 
ultrasound flow and vessel wall 
elastography imaging 
Boris Chayer, Marcel van den Hoven, 
Marie-Hélène Roy Cardinal et al. -
Arterial waveguide model for shear wave 
elastography: implementation and  in vitro 
validation 
Ali Vaziri Astaneh, Matthew W Urban, 
Wilkins Aquino et al. -
Safety of arterial shear wave 
elastography– ex–vivo assessment of 
induced strain and strain rates 
Tim Nordenfur, Kenneth Caidahl, Dmitry 
Grishenkov et al. -
 
This content was downloaded from IP address 73.72.56.6 on 10/07/2023 at 20:44


In [184]:
#Load paper from https://www.umbjournal.org/article/S0301-5629(16)30107-7/fulltext
loader = PyPDFLoader("PIIS0301562916301077.pdf")
paper_2 = loader.load()

#Load paper from https://www.umbjournal.org/article/S0301-5629(16)30107-7/fulltext
loader = PyPDFLoader("PIIS0301562915005220.pdf")
paper_3 = loader.load()

#Load paper from https://www.umbjournal.org/article/S0301-5629(16)30107-7/fulltext
loader = PyPDFLoader("PIIS0301562915005220.pdf")
paper_4 = loader.load()

## Split the data (Chunking)

In [185]:
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter

In [186]:
c_splitter = CharacterTextSplitter(
    chunk_size=450,
    chunk_overlap=0,
    separator = ' '
)

#Recursivesplitter is recommended for general texts
r_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2000,
    chunk_overlap=200, 
    separators=["\n\n", "\n", " ", ""]
)

In [187]:
 paper_1_chunks = r_splitter.split_documents(paper_1)

In [188]:
org_l = len(paper_1)
new_l = len(paper_1_chunks)
print(f"Original # of documents {org_l}.\nNew # of documents {new_l}.")

Original # of documents 25.
New # of documents 55.


In [189]:
#Manually review a few chunks and make sure they look good.
paper_1_chunks[4]

Document(page_content='In current clinical practice, ultrasound duplex scanning is typically the first level of screen-\ning for atherosclerotic plaques (Saba et al 2012). The degree of stenosis typically has been \nconsidered the parameter of choice to determine the therapeutic approach (Barnett et al 1998), \nbut several investigations (Naghavi et al 2003a, 2003b, Schwarz et al 2013) have demon-\nstrated that the degree of luminal stenosis is only an indirect indicator of the atherosclerotic process and that direct assessment of the plaque structure and composition may be key to predict the development of future cerebrovascular ischemic events. Moreover, it is critical to characterize plaques to determine the most suitable treatment (endarterectomy, angioplasty, or medication) for the patient.\nCurrently, clinical non-invasive ultrasound-based methods for plaque characterization are \nlimited to visual assessment of plaque morphology, hypoechoic area, and echo reflection in the plaqu

Let's do the same chunking on the other papers.

In [190]:
paper_2_chunks = r_splitter.split_documents(paper_2)
paper_3_chunks = r_splitter.split_documents(paper_3)
paper_4_chunks = r_splitter.split_documents(paper_4)

Add all the chunks together to a large list of chunks.

In [191]:
chunks = paper_1_chunks + paper_2_chunks + paper_3_chunks + paper_4_chunks
len(chunks)

169

In [192]:
#Another way of adding the chunks together

chunks_2 = []
chunks_2.extend(paper_1_chunks)
chunks_2.extend(paper_2_chunks)
chunks_2.extend(paper_3_chunks)
chunks_2.extend(paper_4_chunks)

len(chunks_2)

169

## Convert Chunks to Embeddings

In [193]:
from langchain.embeddings.openai import OpenAIEmbeddings
embedding = OpenAIEmbeddings()

In [194]:
from langchain.vectorstores import Chroma

In [195]:
persist_directory = 'documents/Machine Learning Projects/Langchain/chat_with_data/chroma/'

In [196]:
!rm -rf ./documents/Machine Learning Projects/Langchain/chat_with_data/chroma # remove old database files if any

In [197]:
vectordb = Chroma.from_documents(
    documents=chunks_2,
    embedding=embedding,
    persist_directory=persist_directory
)

In [198]:
print(vectordb._collection.count())

169


#### Try querying the data

In [222]:
#question = "How do I mechanically measure the shear modulus?"
#question = "What's the cause of plaque formation in the arteries?"
question = "How can Shear wave elastography be used to characterize arterial plaque?"

In [223]:
#Find relevant docs for question
docs = vectordb.similarity_search(question, k = 5)

In [224]:
print(docs[1].page_content)

Physics in Medicine & Biology
      
PAPER • OPEN ACCESS
Shear wave elastography plaque characterization
with mechanical testing validation: a phantom
study
To cite this article: E Widman et al 2015 Phys. Med. Biol.  60 3151 
 
View the article online  for updates and enhancements. You may also like 
Atherosclerotic carotid bifurcation 
phantoms with stenotic soft inclusions for 
ultrasound flow and vessel wall 
elastography imaging 
Boris Chayer, Marcel van den Hoven, 
Marie-Hélène Roy Cardinal et al. -
Arterial waveguide model for shear wave 
elastography: implementation and  in vitro 
validation 
Ali Vaziri Astaneh, Matthew W Urban, 
Wilkins Aquino et al. -
Safety of arterial shear wave 
elastography– ex–vivo assessment of 
induced strain and strain rates 
Tim Nordenfur, Kenneth Caidahl, Dmitry 
Grishenkov et al. -
 
This content was downloaded from IP address 73.72.56.6 on 10/07/2023 at 20:44


In [225]:
#Here's a similarity search that also returns a relevance score in the metadata. You can use this to filter the returned document for the most relevant data
docs = vectordb.similarity_search_with_relevance_scores(question, k=5)

In [226]:
print(docs[4])

(Document(page_content='It has been suggested that assessment of the mechanical\nproperties of plaques to determine plaque vulnerabilitywould be a better measure than visual assessment of the\ndegree of lumen stenosis. Thus, accurate quantitative\ntechniques for characterization of mechanical properties\nof plaques are needed.\nMany attempts have been made to use ultrasound-\nbased methods to characterize plaque composition, such\nas gray-scale median ( Kanber et al. 2013 ), contrast-\nenhanced ultrasound imaging ( Muller et al. 2014 ), strain\nimaging by speckle tracking ( Widman et al. 2015a ), ther-\nmal strain imaging ( Mahmoud et al. 2013 ), intravascular\nultrasound elastography ( Zhang et al. 2011 ) and acoustic\nradiation force impulse (ARFI) imaging ( Allen et al.\n2011; Czernuszewicz et al. 2015 ), but these techniques\nhave various technical limitations (high variability) or\nsuffer from conﬂicting studies of effectiveness and are\nnot part of clinical practice. Shear wave e

## Query documents with LLM

In [227]:
from langchain.chains import RetrievalQA

#load LLM
from langchain.chat_models import ChatOpenAI
llm_name = "gpt-3.5-turbo"
llm = ChatOpenAI(model_name=llm_name, temperature=0)

In [228]:
qa_chain = RetrievalQA.from_chain_type(llm, retriever=vectordb.as_retriever())

In [229]:
result = qa_chain({"query": question})

In [230]:
result["result"]

'Shear wave elastography (SWE) is an emerging imaging modality that uses the shear modulus of tissue as the contrast mechanism in images. SWE uses ultrasound radiation force to non-invasively generate shear waves in the tissue and ultrasonic tracking methods measure the shear wave propagation speed, which is directly proportional to the material properties. In the case of arterial plaque, SWE can be used to measure the shear modulus of the plaque, which provides information about its stiffness. This can help in characterizing the plaque and determining its vulnerability. By comparing the shear modulus of the plaque to that of the surrounding tissue, SWE can provide quantitative information about the plaque composition and help in selecting the most suitable treatment for patients with carotid plaques.'

# Chatbot
See class 6 of Langchain chat with your data for Chatbot UI implementation + adding memory to LLM.