In [24]:
#import OpenAI key with helper function
from helper import get_openai_api_key

OPENAI_API_KEY = get_openai_api_key()

In [25]:
#A lot of modules use async and we want them to be compatible with Jupyter notebook
import nest_asyncio

nest_asyncio.apply()

In [26]:
from llama_index.core import SimpleDirectoryReader

# load documents
documents = SimpleDirectoryReader(input_files=["EDS.pdf"]).load_data()

In [27]:
#split document into even sized chunks
from llama_index.core.node_parser import SentenceSplitter

splitter = SentenceSplitter(chunk_size=1024)
nodes = splitter.get_nodes_from_documents(documents)

In [28]:
#LlamaIndex is a Python and Typescript library that enables you to apply LLMs on top of your private or domain-specific data. 
#Configuring using OpenAI's 3.5 turbo model and model for generating embeddings.
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

Settings.llm = OpenAI(model="gpt-3.5-turbo")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

In [29]:
#Index is set of metadata over our data.
from llama_index.core import SummaryIndex, VectorStoreIndex

summary_index = SummaryIndex(nodes) #return all nodes in index
vector_index = VectorStoreIndex(nodes) #index notes via text embeddings, return most similar nodes

In [30]:
#The indexes need to be transformed into query engines which represents a query interface over the stored data.
#Unlike a query tool, the query engine is the backend component.
summary_query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize",
    use_async=True,
)
vector_query_engine = vector_index.as_query_engine()

In [31]:
#While the query engine is the backend component, the query tool is the user-facing app to interact with query engine
from llama_index.core.tools import QueryEngineTool


summary_tool = QueryEngineTool.from_defaults(
    query_engine=summary_query_engine,
    description=(
        "Useful for summarization questions related to document"
    ),
)

vector_tool = QueryEngineTool.from_defaults(
    query_engine=vector_query_engine,
    description=(
        "Useful for retrieving specific context from the document."
    ),
)

In [32]:
#LLM selector to filter and select nodes based on queries
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector


query_engine = RouterQueryEngine(
    selector=LLMSingleSelector.from_defaults(),
    query_engine_tools=[
        summary_tool,
        vector_tool,
    ],
    verbose=True
)

In [33]:
response = query_engine.query("What is the summary of the document?")
print(str(response))

[1;3;38;5;200mSelecting query engine 0: Useful for summarization questions related to MetaGPT.
[0mThe document outlines a randomized controlled trial protocol focusing on patients with hypermobility spectrum disorder (HSD) or hypermobile Ehlers-Danlos syndrome (hEDS) who have long-lasting shoulder complaints. The study aims to compare the effectiveness of a 16-week progressive heavy shoulder strengthening program (HEAVY) with low-load training (LIGHT) on self-reported shoulder symptoms, function, and quality of life. The trial involves 100 patients with HSD/hEDS and shoulder complaints, with primary outcomes measured using the Western Ontario Shoulder Instability Index (WOSI). The study is designed as a superiority, parallel group, randomized controlled trial with blinded outcome assessors and participants. The primary objective is to assess the difference in self-reported shoulder-related symptoms, function, and quality of life between the HEAVY and LIGHT groups over 16 weeks. The d

In [34]:
print(len(response.source_nodes))

37


In [35]:
response = query_engine.query(
    "How do I tell if a patient has EDS?"
)
print(str(response))

[1;3;38;5;200mSelecting query engine 1: Useful for retrieving specific context from the MetaGPT paper..
[0mLook for clinical suspicion of referred pain from the cervical spine, diagnosis of systemic inflammatory rheumatic diseases, connective tissue diseases (excluding hEDS), Marfans, Stickler's or Loeys Dietz syndromes, and/or neurological diseases. Also, consider if the patient has had shoulder surgery within the past year, received a steroid injection in the affected shoulder in the previous 3 months, or has been pregnant or given birth within the past year or planning to get pregnant during the study period due to increased levels of relaxin.


In [36]:
#same exact code as above from utils file
from utils import get_router_query_engine

query_engine = get_router_query_engine("EDS.pdf")

In [37]:
response = query_engine.query("How do I tell if a patient has EDS?")
print(str(response))

[1;3;38;5;200mSelecting query engine 1: Useful for retrieving specific context from the MetaGPT paper..
[0mA patient may be suspected of having EDS if they exhibit symptoms such as systemic inflammatory rheumatic diseases, connective tissue diseases (excluding hEDS), or if they have been diagnosed with Marfans, Stickler's, or Loeys Dietz syndromes. Additionally, a history of shoulder surgery within the past year or a steroid injection in the affected shoulder in the previous 3 months could also indicate EDS.
