In [1]:
##############################################################
## This code is for academic and educational purposes only. ##
## Event: Global Summit 2024 Maryland USA                   ##
## InterSystems Corporation 2024 (C)                        ##
## Date: June 9th 2024                                      ##
##############################################################

##### We are going to use llama index that allows us to load and store data from file and put it into iris
from llama_index import download_loader
from llama_index import SimpleDirectoryReader, StorageContext, ServiceContext
from llama_index.readers.json import JSONReader
from llama_index.indices.vector_store import VectorStoreIndex
from llama_iris import IRISVectorStore

from dotenv import load_dotenv
load_dotenv(override=True)

import os

##### Let's load our dataset
reader = JSONReader(is_jsonl=True)
documents = reader.load_data('./data/healthcare/augmented_notes_100.jsonl')


ModuleNotFoundError: No module named 'dotenv'

In [16]:
##### Let's see the first 5 documents
documents[:5]

##### We have already reduced these documents (in Step 0) to just the text and first 100 documents

[Document(id_='7253f537-bf60-46a4-be60-bd96c7844088', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='"note": "A a sixteen year-old girl, presented to our Outpatient department with the complaints of discomfort in the neck and lower back as well as restriction of body movements. She was not able to maintain an erect posture and would tend to fall on either side while standing up from a sitting position. She would keep her head turned to the right and upwards due to the sustained contraction of the neck muscles. There was a sideways bending of the back in the lumbar region. To counter the abnormal positioning of the back and neck, she would keep her limbs in a specific position to allow her body weight to be supported. Due to the restrictions with the body movements at the neck and in the lumbar region, she would require assistance in standing and walking. She would require her parents to help her with daily chores, in

In [19]:
##### Configuring IRIS
# Setup our demo connectivity
username = 'demo'
password = 'demo' 
hostname = os.getenv('IRIS_HOSTNAME', 'localhost')
port = '65066' 
namespace = 'USER'
CONNECTION_STRING = f"iris://{username}:{password}@{hostname}:{port}/{namespace}"
#####

In [None]:
##### Here, we connect the dataset into the IRISVectorStore helper
vector_store = IRISVectorStore.from_params(
    connection_string=CONNECTION_STRING,
    table_name="augmented_notes",
    embed_dim=1536,  # openai embedding dimension
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

In [None]:
##### Finally, We can connect into the iris instance and save our data in a vectorized format
## TODO: explain how embeddings work and why we're using them
##### Below, we setup how we are going to index the vectorized data (using an embeddings model)
index = VectorStoreIndex.from_documents(
    documents,                              ##### These are our clinical notes we loaded up
    storage_context=storage_context,        ##### This is our connection to the vector store
    show_progress=True,                     ##### Let's see the progress as it happens
)

##### To interact with our embeddings, we take the query engine from our documents
query_engine = index.as_query_engine()      ##### The "as_query_engine" is a llama_index directive which lets 
                                            ##### us search and retrieve based on vector similarity

In [20]:
##### Now, let's use this against our vector store!

response = query_engine.query("36 year old patient with a history of pain")
import textwrap
print(textwrap.fill(str(response), 100))

NameError: name 'query_engine' is not defined