We want to use `OpenAIEmbeddings` so we have to get the OpenAI API Key.

In [8]:
import getpass
import os
from dotenv import load_dotenv

load_dotenv(override=True)

if not os.environ.get("OPENAI_API_KEY"): 
    #os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")
    pass

# os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")  
# os.environ["OPENAI_API_KEY"]



In [11]:
from langchain.docstore.document import Document
from langchain.document_loaders import TextLoader, JSONLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.embeddings.fastembed import FastEmbedEmbeddings

from langchain_iris import IRISVector


In [14]:
# loader = TextLoader("../data/state_of_the_union.txt", encoding='utf-8')
# Windows only install: 
# ! pip install https://jeffreyknockel.com/jq/jq-1.4.0-cp311-cp311-win_amd64.whl
# Other platforms
# ! pip install jq
#

loader = JSONLoader(
    file_path='./data/healthcare/augmented_notes_100.jsonl',
    jq_schema='.note',
    json_lines=True # TODO: tell audience what json lines are
)
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings()
# embeddings = FastEmbedEmbeddings()


[notice] A new release of pip available: 22.3 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip
  warn_deprecated(


Collecting jq==1.4.0
  Downloading https://jeffreyknockel.com/jq/jq-1.4.0-cp311-cp311-win_amd64.whl (348 kB)
     -------------------------------------- 348.2/348.2 kB 1.4 MB/s eta 0:00:00
Installing collected packages: jq
Successfully installed jq-1.4.0


In [20]:
username = 'demo'
password = 'demo' 
hostname = os.getenv('IRIS_HOSTNAME', 'localhost')
port = '51729' # '1972'
namespace = 'USER'
CONNECTION_STRING = f"iris://{username}:{password}@{hostname}:{port}/{namespace}"

In [21]:
# print(os.environ.get("OPENAI_API_KEY"))
print(CONNECTION_STRING)


iris://demo:demo@localhost:51729/USER


In [22]:
COLLECTION_NAME = "augmented_notes"

db = IRISVector.from_documents(
    embedding=embeddings,
    documents=docs,
    collection_name=COLLECTION_NAME,
    connection_string=CONNECTION_STRING,
)

In [7]:
# If reconnecting to the database, use this:

# db = IRISVector(
#     embedding_function=embeddings,
#     dimension=1536,
#     collection_name=COLLECTION_NAME,
#     connection_string=CONNECTION_STRING,
# )

In [23]:
# To add documents to existing vector store:

db.add_documents(documents)

['0c2dcf5d-0894-11ef-be38-3448ed843086',
 '0c2dcf5e-0894-11ef-aee6-3448ed843086',
 '0c2dcf5f-0894-11ef-8e84-3448ed843086',
 '0c2dcf60-0894-11ef-a90d-3448ed843086',
 '0c2dcf61-0894-11ef-9a32-3448ed843086',
 '0c2dcf62-0894-11ef-a17a-3448ed843086',
 '0c2dcf63-0894-11ef-988a-3448ed843086',
 '0c2dcf64-0894-11ef-913d-3448ed843086',
 '0c2dcf65-0894-11ef-9f09-3448ed843086',
 '0c2dcf66-0894-11ef-b210-3448ed843086',
 '0c2dcf67-0894-11ef-870f-3448ed843086',
 '0c2dcf68-0894-11ef-9065-3448ed843086',
 '0c2dcf69-0894-11ef-850c-3448ed843086',
 '0c2dcf6a-0894-11ef-93e1-3448ed843086',
 '0c2dcf6b-0894-11ef-afa7-3448ed843086',
 '0c2dcf6c-0894-11ef-b735-3448ed843086',
 '0c2dcf6d-0894-11ef-b81e-3448ed843086',
 '0c2dcf6e-0894-11ef-be5f-3448ed843086',
 '0c2dcf6f-0894-11ef-83cf-3448ed843086',
 '0c2dcf70-0894-11ef-8703-3448ed843086',
 '0c2dcf71-0894-11ef-a2c4-3448ed843086',
 '0c2dcf72-0894-11ef-8e11-3448ed843086',
 '0c2dcf73-0894-11ef-9da0-3448ed843086',
 '0c2dcf74-0894-11ef-abea-3448ed843086',
 '0c2dcf75-0894-

In [30]:
print(f"Number of docs in vector store: {len(db.get()['ids'])}")

Number of docs in vector store: 114


In [24]:
query = "19 year old patient"
docs_with_score = db.similarity_search_with_score(query)

In [25]:
for doc, score in docs_with_score:
    print("-" * 80)
    print("Score: ", score)
    print(doc.page_content)
    print("-" * 80)

--------------------------------------------------------------------------------
Score:  0.192119326953393
A previously healthy 73-year-old Caucasian female presents to the clinic with a history of progressive fatigue and dyspnea on exertion over the past couple of months. She denied angina, palpitations, syncope or any other associated symptoms and did not have any cardiovascular disease risk factors. On examination, she was stable and in no distress. Her blood pressure was 148/66 mmHg with a regular pulse of 48 beats per minute. She had no clinical evidence of heart failure on cardiovascular examination but was found to have an S4 on auscultation and cannon A waves on assessment of her jugular venous pressure (JVP). Her exam was otherwise unremarkable. Twelve-lead electrocardiogram during the clinic visit revealed complete heart block with a junctional escape rhythm at 49 beats per minute with right bundle branch block morphology; her 12-lead electrocardiogram 3 months earlier also r

In [33]:
db.add_documents([Document(page_content="foo")])
docs_with_score = db.similarity_search_with_score("foo")
docs_with_score[0]

(Document(page_content='foo'), 0.0)

In [13]:
docs_with_score

[(Document(page_content='foo'), 0.0),
 (Document(page_content='Up to eight state-of-the-art factories in one place. 10,000 new good-paying jobs. \n\nSome of the most sophisticated manufacturing in the world to make computer chips the size of a fingertip that power the world and our everyday lives. \n\nSmartphones. The Internet. Technology we have yet to invent. \n\nBut that’s just the beginning.', metadata={'source': '../data/state_of_the_union.txt'}),
  0.232740817564837),
 (Document(page_content='Powered by people I’ve met like JoJo Burgess, from generations of union steelworkers from Pittsburgh, who’s here with us tonight. \n\nAs Ohio Senator Sherrod Brown says, “It’s time to bury the label “Rust Belt.” \n\nIt’s time. \n\nBut with all the bright spots in our economy, record job growth and higher wages, too many families are struggling to keep up with the bills.', metadata={'source': '../data/state_of_the_union.txt'}),
  0.235385724551686),
 (Document(page_content='My administration 

In [14]:
retriever = db.as_retriever()
print(retriever)

tags=['IRISVector'] vectorstore=<langchain_iris.vectorstores.IRISVector object at 0x127f685b0>
