We want to use `OpenAIEmbeddings` so we have to get the OpenAI API Key.

In [1]:
import getpass
import os
from dotenv import load_dotenv

load_dotenv(override=True)

if not os.environ.get("OPENAI_API_KEY"): 
    #os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")
    pass

# os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")  
# os.environ["OPENAI_API_KEY"]



In [2]:
from langchain.docstore.document import Document
from langchain.document_loaders import TextLoader, JSONLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.embeddings.fastembed import FastEmbedEmbeddings

from langchain_iris import IRISVector


In [3]:
# loader = TextLoader("../data/state_of_the_union.txt", encoding='utf-8')
# Windows only install: 
# ! pip install https://jeffreyknockel.com/jq/jq-1.4.0-cp311-cp311-win_amd64.whl
# Other platforms
# ! pip install jq
#

loader = JSONLoader(
    file_path='./data/healthcare/augmented_notes_1000.jsonl',
    jq_schema='.note',
    json_lines=True # TODO: tell audience what json lines are
)
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings()
# embeddings = FastEmbedEmbeddings()

  warn_deprecated(


In [4]:
username = 'demo'
password = 'demo' 
hostname = os.getenv('IRIS_HOSTNAME', 'localhost')
port = '51729' # '1972'
namespace = 'USER'
CONNECTION_STRING = f"iris://{username}:{password}@{hostname}:{port}/{namespace}"

In [5]:
# print(os.environ.get("OPENAI_API_KEY"))
print(CONNECTION_STRING)


iris://demo:demo@localhost:51729/USER


In [6]:
COLLECTION_NAME = "augmented_notes"

db = IRISVector.from_documents(
    embedding=embeddings,
    documents=docs,
    collection_name=COLLECTION_NAME,
    connection_string=CONNECTION_STRING,
)

In [7]:
# If reconnecting to the database, use this:

# db = IRISVector(
#     embedding_function=embeddings,
#     dimension=1536,
#     collection_name=COLLECTION_NAME,
#     connection_string=CONNECTION_STRING,
# )

In [8]:
# To add documents to existing vector store:

db.add_documents(documents)

['5cab937f-089e-11ef-b538-3448ed843086',
 '5cab9380-089e-11ef-957d-3448ed843086',
 '5cab9381-089e-11ef-80f4-3448ed843086',
 '5cab9382-089e-11ef-a4c7-3448ed843086',
 '5cab9383-089e-11ef-8f71-3448ed843086',
 '5cab9384-089e-11ef-a5d8-3448ed843086',
 '5cab9385-089e-11ef-a5af-3448ed843086',
 '5cab9386-089e-11ef-9dc6-3448ed843086',
 '5cab9387-089e-11ef-88c5-3448ed843086',
 '5cab9388-089e-11ef-9d5c-3448ed843086',
 '5cab9389-089e-11ef-9c5c-3448ed843086',
 '5cab938a-089e-11ef-83de-3448ed843086',
 '5cab938b-089e-11ef-a477-3448ed843086',
 '5cab938c-089e-11ef-a3c8-3448ed843086',
 '5cab938d-089e-11ef-90ec-3448ed843086',
 '5cab938e-089e-11ef-a2a5-3448ed843086',
 '5cab938f-089e-11ef-adbb-3448ed843086',
 '5cab9390-089e-11ef-8cc9-3448ed843086',
 '5cab9391-089e-11ef-9406-3448ed843086',
 '5cab9392-089e-11ef-9db1-3448ed843086',
 '5cab9393-089e-11ef-863f-3448ed843086',
 '5cab9394-089e-11ef-a38e-3448ed843086',
 '5cab9395-089e-11ef-9551-3448ed843086',
 '5cab9396-089e-11ef-ae70-3448ed843086',
 '5cab9397-089e-

In [9]:
print(f"Number of docs in vector store: {len(db.get()['ids'])}")

Number of docs in vector store: 2200


In [10]:
query = "19 year old patient"
docs_with_score = db.similarity_search_with_score(query)

In [11]:
for doc, score in docs_with_score:
    print("-" * 80)
    print("Score: ", score)
    print(doc.page_content)
    print("-" * 80)

--------------------------------------------------------------------------------
Score:  0.178323928204704
The patient is a 30-year-old pregnant woman, gravida 1 para 0, 170 cm and weighted 82 kg at 18 weeks’ gestation. Her initial NIPT result showed an unexpected 5 Mb deletion and 9 Mb duplication on the short arm of chromosome 18. Because of the rare discovery, the patient was then referred to us for genetic counseling sessions and further genetic tests were issued with the complete consent of her parents to investigate if the pregnant woman, her biological parents and the fetus were healthy. After cytogenetic and molecular examinations, a rare de novo 18p terminal deletion with inverted duplication was identified in the pregnant woman, but her parents and the fetus were normal.
The course of her pregnancy was uneventful with the exception of hypothyroidism at 7 weeks’ gestation and treated with Euthyrox from then on. Despite an uneventful family history, the patient had a healthy ap

In [12]:
db.add_documents([Document(page_content="foo")])
docs_with_score = db.similarity_search_with_score("foo")
docs_with_score[0]

(Document(page_content='foo'), 0.0)

In [13]:
docs_with_score

[(Document(page_content='foo'), 0.0),
 (Document(page_content='A 45 year old male patient who was run over by a train resulting in a right leg amputation at the level of the knee and a crush injury of the left foot. He was brought to our hospital about 2 h after the accident. The right lower limb had a severe comminution and bone loss at the knee joint, with the loss of skin and soft- tissue and crushing of muscle above and below the knee [Figures and ]. The left forefoot was completely degloved and all the toes were crushed and degloved as well [Figures and ].\nThe right lower limb was deemed not replantable as the knee joint was severely damaged and not salvageable, In addition, debridement of crushed and devitalized tissues would result in a 15-20 cm shortening and a limb that was at least 15 cm short with fused knee joint would not be functionally useful and primary insertion of prosthetic knee joint was not considered to be feasible by the attending orthopaedic surgeon.\nFocus was

In [14]:
retriever = db.as_retriever()
print(retriever)

tags=['IRISVector'] vectorstore=<langchain_iris.vectorstores.IRISVector object at 0x0000024253943E50>
