In [26]:
# %%capture
# !pip install langchain-iris

# !pip install testcontainers-iris \
#                 fastembed \
#                 openai \
#                 tiktoken \
#                 python-dotenv


We want to use `OpenAIEmbeddings` so we have to get the OpenAI API Key.

In [7]:
import getpass
import os
from dotenv import load_dotenv

load_dotenv(override=True)

if not os.environ.get("OPENAI_API_KEY"):
    os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")


In [8]:
from langchain.docstore.document import Document
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.embeddings.fastembed import FastEmbedEmbeddings

from langchain_iris import IRISVector


In [18]:
loader = TextLoader("state_of_the_union.txt")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=400, chunk_overlap=20)
docs = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings()
# embeddings = FastEmbedEmbeddings()

In [10]:
import time
import os
from testcontainers.iris import IRISContainer

# license_key = os.path.abspath(os.path.expanduser("~/iris.key"))
# image = 'containers.intersystems.com/intersystems/iris:2023.3'
image = 'localhost/intersystems/iris-arm64:2024.1.0.233.0'
# image = 'iris:2024.1'
container = IRISContainer(image, username="demo", password="demo", namespace="demo", license_key="/Users/aryanput/irisvector.key")
container.with_exposed_ports(1972, 52773)
container.start()
CONNECTION_STRING = container.get_connection_url()

time.sleep(1)
print('Started')


Pulling image localhost/intersystems/iris-arm64:2024.1.0.233.0
Container started: d09bdae224e0
Waiting to be ready...


Started


In [11]:
print(CONNECTION_STRING)

iris://demo:demo@localhost:64358/demo


In [25]:
args = {'hostname':'localhost', 'port':8881,
    'namespace':'USER', 'username':'SUPERUSER', 'password':'SYS2'
}
username = args['username']
password = args['password']
hostname = args['hostname']
port = args['port']
namespace = args['namespace']

CONNECTION_STRING = f"iris://{username}:{password}@{hostname}:{port}/{namespace}"
print(CONNECTION_STRING)

iris://SUPERUSER:SYS2@localhost:8881/USER


In [26]:
COLLECTION_NAME = "state_of_the_union_test"

db = IRISVector.from_documents(
    embedding=embeddings,
    documents=docs,
    collection_name=COLLECTION_NAME,
    connection_string=CONNECTION_STRING,
)

In [20]:
query = "Joint patrols to catch traffickers"
docs_with_score = db.similarity_search_with_score(query)

In [21]:
for doc, score in docs_with_score:
    print("-" * 80)
    print("Score: ", score)
    print(doc.page_content)
    print("-" * 80)

--------------------------------------------------------------------------------
Score:  0.17935702033124812
And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. 

We can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling.  

We’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers.
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Score:  0.20608255896079208
So let’s not abandon our streets. Or choose between safety and equal justice. 

Let’s come together to protect our communities, restore trust, and hold law enforcement accountable. 

That’s why the Justice Department required body cameras, banned chokeholds, and restricted no-knock warrants for its officers.
-----------------------------------------------------------------------

In [22]:
db.add_documents([Document(page_content="foo")])
docs_with_score = db.similarity_search_with_score("foo")
docs_with_score[0]

(Document(page_content='foo'), 6.440451282507809e-06)

In [23]:
docs_with_score

[(Document(page_content='foo'), 6.440451282507809e-06),
 (Document(page_content='Up to eight state-of-the-art factories in one place. 10,000 new good-paying jobs. \n\nSome of the most sophisticated manufacturing in the world to make computer chips the size of a fingertip that power the world and our everyday lives. \n\nSmartphones. The Internet. Technology we have yet to invent. \n\nBut that’s just the beginning.', metadata={'source': 'state_of_the_union.txt'}),
  0.23160923327669314),
 (Document(page_content='Powered by people I’ve met like JoJo Burgess, from generations of union steelworkers from Pittsburgh, who’s here with us tonight. \n\nAs Ohio Senator Sherrod Brown says, “It’s time to bury the label “Rust Belt.” \n\nIt’s time. \n\nBut with all the bright spots in our economy, record job growth and higher wages, too many families are struggling to keep up with the bills.', metadata={'source': 'state_of_the_union.txt'}),
  0.23470916306863654),
 (Document(page_content='My administr

In [24]:
retriever = db.as_retriever()
print(retriever)

tags=['IRISVector'] vectorstore=<langchain_iris.vectorstores.IRISVector object at 0x127822d10>


In [37]:
container.stop()