Skip to content

Commit

Permalink
native vector support
Browse files Browse the repository at this point in the history
  • Loading branch information
daimor committed Feb 15, 2024
1 parent 1f84271 commit 8da81d9
Show file tree
Hide file tree
Showing 7 changed files with 314 additions and 122 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ jobs:
fail-fast: false
matrix:
image:
- intersystemsdc/iris-community:2023.3-zpm
- intersystemsdc/iris-community:latest
- intersystemsdc/iris-community:preview
- intersystemsdc/iris-community:2024.1-preview
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
Expand Down
62 changes: 30 additions & 32 deletions demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -42,7 +42,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -86,14 +86,14 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Document ID: 5dfc733a-202e-47c0-8103-522adcd436d3\n"
"Document ID: 16699f26-27d0-41f9-ab68-af750a4490c1\n"
]
}
],
Expand All @@ -104,39 +104,38 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2024.1-preview: Pulling from intersystems/iris-community\n",
"Digest: sha256:8d764a2b86f95180f7a34ef1f8a49e22892b0b22d48444d2c867718ef55da48c\n",
"Status: Image is up to date for containers.intersystems.com/intersystems/iris-community:2024.1-preview\n",
"containers.intersystems.com/intersystems/iris-community:2024.1-preview\n",
"2024.1-preview: Pulling from intersystemsdc/iris-community\n",
"Digest: sha256:5ffc1adeaac16d945d308cdefa03263c61bf8aafa52d43551a57ca3e5286db33\n",
"Status: Image is up to date for intersystemsdc/iris-community:2024.1-preview\n",
"docker.io/intersystemsdc/iris-community:2024.1-preview\n",
"\u001b[1m\n",
"What's Next?\n",
"\u001b[0m 1. Sign in to your Docker account → \u001b[36mdocker login\u001b[0m\n",
" 2. View a summary of image vulnerabilities and recommendations → \u001b[36mdocker scout quickview containers.intersystems.com/intersystems/iris-community:2024.1-preview\u001b[0m\n"
"\u001b[0m View a summary of image vulnerabilities and recommendations → \u001b[36mdocker scout quickview intersystemsdc/iris-community:2024.1-preview\u001b[0m\n"
]
}
],
"source": [
"!docker pull containers.intersystems.com/intersystems/iris-community:2024.1-preview"
"!docker pull intersystemsdc/iris-community:2024.1-preview"
]
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Pulling image containers.intersystems.com/intersystems/iris-community:2024.1-preview\n",
"Container started: 082db6725893\n",
"Pulling image intersystemsdc/iris-community:2024.1-preview\n",
"Container started: cbb4abdb9a4f\n",
"Waiting to be ready...\n",
"Waiting to be ready...\n"
]
Expand All @@ -145,8 +144,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Started iris://demo:demo@localhost:51024/demo\n",
"Portal: http://localhost:51023/csp/sys/UtilHome.csp\n"
"Started iris://demo:demo@localhost:49990/demo\n",
"Portal: http://localhost:49991/csp/sys/UtilHome.csp\n"
]
}
],
Expand All @@ -161,7 +160,7 @@
"\n",
"# Community Edition\n",
"license_key = None\n",
"image = 'containers.intersystems.com/intersystems/iris-community:2024.1-preview'\n",
"image = 'intersystemsdc/iris-community:2024.1-preview'\n",
"\n",
"container = IRISContainer(image, username=\"demo\", password=\"demo\", namespace=\"demo\", license_key=license_key)\n",
"container.with_exposed_ports(1972, 52773)\n",
Expand All @@ -175,13 +174,13 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d21253f0bd684cbd9441bb5502c22cbb",
"model_id": "45f54f42d6874a6cbfc3a9e71d28894a",
"version_major": 2,
"version_minor": 0
},
Expand All @@ -195,7 +194,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f1028a137a364b28b41dd478726a5c7e",
"model_id": "cab67cf72ef54950993729756c04d3c2",
"version_major": 2,
"version_minor": 0
},
Expand Down Expand Up @@ -230,7 +229,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -239,7 +238,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 7,
"metadata": {},
"outputs": [
{
Expand All @@ -248,9 +247,9 @@
"text": [
"The author worked on writing essays and programming. They started by writing short stories before\n",
"college and later began programming on an IBM 1401 computer. They then transitioned to working with\n",
"microcomputers, specifically a TRS-80, where they wrote simple games, a rocket prediction program,\n",
"and a word processor. In college, the author initially planned to study philosophy but switched to\n",
"studying AI due to their interest in intelligent computers.\n"
"microcomputers and wrote simple games, a rocket prediction program, and a word processor. In\n",
"college, the author initially planned to study philosophy but switched to studying AI due to their\n",
"interest in intelligent computers.\n"
]
}
],
Expand All @@ -261,18 +260,17 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"In the mid 1980s, AI (Artificial Intelligence) was in the air and there were two things that\n",
"influenced the author's interest in working on it. One was a novel by Heinlein called \"The Moon is a\n",
"Harsh Mistress\" which featured an intelligent computer called Mike, and the other was a PBS\n",
"documentary that showed Terry Winograd using SHRDLU. These experiences made the author believe that\n",
"intelligent computers like Mike would be a reality in the near future.\n"
"In the mid 1980s, AI (Artificial Intelligence) was in the air and it was during this time that the\n",
"author, Paul Graham, became interested in working on it. He was inspired by a novel called \"The Moon\n",
"is a Harsh Mistress\" by Heinlein, which featured an intelligent computer called Mike, and a PBS\n",
"documentary that showed Terry Winograd using SHRDLU.\n"
]
}
],
Expand Down
7 changes: 3 additions & 4 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
version: "3"
services:
iris:
image: intersystemsdc/iris-community:2023.3-zpm
image: intersystemsdc/iris-community:2024.1-preview
ports:
- 6272:1972
- 6273:52773
environment:
- IRISUSERNAME=test
- IRISPASSWORD=test
- IRISNAMESPACE=TEST
working_dir: /home/irisowner/langchain-iris
working_dir: /home/irisowner/llama-iris
volumes:
- ./init.sh:/docker-entrypoint-initdb.d/init.sh
- ./:/home/irisowner/langchain-iris
- ./:/home/irisowner/llama-iris
45 changes: 30 additions & 15 deletions llama_iris/vectorstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
)
from llama_index.vector_stores.utils import metadata_dict_to_node, node_to_metadata_dict


_logger = logging.getLogger(__name__)


Expand All @@ -18,13 +19,15 @@ def get_data_model(
index_name: str,
schema_name: str,
embed_dim: int = 1536,
native_vector: bool = False,
) -> Any:
"""
This part create a dynamic sqlalchemy model with a new table.
"""
from sqlalchemy import Column
from sqlalchemy.types import BIGINT, TEXT, VARCHAR
from sqlalchemy_iris import IRISListBuild
from sqlalchemy_iris import IRISVector

tablename = "data_%s" % index_name
class_name = "Data%s" % index_name
Expand All @@ -36,7 +39,7 @@ class AbstractData(base): # type: ignore
metadata_ = Column(TEXT)
node_id = Column(VARCHAR(200))
partition_id = Column(VARCHAR(200))
embedding = Column(IRISListBuild(embed_dim)) # type: ignore
embedding = Column(IRISVector(embed_dim) if native_vector else IRISListBuild(embed_dim)) # type: ignore

return type(
class_name,
Expand All @@ -63,6 +66,7 @@ class IRISVectorStore(BasePydanticVectorStore):
_engine: Any = PrivateAttr()
_session: Any = PrivateAttr()
_is_initialized: bool = PrivateAttr(default=False)
_native_vector: bool = PrivateAttr(default=False)

def __init__(
self,
Expand All @@ -76,17 +80,6 @@ def __init__(
table_name = table_name.lower()
schema_name = schema_name.lower()

from sqlalchemy.orm import declarative_base

# sqlalchemy model
self._base = declarative_base()
self._table_class = get_data_model(
self._base,
table_name,
schema_name,
embed_dim=embed_dim,
)

super().__init__(
connection_string=connection_string,
table_name=table_name,
Expand Down Expand Up @@ -148,12 +141,28 @@ def _connect(self) -> Any:

self._engine = create_engine(self.connection_string, echo=self.debug)
self._session = sessionmaker(self._engine)
with self._engine.connect() as conn:
self._native_vector = conn.dialect.supports_vectors

from sqlalchemy.orm import declarative_base

# sqlalchemy model
self._base = declarative_base()
self._table_class = get_data_model(
self._base,
self.table_name,
self.schema_name,
embed_dim=self.embed_dim,
native_vector=self._native_vector,
)

def _create_tables_if_not_exists(self) -> None:
with self._session() as session, session.begin():
self._base.metadata.create_all(session.connection())

def _create_vector_functions(self) -> None:
if self._native_vector:
return
try:
from sqlalchemy import text

Expand Down Expand Up @@ -293,9 +302,15 @@ def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResul
self._table_class.node_id,
self._table_class.text,
self._table_class.metadata_.label("metadata"),
self._table_class.embedding.func(
"llamaindex_cosine_distance", query_embedding
).label("distance"),
(
self._table_class.embedding.cosine(query_embedding).label(
"distance"
)
if self._native_vector
else self._table_class.embedding.func(
"llamaindex_cosine_distance", query_embedding
).label("distance")
),
)
.limit(query.similarity_top_k)
.order_by(text("distance asc"))
Expand Down

0 comments on commit 8da81d9

Please sign in to comment.