In [1]:
%pip install cmem-cmempy llama-index python-dotenv llama-index-vector-stores-postgres

Note: you may need to restart the kernel to use updated packages.


In [2]:
%load_ext dotenv
%dotenv
%reload_ext dotenv

In [3]:
import os

import openai

openai.api_key = os.environ["OPENAI_API_KEY"]

In [4]:
!docker pull ankane/pgvector
!docker run -d -e POSTGRES_USER=eccenca -e POSTGRES_PASSWORD=eccenca -e POSTGRES_DB=eccenca --name pgvector-eccenca -p 5432:5432 ankane/pgvector

Using default tag: latest
latest: Pulling from ankane/pgvector
Digest: sha256:956744bd14e9cbdf639c61c2a2a7c7c2c48a9c8cdd42f7de4ac034f4e96b90f8
Status: Image is up to date for ankane/pgvector:latest
docker.io/ankane/pgvector:latest
docker: Error response from daemon: Conflict. The container name "/pgvector-eccenca" is already in use by container "b6c885e9ba8fed6a52c3cfd6369cb2ba3df7c519423b50b647811974d1367bbb". You have to remove (or rename) that container to be able to reuse that name.

Run 'docker run --help' for more information


In [5]:
from IPython.display import Markdown

from llama_index_cmem.executor.cmem_sparql_executor import CMEMSPARQLExecutor
from llama_index_cmem.readers.sparql_reader import SPARQLReader


sparql_reader = SPARQLReader(executor=CMEMSPARQLExecutor())
documents = sparql_reader.load_graph_triples_with_labels(graph="http://ld.company.org/prod-inst/")
display(Markdown(f"### Documents loaded: __{len(documents)}__ "))

### Documents loaded: __11426__ 

In [6]:
from llama_index.core import StorageContext, VectorStoreIndex
from llama_index.vector_stores.postgres import PGVectorStore

vector_store = PGVectorStore.from_params(
    database="eccenca",
    host="localhost",
    password="eccenca",
    port="5432",
    user="eccenca",
    table_name="prod-inst",
    embed_dim=1536,  # openai embedding dimension
    hnsw_kwargs={
        "hnsw_m": 16,
        "hnsw_ef_construction": 64,
        "hnsw_ef_search": 40,
        "hnsw_dist_method": "vector_cosine_ops",
    },
)

storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context, show_progress=True
)
query_engine = index.as_query_engine()

  from .autonotebook import tqdm as notebook_tqdm
Parsing nodes: 100%|██████████| 11426/11426 [00:01<00:00, 6340.76it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:29<00:00, 70.57it/s]
PG Setup: Error creating HNSW index: (psycopg2.errors.SyntaxError) syntax error at or near "-"
LINE 1: CREATE INDEX IF NOT EXISTS data_prod-inst_embedding_idx ON p...
                                            ^

[SQL: CREATE INDEX IF NOT EXISTS data_prod-inst_embedding_idx ON public.data_prod-inst USING hnsw (embedding vector_cosine_ops) WITH (m = 16, ef_construction = 64)]
(Background on this error at: https://sqlalche.me/e/20/f405)
Generating embeddings: 100%|██████████| 2048/2048 [00:27<00:00, 74.11it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:28<00:00, 72.42it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:27<00:00, 74.25it/s]
Generating embeddings: 100%|██████████| 2048/2048 [00:24<00:00, 84.94it/s]
Generating embeddings: 100%|██████████| 1186/1186 [00:20<00:00, 

In [7]:
question = "What information do you have about Liese Adam? Give me as many details as possible."
answer = query_engine.query(question)
display(Markdown(f"## Question: _{question}_"))
display(Markdown(f"### Answer:\n\n{answer}"))

## Question: _What information do you have about Liese Adam? Give me as many details as possible._

### Answer:

Liese Adam is a member of the Marketing department. Her areas of expertise include Crystal and Sensor.