# Chat Engine with CMEMRetriever (streaming)

To use the CMEM integration, use a RetrieverQueryEngine with CMEMRetriever and CMEMGraphStore.

In [None]:
%pip install cmem-cmempy llama-index

Add environment if neccessary.

In [None]:
from os import environ

environ["CMEM_BASE_URI"] = ""
environ["OAUTH_GRANT_TYPE"] = "password"
environ["OAUTH_USER"] = "admin"
environ["OAUTH_PASSWORD"] = ""
environ["OAUTH_CLIENT_ID"] = "cmemc"

environ["OPENAI_API_KEY"] = ""

Set up the LLM

In [1]:
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI

model="gpt-4o-mini"
llm = OpenAI(model=model)
Settings.llm = llm

Set up all components.

In [2]:
from llama_index.core import get_response_synthesizer
from llama_index.core.chat_engine import CondensePlusContextChatEngine
from llama_index.core.memory import ChatMemoryBuffer

from llama_index_cmem.graph_stores.cmem import CMEMGraphStore
from llama_index_cmem.retrievers.cmem.cmem_retriever import CMEMRetriever

ontology_graph = "http://ld.company.org/prod-vocab/"
context_graph = "http://ld.company.org/prod-inst/"

response_synthesizer = get_response_synthesizer()

graph_store = CMEMGraphStore()

retriever = CMEMRetriever(
    graph_store=graph_store,
    ontology_graph=ontology_graph,
    context_graph=context_graph,
    llm=llm
)

memory = ChatMemoryBuffer.from_defaults()

chat_engine = CondensePlusContextChatEngine(
    retriever=retriever,
    llm=llm,
    memory=memory
)

Define a natural language query, run via query engine and get a natural language response answering the question.

In [None]:
query = "List all hardware with price. Limit the results to 20 items."

print("Query:\n" + query)
response = chat_engine.stream_chat(query)

print("Final response:\n")
for token in response.response_gen:
    print(token, end="")