Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,18 @@ the Node service locally, you can do so by following these steps:
docker run -p 6333:6333 -p 6334:6334 -v $(pwd)/databases/qdrant_storage:/qdrant/storage:z qdrant/qdrant
```

#### Modifying UI in CML

* This is an unsupported workflow, but it is possible to modify the UI code in CML.

- Start a CML Session from a CML Project that has the RAG Studio AMP installed.
- Open the terminal in the CML Session and navigate to the `ui` directory.
- Run `source ~/.bashrc` to ensure the Node environment variables are loaded.
- Install PNPM using `npm install -g pnpm`. Docs on PNPM can be found here: https://pnpm.io/installation#using-npm
- Run `pnpm install` to install the dependencies.
- Make your changes to the UI code in the `ui` directory.
- Run `pnpm build` to build the new UI bundle.

## The Fine Print

IMPORTANT: Please read the following before proceeding. This AMP includes or otherwise depends on certain third party software packages. Information about such third party software packages are made available in the notice file associated with this AMP. By configuring and launching this AMP, you will cause such third party software packages to be downloaded and installed into your environment, in some instances, from third parties' websites. For each third party software package, please see the notice file and the applicable websites for more information, including the applicable license terms. If you do not wish to download and install the third party software packages, do not configure, launch or otherwise use this AMP. By configuring, launching or otherwise using the AMP, you acknowledge the foregoing statement and agree that Cloudera is not responsible or liable in any way for the third party software packages.
6 changes: 4 additions & 2 deletions llm-service/app/ai/indexing/summary_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
from ..vector_stores.vector_store_factory import VectorStoreFactory
from ...config import settings
from ...services.metadata_apis import data_sources_metadata_api
from ...services.models.providers import CAIIModelProvider
from ...services.models.providers import CAIIModelProvider, AzureModelProvider, OpenAiModelProvider

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -132,9 +132,11 @@ def __index_configuration(
embed_summaries: bool = True,
) -> Dict[str, Any]:
prompt_helper: Optional[PromptHelper] = None
# if we're using CAII, let's be conservative, and use a small context window to account for mistral's small context
# if we're using CAII, let's be conservative and use a small context window to account for mistral's small context
if CAIIModelProvider.is_enabled():
prompt_helper = PromptHelper(context_window=3000)
if AzureModelProvider.is_enabled() or OpenAiModelProvider.is_enabled():
prompt_helper = PromptHelper(context_window=min(llm.metadata.context_window, 10000))
return {
"llm": llm,
"response_synthesizer": get_response_synthesizer(
Expand Down
5 changes: 3 additions & 2 deletions llm-service/app/routers/index/sessions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,8 +302,9 @@ def generate_stream() -> Generator[str, None, None]:
yield f"data: {event_json}\n\n"
first_message = False
response_id = response.additional_kwargs["response_id"]
json_delta = json.dumps({"text": response.delta})
yield f"data: {json_delta}\n\n"
if response.delta:
json_delta = json.dumps({"text": response.delta})
yield f"data: {json_delta}\n\n"

if not cancel_event.is_set() and response_id:
done = ChatEvent(type="done", name="chat_done", timestamp=time.time())
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
from typing import (
Sequence,
Optional,
Union,
Any,
List,
AsyncGenerator,
)

from llama_index.core.base.llms.types import (
ChatMessage,
ChatResponse,
)
from llama_index.core.tools import BaseTool
from llama_index.llms.bedrock_converse import BedrockConverse


class FakeStreamBedrockConverse(BedrockConverse):
"""
A class that inherits from BedrockConverse but overrides its astream_chat_with_tools function.
This class is used to create a non-streaming version of the BedrockConverse.
"""

def __init__(self, *args: Any, **kwargs: Any) -> None:
"""
Initialize the FakeStreamBedrockConverse class.
"""
super().__init__(*args, **kwargs)

async def astream_chat_with_tools(
self,
tools: Sequence["BaseTool"],
user_msg: Optional[Union[str, ChatMessage]] = None,
chat_history: Optional[List[ChatMessage]] = None,
verbose: bool = False,
allow_parallel_tool_calls: bool = False,
tool_required: bool = False,
**kwargs: Any,
) -> AsyncGenerator[ChatResponse, None]:
# This method is overridden to provide a non-streaming version of the chat with tools.
# Here we yield a single ChatResponse object instead of streaming multiple responses.
async def _fake_stream() -> AsyncGenerator[ChatResponse, None]:
response = await self.achat_with_tools(
tools=tools,
user_msg=user_msg,
chat_history=chat_history,
verbose=verbose,
allow_parallel_tool_calls=allow_parallel_tool_calls,
tool_required=tool_required,
**kwargs,
)
yield response

return _fake_stream()

@classmethod
def from_bedrock_converse(
cls, bedrock_converse: BedrockConverse
) -> "FakeStreamBedrockConverse":
"""
Create a FakeStreamBedrockConverse object from a BedrockConverse object.

Args:
bedrock_converse: A BedrockConverse object

Returns:
A FakeStreamBedrockConverse object with the same public attributes as the input BedrockConverse
"""
# Create a new instance of FakeStreamBedrockConverse with only the public parameters
# Let the parent class handle initialization of private attributes
return cls(
model=bedrock_converse.model,
temperature=bedrock_converse.temperature,
max_tokens=bedrock_converse.max_tokens,
additional_kwargs=bedrock_converse.additional_kwargs,
callback_manager=bedrock_converse.callback_manager,
system_prompt=bedrock_converse.system_prompt,
messages_to_prompt=bedrock_converse.messages_to_prompt,
completion_to_prompt=bedrock_converse.completion_to_prompt,
pydantic_program_mode=bedrock_converse.pydantic_program_mode,
output_parser=bedrock_converse.output_parser,
profile_name=bedrock_converse.profile_name,
aws_access_key_id=bedrock_converse.aws_access_key_id,
aws_secret_access_key=bedrock_converse.aws_secret_access_key,
aws_session_token=bedrock_converse.aws_session_token,
region_name=bedrock_converse.region_name,
api_version=bedrock_converse.api_version,
use_ssl=bedrock_converse.use_ssl,
verify=bedrock_converse.verify,
endpoint_url=bedrock_converse.endpoint_url,
timeout=bedrock_converse.timeout,
max_retries=bedrock_converse.max_retries,
guardrail_identifier=bedrock_converse.guardrail_identifier,
guardrail_version=bedrock_converse.guardrail_version,
application_inference_profile_arn=bedrock_converse.application_inference_profile_arn,
trace=bedrock_converse.trace,
)
95 changes: 68 additions & 27 deletions llm-service/app/services/query/agents/tool_calling_querier.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@
from llama_index.core.schema import NodeWithScore
from llama_index.core.tools import BaseTool
from llama_index.core.workflow import StopEvent
from llama_index.llms.bedrock_converse import BedrockConverse
from llama_index.llms.bedrock_converse.utils import get_model_name

from app.ai.indexing.summary_indexer import SummaryIndexer
from app.services.metadata_apis.session_metadata_api import Session
Expand All @@ -65,6 +67,9 @@
from app.services.query.agents.agent_tools.retriever import (
build_retriever_tool,
)
from app.services.query.agents.non_streamer_bedrock_converse import (
FakeStreamBedrockConverse,
)
from app.services.query.chat_engine import (
FlexibleContextChatEngine,
)
Expand All @@ -84,6 +89,16 @@
"mistral.mixtral-8x7b-instruct-v0:1",
}

BEDROCK_STREAMING_TOOL_MODELS = {
"anthropic.claude-3-5-sonnet-20241022-v2:0",
"anthropic.claude-3-7-sonnet-20250219-v1:0",
"anthropic.claude-sonnet-4-20250514-v1:0",
"anthropic.claude-opus-4-20250514-v1:0",
"amazon.nova-pro-v1:0",
"cohere.command-r-plus-v1:0",
"cohere.command-r-v1:0",
}


def should_use_retrieval(
data_source_ids: list[int],
Expand All @@ -101,27 +116,53 @@ def should_use_retrieval(


DEFAULT_AGENT_PROMPT = """\
Today's date is {date} and the current time is {time}. \
### DATE AND TIME
Today's date is {date} and the current time is {time}. This date and time \
is considered the current date and time for all responses. \

### ROLE DESCRIPTION
You are an expert agent that can answer questions with the help of tools. \
Go through the tools available and use them appropriately to answer the \
user's question. If you do not know the answer to a question, you \
truthfully say it does not know. As the agent, you will provide an \
answer based solely on the provided sources with citations to the \
paragraphs.

Note for in-line citations:
* Use the citations from the chat history as is.
* Use links provided by the tools if needed to answer the question and cite them in-line \
You will use the date and time provided above to answer questions \
to refine the user's query and provide the best possible answer. \

### BEST PRACTICES
You will follow these best practices when answering questions:
1. Refining the user's query according to the date \
and time provided above if necessary, and if the user
has not provided enough information to answer the question. \
2. Going through the tools available.
3. Approaching the question step by step, using the tools
available to you to gather information when necessary.
4. Once you have the information you need, you will provide \
a final answer to the user with citations if available \
you used to answer the question.
5. If you do not know the answer to a question or cannot find the information \
you need to answer the question with the provided sources or tools, \
you truthfully say you do not know and let the user know how you arrived \
at the response and what information you used (links if any) to arrive \
at it and ask for clarification or more information.

### OUTPUT FORMAT
As the agent, you will provide an answer based solely on the provided \
sources with citations (if available). Only return the answer with \
citations (if used) to the user. If you cannot answer the question with the \
provided sources or tools, you will return a message saying you \
cannot answer the question and ask the user to provide more \
information or clarify the question. \

### CITATION FORMAT
You will use the following format to cite sources in your response:
* Use the citations from the chat history as is.
* Use links provided by tool results if needed to answer the question and cite them in-line \
in the given format: the link should be in markdown format. For example: \
Refer to the example in [example.com](https://example.com). Do not make up links that are not \
present.
* Cite from node_ids in the given format: the node_id \
present.
* Cite from tool results with node_ids in the given format: the node_id \
should be in an html anchor tag (<a href>) with an html 'class' of 'rag_citation'. \
Do not use filenames as citations. Only node ids should be used. \
For example: <a class="rag_citation" href="2" ></a>. Do not make up node ids that are not present
in the context.
* All citations should be either in-line citations or markdown links.
* All citations should be either in-line citations or markdown links.

For example:

Expand Down Expand Up @@ -380,29 +421,29 @@ def gen() -> Generator[ChatResponse, None, None]:
def build_function_agent(
enhanced_query: str, llm: FunctionCallingLLM, tools: list[BaseTool]
) -> tuple[FunctionAgent, str]:
formatted_prompt = DEFAULT_AGENT_PROMPT.format(
date=datetime.datetime.now().strftime("%A, %B %d, %Y"),
time=datetime.datetime.now().strftime("%H:%M:%S %p"),
)
callable_tools = cast(list[BaseTool | Callable[[], Any]], tools)
if llm.metadata.model_name in NON_SYSTEM_MESSAGE_MODELS:
agent = FunctionAgent(
tools=cast(list[BaseTool | Callable[[], Any]], tools),
llm=llm,
)
agent = FunctionAgent(tools=callable_tools, llm=llm)
enhanced_query = (
"ROLE DESCRIPTION =========================================\n"
+ DEFAULT_AGENT_PROMPT.format(
date=datetime.datetime.now().strftime("%Y-%m-%d"),
time=datetime.datetime.now().strftime("%H:%M:%S"),
)
+ formatted_prompt
+ "=========================================================\n"
"USER QUERY ==============================================\n"
+ enhanced_query
)
else:
if (
isinstance(llm, BedrockConverse)
and get_model_name(llm.metadata.model_name)
not in BEDROCK_STREAMING_TOOL_MODELS
):
llm = FakeStreamBedrockConverse.from_bedrock_converse(llm)
agent = FunctionAgent(
tools=cast(list[BaseTool | Callable[[], Any]], tools),
llm=llm,
system_prompt=DEFAULT_AGENT_PROMPT.format(
date=datetime.datetime.now().strftime("%Y-%m-%d"),
time=datetime.datetime.now().strftime("%H:%M:%S"),
),
tools=callable_tools, llm=llm, system_prompt=formatted_prompt
)

return agent, enhanced_query
28 changes: 24 additions & 4 deletions llm-service/app/services/query/querier.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@
from llama_index.core.llms import LLM
from llama_index.core.llms.function_calling import FunctionCallingLLM
from llama_index.core.schema import NodeWithScore
from llama_index.llms.bedrock_converse.utils import BEDROCK_FUNCTION_CALLING_MODELS
from llama_index.llms.bedrock_converse.utils import (
BEDROCK_FUNCTION_CALLING_MODELS,
get_model_name,
)
from llama_index.llms.openai.utils import (
is_function_calling_model,
ALL_AVAILABLE_MODELS,
Expand Down Expand Up @@ -77,6 +80,15 @@

logger = logging.getLogger(__name__)

LLAMA_3_2_NON_FUNCTION_CALLING_MODELS = {
"meta.llama3-2-1b-instruct-v1:0",
"meta.llama3-2-3b-instruct-v1:0",
}

MODIFIED_BEDROCK_FUNCTION_CALLING_MODELS = tuple(
set(BEDROCK_FUNCTION_CALLING_MODELS) - LLAMA_3_2_NON_FUNCTION_CALLING_MODELS
)


def streaming_query(
chat_engine: Optional[FlexibleContextChatEngine],
Expand Down Expand Up @@ -126,12 +138,20 @@ def streaming_query(
return chat_response


# LlamaIndex's list of function-calling models appears out of date,
# so we have a modified version
def is_bedrock_function_calling_model_v2(model_name: str) -> bool:
return get_model_name(model_name) in MODIFIED_BEDROCK_FUNCTION_CALLING_MODELS


def check_for_tool_calling_support(llm: LLM) -> None:
if BedrockModelProvider.is_enabled() and not llm.metadata.is_function_calling_model:
if BedrockModelProvider.is_enabled() and not is_bedrock_function_calling_model_v2(
llm.metadata.model_name
):
raise HTTPException(
status_code=422,
detail=f"Tool calling is enabled, but the model {llm.metadata.model_name} does not support tool calling. "
f"The following models support tool calling: {', '.join(list(BEDROCK_FUNCTION_CALLING_MODELS))}.",
detail=f"Tool calling is enabled, but the model {get_model_name(llm.metadata.model_name)} does not support tool calling. "
f"The following models support tool calling: {', '.join(list(MODIFIED_BEDROCK_FUNCTION_CALLING_MODELS))}.",
)
if (
OpenAiModelProvider.is_enabled() or AzureModelProvider.is_enabled()
Expand Down
12 changes: 10 additions & 2 deletions ui/src/pages/RagChatTab/Settings/ChatSettingsModal.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ import {
import { useGetLlmModels, useGetRerankingModels } from "src/api/modelsApi.ts";
import { transformModelOptions } from "src/utils/modelUtils.ts";
import { ResponseChunksRange } from "pages/RagChatTab/Settings/ResponseChunksSlider.tsx";
import { useContext } from "react";
import { useContext, useEffect } from "react";
import { RagChatContext } from "pages/RagChatTab/State/RagChatContext.tsx";
import {
UpdateSessionRequest,
Expand All @@ -77,8 +77,8 @@ const ChatSettingsModal = ({
const { data: rerankingModels } = useGetRerankingModels();
const {
dataSourcesQuery: { dataSources },
activeSession,
} = useContext(RagChatContext);
const { activeSession } = useContext(RagChatContext);
const [form] = Form.useForm<Omit<CreateSessionType, "id">>();
const queryClient = useQueryClient();
const updateSession = useUpdateSessionMutation({
Expand All @@ -99,6 +99,14 @@ const ChatSettingsModal = ({
return null;
}

useEffect(() => {
if (activeSession.name) {
form.setFieldsValue({
name: activeSession.name,
});
}
}, [activeSession.name, form.setFieldsValue]);

const handleUpdateSession = () => {
form
.validateFields()
Expand Down
Loading