embabel · jasperblues · Mar 8, 2026
diff --git a/.env.example b/.env.example
@@ -5,8 +5,15 @@
 # Create your config: cp scripts/user-config/application-user.yml.example scripts/user-config/application-<yourname>.yml
 GUIDE_PROFILE=user
 
-# OpenAI API key (required for embeddings and chat)
-OPENAI_API_KEY=sk-proj-your-key-here
+# --- LLM Provider API Keys ---
+# For local/MCP use: set at least one key so the server has an LLM available.
+# For hub/web deployment: leave all keys unset — users bring their own (BYOK)
+# via Settings → Integrations.
+# When multiple keys are set, the server auto-detects the provider in this order:
+# OPENAI_API_KEY=sk-proj-your-key-here
+# ANTHROPIC_API_KEY=sk-ant-your-key-here
+# MISTRAL_API_KEY=your-key-here
+# DEEPSEEK_API_KEY=your-key-here
 
 # Neo4j (optional — defaults shown)
 # NEO4J_USERNAME=neo4j
@@ -15,3 +22,7 @@ OPENAI_API_KEY=sk-proj-your-key-here
 
 # Discord bot token (optional — only needed for Discord integration)
 # DISCORD_TOKEN=your-discord-token
+
+# Encryption key for BYOK API keys cached client-side.
+# Generate with: openssl rand -base64 32
+EMBABEL_KEY_SECRET=
diff --git a/README.md b/README.md
@@ -17,6 +17,12 @@
 Guide exposes resources relating to the Embabel Agent Framework, such
 as documentation, relevant blogs and other content, and up-to-the-minute API information.
 
+<p align="center">
+  <img src="guide-demo.png" alt="Guide Demo" width="700">
+</p>
+
+[![The Voice, The Word, and The Wheel](https://img.youtube.com/vi/hY6ZFMIJdd4/maxresdefault.jpg)](https://www.youtube.com/watch?v=hY6ZFMIJdd4)
+
 This is exposed in two ways:
 
 - Via a chat server (WebSocket/STOMP) for custom front-ends
@@ -437,9 +443,11 @@ Docker Compose supports environment variable overrides. You can set them inline
 - **`NEO4J_VERSION` / `NEO4J_USERNAME` / `NEO4J_PASSWORD`**: Neo4j settings (optional)
 - **`DISCORD_TOKEN`**: optional, to enable the Discord bot
 
-#### OpenAI API key
+#### LLM API key
+
+For local/MCP use, the `guide` container needs at least one LLM provider key. Supported providers (in auto-detection order): `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `MISTRAL_API_KEY`, `DEEPSEEK_API_KEY`.
 
-The `guide` container needs `OPENAI_API_KEY`. You can:
+For hub/web deployments, no server-side key is needed — users bring their own via **Settings → Integrations**.
 
 1. **Create a `.env` file** next to `compose.yaml`:
 
@@ -479,7 +487,11 @@ docker compose --profile java down --remove-orphans
 | `NEO4J_HTTP_PORT`  | `7474`                         | Neo4j HTTP port                                  |
 | `NEO4J_BOLT_PORT`  | `7687`                         | Neo4j Bolt port                                  |
 | `NEO4J_HTTPS_PORT` | `7473`                         | Neo4j HTTPS port                                 |
-| `OPENAI_API_KEY`   | (required)                     | OpenAI API key                                   |
+| `OPENAI_API_KEY`   | (optional)                     | OpenAI API key (or any one provider key below)   |
+| `ANTHROPIC_API_KEY`| (optional)                     | Anthropic API key                                |
+| `MISTRAL_API_KEY`  | (optional)                     | Mistral API key                                  |
+| `DEEPSEEK_API_KEY` | (optional)                     | DeepSeek API key                                 |
+| `EMBABEL_KEY_SECRET`| (recommended)                 | AES key for BYOK key encryption (`openssl rand -base64 32`) |
 | `DISCORD_TOKEN`    | (optional)                     | Discord bot token                                |
 
 Example:
@@ -494,7 +506,7 @@ NEO4J_PASSWORD=mysecretpassword OPENAI_API_KEY=sk-... GUIDE_PORT=1338 docker com
 
 Tests require the following:
 
-1. **OpenAI API Key**: Set `OPENAI_API_KEY` in your environment before running tests:
+1. **LLM API Key**: Set at least one provider key in your environment before running tests:
 
 ```bash
 export OPENAI_API_KEY=sk-your-key-here

diff --git a/guide-demo.png b/guide-demo.png
diff --git a/pom.xml b/pom.xml
@@ -19,6 +19,7 @@
     <properties>
         <java.version>21</java.version>
         <embabel-agent.version>0.3.5-SNAPSHOT</embabel-agent.version>
+        <spring-ai.version>1.1.1</spring-ai.version>
         <kotlin.version>2.2.0</kotlin.version>
     </properties>
 
@@ -56,11 +57,35 @@
             <version>0.2.0-SNAPSHOT</version>
         </dependency>
 
+        <!-- Spring AI client libraries for BYOK — OpenAI (no starter, which requires API key at startup) -->
+        <dependency>
+            <groupId>org.springframework.ai</groupId>
+            <artifactId>spring-ai-openai</artifactId>
+            <version>${spring-ai.version}</version>
+        </dependency>
+
+        <!-- Local ONNX embedding model -->
         <dependency>
             <groupId>com.embabel.agent</groupId>
-            <artifactId>embabel-agent-starter-openai</artifactId>
+            <artifactId>embabel-agent-starter-onnx</artifactId>
             <version>${embabel-agent.version}</version>
+        </dependency>
 
+        <!-- Spring AI client libraries for BYOK (no embabel starters — those require API keys at startup) -->
+        <dependency>
+            <groupId>org.springframework.ai</groupId>
+            <artifactId>spring-ai-anthropic</artifactId>
+            <version>${spring-ai.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.springframework.ai</groupId>
+            <artifactId>spring-ai-mistral-ai</artifactId>
+            <version>${spring-ai.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.springframework.ai</groupId>
+            <artifactId>spring-ai-deepseek</artifactId>
+            <version>${spring-ai.version}</version>
         </dependency>
 
         <dependency>

diff --git a/src/main/java/com/embabel/guide/rag/RagConfiguration.java b/src/main/java/com/embabel/guide/rag/RagConfiguration.java
@@ -21,33 +21,26 @@
 import com.embabel.agent.rag.neo.drivine.DrivineStore;
 import com.embabel.agent.rag.neo.drivine.NeoRagServiceProperties;
 import com.embabel.common.ai.model.EmbeddingService;
-import com.embabel.common.ai.model.ModelProvider;
-import com.embabel.common.ai.model.ModelSelectionCriteria;
 import com.embabel.guide.GuideProperties;
 import org.drivine.manager.PersistenceManager;
 import org.springframework.beans.factory.annotation.Qualifier;
 import org.springframework.boot.context.properties.EnableConfigurationProperties;
 import org.springframework.context.annotation.Bean;
-import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
 import org.springframework.context.annotation.Configuration;
 import org.springframework.context.annotation.Primary;
 import org.springframework.transaction.PlatformTransactionManager;
 
 /**
  * Configuration for RAG (Retrieval Augmented Generation) components.
  * Creates the DrivineStore and related beans for Neo4j-based RAG operations.
+ *
+ * The EmbeddingService is provided by the ONNX embeddings auto-configuration
+ * (embabel-agent-embeddings-onnx) and injected directly into DrivineStore.
  */
 @Configuration
 @EnableConfigurationProperties(NeoRagServiceProperties.class)
 class RagConfiguration {
 
-    @Bean
-    @Primary
-    @ConditionalOnMissingBean
-    EmbeddingService embeddingService(ModelProvider modelProvider) {
-        return modelProvider.getEmbeddingService(ModelSelectionCriteria.getPlatformDefault());
-    }
-
     @Bean
     ChunkTransformer chunkTransformer() {
         return ChunkTransformer.NO_OP;

diff --git a/src/main/kotlin/com/embabel/guide/ChatActions.kt b/src/main/kotlin/com/embabel/guide/ChatActions.kt
@@ -30,6 +30,9 @@ import com.embabel.guide.narrator.NarratorAgent
 import com.embabel.guide.rag.DataManager
 import com.embabel.guide.util.truncate
 import com.embabel.hub.PersonaService
+import com.embabel.hub.integrations.SetupRequiredChatModel
+import com.embabel.hub.integrations.LlmRole
+import com.embabel.hub.integrations.UserLlmResolver
 import org.slf4j.LoggerFactory
 import java.time.Instant
 import java.util.UUID
@@ -49,6 +52,7 @@ class ChatActions(
     private val chatService: ChatService,
     private val personaService: PersonaService,
     private val commandExecutor: CommandExecutor,
+    private val userLlmResolver: UserLlmResolver,
 ) {
 
     private val logger = LoggerFactory.getLogger(ChatActions::class.java)
@@ -70,6 +74,10 @@ class ChatActions(
             logger.error("Cannot respond: guideUser is null for context user {}", context.user())
             return
         }
+        if (!userLlmResolver.hasLlm(guideUser.id)) {
+            sendResponse(AssistantMessage(SetupRequiredChatModel.SETUP_MESSAGE), conversation, context)
+            return
+        }
         try {
             val snapshot = messages.toList()
             val lastMsg = snapshot.lastOrNull()
@@ -86,7 +94,7 @@ class ChatActions(
             if (snapshot.size > 1) {
                 try {
                     val userContent = (snapshot.last() as? UserMessage)?.content ?: ""
-                    val check = classifyMessage(userContent, conversation, context, templateModel)
+                    val check = classifyMessage(userContent, conversation, context, guideUser, templateModel)
                     category = check.category
                     quickResponse = check.response
                     logger.info("[CLASSIFY RESULT] input='{}' category={}",
@@ -122,7 +130,7 @@ class ChatActions(
                             logger.info("[COMMAND] Falling through to RAG for: {}", commandResult.ragRequest)
                             // Replace the user message with the extracted rag request for the RAG pipeline
                             val ragMessage = AssistantMessage(
-                                buildRendering(context)
+                                buildRendering(context, guideUser)
                                     .respondWithSystemPrompt(conversation, templateModel)
                                     .content
                             )
@@ -141,15 +149,15 @@ class ChatActions(
                 }
             }
 
-            val assistantMessage = buildRendering(context)
+            val assistantMessage = buildRendering(context, guideUser)
                 .respondWithSystemPrompt(conversation, templateModel)
             logger.info("[TRACE] LLM response: '{}'",
                 assistantMessage.content.truncate(100))
             computeAndCacheNarration(assistantMessage, conversation, guideUser, context)
             sendResponse(assistantMessage, conversation, context)
         } catch (e: Exception) {
             logger.error("LLM call failed for user {}: {}", context.user(), e.message, e)
-            sendErrorResponse(conversation, context)
+            sendErrorResponse(conversation, context, e)
         }
     }
 
@@ -163,8 +171,12 @@ class ChatActions(
             logger.error("Cannot respond to trigger: guideUser is null")
             return
         }
+        if (!userLlmResolver.hasLlm(guideUser.id)) {
+            logger.info("[TRIGGER] Silently aborting trigger for user {} — no LLM configured", guideUser.id)
+            return
+        }
         try {
-            val assistantMessage = buildRendering(context)
+            val assistantMessage = buildRendering(context, guideUser)
                 .respondWithTrigger(conversation, trigger.prompt, buildTemplateModel(guideUser, conversation))
             computeAndCacheNarration(assistantMessage, conversation, guideUser, context)
             sendResponse(assistantMessage, conversation, context)
@@ -192,10 +204,8 @@ class ChatActions(
         else -> throw RuntimeException("Unknown user type: $user")
     }
 
-    private fun buildRendering(context: ActionContext): PromptRunner.Rendering {
-        return context
-            .ai()
-            .withLlm(guideProperties.chatLlm)
+    private fun buildRendering(context: ActionContext, guideUser: GuideUser): PromptRunner.Rendering {
+        return userLlmResolver.resolve(context, guideUser.id, LlmRole.CHAT)
             .withId("chat_response")
             .withReferences(dataManager.referencesForUser(context.user()))
             .withToolGroups(guideProperties.toolGroups)
@@ -249,7 +259,7 @@ class ChatActions(
         }
         try {
             val persona = guideUser.core.persona ?: guideProperties.defaultPersona
-            val narration = narratorAgent.narrate(assistantMessage.content, persona, context)
+            val narration = narratorAgent.narrate(assistantMessage.content, persona, context, guideUser.id)
             logger.info("[NARRATION] Narration complete for conversation {}: {} chars", conversationId, narration.text.length)
             narrationCache.put(conversationId, narration.text)
         } catch (e: Exception) {
@@ -274,13 +284,39 @@ class ChatActions(
         context.sendMessage(assistantMessage)
     }
 
-    private fun sendErrorResponse(conversation: Conversation, context: ActionContext) {
+    private fun sendErrorResponse(conversation: Conversation, context: ActionContext, cause: Exception? = null) {
+        val detail = cause?.let { userFacingErrorDetail(it) } ?: ""
         val errorMessage = AssistantMessage(
-            "I'm sorry, I'm having trouble connecting to the AI service right now. Please try again in a moment."
+            "I'm sorry, I'm having trouble connecting to the AI service right now. $detail".trimEnd() +
+                "\n\nPlease try again in a moment."
         )
         sendResponse(errorMessage, conversation, context)
     }
 
+    companion object {
+        /**
+         * Extracts a user-friendly error detail from an LLM exception.
+         * Safe to show — no internal details, just actionable info.
+         */
+        fun userFacingErrorDetail(e: Exception): String {
+            val msg = e.message ?: return ""
+            return when {
+                msg.contains("401") || msg.contains("unauthorized", ignoreCase = true) ->
+                    "Your API key appears to be invalid or expired. Please check your key in Settings."
+                msg.contains("402") || msg.contains("billing", ignoreCase = true) ||
+                    msg.contains("quota", ignoreCase = true) || msg.contains("insufficient", ignoreCase = true) ->
+                    "Your API account may have run out of credits or exceeded its quota. Please check your billing."
+                msg.contains("429") || msg.contains("rate", ignoreCase = true) ->
+                    "The AI provider is rate-limiting requests. Please wait a moment."
+                msg.contains("404") || msg.contains("not_found", ignoreCase = true) ->
+                    "The configured AI model could not be found. Please check your settings."
+                msg.contains("500") || msg.contains("502") || msg.contains("503") ->
+                    "The AI provider is experiencing an outage."
+                else -> ""
+            }
+        }
+    }
+
     /**
      * Pass 1: Classify the latest user message into CONVERSATIONAL, COMMAND, or INFORMATIONAL using nano.
      * If conversational, includes a quick response to avoid the full RAG pipeline.
@@ -289,6 +325,7 @@ class ChatActions(
         userMessage: String,
         conversation: Conversation,
         context: ActionContext,
+        guideUser: GuideUser,
         templateModel: Map<String, Any>,
     ): CategoryCheck {
         val messages = conversation.messages
@@ -307,8 +344,7 @@ class ChatActions(
             put("userMessage", userMessage)
             put("personaNames", personaNames)
         }
-        return context.ai()
-            .withLlm(guideProperties.classifierLlm)
+        return userLlmResolver.resolve(context, guideUser.id, LlmRole.CLASSIFIER)
             .rendering("classifier")
             .createObject(CategoryCheck::class.java, model)
     }
@@ -332,8 +368,7 @@ class ChatActions(
             putAll(templateModel)
             put("userMessage", userMessage)
         }
-        return context.ai()
-            .withLlm(guideProperties.classifierLlm)
+        return userLlmResolver.resolve(context, guideUser.id, LlmRole.CLASSIFIER)
             .withToolObject(tools)
             .rendering("command_executor")
             .createObject(CommandResult::class.java, model)

diff --git a/src/main/kotlin/com/embabel/guide/GuideProperties.kt b/src/main/kotlin/com/embabel/guide/GuideProperties.kt
@@ -1,8 +1,8 @@
 package com.embabel.guide
 
 import com.embabel.agent.rag.ingestion.ContentChunker
-import com.embabel.common.ai.model.LlmOptions
 import com.embabel.common.util.StringTransformer
+import com.embabel.hub.integrations.LlmProvider
 import jakarta.validation.constraints.NotBlank
 import org.springframework.boot.context.properties.ConfigurationProperties
 import org.springframework.boot.context.properties.NestedConfigurationProperty
@@ -15,8 +15,7 @@ import java.nio.file.Path
  *
  * @param reloadContentOnStartup whether to reload RAG content on startup
  * @param defaultPersona         name of the default persona to use
- * @param chatLlm                LLM options for RAG chat (beefy model)
- * @param classifierLlm          LLM options for message classification (lightweight model)
+ * @param defaultProvider        which LLM provider to use for server-side defaults; auto-detected from env vars if not set
  * @param projectsPath           path to projects root: absolute, or relative to the process working directory (user.dir)
  * @param chunkerConfig          chunker configuration for RAG ingestion
  * @param referencesFile         YML files containing LLM references such as GitHub repositories and classpath info
@@ -30,8 +29,7 @@ data class GuideProperties(
     val reloadContentOnStartup: Boolean,
     @field:NotBlank(message = "defaultPersona must not be blank")
     val defaultPersona: String,
-    val chatLlm: LlmOptions,
-    val classifierLlm: LlmOptions,
+    val defaultProvider: LlmProvider? = null,
     @field:NotBlank(message = "projectsPath must not be blank")
     val projectsPath: String,
     @NestedConfigurationProperty val chunkerConfig: ContentChunker.Config?,
@@ -43,7 +41,6 @@ data class GuideProperties(
     val toolPrefix: String,
     val directories: List<String>?,
     val toolGroups: Set<String>,
-    val narratorLlm: LlmOptions,
 ) {
 
     fun toolNamingStrategy(): StringTransformer = StringTransformer { name -> toolPrefix + name }

diff --git a/src/main/kotlin/com/embabel/guide/domain/GuideUserData.kt b/src/main/kotlin/com/embabel/guide/domain/GuideUserData.kt
@@ -18,7 +18,8 @@ data class GuideUserData(
     override var username: String = displayName,
     override var email: String? = null,
     var persona: String? = null,
-    var customPrompt: String? = null
+    var customPrompt: String? = null,
+    var welcomed: Boolean = false,
 ) : HasGuideUserData, StoredUser {
 
     override fun guideUserData(): GuideUserData = this