github · Copilot · May 5, 2026 · May 5, 2026 · May 5, 2026
diff --git a/.lastmerge b/.lastmerge
@@ -1 +1 @@
-c063458ecc3d606766f04cf203b11b08de672cc8
+58cf64d2c55107c6e86902f75808ff400b8a0eb7
diff --git a/pom.xml b/pom.xml
@@ -94,7 +94,7 @@
             reference-impl-sync workflow and deal with the subsequent
             PR.
         -->
-        <readonly-copilot-sdk-ref-impl-version-from-lastmerge-file-updated-by-reference-impl-sync>^1.0.41-0</readonly-copilot-sdk-ref-impl-version-from-lastmerge-file-updated-by-reference-impl-sync>
+        <readonly-copilot-sdk-ref-impl-version-from-lastmerge-file-updated-by-reference-impl-sync>^1.0.41-1</readonly-copilot-sdk-ref-impl-version-from-lastmerge-file-updated-by-reference-impl-sync>
 
     </properties>
 

diff --git a/scripts/codegen/package-lock.json b/scripts/codegen/package-lock.json
diff --git a/scripts/codegen/package.json b/scripts/codegen/package.json
@@ -7,7 +7,7 @@
     "generate:java": "tsx java.ts"
   },
   "dependencies": {
-    "@github/copilot": "^1.0.41-0",
+    "@github/copilot": "^1.0.41",
     "json-schema": "^0.4.0",
     "tsx": "^4.20.6"
   }

diff --git a/src/main/java/com/github/copilot/sdk/CopilotClient.java b/src/main/java/com/github/copilot/sdk/CopilotClient.java
@@ -187,9 +187,9 @@ private CompletableFuture<Connection> startCore() {
     }
 
     private Connection startCoreBody() {
+        Process process = null;
         try {
             JsonRpcClient rpc;
-            Process process = null;
 
             if (optionsHost != null && optionsPort != null) {
                 // External server (TCP)
@@ -215,6 +215,11 @@ private Connection startCoreBody() {
             LOG.info("Copilot client connected");
             return connection;
         } catch (Exception e) {
+            // Clean up process if startup failed partway through
+            if (process != null) {
+                cleanupCliProcess(process);
+            }
+
             String stderr = serverManager.getStderrOutput();
             if (!stderr.isEmpty()) {
                 throw new CompletionException(new IOException(
@@ -224,6 +229,20 @@ private Connection startCoreBody() {
         }
     }
 
+    private static void cleanupCliProcess(Process process) {
+        try {
+            if (process.isAlive()) {
+                process.destroyForcibly();
+                process.waitFor(FORCE_KILL_TIMEOUT_SECONDS, TimeUnit.SECONDS);
+            }
+        } catch (InterruptedException ie) {
+            Thread.currentThread().interrupt();
+            LOG.log(Level.FINE, "Interrupted while cleaning up CLI process", ie);
+        } catch (Exception ex) {
+            LOG.log(Level.FINE, "Error cleaning up CLI process during failed startup", ex);
+        }
+    }
+
     private static final int MIN_PROTOCOL_VERSION = 2;
     private static final int METHOD_NOT_FOUND_ERROR_CODE = -32601;
 

diff --git a/src/main/java/com/github/copilot/sdk/json/ProviderConfig.java b/src/main/java/com/github/copilot/sdk/json/ProviderConfig.java
@@ -57,6 +57,18 @@ public class ProviderConfig {
     @JsonProperty("headers")
     private Map<String, String> headers;
 
+    @JsonProperty("modelId")
+    private String modelId;
+
+    @JsonProperty("wireModel")
+    private String wireModel;
+
+    @JsonProperty("maxPromptTokens")
+    private Integer maxInputTokens;
+
+    @JsonProperty("maxOutputTokens")
+    private Integer maxOutputTokens;
+
     /**
      * Gets the provider type.
      *
@@ -225,4 +237,116 @@ public ProviderConfig setHeaders(Map<String, String> headers) {
         this.headers = headers;
         return this;
     }
+
+    /**
+     * Gets the well-known model name used by the runtime to look up agent
+     * configuration (tools, prompts, reasoning behavior) and default token limits.
+     * <p>
+     * Also used as the wire model when {@link #getWireModel()} is not set. Falls
+     * back to {@link SessionConfig#getModel()}.
+     *
+     * @return the model ID, or {@code null} if not set
+     */
+    public String getModelId() {
+        return modelId;
+    }
+
+    /**
+     * Sets the well-known model name used by the runtime to look up agent
+     * configuration (tools, prompts, reasoning behavior) and default token limits.
+     * <p>
+     * Also used as the wire model when {@link #setWireModel(String)} is not set.
+     * Falls back to {@link SessionConfig#getModel()}.
+     *
+     * @param modelId
+     *            the model ID
+     * @return this config for method chaining
+     */
+    public ProviderConfig setModelId(String modelId) {
+        this.modelId = modelId;
+        return this;
+    }
+
+    /**
+     * Gets the model name sent to the provider API for inference.
+     * <p>
+     * Use this when the provider's model name (e.g. an Azure deployment name or a
+     * custom fine-tune name) differs from {@link #getModelId()}. Falls back to
+     * {@link #getModelId()}, then {@link SessionConfig#getModel()}.
+     *
+     * @return the wire model name, or {@code null} if not set
+     */
+    public String getWireModel() {
+        return wireModel;
+    }
+
+    /**
+     * Sets the model name sent to the provider API for inference.
+     * <p>
+     * Use this when the provider's model name (e.g. an Azure deployment name or a
+     * custom fine-tune name) differs from {@link #getModelId()}. Falls back to
+     * {@link #getModelId()}, then {@link SessionConfig#getModel()}.
+     *
+     * @param wireModel
+     *            the wire model name
+     * @return this config for method chaining
+     */
+    public ProviderConfig setWireModel(String wireModel) {
+        this.wireModel = wireModel;
+        return this;
+    }
+
+    /**
+     * Gets the override for the resolved model's default max prompt tokens.
+     * <p>
+     * The runtime triggers conversation compaction before sending a request when
+     * the prompt (system message, history, tool definitions, user message) would
+     * exceed this limit.
+     *
+     * @return the max input tokens, or {@code null} if not set
+     */
+    public Integer getMaxInputTokens() {
+        return maxInputTokens;
+    }
+
+    /**
+     * Sets the override for the resolved model's default max prompt tokens.
+     * <p>
+     * The runtime triggers conversation compaction before sending a request when
+     * the prompt (system message, history, tool definitions, user message) would
+     * exceed this limit.
+     *
+     * @param maxInputTokens
+     *            the max input tokens
+     * @return this config for method chaining
+     */
+    public ProviderConfig setMaxInputTokens(Integer maxInputTokens) {
+        this.maxInputTokens = maxInputTokens;
+        return this;
+    }
+
+    /**
+     * Gets the override for the resolved model's default max output tokens.
+     * <p>
+     * When hit, the model stops generating and returns a truncated response.
+     *
+     * @return the max output tokens, or {@code null} if not set
+     */
+    public Integer getMaxOutputTokens() {
+        return maxOutputTokens;
+    }
+
+    /**
+     * Sets the override for the resolved model's default max output tokens.
+     * <p>
+     * When hit, the model stops generating and returns a truncated response.
+     *
+     * @param maxOutputTokens
+     *            the max output tokens
+     * @return this config for method chaining
+     */
+    public ProviderConfig setMaxOutputTokens(Integer maxOutputTokens) {
+        this.maxOutputTokens = maxOutputTokens;
+        return this;
+    }
 }
diff --git a/src/site/markdown/advanced.md b/src/site/markdown/advanced.md
@@ -421,17 +421,36 @@ foundry service status
 
 When using BYOK, be aware of these limitations:
 
-#### Identity Limitations
+#### Model and Token Limit Overrides
 
-BYOK authentication uses **static credentials only**. The following identity providers are NOT supported:
+You can override the model name and token limits used by the provider:
 
-- ❌ **Microsoft Entra ID (Azure AD)** - No support for Entra managed identities or service principals
-- ❌ **Third-party identity providers** - No OIDC, SAML, or other federated identity
-- ❌ **Managed identities** - Azure Managed Identity is not supported
+```java
+var session = client.createSession(
+    new SessionConfig().setOnPermissionRequest(PermissionHandler.APPROVE_ALL)
+        .setProvider(new ProviderConfig()
+            .setType("openai")
+            .setBaseUrl("https://api.openai.com/v1")
+            .setApiKey("sk-...")
+            .setModelId("gpt-4o")              // Runtime model for config lookup
+            .setWireModel("my-finetune-v3")    // Actual model name sent to provider API
+            .setMaxInputTokens(100_000)        // Override max prompt tokens
+            .setMaxOutputTokens(4096))         // Override max output tokens
+).get();
+```
 
-You must use an API key or static bearer token that you manage yourself.
+| Property | Description |
+|---|---|
+| `modelId` | Well-known model name for runtime config lookup (tools, prompts, reasoning). Also used as wire model when `wireModel` is not set. Falls back to `SessionConfig.model`. |
+| `wireModel` | Model name sent to the provider API. Use when the provider's model name (e.g. Azure deployment name or fine-tune) differs from `modelId`. Falls back to `modelId`, then `SessionConfig.model`. |
+| `maxInputTokens` | Override max prompt tokens. The runtime compacts conversation before exceeding this limit. |
+| `maxOutputTokens` | Override max output tokens. The model stops generating when this limit is hit. |
 
-**Why not Entra ID?** While Entra ID does issue bearer tokens, these tokens are short-lived (typically 1 hour) and require automatic refresh via the Azure Identity SDK. The `bearerToken` option only accepts a static string—there is no callback mechanism for the SDK to request fresh tokens. For long-running workloads requiring Entra authentication, you would need to implement your own token refresh logic and create new sessions with updated tokens.
+#### Identity Limitations
+
+BYOK authentication uses **static credentials only**.
+
+You must use an API key or static bearer token that you manage yourself.
 
 ---
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		c063458ecc3d606766f04cf203b11b08de672cc8
		58cf64d2c55107c6e86902f75808ff400b8a0eb7