@@ -52,6 +52,27 @@ function collectToolOutputs(events: WorkspaceChatMessage[], toolName: string): s
5252 . join ( "\n" ) ;
5353}
5454
55+ // Helper to calculate tool execution duration from captured events
56+ function getToolDuration (
57+ env : { sentEvents : Array < { channel : string ; data : unknown ; timestamp : number } > } ,
58+ toolName : string
59+ ) : number {
60+ const startEvent = env . sentEvents . find ( ( e ) => {
61+ const msg = e . data as any ;
62+ return msg . type === "tool-call-start" && msg . toolName === toolName ;
63+ } ) ;
64+
65+ const endEvent = env . sentEvents . find ( ( e ) => {
66+ const msg = e . data as any ;
67+ return msg . type === "tool-call-end" && msg . toolName === toolName ;
68+ } ) ;
69+
70+ if ( startEvent && endEvent ) {
71+ return endEvent . timestamp - startEvent . timestamp ;
72+ }
73+ return - 1 ;
74+ }
75+
5576// Skip all tests if TEST_INTEGRATION is not set
5677const describeIntegration = shouldRunIntegrationTests ( ) ? describe : describe . skip ;
5778
@@ -259,16 +280,17 @@ describeIntegration("Runtime Bash Execution", () => {
259280 // Test command that pipes file through stdin-reading command (grep)
260281 // This would hang forever if stdin.close() was used instead of stdin.abort()
261282 // Regression test for: https://github.com/coder/mux/issues/503
262- const startTime = Date . now ( ) ;
263283 const events = await sendMessageAndWait (
264284 env ,
265285 workspaceId ,
266286 "Run bash: cat /tmp/test.json | grep test" ,
267287 HAIKU_MODEL ,
268288 BASH_ONLY ,
269- 10000 // 10s timeout - should complete in ~4s per API call
289+ 30000 // Relaxed timeout for CI stability (was 10s)
270290 ) ;
271- const duration = Date . now ( ) - startTime ;
291+
292+ // Calculate actual tool execution duration
293+ const toolDuration = getToolDuration ( env , "bash" ) ;
272294
273295 // Extract response text
274296 const responseText = extractTextFromEvents ( events ) ;
@@ -279,10 +301,9 @@ describeIntegration("Runtime Bash Execution", () => {
279301 expect ( bashOutput ) . toContain ( '"test": "data"' ) ;
280302
281303 // Verify command completed quickly (not hanging until timeout)
282- // With tokenizer preloading, both local and SSH complete in ~8s total
283- // Actual hangs would hit bash tool's 180s timeout
304+ expect ( toolDuration ) . toBeGreaterThan ( 0 ) ;
284305 const maxDuration = 10000 ;
285- expect ( duration ) . toBeLessThan ( maxDuration ) ;
306+ expect ( toolDuration ) . toBeLessThan ( maxDuration ) ;
286307
287308 // Verify bash tool was called
288309 const toolCallStarts = events . filter ( ( e : any ) => e . type === "tool-call-start" ) ;
@@ -337,16 +358,17 @@ describeIntegration("Runtime Bash Execution", () => {
337358
338359 // Test grep | head pattern - this historically hangs over SSH
339360 // This is a regression test for the bash hang issue
340- const startTime = Date . now ( ) ;
341361 const events = await sendMessageAndWait (
342362 env ,
343363 workspaceId ,
344364 'Run bash: grep -n "terminal bench" testfile.txt | head -n 200' ,
345365 HAIKU_MODEL ,
346366 BASH_ONLY ,
347- 15000 // 15s timeout - should complete quickly
367+ 30000 // Relaxed timeout for CI stability (was 15s)
348368 ) ;
349- const duration = Date . now ( ) - startTime ;
369+
370+ // Calculate actual tool execution duration
371+ const toolDuration = getToolDuration ( env , "bash" ) ;
350372
351373 // Extract response text
352374 const responseText = extractTextFromEvents ( events ) ;
@@ -356,8 +378,9 @@ describeIntegration("Runtime Bash Execution", () => {
356378
357379 // Verify command completed quickly (not hanging until timeout)
358380 // SSH runtime should complete in <10s even with high latency
381+ expect ( toolDuration ) . toBeGreaterThan ( 0 ) ;
359382 const maxDuration = 15000 ;
360- expect ( duration ) . toBeLessThan ( maxDuration ) ;
383+ expect ( toolDuration ) . toBeLessThan ( maxDuration ) ;
361384
362385 // Verify bash tool was called
363386 const toolCallStarts = events . filter ( ( e : any ) => e . type === "tool-call-start" ) ;
0 commit comments