Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ jobs:
extra_nix_config: |
experimental-features = nix-command flakes

- name: Install uv
run: curl -LsSf https://astral.sh/uv/install.sh | sh

- name: Add uv to PATH
run: echo "$HOME/.local/bin" >> $GITHUB_PATH

- name: Run static checks
run: make -j3 static-check

Expand Down
67 changes: 67 additions & 0 deletions .github/workflows/terminal-bench.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
name: Terminal-Bench

on:
workflow_dispatch:
inputs:
dataset:
description: 'Terminal-Bench dataset to use'
required: false
default: 'terminal-bench-core==0.1.1'
type: string
concurrency:
description: 'Number of concurrent tasks (--n-concurrent)'
required: false
default: '4'
type: string
livestream:
description: 'Enable livestream mode'
required: false
default: true
type: boolean
extra_args:
description: 'Additional arguments to pass to terminal-bench'
required: false
type: string

jobs:
benchmark:
name: Run Terminal-Bench
runs-on: ${{ github.repository_owner == 'coder' && 'depot-ubuntu-22.04-16' || 'ubuntu-latest' }}
timeout-minutes: 180 # 3 hours - terminal-bench can take a long time
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0 # Required for git describe to find tags

- uses: ./.github/actions/setup-cmux

- name: Install uv
run: curl -LsSf https://astral.sh/uv/install.sh | sh

- name: Add uv to PATH
run: echo "$HOME/.local/bin" >> $GITHUB_PATH

- name: Generate version file
run: ./scripts/generate-version.sh

- name: Run Terminal-Bench
run: make benchmark-terminal
env:
TB_DATASET: ${{ inputs.dataset }}
TB_CONCURRENCY: ${{ inputs.concurrency }}
TB_LIVESTREAM: ${{ inputs.livestream && '1' || '' }}
TB_ARGS: ${{ inputs.extra_args }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}

- name: Upload benchmark results
if: always()
uses: actions/upload-artifact@v4
with:
name: terminal-bench-results
path: |
terminal-bench-results/
*.json
if-no-files-found: warn

Empty file added benchmarks/__init__.py
Empty file.
23 changes: 20 additions & 3 deletions fmt.mk
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,25 @@
# This file contains all code formatting logic.
# Included by the main Makefile.

.PHONY: fmt fmt-check fmt-prettier fmt-prettier-check fmt-shell fmt-shell-check fmt-nix fmt-nix-check
.PHONY: fmt fmt-check fmt-prettier fmt-prettier-check fmt-shell fmt-shell-check fmt-nix fmt-nix-check fmt-python fmt-python-check

# Centralized patterns - single source of truth
PRETTIER_PATTERNS := 'src/**/*.{ts,tsx,json}' 'tests/**/*.ts' 'docs/**/*.md' 'package.json' 'tsconfig*.json' 'README.md'
SHELL_SCRIPTS := scripts
PYTHON_DIRS := benchmarks

# Always use bun x prettier for reproducibility (uses package.json version)
PRETTIER := bun x prettier

# Tool availability checks
SHFMT := $(shell command -v shfmt 2>/dev/null)
NIX := $(shell command -v nix 2>/dev/null)
UVX := $(shell command -v uvx 2>/dev/null)

fmt: fmt-prettier fmt-shell fmt-nix
fmt: fmt-prettier fmt-shell fmt-python fmt-nix
@echo "==> All formatting complete!"

fmt-check: fmt-prettier-check fmt-shell-check fmt-nix-check
fmt-check: fmt-prettier-check fmt-shell-check fmt-python-check fmt-nix-check
@echo "==> All formatting checks passed!"

fmt-prettier:
Expand Down Expand Up @@ -48,6 +50,21 @@ else
@shfmt -i 2 -ci -bn -d $(SHELL_SCRIPTS)
endif

# Helper target to check for uvx
.check-uvx:
ifeq ($(UVX),)
@echo "Error: uvx not found. Install with: curl -LsSf https://astral.sh/uv/install.sh | sh"
@exit 1
endif

fmt-python: .check-uvx
@echo "Formatting Python files..."
@uvx ruff format $(PYTHON_DIRS)

fmt-python-check: .check-uvx
@echo "Checking Python formatting..."
@uvx ruff format --check $(PYTHON_DIRS)

fmt-nix:
ifeq ($(NIX),)
@echo "Nix not found; skipping Nix formatting"
Expand Down