Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
{
"name": "yasha",
"build": {
"dockerfile": "../Dockerfile.dev",
"dockerfile": "../Dockerfile",
"target": "dev",
"context": ".."
},
"features": {
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ jobs:
uses: docker/build-push-action@v6
with:
context: .
file: Dockerfile.prod
file: Dockerfile
target: prod
push: true
build-args: |
CUDA_VERSION=13.0.2
Expand Down
59 changes: 26 additions & 33 deletions Dockerfile.prod → Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,6 @@ WORKDIR /yasha
ADD ./pyproject.toml pyproject.toml
ADD ./README.md README.md
ADD ./uv.lock uv.lock
ADD ./start.py start.py
ADD ./yasha yasha
ADD ./config config
ADD ./plugins plugins

ENV UV_PROJECT_ENVIRONMENT=/.venv
Expand All @@ -63,36 +60,32 @@ RUN uv python install ${PYTHON_VERSION}

ENV PATH="$UV_PROJECT_ENVIRONMENT/bin:$PATH"

# ---------------------------------------------------------------------------
# Development target
# ---------------------------------------------------------------------------
FROM base AS dev

RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --locked --no-install-project --extra dev

ADD ./scripts/start_ray.sh /yasha/scripts/start_ray.sh
RUN chmod +x /yasha/scripts/start_ray.sh

CMD ["/bin/bash"]

# ---------------------------------------------------------------------------
# Production target
# ---------------------------------------------------------------------------
FROM base AS prod

ADD ./start.py start.py
ADD ./yasha yasha
ADD ./config config
ADD ./scripts scripts

RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --locked --no-install-project

WORKDIR /
COPY <<EOF start.sh
#!/bin/bash

EXTRAS=""
if [ -n "\${YASHA_PLUGINS}" ]; then
for plugin in \$(echo "\${YASHA_PLUGINS}" | tr ',' ' '); do
EXTRAS="\$EXTRAS --extra \$plugin"
done
fi
uv sync --project /yasha --locked \$EXTRAS

if [ "\${YASHA_USE_EXISTING_RAY_CLUSTER}" != "true" ]; then
METRICS_FLAG=""
if [ "\${YASHA_METRICS}" = "true" ]; then
METRICS_FLAG="--metrics-export-port=\${RAY_METRICS_EXPORT_PORT}"
fi
cd /yasha && ray start --head --port=\${RAY_REDIS_PORT} --dashboard-host=0.0.0.0 --num-cpus=\${RAY_HEAD_CPU_NUM} --num-gpus=\${RAY_HEAD_GPU_NUM} --disable-usage-stats \${METRICS_FLAG}
if ! ray status --address=\${RAY_CLUSTER_ADDRESS}:\${RAY_REDIS_PORT}; then
echo "ray cluster failed to start"
exit 1
fi
fi
cd /yasha && uv run start.py

EOF
RUN chmod +x start.sh

WORKDIR /
CMD ["uv", "run", "--active", "bash", "start.sh"]
RUN chmod +x /yasha/scripts/start_ray.sh /yasha/scripts/start.sh

CMD ["uv", "run", "--active", "bash", "/yasha/scripts/start.sh"]
99 changes: 0 additions & 99 deletions Dockerfile.dev

This file was deleted.

6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ Each model runs as an isolated Ray Serve deployment with its own lifecycle, heal
- **Plugin system** — opt-in TTS backends installed as isolated uv workspace packages
- **Multi-GPU support** — assign models to specific GPUs by index or named Ray resource, with full tensor parallelism support
- **Client disconnect detection** — cancels in-flight inference when the client disconnects, freeing GPU resources immediately
- **Prometheus metrics & Grafana dashboard** — built-in observability with custom `yasha:*` metrics, vLLM engine stats, and Ray cluster metrics on a single scrape endpoint; pre-built Grafana dashboard included
- **Ray dashboard** — monitor deployments, resources, and request logs

## Supported OpenAI Endpoints
Expand Down Expand Up @@ -80,7 +81,7 @@ docker run --rm --shm-size=8g --gpus all \
-e YASHA_PLUGINS=kokoro \
-v ./models.yaml:/yasha/config/models.yaml \
-v ./models-cache:/yasha/.cache/models \
-p 8265:8265 -p 8000:8000 ghcr.io/alez007/yasha:latest
-p 8265:8265 -p 8000:8000 -p 8079:8079 ghcr.io/alez007/yasha:latest
```

Try it out:
Expand All @@ -95,6 +96,7 @@ curl http://localhost:8000/v1/chat/completions \
```

- API: `http://localhost:8000`
- Prometheus metrics: `http://localhost:8079`
- Ray dashboard: `http://localhost:8265`

Example configs are included for 8 GB, 16 GB, 24 GB, and 2×16 GB GPU setups.
Expand Down Expand Up @@ -129,7 +131,7 @@ For a full guide on writing your own plugin, see [Plugin Development](docs/plugi

## Monitoring

Yasha exposes Prometheus metrics (Ray cluster, Ray Serve, vLLM, and custom `yasha:*` metrics) through a single port. Enable with `YASHA_METRICS=true` and scrape port 8079. A pre-built Grafana dashboard is included. See [Monitoring](docs/monitoring.md) for setup details.
Yasha exposes Prometheus metrics (Ray cluster, Ray Serve, vLLM, and custom `yasha:*` metrics) through a single scrape endpoint on port 8079. Metrics are **enabled by default** — set `YASHA_METRICS=false` to disable. A pre-built Grafana dashboard is included. See [Monitoring](docs/monitoring.md) for setup details.

## Future Work

Expand Down
4 changes: 2 additions & 2 deletions docs/development.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ The recommended way to develop Yasha is with VS Code Dev Containers. The configu
> **Why the extra steps?** The Dev Container overrides the image's default `CMD` (which normally runs `start.sh` to sync deps and start Ray). Inside a Dev Container you need to run these steps manually.

The Dev Container automatically:
- Builds the dev image from `Dockerfile.dev`
- Builds the dev image from `Dockerfile` (target: `dev`)
- Bind-mounts the repo to `/yasha` for live editing
- Forwards ports `8000` (API) and `8265` (Ray Dashboard)
- Installs extensions: Ruff, Python, Pyright, and Claude Code
Expand Down Expand Up @@ -76,7 +76,7 @@ If you prefer not to use Dev Containers, you can build and run the dev image dir
### Building the dev image

```bash
docker build -t yasha_dev -f Dockerfile.dev .
docker build -t yasha_dev --target dev .
```

### Running with live source mounting
Expand Down
16 changes: 16 additions & 0 deletions scripts/start.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash
set -e

EXTRAS=""
if [ -n "${YASHA_PLUGINS}" ]; then
for plugin in $(echo "${YASHA_PLUGINS}" | tr ',' ' '); do
EXTRAS="$EXTRAS --extra $plugin"
done
fi
uv sync --project /yasha --locked $EXTRAS

if [ "${YASHA_USE_EXISTING_RAY_CLUSTER}" != "true" ]; then
/yasha/scripts/start_ray.sh --num-cpus "${RAY_HEAD_CPU_NUM}" --num-gpus "${RAY_HEAD_GPU_NUM}"
fi

cd /yasha && uv run start.py
38 changes: 38 additions & 0 deletions scripts/start_ray.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/bin/bash
set -e

usage() {
echo "Usage: start_ray.sh --num-cpus <n> --num-gpus <n> [--enable-metrics <true|false>]"
exit 1
}

ENABLE_METRICS="true"

while [[ $# -gt 0 ]]; do
case "$1" in
--num-cpus) NUM_CPUS="$2"; shift 2 ;;
--num-gpus) NUM_GPUS="$2"; shift 2 ;;
--enable-metrics) ENABLE_METRICS="$2"; shift 2 ;;
*) usage ;;
esac
done

[ -z "${NUM_CPUS}" ] && usage
[ -z "${NUM_GPUS}" ] && usage

METRICS_FLAG=""
if [ "${ENABLE_METRICS}" = "true" ]; then
METRICS_FLAG="--metrics-export-port=${RAY_METRICS_EXPORT_PORT:-8079}"
fi

ray start --head \
--dashboard-host=0.0.0.0 \
--num-cpus="${NUM_CPUS}" \
--num-gpus="${NUM_GPUS}" \
--disable-usage-stats \
${METRICS_FLAG}

if ! ray status; then
echo "ray cluster failed to start"
exit 1
fi
Loading