From e3d9a5fe69bfda9bc0e6cbd71991aa6d833909b6 Mon Sep 17 00:00:00 2001 From: Jeremy Eder Date: Tue, 18 Nov 2025 14:39:47 -0500 Subject: [PATCH 1/2] feat: Add Open WebUI + LiteLLM deployment (Phase 1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a Kubernetes-native deployment of Open WebUI with LiteLLM proxy for chatting with Claude models. This Phase 1 implementation provides a quick, dev-friendly deployment to Kind cluster with minimal configuration. Components: - Base manifests (namespace, deployments, services, PVC, RBAC) - LiteLLM proxy configured for Claude Sonnet 4.5, 3.7, and Haiku 3.5 - Open WebUI frontend with persistent storage - Phase 1 overlay for Kind deployment with nginx-ingress - Comprehensive documentation (README, Phase 1 guide, Phase 2 plan) - Makefile for deployment automation Architecture: - Namespace: openwebui (isolated from ACP) - Ingress: vteam.local/chat (reuses Kind cluster from e2e) - Auth: Disabled in Phase 1 (dev/testing only) - Storage: 500Mi PVC for chat history - Images: ghcr.io/berriai/litellm, ghcr.io/open-webui/open-webui Phase 2 (planned): - OAuth authentication via oauth2-proxy - Long-running Claude Code service for Amber integration - Production hardening (secrets, RBAC, monitoring) - OpenShift compatibility (Routes, SCC compliance) Deployment: ```bash cd components/open-webui-llm # Edit overlays/phase1-kind/secrets.yaml with API key make phase1-deploy # Access: http://vteam.local:8080/chat (Podman) or /chat (Docker) ``` 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- components/open-webui-llm/.gitignore | 18 + components/open-webui-llm/Makefile | 93 ++++ components/open-webui-llm/README.md | 217 +++++++++ .../open-webui-llm/base/kustomization.yaml | 25 + .../base/litellm/configmap.yaml | 34 ++ .../base/litellm/deployment.yaml | 75 +++ .../open-webui-llm/base/litellm/service.yaml | 17 + components/open-webui-llm/base/namespace.yaml | 7 + .../base/open-webui/deployment.yaml | 65 +++ .../open-webui-llm/base/open-webui/pvc.yaml | 15 + .../base/open-webui/service.yaml | 17 + components/open-webui-llm/base/rbac.yaml | 17 + components/open-webui-llm/docs/PHASE1.md | 359 +++++++++++++++ components/open-webui-llm/docs/PHASE2.md | 428 ++++++++++++++++++ .../overlays/phase1-kind/.env.example | 16 + .../overlays/phase1-kind/.gitignore | 6 + .../overlays/phase1-kind/ingress.yaml | 24 + .../overlays/phase1-kind/kustomization.yaml | 24 + .../overlays/phase1-kind/pvc-patch.yaml | 9 + .../overlays/phase1-kind/secrets.yaml | 22 + 20 files changed, 1488 insertions(+) create mode 100644 components/open-webui-llm/.gitignore create mode 100644 components/open-webui-llm/Makefile create mode 100644 components/open-webui-llm/README.md create mode 100644 components/open-webui-llm/base/kustomization.yaml create mode 100644 components/open-webui-llm/base/litellm/configmap.yaml create mode 100644 components/open-webui-llm/base/litellm/deployment.yaml create mode 100644 components/open-webui-llm/base/litellm/service.yaml create mode 100644 components/open-webui-llm/base/namespace.yaml create mode 100644 components/open-webui-llm/base/open-webui/deployment.yaml create mode 100644 components/open-webui-llm/base/open-webui/pvc.yaml create mode 100644 components/open-webui-llm/base/open-webui/service.yaml create mode 100644 components/open-webui-llm/base/rbac.yaml create mode 100644 components/open-webui-llm/docs/PHASE1.md create mode 100644 components/open-webui-llm/docs/PHASE2.md create mode 100644 components/open-webui-llm/overlays/phase1-kind/.env.example create mode 100644 components/open-webui-llm/overlays/phase1-kind/.gitignore create mode 100644 components/open-webui-llm/overlays/phase1-kind/ingress.yaml create mode 100644 components/open-webui-llm/overlays/phase1-kind/kustomization.yaml create mode 100644 components/open-webui-llm/overlays/phase1-kind/pvc-patch.yaml create mode 100644 components/open-webui-llm/overlays/phase1-kind/secrets.yaml diff --git a/components/open-webui-llm/.gitignore b/components/open-webui-llm/.gitignore new file mode 100644 index 00000000..453014b6 --- /dev/null +++ b/components/open-webui-llm/.gitignore @@ -0,0 +1,18 @@ +# Environment files with secrets +.env +*.env +!.env.example + +# Backup files +*.bak +backup*.tar.gz + +# IDE files +.vscode/ +.idea/ +*.swp +*.swo + +# OS files +.DS_Store +Thumbs.db diff --git a/components/open-webui-llm/Makefile b/components/open-webui-llm/Makefile new file mode 100644 index 00000000..71025565 --- /dev/null +++ b/components/open-webui-llm/Makefile @@ -0,0 +1,93 @@ +.PHONY: help phase1-deploy phase1-clean phase1-logs phase1-test phase1-status + +KUSTOMIZE := kubectl kustomize +KUBECTL := kubectl +NAMESPACE := openwebui + +help: + @echo "Open WebUI + LiteLLM Deployment" + @echo "================================" + @echo "" + @echo "Phase 1 (Kind) Commands:" + @echo " make phase1-deploy - Deploy Open WebUI + LiteLLM to Kind cluster" + @echo " make phase1-status - Check deployment status" + @echo " make phase1-logs - View Open WebUI logs" + @echo " make phase1-test - Run health checks" + @echo " make phase1-clean - Remove deployment" + @echo "" + @echo "Prerequisites:" + @echo " 1. Kind cluster running (run: make -C ../../e2e setup-kind)" + @echo " 2. API key configured (edit: overlays/phase1-kind/secrets.yaml)" + @echo "" + @echo "Access:" + @echo " Docker: http://vteam.local/chat" + @echo " Podman: http://vteam.local:8080/chat" + +phase1-deploy: + @echo "Deploying Phase 1 (Kind)..." + @if ! $(KUBECTL) get namespace $(NAMESPACE) >/dev/null 2>&1; then \ + echo "Creating namespace $(NAMESPACE)..."; \ + fi + $(KUSTOMIZE) overlays/phase1-kind | $(KUBECTL) apply -f - + @echo "" + @echo "Waiting for deployments to be ready..." + $(KUBECTL) wait --for=condition=available --timeout=300s \ + deployment/litellm deployment/openwebui -n $(NAMESPACE) || true + @echo "" + @make phase1-status + @echo "" + @echo "✅ Phase 1 deployed!" + @echo "" + @echo "Access Open WebUI:" + @echo " Docker: http://vteam.local/chat" + @echo " Podman: http://vteam.local:8080/chat" + +phase1-clean: + @echo "Removing Phase 1 deployment..." + $(KUSTOMIZE) overlays/phase1-kind | $(KUBECTL) delete -f - --ignore-not-found=true + @echo "✅ Phase 1 cleaned up" + +phase1-logs: + @echo "Open WebUI logs (Ctrl+C to exit):" + $(KUBECTL) logs -f -l app=openwebui -n $(NAMESPACE) --tail=50 + +phase1-logs-litellm: + @echo "LiteLLM logs (Ctrl+C to exit):" + $(KUBECTL) logs -f -l app=litellm -n $(NAMESPACE) --tail=50 + +phase1-test: + @echo "Testing LiteLLM health..." + @$(KUBECTL) exec -n $(NAMESPACE) deployment/openwebui -- \ + curl -s http://litellm-service:4000/health || echo "❌ LiteLLM health check failed" + @echo "" + @echo "Testing Open WebUI connectivity to LiteLLM..." + @$(KUBECTL) exec -n $(NAMESPACE) deployment/openwebui -- \ + curl -s http://litellm-service:4000/v1/models -H "Authorization: Bearer sk-litellm-dev-master-key" || echo "❌ Model list failed" + @echo "" + @echo "✅ Health checks complete" + +phase1-status: + @echo "Pod Status:" + @$(KUBECTL) get pods -n $(NAMESPACE) + @echo "" + @echo "Services:" + @$(KUBECTL) get svc -n $(NAMESPACE) + @echo "" + @echo "Ingress:" + @$(KUBECTL) get ingress -n $(NAMESPACE) || true + @echo "" + @echo "PVC:" + @$(KUBECTL) get pvc -n $(NAMESPACE) + +phase1-shell-webui: + @echo "Opening shell in Open WebUI pod..." + $(KUBECTL) exec -it -n $(NAMESPACE) deployment/openwebui -- /bin/sh + +phase1-shell-litellm: + @echo "Opening shell in LiteLLM pod..." + $(KUBECTL) exec -it -n $(NAMESPACE) deployment/litellm -- /bin/sh + +phase1-port-forward: + @echo "Port forwarding Open WebUI to localhost:8080..." + @echo "Access at: http://localhost:8080" + $(KUBECTL) port-forward -n $(NAMESPACE) svc/openwebui-service 8080:8080 diff --git a/components/open-webui-llm/README.md b/components/open-webui-llm/README.md new file mode 100644 index 00000000..1de8e9d0 --- /dev/null +++ b/components/open-webui-llm/README.md @@ -0,0 +1,217 @@ +# Open WebUI + LiteLLM Deployment + +A phased deployment of Open WebUI with LiteLLM proxy for chatting with Claude models, designed to work with the Ambient Code Platform's Kind cluster. + +## Architecture + +- **Phase 1**: Open WebUI → LiteLLM → Anthropic Claude API (simple proxy, no auth) +- **Phase 2** (Future): Long-running Claude service for Amber agent integration + +## Quick Start (Phase 1) + +### Prerequisites + +1. **Kind cluster running** with nginx-ingress: + ```bash + cd ../../e2e + ./scripts/setup-kind.sh + # Or if using Podman: CONTAINER_ENGINE=podman ./scripts/setup-kind.sh + ``` + +2. **Anthropic API key**: Get yours from [console.anthropic.com](https://console.anthropic.com) + +### Deploy + +1. **Configure API key**: + ```bash + cd overlays/phase1-kind + + # Edit secrets.yaml and replace sk-ant-YOUR-KEY-HERE with your actual key + # Or use sed: + sed -i.bak 's/sk-ant-YOUR-KEY-HERE/sk-ant-api01-YOUR-ACTUAL-KEY/g' secrets.yaml + ``` + +2. **Deploy to Kind**: + ```bash + cd ../.. # Back to components/open-webui-llm/ + make phase1-deploy + ``` + +3. **Wait for pods** (automatic, but you can check): + ```bash + make phase1-status + ``` + +4. **Access Open WebUI**: + - **Docker**: http://vteam.local/chat + - **Podman**: http://vteam.local:8080/chat + +### Usage + +1. Open the URL in your browser +2. No login required (Phase 1 has auth disabled) +3. Select a model from the dropdown: + - `claude-sonnet-4-5` (recommended) + - `claude-sonnet-3-7` + - `claude-haiku-3-5` +4. Start chatting! + +## Management Commands + +```bash +# View logs +make phase1-logs # Open WebUI logs +make phase1-logs-litellm # LiteLLM logs + +# Check status +make phase1-status # All resources + +# Run health checks +make phase1-test # Verify LiteLLM and Open WebUI connectivity + +# Clean up +make phase1-clean # Remove all resources +``` + +## Troubleshooting + +### Pods not starting + +```bash +# Check pod status +kubectl get pods -n openwebui + +# View pod logs +kubectl logs -n openwebui deployment/openwebui +kubectl logs -n openwebui deployment/litellm + +# Describe pod for events +kubectl describe pod -n openwebui -l app=openwebui +``` + +### LiteLLM errors + +**"No API key provided"**: +- Check secrets.yaml has your actual Anthropic API key +- Verify secret was created: `kubectl get secret litellm-secrets -n openwebui -o yaml` + +**"Model not found"**: +- Check LiteLLM config: `kubectl get cm litellm-config -n openwebui -o yaml` +- Verify model names match Anthropic's API + +### Ingress not working + +**Docker** (ports 80/443): +```bash +# Verify vteam.local resolves to 127.0.0.1 +grep vteam.local /etc/hosts + +# Test ingress +curl http://vteam.local/chat +``` + +**Podman** (ports 8080/8443): +```bash +# Use port 8080 +curl http://vteam.local:8080/chat +``` + +**Fallback - Port forwarding**: +```bash +# Access via localhost instead +make phase1-port-forward +# Then open: http://localhost:8080 +``` + +### PVC not binding + +```bash +# Check PVC status +kubectl get pvc -n openwebui + +# If pending, check storage class +kubectl get sc + +# Kind should have 'standard' storage class by default +``` + +## Component Structure + +``` +. +├── base/ # Shared base manifests +│ ├── namespace.yaml +│ ├── rbac.yaml # ServiceAccounts +│ ├── litellm/ # LiteLLM proxy +│ │ ├── deployment.yaml +│ │ ├── service.yaml +│ │ └── configmap.yaml # Model routing +│ ├── open-webui/ # Web UI +│ │ ├── deployment.yaml +│ │ ├── service.yaml +│ │ └── pvc.yaml # Persistent storage +│ └── kustomization.yaml +│ +├── overlays/ +│ ├── phase1-kind/ # Phase 1: Simple deployment +│ │ ├── kustomization.yaml +│ │ ├── secrets.yaml # API keys (edit this!) +│ │ ├── ingress.yaml # Nginx ingress +│ │ └── pvc-patch.yaml # Reduced storage for Kind +│ │ +│ └── phase2-production/ # Phase 2: Future (OAuth, Claude service) +│ └── (planned) +│ +├── docs/ +│ ├── PHASE1.md # Detailed Phase 1 guide +│ └── PHASE2.md # Phase 2 migration plan +│ +├── Makefile # Deployment automation +└── README.md # This file +``` + +## Data Flow + +``` +User Browser → vteam.local/chat → Nginx Ingress → Open WebUI Service + → Open WebUI Pod → LiteLLM Service → LiteLLM Pod → Anthropic API +``` + +## Phase 2 (Future) + +Phase 2 will add: +- **Authentication**: OAuth2 proxy for production use +- **Claude Service**: Long-running Claude Code sessions +- **Amber Integration**: Direct integration with Amber agent +- **Production deployment**: OpenShift Routes, proper RBAC + +See `docs/PHASE2.md` for migration plan (coming soon). + +## Files You May Need to Edit + +- **`overlays/phase1-kind/secrets.yaml`**: Add your Anthropic API key here (required) +- **`base/litellm/configmap.yaml`**: Add more models or adjust LiteLLM settings +- **`base/open-webui/deployment.yaml`**: Change resource limits or add environment variables + +## Clean Up + +```bash +# Remove deployment but keep namespace +make phase1-clean + +# Remove namespace too +kubectl delete namespace openwebui +``` + +## Next Steps + +1. Try chatting with different Claude models +2. Explore Open WebUI settings (http://vteam.local/chat/settings) +3. Review LiteLLM logs to see API calls: `make phase1-logs-litellm` +4. Plan for Phase 2 migration (see `docs/PHASE2.md`) + +## Support + +- **Documentation**: See `docs/` directory +- **Issues**: Create an issue in the main repository +- **Logs**: Always check logs first: `make phase1-logs` diff --git a/components/open-webui-llm/base/kustomization.yaml b/components/open-webui-llm/base/kustomization.yaml new file mode 100644 index 00000000..cf2d6fa0 --- /dev/null +++ b/components/open-webui-llm/base/kustomization.yaml @@ -0,0 +1,25 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +metadata: + name: openwebui-base + +namespace: openwebui + +# Common resources across all environments +resources: +- namespace.yaml +- rbac.yaml +- litellm/deployment.yaml +- litellm/service.yaml +- litellm/configmap.yaml +- open-webui/deployment.yaml +- open-webui/service.yaml +- open-webui/pvc.yaml + +# Default images (can be overridden by overlays) +images: +- name: ghcr.io/berriai/litellm + newTag: main-latest +- name: ghcr.io/open-webui/open-webui + newTag: main diff --git a/components/open-webui-llm/base/litellm/configmap.yaml b/components/open-webui-llm/base/litellm/configmap.yaml new file mode 100644 index 00000000..6b82bb84 --- /dev/null +++ b/components/open-webui-llm/base/litellm/configmap.yaml @@ -0,0 +1,34 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: litellm-config + labels: + app: litellm + app.kubernetes.io/name: litellm + app.kubernetes.io/component: proxy +data: + config.yaml: | + model_list: + - model_name: claude-sonnet-4-5 + litellm_params: + model: anthropic/claude-sonnet-4-5-20250929 + api_key: os.environ/ANTHROPIC_API_KEY + + - model_name: claude-sonnet-3-7 + litellm_params: + model: anthropic/claude-3-7-sonnet-latest + api_key: os.environ/ANTHROPIC_API_KEY + + - model_name: claude-haiku-3-5 + litellm_params: + model: anthropic/claude-3-5-haiku-20241022 + api_key: os.environ/ANTHROPIC_API_KEY + + litellm_settings: + drop_params: true + success_callback: [] + failure_callback: [] + set_verbose: false + + general_settings: + master_key: os.environ/LITELLM_MASTER_KEY diff --git a/components/open-webui-llm/base/litellm/deployment.yaml b/components/open-webui-llm/base/litellm/deployment.yaml new file mode 100644 index 00000000..240f862b --- /dev/null +++ b/components/open-webui-llm/base/litellm/deployment.yaml @@ -0,0 +1,75 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: litellm + labels: + app: litellm + app.kubernetes.io/name: litellm + app.kubernetes.io/component: proxy +spec: + replicas: 1 + selector: + matchLabels: + app: litellm + template: + metadata: + labels: + app: litellm + spec: + serviceAccountName: litellm + containers: + - name: litellm + image: ghcr.io/berriai/litellm:main-latest + imagePullPolicy: Always + ports: + - containerPort: 4000 + name: http + env: + - name: ANTHROPIC_API_KEY + valueFrom: + secretKeyRef: + name: litellm-secrets + key: ANTHROPIC_API_KEY + optional: true + - name: LITELLM_MASTER_KEY + valueFrom: + secretKeyRef: + name: litellm-secrets + key: LITELLM_MASTER_KEY + optional: true + - name: PORT + value: "4000" + args: + - "--config" + - "/app/config/config.yaml" + - "--port" + - "4000" + - "--host" + - "0.0.0.0" + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 30 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + volumeMounts: + - name: config + mountPath: /app/config + readOnly: true + volumes: + - name: config + configMap: + name: litellm-config diff --git a/components/open-webui-llm/base/litellm/service.yaml b/components/open-webui-llm/base/litellm/service.yaml new file mode 100644 index 00000000..243851a5 --- /dev/null +++ b/components/open-webui-llm/base/litellm/service.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Service +metadata: + name: litellm-service + labels: + app: litellm + app.kubernetes.io/name: litellm + app.kubernetes.io/component: proxy +spec: + selector: + app: litellm + ports: + - port: 4000 + targetPort: http + protocol: TCP + name: http + type: ClusterIP diff --git a/components/open-webui-llm/base/namespace.yaml b/components/open-webui-llm/base/namespace.yaml new file mode 100644 index 00000000..8bef366b --- /dev/null +++ b/components/open-webui-llm/base/namespace.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: openwebui + labels: + app.kubernetes.io/name: openwebui + app.kubernetes.io/managed-by: kustomize diff --git a/components/open-webui-llm/base/open-webui/deployment.yaml b/components/open-webui-llm/base/open-webui/deployment.yaml new file mode 100644 index 00000000..8ed8a0b4 --- /dev/null +++ b/components/open-webui-llm/base/open-webui/deployment.yaml @@ -0,0 +1,65 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: openwebui + labels: + app: openwebui + app.kubernetes.io/name: openwebui + app.kubernetes.io/component: frontend +spec: + replicas: 1 + selector: + matchLabels: + app: openwebui + template: + metadata: + labels: + app: openwebui + spec: + serviceAccountName: openwebui + containers: + - name: openwebui + image: ghcr.io/open-webui/open-webui:main + imagePullPolicy: Always + ports: + - containerPort: 8080 + name: http + env: + - name: OPENAI_API_BASE_URL + value: "http://litellm-service:4000/v1" + - name: OPENAI_API_KEY + valueFrom: + secretKeyRef: + name: openwebui-secrets + key: OPENAI_API_KEY + optional: true + - name: WEBUI_AUTH + value: "false" + - name: DATA_DIR + value: "/app/backend/data" + resources: + requests: + cpu: 200m + memory: 512Mi + limits: + cpu: 1000m + memory: 1Gi + livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 30 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 10 + periodSeconds: 5 + volumeMounts: + - name: data + mountPath: /app/backend/data + volumes: + - name: data + persistentVolumeClaim: + claimName: openwebui-data-pvc diff --git a/components/open-webui-llm/base/open-webui/pvc.yaml b/components/open-webui-llm/base/open-webui/pvc.yaml new file mode 100644 index 00000000..d7ab98f9 --- /dev/null +++ b/components/open-webui-llm/base/open-webui/pvc.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: openwebui-data-pvc + labels: + app: openwebui + app.kubernetes.io/name: openwebui + app.kubernetes.io/component: frontend +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 2Gi + # storageClassName will be set by overlays (standard for Kind, gp3 for prod) diff --git a/components/open-webui-llm/base/open-webui/service.yaml b/components/open-webui-llm/base/open-webui/service.yaml new file mode 100644 index 00000000..f9db4ed7 --- /dev/null +++ b/components/open-webui-llm/base/open-webui/service.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Service +metadata: + name: openwebui-service + labels: + app: openwebui + app.kubernetes.io/name: openwebui + app.kubernetes.io/component: frontend +spec: + selector: + app: openwebui + ports: + - port: 8080 + targetPort: http + protocol: TCP + name: http + type: ClusterIP diff --git a/components/open-webui-llm/base/rbac.yaml b/components/open-webui-llm/base/rbac.yaml new file mode 100644 index 00000000..38e643e1 --- /dev/null +++ b/components/open-webui-llm/base/rbac.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: litellm + labels: + app: litellm + app.kubernetes.io/name: litellm + app.kubernetes.io/component: proxy +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: openwebui + labels: + app: openwebui + app.kubernetes.io/name: openwebui + app.kubernetes.io/component: frontend diff --git a/components/open-webui-llm/docs/PHASE1.md b/components/open-webui-llm/docs/PHASE1.md new file mode 100644 index 00000000..a876685d --- /dev/null +++ b/components/open-webui-llm/docs/PHASE1.md @@ -0,0 +1,359 @@ +# Phase 1: Quick Deployment Guide + +This guide covers Phase 1 deployment of Open WebUI + LiteLLM on Kind cluster with minimal configuration. + +## What You Get + +- ✅ Web-based chat interface (Open WebUI) +- ✅ Access to Claude models via LiteLLM proxy +- ✅ No authentication (dev/testing only) +- ✅ Persistent storage for chat history +- ✅ Nginx ingress routing + +## What's Not Included (Phase 2) + +- ❌ OAuth authentication +- ❌ Long-running Claude Code sessions +- ❌ Amber agent integration +- ❌ Production hardening + +## Prerequisites + +### Required + +1. **Kind cluster with nginx-ingress**: + ```bash + cd ../../e2e + ./scripts/setup-kind.sh + ``` + +2. **Anthropic API key**: Sign up at [console.anthropic.com](https://console.anthropic.com) + +3. **kubectl and kustomize**: + ```bash + # Check versions + kubectl version --client + kustomize version + ``` + +### Optional + +- **Podman**: If using rootless Podman, ports will be 8080/8443 instead of 80/443 + +## Installation Steps + +### 1. Configure API Key + +Edit the secrets file: + +```bash +cd components/open-webui-llm/overlays/phase1-kind +vi secrets.yaml +``` + +Replace `sk-ant-YOUR-KEY-HERE` with your actual Anthropic API key: + +```yaml +stringData: + ANTHROPIC_API_KEY: "sk-ant-api01-xxxxxxxxxxxxxxxxxxxxxxxxxxxxx" +``` + +**Security Note**: This file is excluded from git via `.gitignore`. Never commit actual API keys. + +### 2. Deploy + +```bash +cd ../.. # Back to components/open-webui-llm/ +make phase1-deploy +``` + +**Expected output**: +``` +Deploying Phase 1 (Kind)... +Creating namespace openwebui... +namespace/openwebui created +serviceaccount/litellm created +... +deployment.apps/litellm condition met +deployment.apps/openwebui condition met +✅ Phase 1 deployed! +``` + +### 3. Verify Deployment + +```bash +make phase1-status +``` + +**Expected output**: +``` +Pod Status: +NAME READY STATUS RESTARTS AGE +litellm-xxxxx 1/1 Running 0 1m +openwebui-xxxxx 1/1 Running 0 1m + +Services: +NAME TYPE CLUSTER-IP PORT(S) +litellm-service ClusterIP 10.96.xxx.xxx 4000/TCP +openwebui-service ClusterIP 10.96.xxx.xxx 8080/TCP +``` + +All pods should show `Running` and `1/1 Ready`. + +### 4. Access Web UI + +**Docker users**: +``` +http://vteam.local/chat +``` + +**Podman users**: +``` +http://vteam.local:8080/chat +``` + +**First visit**: +1. No login required +2. You'll see the Open WebUI interface +3. Model selector will show Claude models + +## Usage + +### Select a Model + +Click the model dropdown (top of chat): +- **claude-sonnet-4-5**: Recommended (balanced speed/quality) +- **claude-sonnet-3-7**: Previous version (still excellent) +- **claude-haiku-3-5**: Fastest, good for simple tasks + +### Start Chatting + +Type a message and press Enter. Examples: + +``` +"Hello! Can you explain how Kubernetes Ingress works?" + +"Write a Python function to reverse a string" + +"Explain the difference between microservices and monoliths" +``` + +### View Chat History + +- Conversations are saved automatically +- Click the history icon (left sidebar) to see past chats +- Storage is in PVC (persists across pod restarts) + +## Configuration + +### Add More Models + +Edit `base/litellm/configmap.yaml`: + +```yaml +model_list: + # Add OpenAI models + - model_name: gpt-4 + litellm_params: + model: openai/gpt-4 + api_key: os.environ/OPENAI_API_KEY +``` + +Then update `overlays/phase1-kind/secrets.yaml` to add `OPENAI_API_KEY`. + +Redeploy: +```bash +make phase1-deploy +``` + +### Adjust Resource Limits + +Edit `base/open-webui/deployment.yaml` or `base/litellm/deployment.yaml`: + +```yaml +resources: + requests: + cpu: 500m # Increase if needed + memory: 1Gi + limits: + cpu: 2000m + memory: 2Gi +``` + +### Change Storage Size + +Edit `overlays/phase1-kind/pvc-patch.yaml`: + +```yaml +resources: + requests: + storage: 1Gi # Increase for more chat history +``` + +## Troubleshooting + +### Issue: Pods stuck in Pending + +**Symptom**: +```bash +kubectl get pods -n openwebui +NAME READY STATUS RESTARTS AGE +openwebui-xxxxx 0/1 Pending 0 5m +``` + +**Solution**: +```bash +# Check events +kubectl describe pod -n openwebui openwebui-xxxxx + +# Common causes: +# 1. PVC not binding - check storage class exists +kubectl get sc + +# 2. Resource constraints - check node resources +kubectl top nodes +``` + +### Issue: LiteLLM returns 401 Unauthorized + +**Symptom**: Chat messages fail with "API key invalid" + +**Solution**: +```bash +# Verify secret exists +kubectl get secret litellm-secrets -n openwebui + +# Check secret value (base64 encoded) +kubectl get secret litellm-secrets -n openwebui -o jsonpath='{.data.ANTHROPIC_API_KEY}' | base64 -d +# Should show: sk-ant-api01-... + +# If wrong, update secrets.yaml and redeploy +make phase1-deploy +``` + +### Issue: Ingress returns 404 + +**Symptom**: `curl http://vteam.local/chat` returns 404 + +**Solution**: +```bash +# Check ingress exists +kubectl get ingress -n openwebui + +# Check ingress-nginx logs +kubectl logs -n ingress-nginx deployment/ingress-nginx-controller + +# Verify vteam.local in /etc/hosts +grep vteam.local /etc/hosts +# Should show: 127.0.0.1 vteam.local + +# If using Podman, try port 8080 +curl http://vteam.local:8080/chat +``` + +### Issue: Open WebUI loads but can't connect to LiteLLM + +**Symptom**: UI loads, but sending messages fails + +**Solution**: +```bash +# Test LiteLLM from Open WebUI pod +kubectl exec -n openwebui deployment/openwebui -- \ + curl http://litellm-service:4000/health + +# Should return: {"status": "healthy"} + +# If fails, check LiteLLM logs +kubectl logs -n openwebui deployment/litellm +``` + +### Issue: Chat messages timeout + +**Symptom**: Messages take >60s and fail + +**Solution**: +```bash +# Check LiteLLM logs for errors +kubectl logs -n openwebui deployment/litellm -f + +# Test Anthropic API directly from LiteLLM pod +kubectl exec -n openwebui deployment/litellm -- \ + curl -s https://api.anthropic.com/v1/messages \ + -H "x-api-key: $ANTHROPIC_API_KEY" \ + -H "anthropic-version: 2023-06-01" \ + -d '{"model":"claude-3-haiku-20240307","max_tokens":10,"messages":[{"role":"user","content":"test"}]}' + +# If this works, issue is with LiteLLM config or Open WebUI connection +``` + +## Advanced Usage + +### Port Forwarding (Alternative Access) + +If ingress is not working: + +```bash +make phase1-port-forward +# Access at: http://localhost:8080 +``` + +### Shell Access + +```bash +# Open shell in Open WebUI pod +make phase1-shell-webui + +# Open shell in LiteLLM pod +make phase1-shell-litellm +``` + +### View Real-time Logs + +```bash +# Terminal 1: Open WebUI logs +make phase1-logs + +# Terminal 2: LiteLLM logs +make phase1-logs-litellm +``` + +## Clean Up + +### Remove Deployment (Keep Namespace) + +```bash +make phase1-clean +``` + +### Remove Everything (Including Namespace) + +```bash +make phase1-clean +kubectl delete namespace openwebui +``` + +### Reset and Redeploy + +```bash +# Full reset +make phase1-clean +make phase1-deploy +``` + +## Next Steps + +1. **Test different models**: Try claude-haiku-3-5 for speed +2. **Explore Open WebUI**: Settings → Models, System Prompts, etc. +3. **Monitor resources**: `kubectl top pods -n openwebui` +4. **Plan Phase 2**: See `PHASE2.md` for OAuth and Claude service + +## Security Notes for Phase 1 + +**⚠️ Phase 1 is for development/testing only:** + +- No authentication (anyone with network access can use UI) +- API keys in Kubernetes Secrets (base64, not encrypted at rest) +- No network policies (pods can access any external service) +- No resource quotas (can consume unlimited cluster resources) + +**Do NOT use Phase 1 in production**. Migrate to Phase 2 for production deployment. diff --git a/components/open-webui-llm/docs/PHASE2.md b/components/open-webui-llm/docs/PHASE2.md new file mode 100644 index 00000000..f74f23af --- /dev/null +++ b/components/open-webui-llm/docs/PHASE2.md @@ -0,0 +1,428 @@ +# Phase 2: Production Migration Plan + +This document outlines the migration from Phase 1 (simple dev deployment) to Phase 2 (production-ready with OAuth and Claude service). + +## Status: PLANNED (Not Yet Implemented) + +Phase 2 is documented but not yet built. This serves as a design spec and migration guide for future implementation. + +## What Phase 2 Adds + +### Security & Authentication +- ✅ OAuth2 proxy with OpenShift OAuth or generic OIDC +- ✅ User authentication required for UI access +- ✅ API key rotation support +- ✅ Network policies (restrict egress) + +### Claude Integration +- ✅ Long-running Claude Code service +- ✅ Multi-session management (one per user) +- ✅ Tool execution (code, bash, file operations) +- ✅ Amber agent persona integration + +### Production Hardening +- ✅ Kubernetes Secrets (replace ConfigMaps) +- ✅ Resource quotas and limits +- ✅ High availability (multiple replicas) +- ✅ Monitoring and observability + +### OpenShift Compatibility +- ✅ OpenShift Routes (instead of Ingress) +- ✅ SecurityContextConstraints compliance +- ✅ Service accounts with proper RBAC + +## Architecture Changes + +### Phase 1 Flow +``` +User → Ingress → Open WebUI → LiteLLM → Anthropic API +``` + +### Phase 2 Flow +``` +User → Route/Ingress → OAuth Proxy → Open WebUI → LiteLLM → + ├→ Anthropic API (direct models) + └→ Claude Service → Anthropic API (Amber sessions) +``` + +## Migration Strategy + +### Option A: In-Place Migration (Recommended) + +Upgrade existing Phase 1 deployment with minimal downtime. + +**Steps**: +1. Backup Open WebUI data (PVC snapshot or export) +2. Create Phase 2 secrets (OAuth, Claude service) +3. Apply Phase 2 overlay (patches existing deployments) +4. Update DNS/Ingress (route to OAuth proxy) +5. Test authentication and Claude service +6. Rollback if issues (revert to Phase 1 overlay) + +**Downtime**: ~5-10 minutes (during OAuth proxy deployment) + +**Pros**: +- Preserves chat history and user data +- Faster migration (no new cluster setup) +- Easier rollback + +**Cons**: +- Risk of breaking existing deployment +- Harder to test before migration + +### Option B: Parallel Deployment (Safer) + +Deploy Phase 2 to new namespace, test, then cutover. + +**Steps**: +1. Deploy Phase 2 to `openwebui-prod` namespace +2. Export Phase 1 data (chat history, settings) +3. Import data into Phase 2 +4. Test Phase 2 thoroughly +5. Update DNS to point to Phase 2 +6. Deprecate Phase 1 after validation period + +**Downtime**: None (parallel systems) + +**Pros**: +- No risk to Phase 1 +- Full testing before cutover +- Easy rollback (just revert DNS) + +**Cons**: +- Requires double resources temporarily +- More complex data migration +- Manual cutover process + +## Implementation Checklist + +### Prerequisites + +- [ ] Decide OAuth provider (OpenShift OAuth or generic OIDC) +- [ ] Obtain OAuth client credentials +- [ ] Allocate cluster resources (2x current for parallel deployment) +- [ ] Plan downtime window (if in-place migration) + +### Step 1: Create Phase 2 Directory Structure + +```bash +cd components/open-webui-llm + +mkdir -p overlays/phase2-production/{claude-service,secrets} +``` + +### Step 2: Build Claude Service + +**Files to create**: +- `overlays/phase2-production/claude-service/deployment.yaml` +- `overlays/phase2-production/claude-service/service.yaml` +- `overlays/phase2-production/claude-service/configmap.yaml` + +**Claude Service Features**: +- FastAPI server with `/v1/chat/completions` endpoint +- Session management (create, resume, list) +- Streaming responses via SSE +- Tool execution (bash, code, file ops) +- Integration with Amber persona from `agents/amber.md` + +**Example implementation** (pseudocode): +```python +# claude-service/main.py +from fastapi import FastAPI +from anthropic import Anthropic + +app = FastAPI() +client = Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"]) + +@app.post("/v1/chat/completions") +async def chat(request: ChatRequest): + # Create or resume Claude session + session = get_or_create_session(request.user_id) + + # Stream response + async for chunk in client.messages.stream(...): + yield format_openai_chunk(chunk) +``` + +### Step 3: Configure OAuth Proxy + +**File**: `overlays/phase2-production/oauth-deployment-patch.yaml` + +**Adds sidecar to Open WebUI**: +```yaml +containers: +- name: oauth-proxy + image: quay.io/oauth2-proxy/oauth2-proxy:latest + args: + - --provider=oidc + - --upstream=http://localhost:8080 + - --cookie-secret=$(COOKIE_SECRET) + ... +``` + +**OAuth Provider Options**: + +1. **OpenShift OAuth** (recommended for OpenShift): + ```yaml + --provider=openshift + --login-url=https://oauth-openshift.apps.cluster/oauth/authorize + ``` + +2. **Generic OIDC** (Google, Okta, etc.): + ```yaml + --provider=oidc + --oidc-issuer-url=https://accounts.google.com + --client-id=... + --client-secret=... + ``` + +### Step 4: Update LiteLLM Configuration + +**File**: `base/litellm/configmap.yaml` (add Claude service route) + +```yaml +model_list: + # Existing direct routes + - model_name: claude-sonnet-4-5 + litellm_params: + model: anthropic/claude-sonnet-4-5-20250929 + api_key: os.environ/ANTHROPIC_API_KEY + + # NEW: Claude service route + - model_name: claude-amber-session + litellm_params: + model: openai/gpt-3.5-turbo # Proxy format + api_base: http://claude-service:8001/v1 + api_key: internal-token +``` + +### Step 5: Create Secrets + +**File**: `overlays/phase2-production/secrets/secrets.yaml` + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: oauth-config +type: Opaque +stringData: + cookie-secret: "" + client-id: "openwebui-client" + client-secret: "" +--- +apiVersion: v1 +kind: Secret +metadata: + name: claude-service-secrets +type: Opaque +stringData: + ANTHROPIC_API_KEY: "sk-ant-..." + CLAUDE_SERVICE_KEY: "internal-token" +``` + +**Generate secrets**: +```bash +# Cookie secret (32 bytes) +openssl rand -base64 32 + +# OAuth client secret (if not provided by provider) +openssl rand -hex 32 +``` + +### Step 6: Create Phase 2 Kustomization + +**File**: `overlays/phase2-production/kustomization.yaml` + +```yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: openwebui # Or openwebui-prod for parallel deployment + +resources: +- ../../base +- claude-service/deployment.yaml +- claude-service/service.yaml +- claude-service/configmap.yaml +- ingress.yaml # Or route.yaml for OpenShift +- secrets/secrets.yaml + +patches: +- path: oauth-deployment-patch.yaml + target: + kind: Deployment + name: openwebui +- path: oauth-service-patch.yaml + target: + kind: Service + name: openwebui-service +``` + +### Step 7: Test Phase 2 Locally + +**Build Claude service image**: +```bash +cd docker/claude-service +podman build -t localhost/claude-service:dev . +``` + +**Deploy to Kind** (for testing): +```bash +cd ../../overlays/phase2-production +kustomize build . | kubectl apply -f - +``` + +**Test checklist**: +- [ ] OAuth login redirects correctly +- [ ] Can access Open WebUI after auth +- [ ] Claude direct models still work +- [ ] Claude service endpoint is reachable +- [ ] Amber session model appears in dropdown +- [ ] Can create long-running session +- [ ] Session persists across pod restarts + +### Step 8: Production Deployment + +**For OpenShift**: +```bash +# Create OAuthClient (requires cluster-admin) +oc apply -f oauth-client.yaml + +# Deploy Phase 2 +cd components/open-webui-llm +kubectl apply -k overlays/phase2-production + +# Update Route hostname (if needed) +oc patch route openwebui -n openwebui -p '{"spec":{"host":"openwebui.apps.cluster.example.com"}}' +``` + +**For Kind** (with Ingress): +```bash +kubectl apply -k overlays/phase2-production + +# Update /etc/hosts +echo "127.0.0.1 openwebui.local" | sudo tee -a /etc/hosts +``` + +### Step 9: Data Migration + +**Export Phase 1 data**: +```bash +# Backup PVC +kubectl exec -n openwebui deployment/openwebui -- \ + tar czf /tmp/backup.tar.gz /app/backend/data + +kubectl cp openwebui/openwebui-xxxxx:/tmp/backup.tar.gz ./backup.tar.gz +``` + +**Import to Phase 2** (if using new namespace): +```bash +kubectl cp ./backup.tar.gz openwebui-prod/openwebui-xxxxx:/tmp/backup.tar.gz + +kubectl exec -n openwebui-prod deployment/openwebui -- \ + tar xzf /tmp/backup.tar.gz -C / +``` + +### Step 10: Validation + +**Smoke tests**: +```bash +# Test OAuth flow +curl -I https://openwebui.apps.cluster.example.com +# Should redirect to OAuth provider + +# Test LiteLLM health +kubectl exec -n openwebui deployment/litellm -- curl localhost:4000/health + +# Test Claude service health +kubectl exec -n openwebui deployment/claude-service -- curl localhost:8001/health + +# Test end-to-end (requires valid OAuth token) +curl https://openwebui.apps.cluster.example.com/api/chat \ + -H "Authorization: Bearer $TOKEN" \ + -d '{"model":"claude-amber-session", "messages":[...]}' +``` + +## Rollback Plan + +### If Phase 2 Fails (In-Place Migration) + +```bash +# Immediate rollback to Phase 1 +kubectl apply -k overlays/phase1-kind + +# Verify Phase 1 is working +make phase1-test + +# Restore data if needed +kubectl cp ./backup.tar.gz openwebui/openwebui-xxxxx:/tmp/backup.tar.gz +kubectl exec -n openwebui deployment/openwebui -- \ + tar xzf /tmp/backup.tar.gz -C / +``` + +### If Phase 2 Fails (Parallel Deployment) + +```bash +# Revert DNS to Phase 1 +# (No kubectl changes needed, just update DNS/Route) + +# Delete Phase 2 namespace +kubectl delete namespace openwebui-prod +``` + +## Technical Debt to Address + +### Immediate (Before Production) + +1. **External Secrets Operator**: Move from Kubernetes Secrets to Vault/AWS Secrets Manager +2. **Network Policies**: Restrict egress to only Anthropic API +3. **Resource Quotas**: Enforce limits per namespace +4. **Monitoring**: Add Prometheus metrics, Grafana dashboards + +### Future Enhancements + +5. **Multi-tenancy**: Namespace-per-team or namespace-per-user +6. **Rate Limiting**: Per-user API call limits +7. **Cost Tracking**: Track Anthropic API usage per user/team +8. **Audit Logging**: Log all chat sessions for compliance +9. **High Availability**: Multiple replicas with pod disruption budgets +10. **Auto-scaling**: HPA based on request volume + +## Timeline Estimate + +**Assuming one developer, part-time**: + +- **Week 1-2**: Build Claude service (API, session management, tool execution) +- **Week 3**: OAuth integration and testing +- **Week 4**: Phase 2 overlay and Kustomize patches +- **Week 5**: Testing and documentation +- **Week 6**: Production deployment and validation + +**Total**: 6-8 weeks (30-40 hours) + +## Questions to Resolve + +Before implementing Phase 2, decide: + +1. **OAuth Provider**: OpenShift OAuth or generic OIDC? +2. **Session Storage**: PostgreSQL or file-based (PVC)? +3. **Claude Service Language**: Python (FastAPI) or Go? +4. **Deployment Strategy**: In-place or parallel? +5. **Namespace**: Reuse `openwebui` or create `openwebui-prod`? + +## Next Steps + +1. Review this plan with team +2. Create GitHub issues for each step +3. Build Claude service POC +4. Test OAuth locally +5. Create Phase 2 overlay structure +6. Schedule production deployment window + +## References + +- [OAuth2 Proxy Documentation](https://oauth2-proxy.github.io/oauth2-proxy/) +- [OpenShift OAuth](https://docs.openshift.com/container-platform/4.14/authentication/configuring-oauth-clients.html) +- [Claude API Documentation](https://docs.anthropic.com/claude/reference/getting-started-with-the-api) +- [Kustomize Overlays](https://kubernetes.io/docs/tasks/manage-kubernetes-objects/kustomization/) diff --git a/components/open-webui-llm/overlays/phase1-kind/.env.example b/components/open-webui-llm/overlays/phase1-kind/.env.example new file mode 100644 index 00000000..71205df0 --- /dev/null +++ b/components/open-webui-llm/overlays/phase1-kind/.env.example @@ -0,0 +1,16 @@ +# Phase 1 (Kind) Environment Variables +# Copy this file to .env and fill in your actual values + +# Required: Your Anthropic API key (get from console.anthropic.com) +ANTHROPIC_API_KEY=sk-ant-xxxxx + +# LiteLLM master key for API authentication +# This is used by Open WebUI to authenticate with LiteLLM +LITELLM_MASTER_KEY=sk-litellm-dev-master-key + +# Optional: OpenAI API key (if you want to use OpenAI models) +# OPENAI_API_KEY=sk-xxxxx + +# Open WebUI settings +WEBUI_AUTH=false # No authentication in Phase 1 (dev only) +OPENAI_API_BASE_URL=http://litellm-service:4000/v1 diff --git a/components/open-webui-llm/overlays/phase1-kind/.gitignore b/components/open-webui-llm/overlays/phase1-kind/.gitignore new file mode 100644 index 00000000..7800a7fb --- /dev/null +++ b/components/open-webui-llm/overlays/phase1-kind/.gitignore @@ -0,0 +1,6 @@ +# Exclude environment files with secrets +.env + +# Exclude any backup files +*.bak +backup*.tar.gz diff --git a/components/open-webui-llm/overlays/phase1-kind/ingress.yaml b/components/open-webui-llm/overlays/phase1-kind/ingress.yaml new file mode 100644 index 00000000..b0936f19 --- /dev/null +++ b/components/open-webui-llm/overlays/phase1-kind/ingress.yaml @@ -0,0 +1,24 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: openwebui-ingress + namespace: openwebui + labels: + app: openwebui + annotations: + nginx.ingress.kubernetes.io/rewrite-target: /$2 + nginx.ingress.kubernetes.io/proxy-body-size: "100m" + nginx.ingress.kubernetes.io/proxy-read-timeout: "600" +spec: + ingressClassName: nginx + rules: + - host: vteam.local + http: + paths: + - path: /chat(/|$)(.*) + pathType: ImplementationSpecific + backend: + service: + name: openwebui-service + port: + name: http diff --git a/components/open-webui-llm/overlays/phase1-kind/kustomization.yaml b/components/open-webui-llm/overlays/phase1-kind/kustomization.yaml new file mode 100644 index 00000000..be977f04 --- /dev/null +++ b/components/open-webui-llm/overlays/phase1-kind/kustomization.yaml @@ -0,0 +1,24 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: openwebui + +# Resources (base + phase1-specific) +resources: +- ../../base +- secrets.yaml +- ingress.yaml + +# Patches for Phase 1 environment +patches: +- path: pvc-patch.yaml + target: + kind: PersistentVolumeClaim + name: openwebui-data-pvc + +# Use specific image tags for reproducibility in Phase 1 +images: +- name: ghcr.io/berriai/litellm + newTag: main-latest +- name: ghcr.io/open-webui/open-webui + newTag: main diff --git a/components/open-webui-llm/overlays/phase1-kind/pvc-patch.yaml b/components/open-webui-llm/overlays/phase1-kind/pvc-patch.yaml new file mode 100644 index 00000000..5ff0b68f --- /dev/null +++ b/components/open-webui-llm/overlays/phase1-kind/pvc-patch.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: openwebui-data-pvc +spec: + storageClassName: standard # Kind default storage class + resources: + requests: + storage: 500Mi # Reduced for Kind/e2e testing diff --git a/components/open-webui-llm/overlays/phase1-kind/secrets.yaml b/components/open-webui-llm/overlays/phase1-kind/secrets.yaml new file mode 100644 index 00000000..e3a9cfee --- /dev/null +++ b/components/open-webui-llm/overlays/phase1-kind/secrets.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Secret +metadata: + name: litellm-secrets + namespace: openwebui + labels: + app: litellm +type: Opaque +stringData: + ANTHROPIC_API_KEY: "sk-ant-YOUR-KEY-HERE" # Replace with actual key + LITELLM_MASTER_KEY: "sk-litellm-dev-master-key" # Simple dev key +--- +apiVersion: v1 +kind: Secret +metadata: + name: openwebui-secrets + namespace: openwebui + labels: + app: openwebui +type: Opaque +stringData: + OPENAI_API_KEY: "sk-litellm-dev-master-key" # Same as LiteLLM master key From ad4140f151df21d54630fb54bfbbd945e8c230f7 Mon Sep 17 00:00:00 2001 From: Jeremy Eder Date: Tue, 18 Nov 2025 16:56:38 -0500 Subject: [PATCH 2/2] fix: increase LiteLLM memory limits and fix health probes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Increase memory limit from 512Mi to 2Gi to prevent OOMKilled crashes - Increase CPU limit from 500m to 1000m for better performance - Update health probe paths to LiteLLM-specific endpoints: - /health/liveliness for liveness probe - /health/readiness for readiness probe - Increase resource requests for stability Fixes LiteLLM pod crash loop due to insufficient memory allocation. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../open-webui-llm/base/litellm/deployment.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/components/open-webui-llm/base/litellm/deployment.yaml b/components/open-webui-llm/base/litellm/deployment.yaml index 240f862b..4e2970de 100644 --- a/components/open-webui-llm/base/litellm/deployment.yaml +++ b/components/open-webui-llm/base/litellm/deployment.yaml @@ -48,20 +48,20 @@ spec: - "0.0.0.0" resources: requests: - cpu: 100m - memory: 256Mi - limits: - cpu: 500m + cpu: 200m memory: 512Mi + limits: + cpu: 1000m + memory: 2Gi livenessProbe: httpGet: - path: /health + path: /health/liveliness port: http initialDelaySeconds: 30 periodSeconds: 10 readinessProbe: httpGet: - path: /health + path: /health/readiness port: http initialDelaySeconds: 5 periodSeconds: 5