# SPARC-P PubApps Deployment Notebook

This notebook is the runnable version of Step 4 for deploying SPARC-P to UF RC PubApps.

## Resource Profiles
- **HiPerGator (parallel jobs)**: 4 GPUs, 16 CPU cores
- **PubApps (serving)**: 1x L4 GPU (24GB), 2 CPU cores, 16GB RAM

## Before You Run
- You are on your PubApps VM via SSH
- You have your project account (`SPARCP`)
- Trained models are available from HiPerGator at `/blue/jasondeanarnold/SPARCP/trained_models`
- Podman + systemd user services are available

In [None]:
# 1. Configuration
import os
import subprocess
import textwrap
from pathlib import Path

PROJECT = os.environ.get("SPARC_PUBAPPS_PROJECT", "SPARCP")
PUBAPPS_ROOT = Path(f"/pubapps/{PROJECT}")
MODEL_DIR = PUBAPPS_ROOT / "models"
CONDA_ENV = PUBAPPS_ROOT / "conda_envs" / "sparc_backend"
BACKEND_DIR = PUBAPPS_ROOT / "backend"
RIVA_MODEL_DIR = PUBAPPS_ROOT / "riva_models"

HIPERGATOR_SOURCE_MODELS = "/blue/jasondeanarnold/SPARCP/trained_models"
PUBAPPS_HOST = os.environ.get("SPARC_PUBAPPS_HOST", "pubapps-vm.rc.ufl.edu")

# Resource constraints
HPG_MAX_GPUS = 4
HPG_MAX_CORES = 16
PUBAPPS_GPU = "L4 (24GB)"
PUBAPPS_CORES = 2
PUBAPPS_RAM_GB = 16
UVICORN_WORKERS = 1  # tuned for 2 CPU cores and 16GB RAM

print(f"Project: {PROJECT}")
print(f"PubApps root: {PUBAPPS_ROOT}")
print(f"Conda env: {CONDA_ENV}")
print(f"Backend dir: {BACKEND_DIR}")
print(f"HiPerGator resources: {HPG_MAX_GPUS} GPUs, {HPG_MAX_CORES} cores")
print(f"PubApps resources: {PUBAPPS_GPU}, {PUBAPPS_CORES} cores, {PUBAPPS_RAM_GB}GB RAM")

In [None]:
# 2. Command runner (safe by default)
EXECUTE = False  # Set True to actually run shell commands

def run(cmd: str, check: bool = True):
    print(f"$ {cmd}")
    if not EXECUTE:
        print("(dry-run) command not executed\n")
        return None
    result = subprocess.run(["bash", "-lc", cmd], capture_output=True, text=True)
    if result.stdout:
        print(result.stdout)
    if result.stderr:
        print(result.stderr)
    if check and result.returncode != 0:
        raise RuntimeError(f"Command failed: {cmd}")
    print()
    return result

## 3. Transfer Models from HiPerGator
Run this on HiPerGator or from a hop host with access to both systems.

In [None]:
# 3.1 Render model sync command
rsync_cmd = textwrap.dedent(f"""
rsync -avz --progress \
  {HIPERGATOR_SOURCE_MODELS}/ \
  {PROJECT}@{PUBAPPS_HOST}:{MODEL_DIR}/
""").strip()
print(rsync_cmd)

## 4. PubApps Environment Setup

In [None]:
# 4.1 Create required directories
run(f"mkdir -p {PUBAPPS_ROOT} {MODEL_DIR} {BACKEND_DIR} {RIVA_MODEL_DIR} {PUBAPPS_ROOT / 'conda_envs'}")

In [None]:
# 4.2 Create backend conda environment
run("conda --version", check=False)
run(f"cd {PUBAPPS_ROOT}; conda env create -f environment_backend.yml -p {CONDA_ENV}")
run(f"conda run -p {CONDA_ENV} python -c 'import fastapi,langgraph,torch; print(\"backend env ok\")'")

## 5. Deploy Riva with Podman + Quadlet

In [None]:
# 5.1 Write quadlet service for Riva
quadlet_dir = Path.home() / '.config/containers/systemd'
quadlet_dir.mkdir(parents=True, exist_ok=True)
quadlet_file = quadlet_dir / 'riva-server.container'
quadlet_content = textwrap.dedent(f"""
[Unit]
Description=SPARC-P Riva Speech Server
After=network-online.target

[Container]
Image=nvcr.io/nvidia/riva/riva-speech:2.16.0-server
ContainerName=riva-server
AddDevice=/dev/nvidia0
AddDevice=/dev/nvidiactl
AddDevice=/dev/nvidia-uvm
Volume={RIVA_MODEL_DIR}:/data:Z
PublishPort=50051:50051
Environment=NVIDIA_VISIBLE_DEVICES=all
Exec=/opt/riva/bin/riva_server --riva_model_repo=/data/models

[Service]
Restart=always
TimeoutStartSec=300

[Install]
WantedBy=default.target
""").strip()
quadlet_file.write_text(quadlet_content)
print(f"Wrote {quadlet_file}")
print(quadlet_content)

In [None]:
# 5.2 Pull image and enable Riva service
run("podman pull nvcr.io/nvidia/riva/riva-speech:2.16.0-server")
run("systemctl --user daemon-reload")
run("systemctl --user enable --now riva-server")
run("systemctl --user status riva-server --no-pager", check=False)

## 6. Create FastAPI Backend + Systemd Service

In [None]:
# 6.1 Write backend main.py (integration-ready)
main_py = BACKEND_DIR / 'main.py'
main_content = textwrap.dedent(f"""
import base64
import os
from typing import Optional

from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import riva.client
import firebase_admin
from firebase_admin import credentials, firestore

MODEL_BASE_PATH = "{MODEL_DIR}"
RIVA_SERVER = "localhost:50051"
FIREBASE_CREDS = "{PUBAPPS_ROOT}/config/firebase-credentials.json"

if not firebase_admin._apps:
    cred = credentials.Certificate(FIREBASE_CREDS)
    firebase_admin.initialize_app(cred)
db = firestore.client()

app = FastAPI(title="SPARC-P Multi-Agent Backend", version="1.0.0")
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

caregiver_model = coach_model = supervisor_model = tokenizer = None

@app.on_event("startup")
async def load_models():
    global caregiver_model, coach_model, supervisor_model, tokenizer
    base_model_name = "gpt-oss-120b"
    tokenizer = AutoTokenizer.from_pretrained(base_model_name)
    base_model = AutoModelForCausalLM.from_pretrained(base_model_name, load_in_4bit=True, device_map="auto")
    caregiver_model = PeftModel.from_pretrained(base_model, os.path.join(MODEL_BASE_PATH, "CaregiverAgent"))
    coach_model = PeftModel.from_pretrained(base_model, os.path.join(MODEL_BASE_PATH, "C-LEAR_CoachAgent"))
    supervisor_model = PeftModel.from_pretrained(base_model, os.path.join(MODEL_BASE_PATH, "SupervisorAgent"))

class ChatRequest(BaseModel):
    session_id: str
    user_message: str
    audio_data: Optional[str] = None

class ChatResponse(BaseModel):
    response_text: str
    audio_url: Optional[str] = None
    emotion: str
    animation_cues: dict
    coach_feedback: Optional[dict] = None

@app.get("/health")
async def health_check():
    try:
        auth = riva.client.Auth(uri=RIVA_SERVER)
        riva.client.ASRService(auth)
        riva_ok = True
    except Exception:
        riva_ok = False
    return {"status": "healthy", "riva_connected": riva_ok}

@app.post("/v1/chat", response_model=ChatResponse)
async def process_chat(request: ChatRequest):
    try:
        session_ref = db.collection("sessions").document(request.session_id)
        session_state = session_ref.get().to_dict() or {}

        blocked = ["politics", "election", "gambling", "crypto", "finance advice"]
        lower = request.user_message.lower()
        if any(t in lower for t in blocked):
            return ChatResponse(
                response_text="I can only discuss HPV vaccination and clinical communication practice.",
                emotion="neutral",
                animation_cues={"gesture": "idle"},
                coach_feedback={"safe": False, "reason": "off_topic"}
            )

        mode = session_state.get("mode", "caregiver")
        active = coach_model if mode == "coach" else supervisor_model if mode == "supervisor" else caregiver_model

        prompt = f"[SESSION: {request.session_id}] User: {request.user_message}\nAssistant:"
        model_inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
        model_inputs = {k: v.to(active.device) for k, v in model_inputs.items()}

        with torch.inference_mode():
            output = active.generate(**model_inputs, max_new_tokens=180, do_sample=True, temperature=0.7, top_p=0.9, pad_token_id=tokenizer.eos_token_id)
        decoded = tokenizer.decode(output[0], skip_special_tokens=True)
        response_text = decoded.split("Assistant:")[-1].strip() or "Iâ€™m here to help with HPV vaccine communication practice."

        audio_url = None
        try:
            auth = riva.client.Auth(uri=RIVA_SERVER)
            tts = riva.client.SpeechSynthesisService(auth)
            tts_resp = tts.synthesize(response_text, voice_name="English-US.Female-1")
            audio_url = "data:audio/wav;base64," + base64.b64encode(tts_resp.audio).decode("utf-8")
        except Exception:
            pass

        session_state["last_user_message"] = request.user_message
        session_state["last_response"] = response_text
        session_state["mode"] = mode
        session_ref.set(session_state, merge=True)

        return ChatResponse(
            response_text=response_text,
            audio_url=audio_url,
            emotion="supportive",
            animation_cues={"gesture": "speaking", "intensity": "low"},
            coach_feedback={"safe": True}
        )
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))
""").strip()

BACKEND_DIR.mkdir(parents=True, exist_ok=True)
main_py.write_text(main_content)
print(f"Wrote {main_py}")

In [None]:
# 6.2 Create systemd user service for FastAPI
systemd_dir = Path.home() / '.config/systemd/user'
systemd_dir.mkdir(parents=True, exist_ok=True)
service_file = systemd_dir / 'sparc-backend.service'
service_content = textwrap.dedent(f"""
[Unit]
Description=SPARC-P FastAPI Backend
After=network.target riva-server.service
Requires=riva-server.service

[Service]
Type=simple
Environment=PATH={CONDA_ENV}/bin:/usr/bin
Environment=PYTHONUNBUFFERED=1
WorkingDirectory={BACKEND_DIR}
ExecStart={CONDA_ENV}/bin/uvicorn main:app --host 0.0.0.0 --port 8000 --workers {UVICORN_WORKERS}
Restart=always
RestartSec=10

[Install]
WantedBy=default.target
""").strip()
service_file.write_text(service_content)
print(f"Wrote {service_file}")
print(service_content)
print(f"Configured uvicorn workers: {UVICORN_WORKERS}")

In [None]:
# 6.3 Enable backend service
run("systemctl --user daemon-reload")
run("systemctl --user enable --now sparc-backend")
run("systemctl --user status sparc-backend --no-pager", check=False)

## 7. Validation Checks
Set `EXECUTE = True` before running these checks.

In [None]:
# 7.1 Health and service checks
run("curl -s http://localhost:8000/health", check=False)
run("journalctl --user -u riva-server -n 50 --no-pager", check=False)
run("journalctl --user -u sparc-backend -n 50 --no-pager", check=False)
run(f"ls -lh {MODEL_DIR}", check=False)