diff --git a/packages/core/src/images/analyzer.test.ts b/packages/core/src/images/analyzer.test.ts new file mode 100644 index 000000000..3e27a2730 --- /dev/null +++ b/packages/core/src/images/analyzer.test.ts @@ -0,0 +1,182 @@ +import { describe, it, expect } from "vitest"; +import { defaultTreatmentForRole, normalizeAnalysis } from "./analyzer.js"; + +// Pure-helper tests only — no Gemini calls. The analyzer's actual API +// surface is exercised end-to-end by the studio routes; these focus on +// the resilience layer that protects callers from a model that emits +// out-of-range or mistyped values. + +describe("normalizeAnalysis", () => { + it("passes through a fully valid response unchanged", () => { + const out = normalizeAnalysis({ + role: "hero", + vibe: "gritty desert noir", + suggestedTreatment: "editorial-bleed", + retentionStrengthAtAttachment: 9, + rationale: "Dominant silhouette against open sky reads instantly.", + }); + expect(out).toEqual({ + role: "hero", + vibe: "gritty desert noir", + suggestedTreatment: "editorial-bleed", + retentionStrengthAtAttachment: 9, + rationale: "Dominant silhouette against open sky reads instantly.", + }); + }); + + it("lowercases an upper-case role", () => { + const out = normalizeAnalysis({ + role: "HERO", + vibe: "x", + suggestedTreatment: "editorial-bleed", + retentionStrengthAtAttachment: 5, + rationale: "y", + }); + expect(out.role).toBe("hero"); + }); + + it("falls back to 'subject' when role is unrecognized", () => { + const out = normalizeAnalysis({ + role: "champion", + vibe: "x", + suggestedTreatment: "duotone-bg", + retentionStrengthAtAttachment: 5, + rationale: "y", + }); + expect(out.role).toBe("subject"); + }); + + it("falls back to 'subject' when role is missing", () => { + const out = normalizeAnalysis({ + vibe: "x", + suggestedTreatment: "duotone-bg", + retentionStrengthAtAttachment: 5, + rationale: "y", + }); + expect(out.role).toBe("subject"); + }); + + it("clamps retention score below the floor up to 1", () => { + const out = normalizeAnalysis({ + role: "subject", + vibe: "x", + suggestedTreatment: "duotone-bg", + retentionStrengthAtAttachment: -3, + rationale: "y", + }); + expect(out.retentionStrengthAtAttachment).toBe(1); + }); + + it("clamps retention score above the ceiling down to 10", () => { + const out = normalizeAnalysis({ + role: "subject", + vibe: "x", + suggestedTreatment: "duotone-bg", + retentionStrengthAtAttachment: 42, + rationale: "y", + }); + expect(out.retentionStrengthAtAttachment).toBe(10); + }); + + it("rounds fractional scores", () => { + const out = normalizeAnalysis({ + role: "subject", + vibe: "x", + suggestedTreatment: "duotone-bg", + retentionStrengthAtAttachment: 7.4, + rationale: "y", + }); + expect(out.retentionStrengthAtAttachment).toBe(7); + }); + + it("defaults retention score to 5 when missing or non-numeric", () => { + expect( + normalizeAnalysis({ + role: "subject", + vibe: "x", + suggestedTreatment: "duotone-bg", + rationale: "y", + }).retentionStrengthAtAttachment, + ).toBe(5); + expect( + normalizeAnalysis({ + role: "subject", + vibe: "x", + suggestedTreatment: "duotone-bg", + retentionStrengthAtAttachment: Number.NaN, + rationale: "y", + }).retentionStrengthAtAttachment, + ).toBe(5); + }); + + it("returns null suggestedTreatment when value is unknown", () => { + const out = normalizeAnalysis({ + role: "subject", + vibe: "x", + suggestedTreatment: "made-up-treatment", + retentionStrengthAtAttachment: 5, + rationale: "y", + }); + expect(out.suggestedTreatment).toBeNull(); + }); + + it("returns null suggestedTreatment when value is null", () => { + const out = normalizeAnalysis({ + role: "subject", + vibe: "x", + suggestedTreatment: null, + retentionStrengthAtAttachment: 5, + rationale: "y", + }); + expect(out.suggestedTreatment).toBeNull(); + }); + + it("trims and clips vibe to 60 chars", () => { + const long = " ".repeat(2) + "a".repeat(120); + const out = normalizeAnalysis({ + role: "subject", + vibe: long, + suggestedTreatment: "duotone-bg", + retentionStrengthAtAttachment: 5, + rationale: "y", + }); + expect(out.vibe.length).toBe(60); + expect(out.vibe.startsWith("a")).toBe(true); + }); + + it("trims and clips rationale to 240 chars", () => { + const out = normalizeAnalysis({ + role: "subject", + vibe: "x", + suggestedTreatment: "duotone-bg", + retentionStrengthAtAttachment: 5, + rationale: "x".repeat(500), + }); + expect(out.rationale.length).toBe(240); + }); + + it("returns empty strings rather than throwing on missing string fields", () => { + const out = normalizeAnalysis({ + role: "subject", + suggestedTreatment: "duotone-bg", + retentionStrengthAtAttachment: 5, + }); + expect(out.vibe).toBe(""); + expect(out.rationale).toBe(""); + }); +}); + +describe("defaultTreatmentForRole", () => { + it("maps hero to editorial-bleed", () => { + expect(defaultTreatmentForRole("hero")).toBe("editorial-bleed"); + }); + it("maps subject to editorial-bleed", () => { + expect(defaultTreatmentForRole("subject")).toBe("editorial-bleed"); + }); + it("maps atmosphere to duotone-bg", () => { + expect(defaultTreatmentForRole("atmosphere")).toBe("duotone-bg"); + }); + it("maps graphic to duotone-bg", () => { + expect(defaultTreatmentForRole("graphic")).toBe("duotone-bg"); + }); +}); diff --git a/packages/core/src/images/analyzer.ts b/packages/core/src/images/analyzer.ts new file mode 100644 index 000000000..d44d5a1c5 --- /dev/null +++ b/packages/core/src/images/analyzer.ts @@ -0,0 +1,231 @@ +/** + * Gemini image analyzer — runs once per ingested image and tags it with + * structured creative metadata that the visual director then consumes as a + * soft prior. + * + * Why this matters: the visual director currently sees only what the user + * (or the manifest defaults) provided — role is null on every fresh upload, + * description is empty, tags are empty. With nothing to go on, the LLM + * either picks treatments at random or always defaults to editorial-bleed. + * A 1-shot Gemini Flash analysis gives every uploaded image a baseline + * role + suggested treatment + vibe before the user does any manual + * tagging, so the very first render already has intentional visual logic. + * + * The analyzer is pure with respect to the project — it doesn't touch + * disk; the studio route writes the patched fields into the manifest. + * + * analyzeImage(apiKey, fileBytes, mimeType) → AnalyzedImage + * + * Inline-data path (base64) is preferred over the Files API for analyzer + * runs because the input is a single thumbnail-sized webp (<1MB) and the + * caller wants the whole flow to take 1-2 seconds, not 10-30 (Files API + * needs a separate upload + poll-until-ACTIVE). + */ + +import { generateStructured, GeminiError, DEFAULT_GEMINI_MODEL } from "../gemini/client.js"; +import type { ToolFunctionDeclaration } from "../gemini/client.js"; +import { TREATMENT_IDS, type TreatmentId } from "../script/templates/image-scene.js"; +import type { ImageRole } from "./manifest.js"; + +const VALID_ROLES: ReadonlyArray = ["hero", "subject", "atmosphere", "graphic"]; + +export interface AnalyzedImage { + /** Inferred role classification. Soft suggestion — user can override. */ + role: ImageRole; + /** Mood phrase, ≤60 chars. Used for design-brief threading and tag suggestions. */ + vibe: string; + /** + * Recommended treatment for image-scene template. The visual director + * uses this as a soft prior; not a hard binding. null = the analyzer + * thinks this image is best left as backdrop / atmosphere with no + * specific image-scene treatment. + */ + suggestedTreatment: TreatmentId | null; + /** + * 1-10 score: how strong is this image as a retention driver if used in + * a hook scene? High = striking subject, strong contrast, clear focal + * point. Low = busy, low-contrast, ambiguous subject. Soft signal only. + */ + retentionStrengthAtAttachment: number; + /** One-sentence analyst rationale shown in the studio image card. */ + rationale: string; +} + +interface AnalyzeImageToolInput { + role?: string; + vibe?: string; + suggestedTreatment?: string | null; + retentionStrengthAtAttachment?: number; + rationale?: string; +} + +const ANALYZER_TOOL: ToolFunctionDeclaration = { + name: "image_analysis", + description: + "Classify a project image's creative role, vibe, and suggested image-scene treatment, plus a 1-10 retention-strength estimate.", + parameters: { + type: "object", + properties: { + role: { + type: "string", + enum: [...VALID_ROLES], + description: + "hero=full-bleed dominant subject; subject=detail/supporting/callout; atmosphere=pure mood backdrop; graphic=abstract/iconographic accent.", + }, + vibe: { + type: "string", + description: + "Single mood phrase ≤60 chars (e.g. 'gritty desert noir', 'clean editorial blue', 'dreamy gradient').", + }, + suggestedTreatment: { + type: "string", + enum: [...TREATMENT_IDS], + description: + "editorial-bleed for hero/subject with strong silhouette; duotone-bg for atmosphere/graphic backdrops; type-mask-fill for high-contrast big-reveal moments. Pick the single best fit.", + }, + retentionStrengthAtAttachment: { + type: "integer", + minimum: 1, + maximum: 10, + description: + "1-10. How strong is this image as a hook visual? 9-10 = striking, instantly readable subject. 5-6 = decent supporting shot. 1-3 = busy, low-contrast, weak focal point.", + }, + rationale: { + type: "string", + description: "One sentence: why this role + treatment + score for this image.", + }, + }, + required: ["role", "vibe", "suggestedTreatment", "retentionStrengthAtAttachment", "rationale"], + }, +}; + +const SYSTEM_PROMPT = + "You are a senior video director auditioning an image for a short-form retention-optimized video. " + + "You are looking at one image. Answer in the function call. " + + "Be honest about retention strength — busy, ambiguous, or low-contrast images deserve scores of 3-5, " + + "not 8. The user benefits more from accurate signal than from flattery. " + + "For role: prefer 'subject' over 'hero' unless the image truly has a single dominant subject filling " + + "most of the frame. For treatment: pick the SINGLE best fit; the visual director will decide whether " + + "to honor it."; + +const USER_PROMPT = + "Analyze the attached image and call the image_analysis tool with your assessment."; + +export interface AnalyzeImageResult { + analyzed: AnalyzedImage; + usage: { + promptTokens: number; + outputTokens: number; + }; +} + +/** + * Run Gemini Flash on an image and return structured analysis. Throws + * GeminiError on API failure; callers should treat that as a soft signal + * (mark analysis as failed in the manifest, fall back to user-typed + * metadata) rather than blocking the upload. + */ +export async function analyzeImage( + apiKey: string, + fileBytes: Uint8Array | Buffer, + mimeType: string, + opts: { model?: string } = {}, +): Promise { + if (!fileBytes || fileBytes.byteLength === 0) { + throw new GeminiError("analyzeImage: fileBytes is empty"); + } + if (!mimeType || !mimeType.startsWith("image/")) { + throw new GeminiError(`analyzeImage: unsupported mimeType "${mimeType}"`); + } + // Buffer.toString("base64") works for both Buffer and Uint8Array via the + // Buffer.from coercion — keeps the call site simple for browser-fetch + // typed arrays AND node fs.readFileSync buffers. + const base64 = + fileBytes instanceof Buffer + ? fileBytes.toString("base64") + : Buffer.from(fileBytes).toString("base64"); + + const { result, usage } = await generateStructured(apiKey, { + model: opts.model ?? DEFAULT_GEMINI_MODEL, + parts: [{ inlineData: { mimeType, data: base64 } }, { text: USER_PROMPT }], + systemInstruction: SYSTEM_PROMPT, + tool: ANALYZER_TOOL, + temperature: 0.3, + maxOutputTokens: 1024, + }); + + return { + analyzed: normalizeAnalysis(result), + usage: { + promptTokens: usage.promptTokenCount ?? 0, + outputTokens: usage.candidatesTokenCount ?? 0, + }, + }; +} + +/** + * Defensive normalization of the model's tool input. Pure — exported so + * tests can verify clamping/coercion without invoking Gemini. + */ +export function normalizeAnalysis(raw: AnalyzeImageToolInput): AnalyzedImage { + const role = coerceRole(raw.role); + const treatment = coerceTreatment(raw.suggestedTreatment); + const vibe = typeof raw.vibe === "string" ? raw.vibe.trim().slice(0, 60) : ""; + const score = clampScore(raw.retentionStrengthAtAttachment); + const rationale = typeof raw.rationale === "string" ? raw.rationale.trim().slice(0, 240) : ""; + return { + role, + vibe, + suggestedTreatment: treatment, + retentionStrengthAtAttachment: score, + rationale, + }; +} + +function coerceRole(value: unknown): ImageRole { + if (typeof value === "string") { + const lower = value.toLowerCase(); + if ((VALID_ROLES as readonly string[]).includes(lower)) { + return lower as ImageRole; + } + } + // Default to subject — safest middle ground. Subject can underlay or + // foreground; misclassifying as hero would over-promote a weak image. + return "subject"; +} + +function coerceTreatment(value: unknown): TreatmentId | null { + if (value === null) return null; + if (typeof value === "string") { + const lower = value.toLowerCase(); + if ((TREATMENT_IDS as readonly string[]).includes(lower)) { + return lower as TreatmentId; + } + } + return null; +} + +function clampScore(value: unknown): number { + if (typeof value !== "number" || !Number.isFinite(value)) return 5; + return Math.max(1, Math.min(10, Math.round(value))); +} + +/** + * Map a role to the analyzer's preferred default treatment. Used when the + * model omits suggestedTreatment but a role is present, and as a sanity + * check by tests. Pure helper. + */ +export function defaultTreatmentForRole(role: ImageRole): TreatmentId | null { + switch (role) { + case "hero": + return "editorial-bleed"; + case "subject": + return "editorial-bleed"; + case "atmosphere": + return "duotone-bg"; + case "graphic": + return "duotone-bg"; + default: + return null; + } +} diff --git a/packages/core/src/images/index.ts b/packages/core/src/images/index.ts index a2ecf31d8..ab5ba2f4e 100644 --- a/packages/core/src/images/index.ts +++ b/packages/core/src/images/index.ts @@ -9,10 +9,19 @@ export { removeEntry, MANIFEST_PATH, } from "./manifest.js"; -export type { ImageEntry, ImageFocal, ImageManifest, ImageRole } from "./manifest.js"; +export type { + ImageEntry, + ImageFocal, + ImageManifest, + ImageRole, + ImageAnalysisStatus, +} from "./manifest.js"; export { processImage, DEFAULT_MAX_LONG_EDGE } from "./process.js"; export type { ProcessedImage, ProcessOptions } from "./process.js"; export { ingestImage, ingestImages, ensureManifest } from "./ingest.js"; export type { IngestOptions, IngestResult } from "./ingest.js"; + +export { analyzeImage, normalizeAnalysis, defaultTreatmentForRole } from "./analyzer.js"; +export type { AnalyzedImage, AnalyzeImageResult } from "./analyzer.js"; diff --git a/packages/core/src/images/manifest.ts b/packages/core/src/images/manifest.ts index 4e593e58a..941d6e7f0 100644 --- a/packages/core/src/images/manifest.ts +++ b/packages/core/src/images/manifest.ts @@ -16,6 +16,20 @@ import { atomicWriteFileSync } from "../internal/atomicWrite.js"; */ export type ImageRole = "hero" | "subject" | "atmosphere" | "graphic"; +/** + * Status of the optional Gemini-driven analysis pipeline. + * + * pending — Analyzer kicked off (fire-and-forget) and has not yet + * written back. Studio shows a small spinner on the card. + * complete — Analyzer wrote vibe / suggestedTreatment / retentionStrength. + * Visual director may consume those as soft priors. + * failed — API key missing, network error, or model declined. + * `analysisError` carries the human-readable reason. + * undefined — Analysis was never attempted (e.g. uploaded before the + * feature shipped, or no GEMINI_API_KEY at upload time). + */ +export type ImageAnalysisStatus = "pending" | "complete" | "failed"; + export interface ImageFocal { /** Normalized x coordinate of the focal point, 0..1 (left to right). */ x: number; @@ -49,6 +63,35 @@ export interface ImageEntry { focalPoint: ImageFocal; /** ISO timestamp of import. */ importedAt: string; + + // ── Optional Gemini analyzer fields ────────────────────────────────────── + // Filled in by `packages/core/src/images/analyzer.ts` post-upload. Soft + // priors only — the user's role/description always take precedence and + // these are best-effort. Absent on entries created before the analyzer + // feature shipped. + + /** Mood phrase ≤60 chars, e.g. "gritty desert noir". */ + vibe?: string; + /** + * Treatment id from packages/core/src/script/templates/image-scene.ts + * the analyzer thinks this image works best with. Visual director reads + * this as a soft prior; user / script context can still override. + */ + suggestedTreatment?: string | null; + /** + * 1-10 retention-strength estimate as a hook visual. 9-10 = striking, + * instantly readable subject; 5-6 = decent supporting shot; 1-3 = + * busy / low-contrast / weak focal point. Soft signal only. + */ + retentionStrengthAtAttachment?: number; + /** Lifecycle of the optional Gemini analysis run. See ImageAnalysisStatus. */ + analysisStatus?: ImageAnalysisStatus; + /** Human-readable failure reason when analysisStatus === "failed". */ + analysisError?: string; + /** ISO timestamp of last successful analysis. */ + analyzedAt?: string; + /** One-line analyst rationale shown in the studio image card on hover. */ + analysisRationale?: string; } export interface ImageManifest { diff --git a/packages/core/src/script/visualDirector.ts b/packages/core/src/script/visualDirector.ts index 0e1aa08c8..4ee7c746f 100644 --- a/packages/core/src/script/visualDirector.ts +++ b/packages/core/src/script/visualDirector.ts @@ -253,7 +253,9 @@ function buildSystem(opts: VisualDirectorOptions): string { `5. **Intensity arc**: open soft, escalate to climax, settle for the close. type-mask-fill ` + ` belongs at the climax or a major act break, not scene 1 and not the outro.\n` + `6. **Hook scenes**: scenes flagged as hooks deserve hero/subject imagery in editorial-bleed ` + - ` or type-mask-fill. Atmosphere is too quiet for a hook.\n` + + ` or type-mask-fill. Atmosphere is too quiet for a hook. **Prefer images with retention ` + + ` strength ≥7** (when the analyzer prior is shown) for hook slots — the visually strongest ` + + ` asset earns the most-watched scene.\n` + `7. **Scene template hint**: scenes already using chart-scene, hook-statreveal, or quote ` + ` templates often work better with no image (imageId: null) so the typography sings — ` + ` override only when an atmosphere image makes the act cohere.\n` + @@ -263,7 +265,11 @@ function buildSystem(opts: VisualDirectorOptions): string { ` so the assembler keeps the planner's chosen template instead of routing through ` + ` image-scene. The image will be rendered using the template's own visual language.\n` + `9. **editorial-serif** scenes are typography-only by design — pass imageId: null and ` + - ` treatment: null. Don't try to attach an image; the breath scene needs negative space.`, + ` treatment: null. Don't try to attach an image; the breath scene needs negative space.\n` + + `10. **Analyzer priors are advisory**. When a catalog entry has "(analyzer prior)" lines, ` + + ` treat them as a starting point but override freely when scene context demands it. The ` + + ` same hero image should still appear in ≥3 different treatments across the video — ` + + ` don't lock yourself into the analyzer's single suggestion.`, ); if (opts.themeContext?.name) { @@ -292,14 +298,29 @@ function buildImageCatalog(manifest: ImageManifest): string { .map((img) => { const desc = img.description.trim() || "(no description supplied — use the file id as the cue)"; - return [ + const lines = [ `### ${img.id}`, `role: ${img.role ?? "(unset)"} · ${img.width}×${img.height} (${img.aspect.toFixed(2)})`, `focal: x=${img.focalPoint.x.toFixed(2)} y=${img.focalPoint.y.toFixed(2)}`, `dominant: ${img.dominantColor} · palette: ${img.palette.join(" ")}`, `tags: ${img.tags.length > 0 ? img.tags.join(", ") : "(none)"}`, `description: ${desc}`, - ].join("\n"); + ]; + // Surface analyzer priors when present. These are *soft* — the + // director can override based on script context (e.g. analyzer says + // duotone-bg but the scene wants a hook so we use editorial-bleed + // anyway). Mark them clearly as "(analyzer prior)" so the model + // knows they're advisory. + if (img.vibe?.trim()) { + lines.push(`vibe (analyzer prior): ${img.vibe.trim()}`); + } + if (img.suggestedTreatment) { + lines.push(`suggested treatment (analyzer prior): ${img.suggestedTreatment}`); + } + if (typeof img.retentionStrengthAtAttachment === "number") { + lines.push(`retention strength (analyzer prior): ${img.retentionStrengthAtAttachment}/10`); + } + return lines.join("\n"); }) .join("\n\n"); } diff --git a/packages/core/src/studio-api/routes/images.ts b/packages/core/src/studio-api/routes/images.ts index ee2b0d82f..98e150150 100644 --- a/packages/core/src/studio-api/routes/images.ts +++ b/packages/core/src/studio-api/routes/images.ts @@ -4,6 +4,7 @@ import { join, basename, extname } from "node:path"; import { randomBytes } from "node:crypto"; import { tmpdir } from "node:os"; import { + analyzeImage, findById, ingestImage, readManifest, @@ -13,6 +14,8 @@ import { type ImageEntry, type ImageRole, } from "../../images/index.js"; +import { GeminiError, loadGeminiKey, DEFAULT_GEMINI_MODEL } from "../../gemini/index.js"; +import { CostLogger, loggerSink } from "../../telemetry/cost.js"; import { OpsLogger, opsFireAndForget } from "../../telemetry/ops.js"; import type { StudioApiAdapter } from "../types.js"; @@ -116,7 +119,17 @@ export function registerImagesRoutes(api: Hono, adapter: StudioApiAdapter): void wallMs: Date.now() - start, meta: { id: entry.id, replaced, originalName: file.name, originalBytes: file.size }, }); - return c.json({ ok: true, entry, replaced }); + + // Kick off Gemini analysis fire-and-forget. Mark the entry pending + // synchronously so the studio shows a spinner from the moment the + // upload response lands. If GEMINI_API_KEY is missing or the call + // fails, the entry's analysisStatus flips to "failed" with a reason. + const withPending = markAnalysisPending(project.dir, entry.id); + void runImageAnalysis(project.dir, withPending ?? entry).catch((err) => { + void ops.logError("images.analyze", err, { id: entry.id }); + }); + + return c.json({ ok: true, entry: withPending ?? entry, replaced }); } catch (err) { void ops.logError("images.upload", err, { originalName: file.name }); return c.json({ error: err instanceof Error ? err.message : String(err) }, 500); @@ -129,6 +142,26 @@ export function registerImagesRoutes(api: Hono, adapter: StudioApiAdapter): void } }); + // Manually re-run Gemini analysis for an existing image. Useful if the + // user added GEMINI_API_KEY after the original upload, or wants to + // refresh stale priors after editing a description. + api.post("/projects/:id/images/:imageId/analyze", async (c) => { + const project = await adapter.resolveProject(c.req.param("id")); + if (!project) return c.json({ error: "not found" }, 404); + const entry = findById(readManifest(project.dir), c.req.param("imageId")); + if (!entry) return c.json({ error: "image not found" }, 404); + + const pending = markAnalysisPending(project.dir, entry.id); + try { + const updated = await runImageAnalysis(project.dir, pending ?? entry); + return c.json({ ok: true, entry: updated }); + } catch (err) { + const reason = err instanceof Error ? err.message : String(err); + const failedEntry = markAnalysisFailed(project.dir, entry.id, reason); + return c.json({ ok: false, entry: failedEntry, error: reason }, 502); + } + }); + // Patch metadata (role / description / tags / focalPoint). Body is a // partial — only the keys present are updated. api.patch("/projects/:id/images/:imageId", async (c) => { @@ -223,3 +256,128 @@ export function registerImagesRoutes(api: Hono, adapter: StudioApiAdapter): void return c.json({ ok: true }); }); } + +// ── Analysis helpers ─────────────────────────────────────────────────────── +// +// These run AFTER an upload response has been sent, so failures here must +// never throw out of an HTTP handler. The route owner already wraps the +// fire-and-forget call in `.catch(...)` for telemetry. + +/** + * Patch a single entry's analysis status in the manifest. Read-modify-write + * with no merge logic — analysis fields are owned by the analyzer alone. + * Returns the patched entry, or null if the id has been removed in the + * meantime (race with DELETE). + */ +function patchEntryAnalysis( + projectDir: string, + imageId: string, + patch: Partial, +): ImageEntry | null { + const manifest = readManifest(projectDir); + const existing = findById(manifest, imageId); + if (!existing) return null; + const next: ImageEntry = { ...existing, ...patch }; + writeManifest(projectDir, upsertEntry(manifest, next)); + return next; +} + +function markAnalysisPending(projectDir: string, imageId: string): ImageEntry | null { + return patchEntryAnalysis(projectDir, imageId, { + analysisStatus: "pending", + analysisError: undefined, + }); +} + +function markAnalysisFailed( + projectDir: string, + imageId: string, + reason: string, +): ImageEntry | null { + return patchEntryAnalysis(projectDir, imageId, { + analysisStatus: "failed", + analysisError: reason.slice(0, 240), + }); +} + +/** + * Read the image bytes off disk, call Gemini, write the result back to the + * manifest. Logs cost + ops. Throws on failure so the caller (the upload + * route's fire-and-forget) can record an ops error — but ALSO patches the + * manifest with analysisStatus: "failed" so the studio surfaces the + * problem to the user without an out-of-band error toast. + */ +async function runImageAnalysis(projectDir: string, entry: ImageEntry): Promise { + const apiKey = loadGeminiKey(); + if (!apiKey) { + const failed = markAnalysisFailed( + projectDir, + entry.id, + "GEMINI_API_KEY is not set — skipping analysis. Configure it in the studio Settings tab.", + ); + return failed ?? entry; + } + + const abs = join(projectDir, entry.src); + if (!existsSync(abs)) { + const failed = markAnalysisFailed( + projectDir, + entry.id, + `Image file missing on disk at ${entry.src}`, + ); + return failed ?? entry; + } + const bytes = readFileSync(abs); + + const ops = new OpsLogger(projectDir); + const onCostEvent = loggerSink(new CostLogger(projectDir)); + const start = Date.now(); + try { + const { analyzed, usage } = await analyzeImage(apiKey, bytes, "image/webp"); + onCostEvent( + "images.analyze", + { + kind: "gemini", + model: DEFAULT_GEMINI_MODEL, + promptTokens: usage.promptTokens, + outputTokens: usage.outputTokens, + }, + Date.now() - start, + { id: entry.id, role: analyzed.role, treatment: analyzed.suggestedTreatment }, + ); + const updated = patchEntryAnalysis(projectDir, entry.id, { + // Don't overwrite a user-typed role — analyzer is a soft prior, not + // a source of truth. Same for description / tags. + role: entry.role ?? analyzed.role, + vibe: analyzed.vibe, + suggestedTreatment: analyzed.suggestedTreatment, + retentionStrengthAtAttachment: analyzed.retentionStrengthAtAttachment, + analysisRationale: analyzed.rationale, + analysisStatus: "complete", + analysisError: undefined, + analyzedAt: new Date().toISOString(), + }); + opsFireAndForget(ops, { + op: "images.analyze", + message: `${entry.id} → ${analyzed.role} · ${analyzed.suggestedTreatment ?? "(no treatment)"} · retention ${analyzed.retentionStrengthAtAttachment}/10`, + wallMs: Date.now() - start, + meta: { + id: entry.id, + role: analyzed.role, + treatment: analyzed.suggestedTreatment, + retention: analyzed.retentionStrengthAtAttachment, + }, + }); + return updated ?? entry; + } catch (err) { + const reason = + err instanceof GeminiError + ? `Gemini error: ${err.message}` + : err instanceof Error + ? err.message + : String(err); + const failed = markAnalysisFailed(projectDir, entry.id, reason); + void ops.logError("images.analyze", err, { id: entry.id }); + return failed ?? entry; + } +} diff --git a/packages/studio/src/components/sidebar/ImagesTab.tsx b/packages/studio/src/components/sidebar/ImagesTab.tsx index 420ad5a4f..16ccb923d 100644 --- a/packages/studio/src/components/sidebar/ImagesTab.tsx +++ b/packages/studio/src/components/sidebar/ImagesTab.tsx @@ -7,6 +7,8 @@ interface ImageFocal { type ImageRole = "hero" | "subject" | "atmosphere" | "graphic"; +type ImageAnalysisStatus = "pending" | "complete" | "failed"; + interface ImageEntry { id: string; src: string; @@ -22,6 +24,15 @@ interface ImageEntry { tags: string[]; focalPoint: ImageFocal; importedAt: string; + // Analyzer fields (Gemini Flash). Soft priors, all optional. See + // packages/core/src/images/manifest.ts for the source of truth. + vibe?: string; + suggestedTreatment?: string | null; + retentionStrengthAtAttachment?: number; + analysisStatus?: ImageAnalysisStatus; + analysisError?: string; + analyzedAt?: string; + analysisRationale?: string; } interface ImageManifest { @@ -181,6 +192,49 @@ export const ImagesTab = memo(function ImagesTab({ projectId }: ImagesTabProps) [projectId, refresh, selectedId], ); + const reanalyzeEntry = useCallback( + async (id: string) => { + // Optimistically flip the entry to "pending" so the chip shows + // "analyzing…" the moment the user clicks. The server's response + // will replace it with either complete or failed. + setManifest((prev) => + prev + ? { + ...prev, + images: prev.images.map((i) => + i.id === id ? { ...i, analysisStatus: "pending", analysisError: undefined } : i, + ), + } + : prev, + ); + try { + const res = await fetch( + `/api/projects/${encodeURIComponent(projectId)}/images/${encodeURIComponent(id)}/analyze`, + { method: "POST" }, + ); + const data = (await res.json().catch(() => ({}))) as { entry?: ImageEntry; error?: string }; + if (data.entry) { + setManifest((prev) => + prev + ? { + ...prev, + images: prev.images.map((i) => (i.id === id ? (data.entry as ImageEntry) : i)), + } + : prev, + ); + } + if (!res.ok && data.error) { + // Don't blow away the manifest with a top-level error — the + // entry already shows "analysis failed" with the reason. + console.warn(`[images] analyze failed: ${data.error}`); + } + } catch (err) { + setLoadError(err instanceof Error ? err.message : String(err)); + } + }, + [projectId], + ); + const selected = manifest?.images.find((i) => i.id === selectedId) ?? null; return ( @@ -278,11 +332,46 @@ export const ImagesTab = memo(function ImagesTab({ projectId }: ImagesTabProps) no role )} + {img.analysisStatus === "pending" && ( + + analyzing… + + )} + {img.analysisStatus === "complete" && + typeof img.retentionStrengthAtAttachment === "number" && ( + = 7 + ? "text-emerald-400" + : img.retentionStrengthAtAttachment >= 4 + ? "text-amber-400" + : "text-rose-400" + }`} + title={ + img.analysisRationale ?? "Analyzer retention-strength estimate" + } + > + R{img.retentionStrengthAtAttachment} + + )} + {img.analysisStatus === "failed" && ( + + analysis failed + + )}
{img.width}×{img.height} · {fmtBytes(img.bytes)} ·{" "} {img.description.trim().length > 0 ? ( img.description + ) : img.vibe ? ( + {img.vibe} ) : ( no description )} @@ -296,6 +385,7 @@ export const ImagesTab = memo(function ImagesTab({ projectId }: ImagesTabProps) projectId={projectId} onPatch={(patch) => patchEntry(img.id, patch)} onRemove={() => removeEntry(img.id)} + onReanalyze={() => reanalyzeEntry(img.id)} /> )}
@@ -318,9 +408,10 @@ interface ImageEditorProps { patch: Partial>, ) => Promise; onRemove: () => Promise; + onReanalyze: () => Promise; } -function ImageEditor({ img, projectId, onPatch, onRemove }: ImageEditorProps) { +function ImageEditor({ img, projectId, onPatch, onRemove, onReanalyze }: ImageEditorProps) { const [description, setDescription] = useState(img.description); const [tagsRaw, setTagsRaw] = useState(img.tags.join(", ")); const previewRef = useRef(null); @@ -449,6 +540,11 @@ function ImageEditor({ img, projectId, onPatch, onRemove }: ImageEditorProps) { /> + {/* Analyzer panel (Gemini Flash priors). Appears once any analyzer + field is present OR when analysis is pending/failed; the rerun + button lets the user refresh after editing description/role. */} + + {/* Palette + remove */}
@@ -472,3 +568,86 @@ function ImageEditor({ img, projectId, onPatch, onRemove }: ImageEditorProps) {
); } + +interface AnalyzerPanelProps { + img: ImageEntry; + onReanalyze: () => Promise; +} + +function AnalyzerPanel({ img, onReanalyze }: AnalyzerPanelProps) { + const status = img.analysisStatus; + const score = img.retentionStrengthAtAttachment; + return ( +
+
+ + Gemini analysis + + {status === "pending" && ( + analyzing… + )} + {status === "complete" && ( + complete + )} + {status === "failed" && ( + failed + )} + +
+ {status === "failed" && img.analysisError && ( +
+ {img.analysisError} +
+ )} + {status === "complete" && ( +
+ {img.vibe && ( +
+ vibe: {img.vibe} +
+ )} + {img.suggestedTreatment && ( +
+ treatment prior:{" "} + {img.suggestedTreatment} +
+ )} + {typeof score === "number" && ( +
+ retention strength:{" "} + = 7 ? "text-emerald-400" : score >= 4 ? "text-amber-400" : "text-rose-400" + } + > + {score}/10 + +
+ )} + {img.analysisRationale && ( +
{img.analysisRationale}
+ )} +
+ )} + {!status && ( +
+ No analysis yet. Click ▷ to run. +
+ )} +
+ ); +}