diff --git a/.cursor/rules/simple.mdc b/.cursor/rules/simple.mdc index 69d0c6a28..1bd4f99bf 100644 --- a/.cursor/rules/simple.mdc +++ b/.cursor/rules/simple.mdc @@ -2,6 +2,10 @@ alwaysApply: true --- +# Formatting + +- Format using `dprint fmt` from the root. Do not use `cargo fmt`. + # Mutation - Never do manual state management for form/mutation. Things like setError is anti-pattern. use useForm(from tanstack-form) and useQuery/useMutation(from tanstack-query) for 99% cases. diff --git a/Cargo.lock b/Cargo.lock index 2486a2034..fd553bd87 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -913,10 +913,12 @@ version = "0.1.0" dependencies = [ "bytes", "futures-util", + "hound", "kalosm-sound", "rodio", "rubato", "thiserror 2.0.17", + "vorbis_rs", ] [[package]] @@ -3720,9 +3722,9 @@ dependencies = [ [[package]] name = "deepgram" -version = "0.6.10" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7e55d68eeaf55308feb5cb6e948d49efa8564d0536a7375d8a9e06acbb5773a" +checksum = "49bf11c4dc8fc1e7c94fc4198f82f64536fdb9eded7b5a076d9597d8b67e1fd1" dependencies = [ "anyhow", "bytes", @@ -10301,8 +10303,10 @@ dependencies = [ "futures-util", "language", "owhisper-interface", + "reqwest 0.12.24", "rodio", "serde_json", + "thiserror 2.0.17", "tokio", "tokio-stream", "tracing", @@ -14769,11 +14773,13 @@ dependencies = [ name = "tauri-plugin-misc" version = "0.1.0" dependencies = [ + "audio-utils", "buffer", "host", "indoc", "lazy_static", "regex", + "rodio", "specta", "specta-typescript", "sysinfo 0.37.2", @@ -14781,6 +14787,7 @@ dependencies = [ "tauri-plugin", "tauri-plugin-opener", "tauri-specta", + "thiserror 2.0.17", "vergen-gix", ] diff --git a/Cargo.toml b/Cargo.toml index f42410868..2a74bfdcc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -226,7 +226,7 @@ kalosm-model-types = { git = "https://github.com/floneum/floneum", rev = "52967a kalosm-sound = { git = "https://github.com/floneum/floneum", rev = "52967ae", default-features = false } kalosm-streams = { git = "https://github.com/floneum/floneum", rev = "52967ae" } -deepgram = { version = "0.6.8", default-features = false } +deepgram = { version = "0.7", default-features = false } libsql = "0.9.24" block2 = "0.6" diff --git a/apps/desktop/package.json b/apps/desktop/package.json index b7788c9fa..6329aa4ce 100644 --- a/apps/desktop/package.json +++ b/apps/desktop/package.json @@ -17,12 +17,12 @@ "dependencies": { "@ai-sdk/amazon-bedrock": "^3.0.51", "@ai-sdk/anthropic": "^2.0.41", - "@ai-sdk/azure": "^2.0.63", - "@ai-sdk/google": "^2.0.28", - "@ai-sdk/google-vertex": "^3.0.58", - "@ai-sdk/openai": "^2.0.62", + "@ai-sdk/azure": "^2.0.66", + "@ai-sdk/google": "^2.0.29", + "@ai-sdk/google-vertex": "^3.0.59", + "@ai-sdk/openai": "^2.0.64", "@ai-sdk/openai-compatible": "^1.0.26", - "@ai-sdk/react": "^2.0.87", + "@ai-sdk/react": "^2.0.89", "@electric-sql/client": "^1.1.3", "@floating-ui/react": "^0.27.16", "@huggingface/languages": "^1.0.0", @@ -43,16 +43,16 @@ "@hypr/utils": "workspace:^", "@iconify-icon/react": "^3.0.3", "@lobehub/icons": "^2.43.1", - "@openrouter/ai-sdk-provider": "^1.2.0", + "@openrouter/ai-sdk-provider": "^1.2.1", "@orama/highlight": "^0.1.9", "@orama/orama": "^3.1.16", "@orama/plugin-qps": "^3.1.16", "@sentry/react": "^8.55.0", - "@supabase/supabase-js": "^2.79.0", + "@supabase/supabase-js": "^2.80.0", "@t3-oss/env-core": "^0.13.8", "@tanstack/react-form": "^1.23.8", - "@tanstack/react-query": "^5.90.6", - "@tanstack/react-router": "^1.134.12", + "@tanstack/react-query": "^5.90.7", + "@tanstack/react-router": "^1.134.13", "@tanstack/react-virtual": "^3.13.12", "@tauri-apps/api": "^2.9.0", "@tauri-apps/plugin-autostart": "^2.5.1", @@ -68,12 +68,12 @@ "@wavesurfer/react": "^1.0.11", "@xstate/react": "^6.0.0", "@xstate/store": "^3.11.2", - "ai": "^5.0.87", + "ai": "^5.0.89", "chroma-js": "^3.1.2", "clsx": "^2.1.1", "date-fns": "^4.1.0", "dompurify": "^3.3.0", - "effect": "^3.19.0", + "effect": "^3.19.2", "json5": "^2.2.3", "lucide-react": "^0.544.0", "motion": "^11.18.2", @@ -93,8 +93,8 @@ }, "devDependencies": { "@faker-js/faker": "^10.1.0", - "@tanstack/react-router-devtools": "^1.134.12", - "@tanstack/router-plugin": "^1.134.12", + "@tanstack/react-router-devtools": "^1.134.13", + "@tanstack/router-plugin": "^1.134.14", "@tauri-apps/cli": "^2.9.3", "@types/chroma-js": "^3.1.2", "@types/json5": "^2.2.0", @@ -108,7 +108,7 @@ "tailwindcss": "^3.4.18", "tsx": "^4.20.6", "typescript": "~5.8.3", - "vite": "^7.1.12", + "vite": "^7.2.2", "vitest": "^3.2.4" } } diff --git a/apps/desktop/src/components/main/body/sessions/floating/generate.tsx b/apps/desktop/src/components/main/body/sessions/floating/generate.tsx deleted file mode 100644 index ece70406b..000000000 --- a/apps/desktop/src/components/main/body/sessions/floating/generate.tsx +++ /dev/null @@ -1,236 +0,0 @@ -import type { LanguageModel } from "ai"; -import { AlertCircleIcon, SparklesIcon } from "lucide-react"; -import { useCallback, useState } from "react"; - -import { commands as windowsCommands } from "@hypr/plugin-windows"; -import { cn } from "@hypr/utils"; -import { useAITaskTask } from "../../../../../hooks/useAITaskTask"; -import { useLanguageModel } from "../../../../../hooks/useLLMConnection"; -import * as main from "../../../../../store/tinybase/main"; -import { createTaskId } from "../../../../../store/zustand/ai-task/task-configs"; -import { ActionableTooltipContent, FloatingButton } from "./shared"; - -export function GenerateButton({ sessionId }: { sessionId: string }) { - const [showTemplates, setShowTemplates] = useState(false); - const { model, templates, isGenerating, isError, error, onRegenerate, hasContent } = useGenerateButton(sessionId); - - const handleConfigureModel = useCallback(() => { - windowsCommands.windowShow({ type: "settings" }) - .then(() => new Promise((resolve) => setTimeout(resolve, 1000))) - .then(() => - windowsCommands.windowEmitNavigate({ type: "settings" }, { - path: "/app/settings", - search: { tab: "intelligence" }, - }) - ); - }, []); - - if (isGenerating) { - return null; - } - - const { icon, text, tooltip } = getButtonConfig(isError, model, error, handleConfigureModel); - - return ( -
-
setShowTemplates(true)} - onMouseLeave={() => setShowTemplates(false)} - > -
-
- {Object.entries(templates).length > 0 - ? ( - Object.entries(templates).map(([templateId, template]) => ( - { - setShowTemplates(false); - onRegenerate(templateId); - }} - > - {template.title} - - )) - ) - : ( - { - setShowTemplates(false); - handleGoToTemplates(); - }} - > - Create templates - - )} -
- - - - { - setShowTemplates(false); - onRegenerate(null); - }} - > - - Auto - -
-
- -
- !isError && setShowTemplates(true)} - onMouseLeave={() => setShowTemplates(false)} - onClick={() => { - setShowTemplates(false); - onRegenerate(null); - }} - disabled={!model} - tooltip={tooltip} - error={isError} - subtle={hasContent && !showTemplates} - > - {text} - -
-
- ); -} - -function useGenerateButton(sessionId: string) { - const model = useLanguageModel(); - const taskId = createTaskId(sessionId, "enhance"); - - const enhancedMd = main.UI.useCell("sessions", sessionId, "enhanced_md", main.STORE_ID); - - const updateEnhancedMd = main.UI.useSetPartialRowCallback( - "sessions", - sessionId, - (input: string) => ({ enhanced_md: input }), - [], - main.STORE_ID, - ); - - const enhanceTask = useAITaskTask(taskId, "enhance", { - onSuccess: ({ text }) => { - if (text) { - updateEnhancedMd(text); - } - }, - }); - - const templates = main.UI.useResultTable(main.QUERIES.visibleTemplates, main.STORE_ID); - - const onRegenerate = useCallback(async (templateId: string | null) => { - if (!model) { - return; - } - - await enhanceTask.start({ - model, - args: { sessionId, templateId: templateId ?? undefined }, - }); - }, [model, enhanceTask.start, sessionId]); - - const hasContent = !!enhancedMd && enhancedMd.trim().length > 0; - - return { - model, - templates, - isGenerating: enhanceTask.isGenerating, - isError: enhanceTask.isError, - error: enhanceTask.error, - onRegenerate, - hasContent, - }; -} - -function getButtonConfig( - isError: boolean, - model: LanguageModel | null, - error: Error | undefined, - handleConfigureModel: () => void, -) { - const icon = isError ? : ; - const text = isError ? "Retry" : "Regenerate"; - - const tooltip = !model - ? { - side: "top" as const, - content: ( - - ), - } - : isError && error - ? { content: error.message, side: "top" as const } - : undefined; - - return { icon, text, tooltip }; -} - -function handleGoToTemplates() { - windowsCommands.windowShow({ type: "settings" }) - .then(() => new Promise((resolve) => setTimeout(resolve, 1000))) - .then(() => - windowsCommands.windowEmitNavigate({ type: "settings" }, { - path: "/app/settings", - search: { tab: "templates" }, - }) - ); -} - -function TemplateButton({ - children, - onClick, - className, -}: { - children: React.ReactNode; - onClick: () => void; - className?: string; -}) { - return ( - - ); -} - -function Divider() { - return ( -
-
- or -
-
- ); -} diff --git a/apps/desktop/src/components/main/body/sessions/floating/index.tsx b/apps/desktop/src/components/main/body/sessions/floating/index.tsx index cb2a039eb..0f7216865 100644 --- a/apps/desktop/src/components/main/body/sessions/floating/index.tsx +++ b/apps/desktop/src/components/main/body/sessions/floating/index.tsx @@ -9,26 +9,20 @@ export function FloatingActionButton({ tab }: { tab: Extract; - } - - if (!button) { + if (!(currentTab === "raw" && !hasTranscript)) { return null; } return ( - {button} + ); } function FloatingButtonContainer({ children }: { children: ReactNode }) { return ( -
+
{children}
); diff --git a/apps/desktop/src/components/main/body/sessions/floating/listen.tsx b/apps/desktop/src/components/main/body/sessions/floating/listen.tsx index a5a396e06..b3599eb35 100644 --- a/apps/desktop/src/components/main/body/sessions/floating/listen.tsx +++ b/apps/desktop/src/components/main/body/sessions/floating/listen.tsx @@ -1,20 +1,34 @@ +import { cn } from "@hypr/utils"; + import { Icon } from "@iconify-icon/react"; +import { useQueryClient } from "@tanstack/react-query"; +import { downloadDir } from "@tauri-apps/api/path"; +import { open as selectFile } from "@tauri-apps/plugin-dialog"; import { useMediaQuery } from "@uidotdev/usehooks"; -import { useCallback } from "react"; +import { Effect, pipe } from "effect"; +import { EllipsisVerticalIcon, FileTextIcon, UploadCloudIcon } from "lucide-react"; +import { useCallback, useState } from "react"; +import { commands as miscCommands } from "@hypr/plugin-misc"; import { commands as windowsCommands } from "@hypr/plugin-windows"; +import { Button } from "@hypr/ui/components/ui/button"; +import { Popover, PopoverContent, PopoverTrigger } from "@hypr/ui/components/ui/popover"; import { Spinner } from "@hypr/ui/components/ui/spinner"; +import { Tooltip, TooltipContent, TooltipTrigger } from "@hypr/ui/components/ui/tooltip"; import { useListener } from "../../../../../contexts/listener"; +import { fromResult } from "../../../../../effect"; +import { useRunBatch } from "../../../../../hooks/useRunBatch"; import { useStartListening } from "../../../../../hooks/useStartListening"; import * as main from "../../../../../store/tinybase/main"; import { type Tab } from "../../../../../store/zustand/tabs"; +import { commands as tauriCommands } from "../../../../../types/tauri.gen"; import { RecordingIcon, useListenButtonState } from "../shared"; import { ActionableTooltipContent, FloatingButton } from "./shared"; export function ListenButton({ tab }: { tab: Extract }) { const { shouldRender } = useListenButtonState(tab.id); const { loading, stop } = useListener((state) => ({ - loading: state.loading, + loading: state.live.loading, stop: state.stop, })); @@ -61,31 +75,34 @@ function BeforeMeeingButton({ tab }: { tab: Extract } } return ( - ); } -function StartButton({ +function ListenSplitButton({ icon, text, disabled, warningMessage, - onClick, + onPrimaryClick, + sessionId, }: { icon: React.ReactNode; text: string; disabled: boolean; warningMessage: string; - onClick: () => void; + onPrimaryClick: () => void; + sessionId: string; }) { const handleAction = useCallback(() => { - onClick(); + onPrimaryClick(); windowsCommands.windowShow({ type: "settings" }) .then(() => new Promise((resolve) => setTimeout(resolve, 1000))) .then(() => @@ -94,30 +111,227 @@ function StartButton({ search: { tab: "transcription" }, }) ); - }, [onClick]); + }, [onPrimaryClick]); return ( - - ), +
+
+ + ), + } + : undefined} + > + {text} + + +
+
+ ); +} + +type FileSelection = string | string[] | null; + +function OptionsMenu({ + sessionId, + disabled, + warningMessage, + onConfigure, +}: { + sessionId: string; + disabled: boolean; + warningMessage: string; + onConfigure?: () => void; +}) { + const [open, setOpen] = useState(false); + const runBatch = useRunBatch(sessionId); + const queryClient = useQueryClient(); + + const handleFilePath = useCallback( + (selection: FileSelection, kind: "audio" | "transcript") => { + if (!selection) { + return Effect.void; + } + + const path = Array.isArray(selection) ? selection[0] : selection; + + if (!path) { + return Effect.void; + } + + const normalizedPath = path.toLowerCase(); + + if (kind === "transcript") { + if (!normalizedPath.endsWith(".vtt") && !normalizedPath.endsWith(".srt")) { + return Effect.void; } - : undefined} + + return fromResult(tauriCommands.parseSubtitle(path)); + } + + if (!normalizedPath.endsWith(".wav") && !normalizedPath.endsWith(".mp3") && !normalizedPath.endsWith(".ogg")) { + return Effect.void; + } + + return pipe( + fromResult(miscCommands.audioImport(sessionId, path)), + Effect.tap(() => + Effect.sync(() => { + queryClient.invalidateQueries({ queryKey: ["audio", sessionId, "exist"] }); + queryClient.invalidateQueries({ queryKey: ["audio", sessionId, "url"] }); + }) + ), + Effect.flatMap((importedPath) => Effect.promise(() => runBatch(importedPath, { channels: 1 }))), + ); + }, + [queryClient, runBatch, sessionId], + ); + + const selectAndHandleFile = useCallback( + (options: { title: string; filters: { name: string; extensions: string[] }[] }, kind: "audio" | "transcript") => { + if (disabled) { + return; + } + + setOpen(false); + + const program = pipe( + Effect.promise(() => downloadDir()), + Effect.flatMap((defaultPath) => + Effect.promise(() => + selectFile({ + title: options.title, + multiple: false, + directory: false, + defaultPath, + filters: options.filters, + }) + ) + ), + Effect.flatMap((selection) => handleFilePath(selection, kind)), + ); + + Effect.runPromise(program); + }, + [disabled, handleFilePath, setOpen], + ); + + const handleUploadAudio = useCallback(() => { + if (disabled) { + return; + } + + selectAndHandleFile( + { + title: "Upload Audio", + filters: [{ name: "Audio", extensions: ["wav", "mp3", "ogg"] }], + }, + "audio", + ); + }, [disabled, selectAndHandleFile]); + + const handleUploadTranscript = useCallback(() => { + if (disabled) { + return; + } + + selectAndHandleFile( + { + title: "Upload Transcript", + filters: [{ name: "Transcript", extensions: ["vtt", "srt"] }], + }, + "transcript", + ); + }, [disabled, selectAndHandleFile]); + + const triggerButton = ( + + ); + + if (disabled && warningMessage) { + return ( + + + {triggerButton} + + + + + + ); + } + + if (disabled) { + return triggerButton; + } + + return ( + + + {triggerButton} + + +
+ + +
+
+
); } diff --git a/apps/desktop/src/components/main/body/sessions/floating/shared.tsx b/apps/desktop/src/components/main/body/sessions/floating/shared.tsx index 148025416..5ab35be1e 100644 --- a/apps/desktop/src/components/main/body/sessions/floating/shared.tsx +++ b/apps/desktop/src/components/main/body/sessions/floating/shared.tsx @@ -16,6 +16,7 @@ export function FloatingButton({ tooltip, error, subtle, + className, }: { icon?: ReactNode; children: ReactNode; @@ -25,6 +26,7 @@ export function FloatingButton({ disabled?: boolean; error?: boolean; subtle?: boolean; + className?: string; tooltip?: { content: ReactNode; side?: ComponentProps["side"]; @@ -40,6 +42,7 @@ export function FloatingButton({ error && "border-red-500", !error && "border-neutral-200 focus-within:border-stone-500", subtle && "opacity-40 hover:opacity-100", + className, ])} onClick={onClick} onMouseEnter={onMouseEnter} diff --git a/apps/desktop/src/components/main/body/sessions/index.tsx b/apps/desktop/src/components/main/body/sessions/index.tsx index d276c6e62..20cad7df5 100644 --- a/apps/desktop/src/components/main/body/sessions/index.tsx +++ b/apps/desktop/src/components/main/body/sessions/index.tsx @@ -25,8 +25,8 @@ export const TabItemNote: TabItem> = ( }, ) => { const title = main.UI.useCell("sessions", rowIdfromTab(tab), "title", main.STORE_ID); - const { status, sessionId } = useListener((state) => ({ status: state.status, sessionId: state.sessionId })); - const isActive = status !== "inactive" && sessionId === tab.id; + const sessionMode = useListener((state) => state.getSessionMode(tab.id)); + const isActive = sessionMode === "running_active" || sessionMode === "finalizing"; return ( > = ( }; export function TabContentNote({ tab }: { tab: Extract }) { - const listenerStatus = useListener((state) => state.status); + const listenerStatus = useListener((state) => state.live.status); const { data: audioUrl } = useQuery({ queryKey: ["audio", tab.id, "url"], queryFn: () => miscCommands.audioPath(tab.id), diff --git a/apps/desktop/src/components/main/body/sessions/note-input/header.tsx b/apps/desktop/src/components/main/body/sessions/note-input/header.tsx index 75f70cf52..d1988a44d 100644 --- a/apps/desktop/src/components/main/body/sessions/note-input/header.tsx +++ b/apps/desktop/src/components/main/body/sessions/note-input/header.tsx @@ -1,9 +1,10 @@ +import { cn } from "@hypr/utils"; + import { useCallback, useState } from "react"; import { commands as windowsCommands } from "@hypr/plugin-windows"; import { Popover, PopoverContent, PopoverTrigger } from "@hypr/ui/components/ui/popover"; import { Tooltip, TooltipContent, TooltipTrigger } from "@hypr/ui/components/ui/tooltip"; -import { cn } from "@hypr/utils"; import { AlertCircleIcon, RefreshCcwIcon, SparklesIcon } from "lucide-react"; import { useListener } from "../../../../../contexts/listener"; import { useAITaskTask } from "../../../../../hooks/useAITaskTask"; @@ -13,6 +14,7 @@ import { createTaskId } from "../../../../../store/zustand/ai-task/task-configs" import { type EditorView } from "../../../../../store/zustand/tabs/schema"; import { useHasTranscript } from "../shared"; import { EditingControls } from "./transcript/editing-controls"; +import { TranscriptionProgress } from "./transcript/progress"; function HeaderTab({ isActive, @@ -164,64 +166,67 @@ export function Header( editorTabs, currentTab, handleTabChange, + isInactive, isEditing, setIsEditing, - isInactive, }: { sessionId: string; editorTabs: EditorView[]; currentTab: EditorView; handleTabChange: (view: EditorView) => void; + isInactive: boolean; isEditing: boolean; setIsEditing: (isEditing: boolean) => void; - isInactive: boolean; }, ) { + const isBatchProcessing = useListener((state) => sessionId in state.batch); + if (editorTabs.length === 1 && editorTabs[0] === "raw") { return null; } - const showEditingControls = currentTab === "transcript" && isInactive; + const showProgress = currentTab === "transcript" && (isInactive || isBatchProcessing); + const showEditingControls = currentTab === "transcript" && isInactive && !isBatchProcessing; return ( -
-
- {editorTabs.map((view) => { - if (view === "enhanced") { +
+
+
+ {editorTabs.map((view) => { + if (view === "enhanced") { + return ( + handleTabChange(view)} + /> + ); + } + return ( - handleTabChange(view)} - /> + > + {labelForEditorView(view)} + ); - } - - return ( - handleTabChange(view)} - > - {labelForEditorView(view)} - - ); - })} + })} +
+ {showProgress && } + {showEditingControls && }
- {showEditingControls && }
); } export function useEditorTabs({ sessionId }: { sessionId: string }): EditorView[] { - const { status, sessionId: activeSessionId } = useListener((state) => ({ - status: state.status, - sessionId: state.sessionId, - })); + const sessionMode = useListener((state) => state.getSessionMode(sessionId)); const hasTranscript = useHasTranscript(sessionId); - if (status === "running_active" && activeSessionId === sessionId) { + if (sessionMode === "running_active" || sessionMode === "running_batch") { return ["raw", "transcript"]; } diff --git a/apps/desktop/src/components/main/body/sessions/note-input/index.tsx b/apps/desktop/src/components/main/body/sessions/note-input/index.tsx index 989fab256..e1a11d7d7 100644 --- a/apps/desktop/src/components/main/body/sessions/note-input/index.tsx +++ b/apps/desktop/src/components/main/body/sessions/note-input/index.tsx @@ -17,8 +17,8 @@ export function NoteInput({ tab }: { tab: Extract }) const editorTabs = useEditorTabs({ sessionId: tab.id }); const { updateSessionTabState } = useTabs(); const editorRef = useRef<{ editor: TiptapEditor | null }>(null); + const inactive = useListener((state) => state.live.status === "inactive"); const [isEditing, setIsEditing] = useState(false); - const inactive = useListener((state) => state.status === "inactive"); const sessionId = tab.id; useAutoEnhance(tab); @@ -50,9 +50,9 @@ export function NoteInput({ tab }: { tab: Extract }) editorTabs={editorTabs} currentTab={currentTab} handleTabChange={handleTabChange} + isInactive={inactive} isEditing={isEditing} setIsEditing={setIsEditing} - isInactive={inactive} />
diff --git a/apps/desktop/src/components/main/body/sessions/note-input/raw.tsx b/apps/desktop/src/components/main/body/sessions/note-input/raw.tsx index bc708bb47..c4e943bcd 100644 --- a/apps/desktop/src/components/main/body/sessions/note-input/raw.tsx +++ b/apps/desktop/src/components/main/body/sessions/note-input/raw.tsx @@ -1,14 +1,8 @@ -import { downloadDir } from "@tauri-apps/api/path"; -import { open as selectFile } from "@tauri-apps/plugin-dialog"; -import { useMediaQuery } from "@uidotdev/usehooks"; -import { Effect, pipe } from "effect"; -import { forwardRef, useCallback } from "react"; +import { forwardRef } from "react"; import NoteEditor, { type TiptapEditor } from "@hypr/tiptap/editor"; import type { PlaceholderFunction } from "@hypr/tiptap/shared"; -import { cn } from "@hypr/utils"; import * as main from "../../../../../store/tinybase/main"; -import { commands } from "../../../../../types/tauri.gen"; export const RawEditor = forwardRef<{ editor: TiptapEditor | null }, { sessionId: string }>( ({ sessionId }, ref) => { @@ -40,8 +34,6 @@ export const RawEditor = forwardRef<{ editor: TiptapEditor | null }, { sessionId }, ); -RawEditor.displayName = "RawEditor"; - const Placeholder: PlaceholderFunction = ({ node, pos }) => { if (node.type.name === "paragraph" && pos === 0) { return ; @@ -51,61 +43,11 @@ const Placeholder: PlaceholderFunction = ({ node, pos }) => { }; const PlaceHolderInner = () => { - const handleFileSelect = useCallback((e: React.MouseEvent) => { - e.preventDefault(); - - const program = pipe( - Effect.promise(() => downloadDir()), - Effect.flatMap((defaultPath) => - Effect.promise(() => - selectFile({ - title: "Upload Audio or Transcript", - multiple: false, - directory: false, - defaultPath, - filters: [ - { name: "Audio", extensions: ["wav", "mp3", "ogg"] }, - { name: "Transcript", extensions: ["vtt", "srt"] }, - ], - }) - ) - ), - Effect.flatMap((path) => { - if (!path) { - return Effect.void; - } - - if (path.endsWith(".vtt") || path.endsWith(".srt")) { - return pipe( - Effect.promise(() => commands.parseSubtitle(path)), - Effect.tap((subtitle) => Effect.sync(() => console.log(subtitle))), - ); - } - - return Effect.void; - }), - ); - - Effect.runPromise(program); - }, []); - - const isNarrow = useMediaQuery("(max-width: 768px)"); - return (
Take notes or press / for commands. -
- You can also upload/drop an - -
); }; diff --git a/apps/desktop/src/components/main/body/sessions/note-input/transcript/index.tsx b/apps/desktop/src/components/main/body/sessions/note-input/transcript/index.tsx index 7fa30dcdc..28a19cf1c 100644 --- a/apps/desktop/src/components/main/body/sessions/note-input/transcript/index.tsx +++ b/apps/desktop/src/components/main/body/sessions/note-input/transcript/index.tsx @@ -3,12 +3,10 @@ import { TranscriptViewer } from "./viewer"; export function Transcript({ sessionId, isEditing }: { sessionId: string; isEditing: boolean }) { return ( -
-
- {isEditing - ? - : } -
+
+ {isEditing + ? + : }
); } diff --git a/apps/desktop/src/components/main/body/sessions/note-input/transcript/progress.tsx b/apps/desktop/src/components/main/body/sessions/note-input/transcript/progress.tsx new file mode 100644 index 000000000..193b5faa3 --- /dev/null +++ b/apps/desktop/src/components/main/body/sessions/note-input/transcript/progress.tsx @@ -0,0 +1,38 @@ +import { useMemo } from "react"; + +import { Spinner } from "@hypr/ui/components/ui/spinner"; + +import { useListener } from "../../../../../../contexts/listener"; + +export function TranscriptionProgress({ sessionId }: { sessionId: string }) { + const { progress: progressRaw, mode } = useListener((state) => ({ + progress: state.batch[sessionId] ?? null, + mode: state.getSessionMode(sessionId), + })); + + const isRunning = mode === "running_batch"; + + const statusLabel = useMemo(() => { + if (!progressRaw) { + return "Preparing audio"; + } + + const percent = Math.round(progressRaw.percentage * 100); + return `${percent}%`; + }, [progressRaw]); + + if (!isRunning) { + return null; + } + + return ( +
+
+ + + Processing ยท {statusLabel} + +
+
+ ); +} diff --git a/apps/desktop/src/components/main/body/sessions/note-input/transcript/shared/index.tsx b/apps/desktop/src/components/main/body/sessions/note-input/transcript/shared/index.tsx index 60d5898f5..77e3cab0a 100644 --- a/apps/desktop/src/components/main/body/sessions/note-input/transcript/shared/index.tsx +++ b/apps/desktop/src/components/main/body/sessions/note-input/transcript/shared/index.tsx @@ -45,8 +45,9 @@ export function TranscriptContainer({ main.STORE_ID, ); - const currentActive = useListener((state) => state.status !== "inactive" && state.sessionId === sessionId); - const editable = useListener((state) => state.status === "inactive" && Object.keys(operations ?? {}).length > 0); + const sessionMode = useListener((state) => state.getSessionMode(sessionId)); + const currentActive = sessionMode === "running_active" || sessionMode === "finalizing"; + const editable = sessionMode === "inactive" && Object.keys(operations ?? {}).length > 0; const partialWords = useListener((state) => Object.values(state.partialWordsByChannel).flat()); const partialHints = useListener((state) => state.partialHints); diff --git a/apps/desktop/src/components/main/body/sessions/outer-header/folder.tsx b/apps/desktop/src/components/main/body/sessions/outer-header/folder.tsx index 32b788c1a..92287de62 100644 --- a/apps/desktop/src/components/main/body/sessions/outer-header/folder.tsx +++ b/apps/desktop/src/components/main/body/sessions/outer-header/folder.tsx @@ -29,7 +29,7 @@ export function FolderChain({ sessionId }: { sessionId: string }) { return ( - + {folderId && } {!folderId ? @@ -60,7 +60,12 @@ function RenderIfRootExist( renderCrumb={({ id, name }) => ( - diff --git a/apps/desktop/src/components/main/body/sessions/outer-header/listen.tsx b/apps/desktop/src/components/main/body/sessions/outer-header/listen.tsx index e03a9214f..eb649d387 100644 --- a/apps/desktop/src/components/main/body/sessions/outer-header/listen.tsx +++ b/apps/desktop/src/components/main/body/sessions/outer-header/listen.tsx @@ -85,14 +85,16 @@ function StartButton({ sessionId }: { sessionId: string }) { function InMeetingIndicator({ sessionId }: { sessionId: string }) { const [ref, hovered] = useHover(); - const { active, finalizing, stop, amplitude, muted } = useListener((state) => ({ - active: state.status !== "inactive" && state.sessionId === sessionId, - finalizing: state.status === "finalizing" && state.sessionId === sessionId, + const { mode, stop, amplitude, muted } = useListener((state) => ({ + mode: state.getSessionMode(sessionId), stop: state.stop, - amplitude: state.amplitude, - muted: state.muted, + amplitude: state.live.amplitude, + muted: state.live.muted, })); + const active = mode === "running_active" || mode === "finalizing"; + const finalizing = mode === "finalizing"; + if (!active) { return null; } diff --git a/apps/desktop/src/components/main/body/sessions/outer-header/overflow.tsx b/apps/desktop/src/components/main/body/sessions/outer-header/overflow.tsx index 1f3eaa946..9866c0aff 100644 --- a/apps/desktop/src/components/main/body/sessions/outer-header/overflow.tsx +++ b/apps/desktop/src/components/main/body/sessions/outer-header/overflow.tsx @@ -104,15 +104,20 @@ function ExportPDF() { } function Listening({ sessionId }: { sessionId: string }) { - const { stop, status, activeSessionId } = useListener((state) => ({ + const { mode, stop } = useListener((state) => ({ + mode: state.getSessionMode(sessionId), stop: state.stop, - status: state.status, - activeSessionId: state.sessionId, })); - const isListening = status !== "inactive" && activeSessionId === sessionId; + const isListening = mode === "running_active" || mode === "finalizing"; + const isFinalizing = mode === "finalizing"; + const isBatching = mode === "running_batch"; const startListening = useStartListening(sessionId); const handleToggleListening = () => { + if (isBatching) { + return; + } + if (isListening) { stop(); } else { @@ -121,9 +126,19 @@ function Listening({ sessionId }: { sessionId: string }) { }; return ( - + {isListening ? : } - {isListening ? "Stop listening" : "Start listening"} + + {isBatching + ? "Batch processing" + : isListening + ? "Stop listening" + : "Start listening"} + ); } diff --git a/apps/desktop/src/components/main/body/sessions/shared.tsx b/apps/desktop/src/components/main/body/sessions/shared.tsx index 565bcd590..3a371b8ff 100644 --- a/apps/desktop/src/components/main/body/sessions/shared.tsx +++ b/apps/desktop/src/components/main/body/sessions/shared.tsx @@ -22,7 +22,8 @@ export function useHasTranscript(sessionId: string): boolean { export function useCurrentNoteTab(tab: Extract): EditorView { const hasTranscript = useHasTranscript(tab.id); - const isListenerActive = useListener((state) => (state.status !== "inactive") && state.sessionId === tab.id); + const sessionMode = useListener((state) => state.getSessionMode(tab.id)); + const isListenerActive = sessionMode === "running_active" || sessionMode === "finalizing"; return useMemo( () => { @@ -56,7 +57,9 @@ export function RecordingIcon({ disabled }: { disabled?: boolean }) { } export function useListenButtonState(sessionId: string) { - const active = useListener((state) => state.status !== "inactive" && state.sessionId === sessionId); + const sessionMode = useListener((state) => state.getSessionMode(sessionId)); + const active = sessionMode === "running_active" || sessionMode === "finalizing"; + const batching = sessionMode === "running_batch"; const taskId = createTaskId(sessionId, "enhance"); const { status } = useAITaskTask(taskId, "enhance"); @@ -64,8 +67,12 @@ export function useListenButtonState(sessionId: string) { const sttConnection = useSTTConnection(); const shouldRender = !active && !generating; - const isDisabled = !sttConnection; - const warningMessage = !sttConnection ? "Transcription model not available." : ""; + const isDisabled = !sttConnection || batching; + const warningMessage = !sttConnection + ? "Transcription model not available." + : batching + ? "Batch transcription in progress." + : ""; return { shouldRender, diff --git a/apps/desktop/src/routes/app/onboarding/calendar.tsx b/apps/desktop/src/components/onboarding/calendar.tsx similarity index 100% rename from apps/desktop/src/routes/app/onboarding/calendar.tsx rename to apps/desktop/src/components/onboarding/calendar.tsx diff --git a/apps/desktop/src/routes/app/onboarding/permissions.tsx b/apps/desktop/src/components/onboarding/permissions.tsx similarity index 94% rename from apps/desktop/src/routes/app/onboarding/permissions.tsx rename to apps/desktop/src/components/onboarding/permissions.tsx index 662fb8731..0decc9f7d 100644 --- a/apps/desktop/src/routes/app/onboarding/permissions.tsx +++ b/apps/desktop/src/components/onboarding/permissions.tsx @@ -2,8 +2,8 @@ import { EyeIcon, MicIcon, Volume2Icon } from "lucide-react"; import { Button } from "@hypr/ui/components/ui/button"; -import { PermissionRow } from "../../../components/shared/permission-row"; -import { usePermissions } from "../../../hooks/use-permissions"; +import { usePermissions } from "../../hooks/use-permissions"; +import { PermissionRow } from "../shared/permission-row"; import { OnboardingContainer, type OnboardingNext } from "./shared"; type PermissionsProps = { diff --git a/apps/desktop/src/routes/app/onboarding/shared.tsx b/apps/desktop/src/components/onboarding/shared.tsx similarity index 100% rename from apps/desktop/src/routes/app/onboarding/shared.tsx rename to apps/desktop/src/components/onboarding/shared.tsx diff --git a/apps/desktop/src/routes/app/onboarding/welcome.tsx b/apps/desktop/src/components/onboarding/welcome.tsx similarity index 100% rename from apps/desktop/src/routes/app/onboarding/welcome.tsx rename to apps/desktop/src/components/onboarding/welcome.tsx diff --git a/apps/desktop/src/components/settings/ai/stt/configure.tsx b/apps/desktop/src/components/settings/ai/stt/configure.tsx index 348cc886a..577775139 100644 --- a/apps/desktop/src/components/settings/ai/stt/configure.tsx +++ b/apps/desktop/src/components/settings/ai/stt/configure.tsx @@ -440,7 +440,7 @@ function useSafeSelectModel() { main.STORE_ID, ); - const active = useListener((state) => state.status !== "inactive"); + const active = useListener((state) => state.live.status !== "inactive"); const handler = useCallback((model: SupportedSttModel) => { if (active) { diff --git a/apps/desktop/src/components/settings/memory/custom-vocabulary.tsx b/apps/desktop/src/components/settings/memory/custom-vocabulary.tsx index 2bee07de8..f10bf308f 100644 --- a/apps/desktop/src/components/settings/memory/custom-vocabulary.tsx +++ b/apps/desktop/src/components/settings/memory/custom-vocabulary.tsx @@ -1,10 +1,11 @@ +import { useForm } from "@tanstack/react-form"; import { Check, MinusCircle, Pencil, Plus, X } from "lucide-react"; import { useMemo, useState } from "react"; import { Button } from "@hypr/ui/components/ui/button"; import { cn } from "@hypr/utils"; import * as main from "../../../store/tinybase/main"; -import { QUERIES, STORE_ID, UI } from "../../../store/tinybase/main"; +import { METRICS, QUERIES, STORE_ID, UI } from "../../../store/tinybase/main"; import { id } from "../../../utils"; interface VocabItem { @@ -58,9 +59,23 @@ function useVocabs() { export function CustomVocabularyView() { const vocabItems = useVocabs(); const mutations = useVocabMutations(); - const [searchValue, setSearchValue] = useState(""); const [editingId, setEditingId] = useState(null); - const [editValues, setEditValues] = useState>({}); + const [searchValue, setSearchValue] = useState(""); + const totalCustomVocabs = UI.useMetric(METRICS.totalCustomVocabs, STORE_ID) ?? 0; + + const form = useForm({ + defaultValues: { + search: "", + }, + onSubmit: ({ value }) => { + const text = value.search.trim(); + if (text) { + mutations.create(text); + form.reset(); + setSearchValue(""); + } + }, + }); const filteredItems = useMemo(() => { if (!searchValue.trim()) { @@ -74,81 +89,55 @@ export function CustomVocabularyView() { const exactMatch = allTexts.includes(searchValue.toLowerCase()); const showAddButton = searchValue.trim() && !exactMatch; - const handleAdd = () => { - const text = searchValue.trim(); - if (text && !exactMatch) { - mutations.create(text); - setSearchValue(""); - } - }; - - const handleKeyDown = (e: React.KeyboardEvent) => { - if (e.key === "Enter" && showAddButton) { - e.preventDefault(); - handleAdd(); - } - }; - - const startEdit = (item: VocabItem) => { - setEditingId(item.rowId); - setEditValues({ ...editValues, [item.rowId]: item.text }); - }; - - const cancelEdit = (rowId: string) => { - setEditingId(null); - const { [rowId]: _, ...rest } = editValues; - setEditValues(rest); - }; - - const saveEdit = (rowId: string) => { - const newText = editValues[rowId]?.trim(); - if (!newText) { - return; - } - - const isDuplicate = vocabItems.some( - (item) => item.rowId !== rowId && item.text.toLowerCase() === newText.toLowerCase(), - ); - if (isDuplicate) { - return; - } - - mutations.update(rowId, newText); - setEditingId(null); - const { [rowId]: _, ...rest } = editValues; - setEditValues(rest); - }; - return (
-

Custom vocabulary

-

- Add jargons or industry/company-specific terms to improve transcription accuracy -

+
+
+

Custom vocabulary

+

+ Add jargons or industry/company-specific terms to improve transcription accuracy +

+
+ + {totalCustomVocabs} {totalCustomVocabs === 1 ? "term" : "terms"} + +
-
- setSearchValue(e.target.value)} - onKeyDown={handleKeyDown} - placeholder="Search or add custom vocabulary" - className="flex-1 text-sm text-neutral-900 placeholder:text-neutral-500 focus:outline-none bg-transparent" - /> +
{ + e.preventDefault(); + e.stopPropagation(); + form.handleSubmit(); + }} + className="flex items-center gap-2 px-4 py-3 border-b border-neutral-200" + > + + {(field) => ( + { + field.handleChange(e.target.value); + setSearchValue(e.target.value); + }} + placeholder="Search or add custom vocabulary" + className="flex-1 text-sm text-neutral-900 placeholder:text-neutral-500 focus:outline-none bg-transparent" + /> + )} + {showAddButton && ( )} -
+
{filteredItems.length === 0 @@ -162,12 +151,11 @@ export function CustomVocabularyView() { setEditValues({ ...editValues, [item.rowId]: value })} - onStartEdit={() => startEdit(item)} - onCancelEdit={() => cancelEdit(item.rowId)} - onSaveEdit={() => saveEdit(item.rowId)} + onStartEdit={() => setEditingId(item.rowId)} + onCancelEdit={() => setEditingId(null)} + onUpdate={mutations.update} onRemove={() => mutations.delete(item.rowId)} /> )) @@ -180,36 +168,60 @@ export function CustomVocabularyView() { interface VocabularyItemProps { item: VocabItem; + vocabItems: VocabItem[]; isEditing: boolean; - editValue: string; - onEditValueChange: (value: string) => void; onStartEdit: () => void; onCancelEdit: () => void; - onSaveEdit: () => void; + onUpdate: (rowId: string, text: string) => void; onRemove: () => void; } function VocabularyItem({ item, + vocabItems, isEditing, - editValue, - onEditValueChange, onStartEdit, onCancelEdit, - onSaveEdit, + onUpdate, onRemove, }: VocabularyItemProps) { const [hoveredItem, setHoveredItem] = useState(false); - const handleKeyDown = (e: React.KeyboardEvent) => { - if (e.key === "Enter") { - e.preventDefault(); - onSaveEdit(); - } else if (e.key === "Escape") { - e.preventDefault(); - onCancelEdit(); - } - }; + const form = useForm({ + defaultValues: { + text: item.text, + }, + onSubmit: ({ value }) => { + const text = value.text.trim(); + if (text && text !== item.text) { + onUpdate(item.rowId, text); + onCancelEdit(); + } + }, + validators: { + onChange: ({ value }) => { + const text = value.text.trim(); + if (!text) { + return { + fields: { + text: "Vocabulary term cannot be empty", + }, + }; + } + const isDuplicate = vocabItems.some( + (v) => v.rowId !== item.rowId && v.text.toLowerCase() === text.toLowerCase(), + ); + if (isDuplicate) { + return { + fields: { + text: "This term already exists", + }, + }; + } + return undefined; + }, + }, + }); return (
{isEditing ? ( - onEditValueChange(e.target.value)} - onKeyDown={handleKeyDown} - className="flex-1 text-sm text-neutral-900 focus:outline-none bg-transparent" - autoFocus - /> + + {(field) => ( + field.handleChange(e.target.value)} + onKeyDown={(e) => { + if (e.key === "Enter") { + e.preventDefault(); + form.handleSubmit(); + } else if (e.key === "Escape") { + e.preventDefault(); + onCancelEdit(); + } + }} + className="flex-1 text-sm text-neutral-900 focus:outline-none bg-transparent" + autoFocus + /> + )} + ) : {item.text}}
{isEditing ? ( - <> - - - + [state.canSubmit]}> + {([canSubmit]) => ( + <> + + + + )} + ) : ( hoveredItem && ( diff --git a/apps/desktop/src/config/use-config.ts b/apps/desktop/src/config/use-config.ts index bd2ea6e62..3a456a075 100644 --- a/apps/desktop/src/config/use-config.ts +++ b/apps/desktop/src/config/use-config.ts @@ -42,7 +42,7 @@ export function useConfigValues(keys: readonly K[]): { [P i } export function useConfigSideEffects(keys?: ConfigKey[]) { - const active = useListener((state) => state.status === "running_active"); + const active = useListener((state) => state.live.status === "running_active"); const configsToWatch = keys ?? (Object.keys(CONFIG_REGISTRY) as ConfigKey[]); const allValues = main.UI.useValues(main.STORE_ID); diff --git a/apps/desktop/src/hooks/useAITaskTask.ts b/apps/desktop/src/hooks/useAITaskTask.ts index 2bb139412..14509923e 100644 --- a/apps/desktop/src/hooks/useAITaskTask.ts +++ b/apps/desktop/src/hooks/useAITaskTask.ts @@ -5,6 +5,7 @@ import { shallow } from "zustand/shallow"; import { useAITask } from "../contexts/ai-task"; import type { TaskArgsMap, TaskId, TaskType } from "../store/zustand/ai-task/task-configs"; import { getTaskState, type TaskState, type TaskStatus } from "../store/zustand/ai-task/tasks"; +import { useLatestRef } from "./useLatestRef"; type SuccessPayload = { text: string; @@ -94,11 +95,3 @@ export function useAITaskTask( reset: resetTask, }; } - -function useLatestRef(value: T | undefined) { - const ref = useRef(value); - useEffect(() => { - ref.current = value; - }, [value]); - return ref; -} diff --git a/apps/desktop/src/hooks/useAutoEnhance.ts b/apps/desktop/src/hooks/useAutoEnhance.ts index 3640245a4..1a8411d67 100644 --- a/apps/desktop/src/hooks/useAutoEnhance.ts +++ b/apps/desktop/src/hooks/useAutoEnhance.ts @@ -14,7 +14,7 @@ export function useAutoEnhance(tab: Extract) { const model = useLanguageModel(); const { updateSessionTabState } = useTabs(); - const listenerStatus = useListener((state) => state.status); + const listenerStatus = useListener((state) => state.live.status); const prevListenerStatus = usePrevious(listenerStatus); const transcriptIds = main.UI.useSliceRowIds( diff --git a/apps/desktop/src/hooks/useLatestRef.ts b/apps/desktop/src/hooks/useLatestRef.ts new file mode 100644 index 000000000..d86022e2d --- /dev/null +++ b/apps/desktop/src/hooks/useLatestRef.ts @@ -0,0 +1,11 @@ +import { useEffect, useRef } from "react"; + +export function useLatestRef(value: T) { + const ref = useRef(value); + + useEffect(() => { + ref.current = value; + }, [value]); + + return ref; +} diff --git a/apps/desktop/src/hooks/useRunBatch.ts b/apps/desktop/src/hooks/useRunBatch.ts new file mode 100644 index 000000000..fcc76e69b --- /dev/null +++ b/apps/desktop/src/hooks/useRunBatch.ts @@ -0,0 +1,148 @@ +import { useCallback } from "react"; + +import type { BatchParams } from "@hypr/plugin-listener"; + +import { useConfigValue } from "../config/use-config"; +import { useListener } from "../contexts/listener"; +import * as main from "../store/tinybase/main"; +import type { HandlePersistCallback } from "../store/zustand/listener/transcript"; +import { type Tab, useTabs } from "../store/zustand/tabs"; +import { id } from "../utils"; +import { useKeywords } from "./useKeywords"; +import { useSTTConnection } from "./useSTTConnection"; + +type RunOptions = { + handlePersist?: HandlePersistCallback; + channels?: number; + model?: string; + baseUrl?: string; + apiKey?: string; + keywords?: string[]; + languages?: string[]; +}; + +export const useRunBatch = (sessionId: string) => { + const store = main.UI.useStore(main.STORE_ID); + const { user_id } = main.UI.useValues(main.STORE_ID); + + const runBatch = useListener((state) => state.runBatch); + const sessionTab = useTabs((state) => { + const found = state.tabs.find( + (tab): tab is Extract => tab.type === "sessions" && tab.id === sessionId, + ); + return found ?? null; + }); + const updateSessionTabState = useTabs((state) => state.updateSessionTabState); + + const conn = useSTTConnection(); + const keywords = useKeywords(sessionId); + const languages = useConfigValue("spoken_languages"); + + return useCallback( + async (filePath: string, options?: RunOptions) => { + if (!store || !conn || !runBatch) { + console.error("no_batch_connection"); + return; + } + + const provider: BatchParams["provider"] | null = (() => { + if (conn.provider === "deepgram") { + return "deepgram"; + } + + if (conn.provider === "hyprnote" && conn.model.startsWith("am-")) { + return "am"; + } + + return null; + })(); + + if (!provider) { + console.error("unsupported_batch_provider", conn.provider); + return; + } + + if (sessionTab) { + updateSessionTabState(sessionTab, { editor: "transcript" }); + } + + const transcriptId = id(); + const createdAt = new Date().toISOString(); + + store.setRow("transcripts", transcriptId, { + session_id: sessionId, + user_id: user_id ?? "", + created_at: createdAt, + started_at: Date.now(), + }); + + const handlePersist: HandlePersistCallback | undefined = options?.handlePersist; + + const persist = handlePersist + ?? ((words, hints) => { + if (words.length === 0) { + return; + } + + const wordIds: string[] = []; + + words.forEach((word) => { + const wordId = id(); + + store.setRow("words", wordId, { + transcript_id: transcriptId, + text: word.text, + start_ms: word.start_ms, + end_ms: word.end_ms, + channel: word.channel, + user_id: user_id ?? "", + created_at: new Date().toISOString(), + }); + + wordIds.push(wordId); + }); + + hints.forEach((hint) => { + if (hint.data.type !== "provider_speaker_index") { + return; + } + + const wordId = wordIds[hint.wordIndex]; + const word = words[hint.wordIndex]; + + if (!wordId || !word) { + return; + } + + store.setRow("speaker_hints", id(), { + transcript_id: transcriptId, + word_id: wordId, + type: "provider_speaker_index", + value: JSON.stringify({ + provider: hint.data.provider ?? conn.provider, + channel: hint.data.channel ?? word.channel, + speaker_index: hint.data.speaker_index, + }), + user_id: user_id ?? "", + created_at: new Date().toISOString(), + }); + }); + }); + + const params: BatchParams = { + session_id: sessionId, + provider, + file_path: filePath, + model: options?.model ?? conn.model, + base_url: options?.baseUrl ?? conn.baseUrl, + api_key: options?.apiKey ?? conn.apiKey, + keywords: options?.keywords ?? keywords ?? [], + languages: options?.languages ?? languages ?? [], + channels: options?.channels, + }; + + await runBatch(params, { handlePersist: persist, sessionId }); + }, + [conn, keywords, languages, runBatch, sessionId, sessionTab, store, updateSessionTabState, user_id], + ); +}; diff --git a/apps/desktop/src/hooks/useSTTConnection.ts b/apps/desktop/src/hooks/useSTTConnection.ts index 954aa8fe9..16ccda96f 100644 --- a/apps/desktop/src/hooks/useSTTConnection.ts +++ b/apps/desktop/src/hooks/useSTTConnection.ts @@ -1,4 +1,5 @@ import { useQuery } from "@tanstack/react-query"; +import { useMemo } from "react"; import { commands as localSttCommands } from "@hypr/plugin-local-stt"; import { ProviderId } from "../components/settings/ai/stt/shared"; @@ -35,51 +36,56 @@ export const useSTTConnection = (): Connection | null => { return null; } - try { - const servers = await localSttCommands.getServers(); + const servers = await localSttCommands.getServers(); - if (servers.status !== "ok") { - return null; - } - - const isInternalModel = current_stt_model.startsWith("Quantized"); - const server = isInternalModel ? servers.data.internal : servers.data.external; + if (servers.status !== "ok") { + return null; + } - if (server?.health === "ready" && server.url) { - return { - provider: current_stt_provider!, - model: current_stt_model, - baseUrl: server.url, - apiKey: "", - }; - } + const isInternalModel = current_stt_model.startsWith("Quantized"); + const server = isInternalModel ? servers.data.internal : servers.data.external; - return null; - } catch { - return null; + if (server?.health === "ready" && server.url) { + return { + provider: current_stt_provider!, + model: current_stt_model, + baseUrl: server.url, + apiKey: "", + }; } + + return null; }, }); - if (!current_stt_provider || !current_stt_model) { - return null; - } - - if (isLocalModel) { - return localConnection ?? null; - } - const baseUrl = providerConfig?.base_url?.trim(); const apiKey = providerConfig?.api_key?.trim(); - if (!baseUrl || !apiKey) { - return null; - } - - return { - provider: current_stt_provider, - model: current_stt_model, + return useMemo(() => { + if (!current_stt_provider || !current_stt_model) { + return null; + } + + if (isLocalModel) { + return localConnection ?? null; + } + + if (!baseUrl || !apiKey) { + return null; + } + + return { + provider: current_stt_provider, + model: current_stt_model, + baseUrl, + apiKey, + }; + }, [ + current_stt_provider, + current_stt_model, + isLocalModel, + localConnection, baseUrl, apiKey, - }; + ]); }; diff --git a/apps/desktop/src/routes/app/onboarding/index.tsx b/apps/desktop/src/routes/app/onboarding/index.tsx index e29bd0202..2e119cf65 100644 --- a/apps/desktop/src/routes/app/onboarding/index.tsx +++ b/apps/desktop/src/routes/app/onboarding/index.tsx @@ -5,10 +5,10 @@ import { commands as windowsCommands } from "@hypr/plugin-windows"; import { createFileRoute, useNavigate } from "@tanstack/react-router"; import { z } from "zod"; -import { Calendars } from "./calendar"; -import { Permissions } from "./permissions"; -import type { OnboardingNext } from "./shared"; -import { Welcome } from "./welcome"; +import { Calendars } from "../../../components/onboarding/calendar"; +import { Permissions } from "../../../components/onboarding/permissions"; +import type { OnboardingNext } from "../../../components/onboarding/shared"; +import { Welcome } from "../../../components/onboarding/welcome"; const STEPS = ["welcome", "calendars", "permissions"] as const; diff --git a/apps/desktop/src/store/tinybase/main.ts b/apps/desktop/src/store/tinybase/main.ts index f40c93a21..3016837aa 100644 --- a/apps/desktop/src/store/tinybase/main.ts +++ b/apps/desktop/src/store/tinybase/main.ts @@ -434,6 +434,12 @@ export const StoreComponent = ({ persist = true }: { persist?: boolean }) => { "organizations", "sum", () => 1, + ) + .setMetricDefinition( + METRICS.totalCustomVocabs, + "memories", + "sum", + (getCell) => (getCell("type") === "vocab" ? 1 : 0), )); const checkpoints = useCreateCheckpoints(store, (store) => createCheckpoints(store)); @@ -469,6 +475,7 @@ export const QUERIES = { export const METRICS = { totalHumans: "totalHumans", totalOrganizations: "totalOrganizations", + totalCustomVocabs: "totalCustomVocabs", }; export const INDEXES = { diff --git a/apps/desktop/src/store/zustand/listener/batch.ts b/apps/desktop/src/store/zustand/listener/batch.ts new file mode 100644 index 000000000..80003f09b --- /dev/null +++ b/apps/desktop/src/store/zustand/listener/batch.ts @@ -0,0 +1,169 @@ +import type { StoreApi } from "zustand"; + +import type { BatchAlternatives, BatchResponse, StreamResponse } from "@hypr/plugin-listener"; +import type { RuntimeSpeakerHint, WordLike } from "../../../utils/segment"; + +import type { HandlePersistCallback } from "./transcript"; +import { fixSpacingForWords } from "./transcript"; + +export type BatchState = { + batch: Record< + string, + { + percentage: number; + isComplete?: boolean; + } + >; +}; + +export type BatchActions = { + handleBatchStarted: (sessionId: string) => void; + handleBatchResponse: (sessionId: string, response: BatchResponse) => void; + handleBatchResponseStreamed: (sessionId: string, response: StreamResponse, percentage: number) => void; + clearBatchSession: (sessionId: string) => void; +}; + +export const createBatchSlice = < + T extends BatchState & { + handlePersist?: HandlePersistCallback; + handleTranscriptResponse?: (response: StreamResponse) => void; + }, +>( + set: StoreApi["setState"], + get: StoreApi["getState"], +): BatchState & BatchActions => ({ + batch: {}, + + handleBatchStarted: (sessionId) => { + set((state) => ({ + ...state, + batch: { + ...state.batch, + [sessionId]: { percentage: 0, isComplete: false }, + }, + })); + }, + + handleBatchResponse: (sessionId, response) => { + const { handlePersist } = get(); + + const [words, hints] = transformBatch(response); + if (!words.length) { + return; + } + + handlePersist?.(words, hints); + + set((state) => { + if (!state.batch[sessionId]) { + return state; + } + + const { [sessionId]: _, ...rest } = state.batch; + return { + ...state, + batch: rest, + }; + }); + }, + + handleBatchResponseStreamed: (sessionId, response, percentage) => { + const { handleTranscriptResponse } = get(); + + handleTranscriptResponse?.(response); + + const isComplete = response.type === "Results" && response.from_finalize; + + set((state) => ({ + ...state, + batch: { + ...state.batch, + [sessionId]: { percentage, isComplete: isComplete || false }, + }, + })); + }, + + clearBatchSession: (sessionId) => { + set((state) => { + if (!(sessionId in state.batch)) { + return state; + } + + const { [sessionId]: _, ...rest } = state.batch; + return { + ...state, + batch: rest, + }; + }); + }, +}); + +function transformBatch( + response: BatchResponse, +): [WordLike[], RuntimeSpeakerHint[]] { + const allWords: WordLike[] = []; + const allHints: RuntimeSpeakerHint[] = []; + let wordOffset = 0; + + response.results.channels.forEach((channel, channelIndex) => { + const alternative = channel.alternatives[0]; + if (!alternative || !alternative.words || !alternative.words.length) { + return; + } + + const [words, hints] = transformAlternativeWords( + alternative.words, + alternative.transcript, + channelIndex, + ); + + hints.forEach((hint) => { + allHints.push({ + ...hint, + wordIndex: hint.wordIndex + wordOffset, + }); + }); + allWords.push(...words); + wordOffset += words.length; + }); + + return [allWords, allHints]; +} + +function transformAlternativeWords( + wordEntries: BatchAlternatives["words"], + transcript: string, + channelIndex: number, +): [WordLike[], RuntimeSpeakerHint[]] { + const words: WordLike[] = []; + const hints: RuntimeSpeakerHint[] = []; + + const textsWithSpacing = fixSpacingForWords( + (wordEntries ?? []).map((w) => w.punctuated_word ?? w.word), + transcript, + ); + + for (let i = 0; i < (wordEntries ?? []).length; i++) { + const word = (wordEntries ?? [])[i]; + const text = textsWithSpacing[i]; + + words.push({ + text, + start_ms: Math.round(word.start * 1000), + end_ms: Math.round(word.end * 1000), + channel: channelIndex, + }); + + if (typeof word.speaker === "number") { + hints.push({ + wordIndex: i, + data: { + type: "provider_speaker_index", + speaker_index: word.speaker, + }, + }); + } + } + + return [words, hints]; +} diff --git a/apps/desktop/src/store/zustand/listener/general.test.ts b/apps/desktop/src/store/zustand/listener/general.test.ts index b62cb64a2..86741dc7e 100644 --- a/apps/desktop/src/store/zustand/listener/general.test.ts +++ b/apps/desktop/src/store/zustand/listener/general.test.ts @@ -11,19 +11,104 @@ describe("General Listener Slice", () => { describe("Initial State", () => { test("initializes with correct default values", () => { const state = store.getState(); - expect(state.status).toBe("inactive"); - expect(state.loading).toBe(false); - expect(state.amplitude).toEqual({ mic: 0, speaker: 0 }); - expect(state.seconds).toBe(0); - expect(state.sessionEventUnlisten).toBeUndefined(); - expect(state.intervalId).toBeUndefined(); + expect(state.live.status).toBe("inactive"); + expect(state.live.loading).toBe(false); + expect(state.live.amplitude).toEqual({ mic: 0, speaker: 0 }); + expect(state.live.seconds).toBe(0); + expect(state.live.sessionEventUnlisten).toBeUndefined(); + expect(state.live.intervalId).toBeUndefined(); + expect(state.batch).toEqual({}); }); }); describe("Amplitude Updates", () => { test("amplitude state is initialized to zero", () => { const state = store.getState(); - expect(state.amplitude).toEqual({ mic: 0, speaker: 0 }); + expect(state.live.amplitude).toEqual({ mic: 0, speaker: 0 }); + }); + }); + + describe("Session Mode Helpers", () => { + test("getSessionMode defaults to inactive", () => { + const state = store.getState(); + expect(state.getSessionMode("session-123")).toBe("inactive"); + }); + + test("getSessionMode returns running_batch when session is in batch", () => { + const sessionId = "session-456"; + const { handleBatchResponseStreamed, getSessionMode } = store.getState(); + + const mockResponse = { + type: "Results" as const, + start: 0, + duration: 5, + is_final: false, + speech_final: false, + from_finalize: false, + channel: { + alternatives: [ + { + transcript: "test", + words: [], + confidence: 0.9, + }, + ], + }, + metadata: { + request_id: "test-request", + model_info: { + name: "test-model", + version: "1.0", + arch: "test-arch", + }, + model_uuid: "test-uuid", + }, + channel_index: [0], + }; + + handleBatchResponseStreamed(sessionId, mockResponse, 0.5); + expect(getSessionMode(sessionId)).toBe("running_batch"); + }); + }); + + describe("Batch State", () => { + test("handleBatchResponseStreamed tracks progress per session", () => { + const sessionId = "session-progress"; + const { handleBatchResponseStreamed, clearBatchSession } = store.getState(); + + const mockResponse = { + type: "Results" as const, + start: 0, + duration: 5, + is_final: false, + speech_final: false, + from_finalize: false, + channel: { + alternatives: [ + { + transcript: "test", + words: [], + confidence: 0.9, + }, + ], + }, + metadata: { + request_id: "test-request", + model_info: { + name: "test-model", + version: "1.0", + arch: "test-arch", + }, + model_uuid: "test-uuid", + }, + channel_index: [0], + }; + + handleBatchResponseStreamed(sessionId, mockResponse, 0.5); + expect(store.getState().batch[sessionId]).toEqual({ percentage: 0.5 }); + + clearBatchSession(sessionId); + expect(store.getState().batch[sessionId]).toBeUndefined(); }); }); diff --git a/apps/desktop/src/store/zustand/listener/general.ts b/apps/desktop/src/store/zustand/listener/general.ts index 91ed748e4..44b144dd5 100644 --- a/apps/desktop/src/store/zustand/listener/general.ts +++ b/apps/desktop/src/store/zustand/listener/general.ts @@ -3,6 +3,8 @@ import { create as mutate } from "mutative"; import type { StoreApi } from "zustand"; import { + type BatchParams, + type BatchResponse, commands as listenerCommands, events as listenerEvents, type SessionEvent, @@ -11,17 +13,23 @@ import { } from "@hypr/plugin-listener"; import { fromResult } from "../../../effect"; +import type { BatchActions, BatchState } from "./batch"; import type { HandlePersistCallback, TranscriptActions } from "./transcript"; +type LiveSessionStatus = Extract; +export type SessionMode = LiveSessionStatus | "running_batch"; + export type GeneralState = { - sessionEventUnlisten?: () => void; - loading: boolean; - status: Extract; - amplitude: { mic: number; speaker: number }; - seconds: number; - intervalId?: NodeJS.Timeout; - sessionId: string | null; - muted: boolean; + live: { + sessionEventUnlisten?: () => void; + loading: boolean; + status: LiveSessionStatus; + amplitude: { mic: number; speaker: number }; + seconds: number; + intervalId?: NodeJS.Timeout; + sessionId: string | null; + muted: boolean; + }; }; export type GeneralActions = { @@ -31,15 +39,22 @@ export type GeneralActions = { ) => void; stop: () => void; setMuted: (value: boolean) => void; + runBatch: ( + params: BatchParams, + options?: { handlePersist?: HandlePersistCallback; sessionId?: string }, + ) => Promise; + getSessionMode: (sessionId: string) => SessionMode; }; const initialState: GeneralState = { - status: "inactive", - loading: false, - amplitude: { mic: 0, speaker: 0 }, - seconds: 0, - sessionId: null, - muted: false, + live: { + status: "inactive", + loading: false, + amplitude: { mic: 0, speaker: 0 }, + seconds: 0, + sessionId: null, + muted: false, + }, }; const listenToSessionEvents = ( @@ -53,16 +68,31 @@ const listenToSessionEvents = ( const startSessionEffect = (params: SessionParams) => fromResult(listenerCommands.startSession(params)); const stopSessionEffect = () => fromResult(listenerCommands.stopSession()); -export const createGeneralSlice = ( +export const createGeneralSlice = < + T extends GeneralState & GeneralActions & TranscriptActions & BatchActions & BatchState, +>( set: StoreApi["setState"], get: StoreApi["getState"], ): GeneralState & GeneralActions => ({ ...initialState, start: (params: SessionParams, options) => { + const targetSessionId = params.session_id; + + if (!targetSessionId) { + console.error("[listener] 'start' requires a session_id"); + return; + } + + const currentMode = get().getSessionMode(targetSessionId); + if (currentMode === "running_batch") { + console.warn(`[listener] cannot start live session while batch processing session ${targetSessionId}`); + return; + } + set((state) => mutate(state, (draft) => { - draft.loading = true; - draft.sessionId = params.session_id ?? null; + draft.live.loading = true; + draft.live.sessionId = targetSessionId; }) ); @@ -74,7 +104,7 @@ export const createGeneralSlice = ( if (payload.type === "audioAmplitude") { set((state) => mutate(state, (draft) => { - draft.amplitude = { + draft.live.amplitude = { mic: payload.mic, speaker: payload.speaker, }; @@ -82,50 +112,50 @@ export const createGeneralSlice = ( ); } else if (payload.type === "running_active") { const currentState = get(); - if (currentState.intervalId) { - clearInterval(currentState.intervalId); + if (currentState.live.intervalId) { + clearInterval(currentState.live.intervalId); } const intervalId = setInterval(() => { set((s) => mutate(s, (d) => { - d.seconds += 1; + d.live.seconds += 1; }) ); }, 1000); set((state) => mutate(state, (draft) => { - draft.status = "running_active"; - draft.loading = false; - draft.seconds = 0; - draft.intervalId = intervalId; - draft.sessionId = currentState.sessionId ?? null; + draft.live.status = "running_active"; + draft.live.loading = false; + draft.live.seconds = 0; + draft.live.intervalId = intervalId; + draft.live.sessionId = targetSessionId; }) ); } else if (payload.type === "finalizing") { set((state) => mutate(state, (draft) => { - if (draft.intervalId) { - clearInterval(draft.intervalId); - draft.intervalId = undefined; + if (draft.live.intervalId) { + clearInterval(draft.live.intervalId); + draft.live.intervalId = undefined; } - draft.status = "finalizing"; - draft.loading = true; + draft.live.status = "finalizing"; + draft.live.loading = true; }) ); } else if (payload.type === "inactive") { const currentState = get(); - if (currentState.sessionEventUnlisten) { - currentState.sessionEventUnlisten(); + if (currentState.live.sessionEventUnlisten) { + currentState.live.sessionEventUnlisten(); } set((state) => mutate(state, (draft) => { - draft.status = "inactive"; - draft.loading = false; - draft.sessionId = null; - draft.sessionEventUnlisten = undefined; + draft.live.status = "inactive"; + draft.live.loading = false; + draft.live.sessionId = null; + draft.live.sessionEventUnlisten = undefined; }) ); @@ -133,6 +163,12 @@ export const createGeneralSlice = ( } else if (payload.type === "streamResponse") { const response = payload.response; get().handleTranscriptResponse(response as unknown as StreamResponse); + } else if (payload.type === "batchResponse") { + const response = payload.response; + get().handleBatchResponse( + targetSessionId, + response as unknown as BatchResponse, + ); } }; @@ -141,16 +177,16 @@ export const createGeneralSlice = ( set((state) => mutate(state, (draft) => { - draft.sessionEventUnlisten = unlisten; + draft.live.sessionEventUnlisten = unlisten; }) ); yield* startSessionEffect(params); set((state) => mutate(state, (draft) => { - draft.status = "running_active"; - draft.loading = false; - draft.sessionId = params.session_id ?? null; + draft.live.status = "running_active"; + draft.live.loading = false; + draft.live.sessionId = targetSessionId; }) ); }); @@ -159,7 +195,22 @@ export const createGeneralSlice = ( Exit.match(exit, { onFailure: (cause) => { console.error("Failed to start session:", cause); - set(initialState as Partial); + set((state) => + mutate(state, (draft) => { + if (draft.live.intervalId) { + clearInterval(draft.live.intervalId); + draft.live.intervalId = undefined; + } + + draft.live.sessionEventUnlisten = undefined; + draft.live.loading = false; + draft.live.status = "inactive"; + draft.live.amplitude = { mic: 0, speaker: 0 }; + draft.live.seconds = 0; + draft.live.sessionId = null; + draft.live.muted = initialState.live.muted; + }) + ); }, onSuccess: () => {}, }); @@ -176,7 +227,7 @@ export const createGeneralSlice = ( console.error("Failed to stop session:", cause); set((state) => mutate(state, (draft) => { - draft.loading = false; + draft.live.loading = false; }) ); }, @@ -187,9 +238,136 @@ export const createGeneralSlice = ( setMuted: (value) => { set((state) => mutate(state, (draft) => { - draft.muted = value; + draft.live.muted = value; listenerCommands.setMicMuted(value); }) ); }, + runBatch: async (params, options) => { + const sessionId = options?.sessionId; + + if (!sessionId) { + console.error("[listener] 'runBatch' requires a sessionId option"); + return; + } + + const mode = get().getSessionMode(sessionId); + if (mode === "running_active" || mode === "finalizing") { + console.warn(`[listener] cannot start batch processing while session ${sessionId} is live`); + return; + } + + if (mode === "running_batch") { + console.warn(`[listener] session ${sessionId} is already processing in batch mode`); + return; + } + + const shouldResetPersist = Boolean(options?.handlePersist); + + if (options?.handlePersist) { + get().setTranscriptPersist(options.handlePersist); + } + + get().clearBatchSession(sessionId); + + let unlisten: (() => void) | undefined; + + const cleanup = () => { + if (unlisten) { + unlisten(); + unlisten = undefined; + } + + if (shouldResetPersist) { + get().setTranscriptPersist(undefined); + } + + get().clearBatchSession(sessionId); + }; + + await new Promise((resolve, reject) => { + listenerEvents.sessionEvent + .listen(({ payload }) => { + if (payload.type === "batchStarted") { + get().handleBatchStarted(payload.session_id); + return; + } + + if (payload.type === "batchProgress") { + get().handleBatchResponseStreamed( + sessionId, + payload.response, + payload.percentage, + ); + + const batchState = get().batch[sessionId]; + if (batchState?.isComplete) { + cleanup(); + resolve(); + } + return; + } + + if (payload.type === "batchFailed") { + cleanup(); + reject(payload.error); + return; + } + + if (payload.type !== "batchResponse") { + return; + } + + try { + get().handleBatchResponse(sessionId, payload.response); + cleanup(); + resolve(); + } catch (error) { + console.error("[runBatch] error handling batch response", error); + cleanup(); + reject(error); + } + }) + .then((fn) => { + unlisten = fn; + + listenerCommands + .runBatch(params) + .then((result) => { + if (result.status === "error") { + console.error(result.error); + cleanup(); + reject(result.error); + } + }) + .catch((error) => { + console.error(error); + cleanup(); + reject(error); + }); + }) + .catch((error) => { + console.error(error); + cleanup(); + reject(error); + }); + }); + }, + getSessionMode: (sessionId) => { + if (!sessionId) { + return "inactive"; + } + + const state = get(); + + if (state.live.sessionId === sessionId) { + return state.live.status; + } + + if (state.batch[sessionId]) { + return "running_batch"; + } + + return "inactive"; + }, }); diff --git a/apps/desktop/src/store/zustand/listener/index.ts b/apps/desktop/src/store/zustand/listener/index.ts index f916a8147..ba71a00d3 100644 --- a/apps/desktop/src/store/zustand/listener/index.ts +++ b/apps/desktop/src/store/zustand/listener/index.ts @@ -1,17 +1,20 @@ import { createStore } from "zustand"; -import { createGeneralSlice, type GeneralActions, type GeneralState } from "./general"; +import { type BatchActions, type BatchState, createBatchSlice } from "./batch"; +import { createGeneralSlice, type GeneralActions, type GeneralState, type SessionMode } from "./general"; import { createTranscriptSlice, type TranscriptActions, type TranscriptState } from "./transcript"; -type State = GeneralState & TranscriptState; -type Actions = GeneralActions & TranscriptActions; +type State = GeneralState & TranscriptState & BatchState; +type Actions = GeneralActions & TranscriptActions & BatchActions; type Store = State & Actions; export type ListenerStore = ReturnType; +export type { SessionMode }; export const createListenerStore = () => { return createStore((set, get) => ({ ...createGeneralSlice(set, get), ...createTranscriptSlice(set, get), + ...createBatchSlice(set, get), })); }; diff --git a/apps/desktop/src/store/zustand/listener/transcript.ts b/apps/desktop/src/store/zustand/listener/transcript.ts index 09b36a324..90ff0343c 100644 --- a/apps/desktop/src/store/zustand/listener/transcript.ts +++ b/apps/desktop/src/store/zustand/listener/transcript.ts @@ -1,7 +1,7 @@ import { create as mutate } from "mutative"; import type { StoreApi } from "zustand"; -import type { Alternatives, StreamResponse } from "@hypr/plugin-listener"; +import type { StreamAlternatives, StreamResponse } from "@hypr/plugin-listener"; import type { RuntimeSpeakerHint, WordLike } from "../../../utils/segment"; type WordsByChannel = Record; @@ -9,6 +9,7 @@ type WordsByChannel = Record; export type HandlePersistCallback = (words: WordLike[], hints: RuntimeSpeakerHint[]) => void; export type TranscriptState = { + finalWordsMaxEndMsByChannel: Record; partialWordsByChannel: WordsByChannel; partialHints: RuntimeSpeakerHint[]; handlePersist?: HandlePersistCallback; @@ -21,6 +22,7 @@ export type TranscriptActions = { }; const initialState: TranscriptState = { + finalWordsMaxEndMsByChannel: {}, partialWordsByChannel: {}, partialHints: [], handlePersist: undefined, @@ -35,13 +37,33 @@ export const createTranscriptSlice = { - const { partialWordsByChannel, partialHints, handlePersist } = get(); - + const { + partialWordsByChannel, + partialHints, + handlePersist, + finalWordsMaxEndMsByChannel, + } = get(); + + const lastPersistedEndMs = finalWordsMaxEndMsByChannel[channelIndex] ?? 0; const lastEndMs = getLastEndMs(words); - const remaining = (partialWordsByChannel[channelIndex] ?? []) + + const firstNewWordIndex = words.findIndex((word) => word.end_ms > lastPersistedEndMs); + if (firstNewWordIndex === -1) { + return; + } + + const newWords = words.slice(firstNewWordIndex); + const newHints = hints + .filter((hint) => hint.wordIndex >= firstNewWordIndex) + .map((hint) => ({ + ...hint, + wordIndex: hint.wordIndex - firstNewWordIndex, + })); + + const remainingPartialWords = (partialWordsByChannel[channelIndex] ?? []) .filter((word) => word.start_ms > lastEndMs); - const remainingHints = partialHints.filter((hint) => { + const remainingPartialHints = partialHints.filter((hint) => { const partialWords = partialWordsByChannel[channelIndex] ?? []; const word = partialWords[hint.wordIndex]; return word && word.start_ms > lastEndMs; @@ -49,12 +71,13 @@ export const createTranscriptSlice = mutate(state, (draft) => { - draft.partialWordsByChannel[channelIndex] = remaining; - draft.partialHints = remainingHints; + draft.partialWordsByChannel[channelIndex] = remainingPartialWords; + draft.partialHints = remainingPartialHints; + draft.finalWordsMaxEndMsByChannel[channelIndex] = lastEndMs; }) ); - handlePersist?.(words, hints); + handlePersist?.(newWords, newHints); }; const handlePartialWords = ( @@ -139,6 +162,7 @@ export const createTranscriptSlice = { draft.partialWordsByChannel = {}; draft.partialHints = []; + draft.finalWordsMaxEndMsByChannel = {}; draft.handlePersist = undefined; }) ); @@ -150,23 +174,19 @@ const getLastEndMs = (words: WordLike[]): number => words[words.length - 1]?.end const getFirstStartMs = (words: WordLike[]): number => words[0]?.start_ms ?? 0; function transformWords( - alternative: Alternatives, + alternative: StreamAlternatives, channelIndex: number, ): [WordLike[], RuntimeSpeakerHint[]] { const words: WordLike[] = []; const hints: RuntimeSpeakerHint[] = []; const textsWithSpacing = fixSpacingForWords( - (alternative.words ?? []).map((w) => w.punctuated_word ?? w.word), + alternative.words.map((w) => w.punctuated_word ?? w.word), alternative.transcript, ); for (let i = 0; i < alternative.words.length; i++) { - const word = alternative.words?.[i]; - if (!word) { - continue; - } - + const word = alternative.words[i]; const text = textsWithSpacing[i]; words.push({ diff --git a/apps/web/package.json b/apps/web/package.json index 9fbb45222..ca44ff62d 100644 --- a/apps/web/package.json +++ b/apps/web/package.json @@ -14,39 +14,39 @@ "@hypr/utils": "workspace:*", "@iconify-icon/react": "^3.0.3", "@mux/mux-player-react": "^3.8.0", - "@nangohq/frontend": "^0.69.7", - "@nangohq/node": "^0.69.7", - "@netlify/vite-plugin-tanstack-start": "^1.1.9", - "@posthog/react": "^1.3.0", - "@sentry/tanstackstart-react": "^10.22.0", + "@nangohq/frontend": "^0.69.9", + "@nangohq/node": "^0.69.9", + "@netlify/vite-plugin-tanstack-start": "^1.1.11", + "@posthog/react": "^1.4.0", + "@sentry/tanstackstart-react": "^10.23.0", "@stripe/stripe-js": "^8.3.0", "@supabase/ssr": "^0.7.0", - "@supabase/supabase-js": "^2.79.0", + "@supabase/supabase-js": "^2.80.0", "@t3-oss/env-core": "^0.13.8", - "@tailwindcss/vite": "^4.1.16", + "@tailwindcss/vite": "^4.1.17", "@tanstack/react-devtools": "^0.7.11", "@tanstack/react-form": "^1.23.8", - "@tanstack/react-query": "^5.90.6", + "@tanstack/react-query": "^5.90.7", "@tanstack/react-query-devtools": "^5.90.2", - "@tanstack/react-router": "^1.134.12", - "@tanstack/react-router-devtools": "^1.134.12", - "@tanstack/react-router-ssr-query": "^1.134.12", - "@tanstack/react-start": "^1.134.12", - "@tanstack/router-plugin": "^1.134.12", + "@tanstack/react-router": "^1.134.13", + "@tanstack/react-router-devtools": "^1.134.13", + "@tanstack/react-router-ssr-query": "^1.134.13", + "@tanstack/react-start": "^1.134.14", + "@tanstack/router-plugin": "^1.134.14", "@unpic/react": "^1.0.1", "drizzle-orm": "^0.44.7", "exa-js": "^1.10.2", "lucide-react": "^0.544.0", "postgres": "^3.4.7", - "posthog-js": "^1.285.1", + "posthog-js": "^1.289.0", "react": "^19.2.0", "react-dom": "^19.2.0", "rehype-autolink-headings": "^7.1.0", "rehype-slug": "^6.0.0", "remark-gfm": "^4.0.1", - "stripe": "^19.2.1", + "stripe": "^19.3.0", "tailwind-scrollbar-hide": "^4.0.0", - "tailwindcss": "^4.1.16", + "tailwindcss": "^4.1.17", "unpic": "^4.1.3", "vite-tsconfig-paths": "^5.1.4", "zod": "^4.1.12" @@ -66,7 +66,7 @@ "jsdom": "^27.1.0", "netlify": "^23.10.0", "typescript": "^5.9.3", - "vite": "^7.1.12", + "vite": "^7.2.2", "web-vitals": "^5.1.0" } } diff --git a/crates/audio-utils/Cargo.toml b/crates/audio-utils/Cargo.toml index fd9712a39..f062c9b52 100644 --- a/crates/audio-utils/Cargo.toml +++ b/crates/audio-utils/Cargo.toml @@ -9,5 +9,7 @@ futures-util = { workspace = true } kalosm-sound = { workspace = true, default-features = false } thiserror = { workspace = true } +hound = { workspace = true } rodio = { workspace = true } rubato = "0.16.2" +vorbis_rs = { workspace = true } diff --git a/crates/audio-utils/src/error.rs b/crates/audio-utils/src/error.rs index 2c67791fe..44a6b2671 100644 --- a/crates/audio-utils/src/error.rs +++ b/crates/audio-utils/src/error.rs @@ -6,4 +6,22 @@ pub enum Error { ResamplerConstructionError(#[from] rubato::ResamplerConstructionError), #[error(transparent)] DecoderError(#[from] rodio::decoder::DecoderError), + #[error(transparent)] + Io(#[from] std::io::Error), + #[error(transparent)] + Hound(#[from] hound::Error), + #[error(transparent)] + Vorbis(#[from] vorbis_rs::VorbisError), + #[error("vorbis channel count mismatch (expected {expected}, actual {actual})")] + ChannelCountMismatch { expected: u8, actual: u8 }, + #[error("vorbis channel data length mismatch for channel {channel}")] + ChannelDataLengthMismatch { channel: usize }, + #[error("unsupported channel count {count}")] + UnsupportedChannelCount { count: u16 }, + #[error("invalid sample rate {0}")] + InvalidSampleRate(u32), + #[error("vorbis channel data is empty")] + EmptyChannelSet, + #[error("too many channels: {count}")] + TooManyChannels { count: usize }, } diff --git a/crates/audio-utils/src/lib.rs b/crates/audio-utils/src/lib.rs index 9bc4cc6ce..557c48a4a 100644 --- a/crates/audio-utils/src/lib.rs +++ b/crates/audio-utils/src/lib.rs @@ -4,6 +4,10 @@ use kalosm_sound::AsyncSource; mod error; pub use error::*; +mod vorbis; +pub use vorbis::*; + +pub use rodio::Source; const I16_SCALE: f32 = 32768.0; @@ -72,9 +76,8 @@ pub fn bytes_to_f32_samples(data: &[u8]) -> Vec { pub fn source_from_path( path: impl AsRef, ) -> Result>, crate::Error> { - let decoder = rodio::Decoder::new(std::io::BufReader::new( - std::fs::File::open(path.as_ref()).unwrap(), - ))?; + let file = std::fs::File::open(path.as_ref())?; + let decoder = rodio::Decoder::new(std::io::BufReader::new(file))?; Ok(decoder) } @@ -128,3 +131,37 @@ where Ok(output) } + +#[derive(Debug)] +pub struct ChunkedAudio { + pub chunks: Vec, + pub sample_count: usize, +} + +pub fn chunk_audio_file( + path: impl AsRef, + sample_rate: u32, + chunk_size: usize, +) -> Result { + let source = source_from_path(path)?; + let samples = resample_audio(source, sample_rate)?; + + if samples.is_empty() { + return Ok(ChunkedAudio { + chunks: Vec::new(), + sample_count: 0, + }); + } + + let chunk_size = chunk_size.max(1); + let sample_count = samples.len(); + let chunks = samples + .chunks(chunk_size) + .map(|chunk| f32_to_i16_bytes(chunk.iter().copied())) + .collect(); + + Ok(ChunkedAudio { + chunks, + sample_count, + }) +} diff --git a/crates/audio-utils/src/vorbis.rs b/crates/audio-utils/src/vorbis.rs new file mode 100644 index 000000000..dcaa5a2e7 --- /dev/null +++ b/crates/audio-utils/src/vorbis.rs @@ -0,0 +1,201 @@ +use std::fs::File; +use std::io::BufReader; +use std::num::{NonZeroU32, NonZeroU8}; +use std::path::Path; + +use hound::{SampleFormat, WavReader, WavSpec, WavWriter}; +use vorbis_rs::{VorbisBitrateManagementStrategy, VorbisDecoder, VorbisEncoderBuilder}; + +use crate::Error; + +pub const DEFAULT_VORBIS_QUALITY: f32 = 0.7; +pub const DEFAULT_VORBIS_BLOCK_SIZE: usize = 4096; + +#[derive(Clone, Copy, Debug)] +pub struct VorbisEncodeSettings { + pub quality: f32, + pub block_size: usize, +} + +impl Default for VorbisEncodeSettings { + fn default() -> Self { + Self { + quality: DEFAULT_VORBIS_QUALITY, + block_size: DEFAULT_VORBIS_BLOCK_SIZE, + } + } +} + +pub fn encode_vorbis_from_channels( + channels: &[&[f32]], + sample_rate: NonZeroU32, + settings: VorbisEncodeSettings, +) -> Result, Error> { + let channel_count = channels.len(); + if channel_count == 0 { + return Err(Error::EmptyChannelSet); + } + + let channel_count_u8 = u8::try_from(channel_count).map_err(|_| Error::TooManyChannels { + count: channel_count, + })?; + let channel_count = NonZeroU8::new(channel_count_u8).ok_or(Error::EmptyChannelSet)?; + + let reference_len = channels[0].len(); + for (index, channel) in channels.iter().enumerate() { + if channel.len() != reference_len { + return Err(Error::ChannelDataLengthMismatch { channel: index }); + } + } + + let mut ogg_buffer = Vec::new(); + let mut encoder = VorbisEncoderBuilder::new(sample_rate, channel_count, &mut ogg_buffer)? + .bitrate_management_strategy(VorbisBitrateManagementStrategy::QualityVbr { + target_quality: settings.quality, + }) + .build()?; + + let block_size = settings.block_size.max(1); + let mut offsets = vec![0usize; channels.len()]; + + loop { + let mut slices: Vec<&[f32]> = Vec::with_capacity(channels.len()); + let mut has_samples = false; + + for (index, channel) in channels.iter().enumerate() { + let start = offsets[index]; + if start >= channel.len() { + slices.push(&[]); + continue; + } + + let end = (start + block_size).min(channel.len()); + if end > start { + has_samples = true; + } + + slices.push(&channel[start..end]); + offsets[index] = end; + } + + if !has_samples { + break; + } + + encoder.encode_audio_block(&slices)?; + } + + encoder.finish()?; + Ok(ogg_buffer) +} + +pub fn encode_vorbis_from_interleaved( + samples: &[f32], + channel_count: NonZeroU8, + sample_rate: NonZeroU32, + settings: VorbisEncodeSettings, +) -> Result, Error> { + let channels = deinterleave(samples, channel_count.get() as usize); + let channel_refs: Vec<&[f32]> = channels.iter().map(Vec::as_slice).collect(); + encode_vorbis_from_channels(&channel_refs, sample_rate, settings) +} + +pub fn encode_vorbis_mono( + samples: &[f32], + sample_rate: NonZeroU32, + settings: VorbisEncodeSettings, +) -> Result, Error> { + encode_vorbis_from_channels(&[samples], sample_rate, settings) +} + +pub fn decode_vorbis_to_wav_file( + ogg_path: impl AsRef, + wav_path: impl AsRef, +) -> Result<(), Error> { + let ogg_reader = BufReader::new(File::open(ogg_path)?); + let mut decoder = VorbisDecoder::new(ogg_reader)?; + + let wav_spec = WavSpec { + channels: decoder.channels().get() as u16, + sample_rate: decoder.sampling_frequency().get(), + bits_per_sample: 32, + sample_format: SampleFormat::Float, + }; + + let mut writer = WavWriter::create(wav_path, wav_spec)?; + + while let Some(block) = decoder.decode_audio_block()? { + let samples = block.samples(); + if samples.is_empty() { + continue; + } + + let frame_count = samples[0].len(); + for (index, channel) in samples.iter().enumerate() { + if channel.len() != frame_count { + return Err(Error::ChannelDataLengthMismatch { channel: index }); + } + } + + for frame in 0..frame_count { + for channel in samples.iter() { + writer.write_sample(channel[frame])?; + } + } + } + + writer.flush()?; + writer.finalize()?; + Ok(()) +} + +pub fn encode_wav_to_vorbis_file( + wav_path: impl AsRef, + ogg_path: impl AsRef, + settings: VorbisEncodeSettings, +) -> Result<(), Error> { + let mut reader = WavReader::open(wav_path)?; + let spec = reader.spec(); + + let sample_rate = + NonZeroU32::new(spec.sample_rate).ok_or(Error::InvalidSampleRate(spec.sample_rate))?; + let channel_count_u8 = + u8::try_from(spec.channels).map_err(|_| Error::UnsupportedChannelCount { + count: spec.channels, + })?; + let channel_count = NonZeroU8::new(channel_count_u8).ok_or(Error::UnsupportedChannelCount { + count: spec.channels, + })?; + + let samples: Vec = reader.samples::().collect::>()?; + let encoded = encode_vorbis_from_interleaved(&samples, channel_count, sample_rate, settings)?; + std::fs::write(ogg_path, encoded)?; + + Ok(()) +} + +pub fn mix_down_to_mono(samples: &[f32], channels: NonZeroU8) -> Vec { + let channel_count = channels.get() as usize; + if channel_count <= 1 { + return samples.to_vec(); + } + + let mut mono = Vec::with_capacity(samples.len() / channel_count); + for frame in samples.chunks(channel_count) { + let sum: f32 = frame.iter().copied().sum(); + mono.push(sum / frame.len() as f32); + } + mono +} + +fn deinterleave(samples: &[f32], channels: usize) -> Vec> { + if channels <= 1 { + return vec![samples.to_vec()]; + } + + let mut output = vec![Vec::with_capacity(samples.len() / channels + 1); channels]; + for (index, sample) in samples.iter().enumerate() { + output[index % channels].push(*sample); + } + output +} diff --git a/crates/transcribe-moonshine/src/service/streaming.rs b/crates/transcribe-moonshine/src/service/streaming.rs index c7c70c09b..179ae6718 100644 --- a/crates/transcribe-moonshine/src/service/streaming.rs +++ b/crates/transcribe-moonshine/src/service/streaming.rs @@ -21,7 +21,8 @@ use hypr_moonshine::MoonshineOnnxModel; use hypr_vad::VadExt; use owhisper_config::MoonshineModelSize; -use owhisper_interface::{Alternatives, Channel, ListenParams, Metadata, StreamResponse, Word}; +use owhisper_interface::stream::{Alternatives, Channel, Metadata, StreamResponse, Word}; +use owhisper_interface::ListenParams; #[derive(Clone)] pub struct TranscribeService { diff --git a/crates/transcribe-whisper-local/src/service/streaming.rs b/crates/transcribe-whisper-local/src/service/streaming.rs index 441f71c11..f6fe6bdba 100644 --- a/crates/transcribe-whisper-local/src/service/streaming.rs +++ b/crates/transcribe-whisper-local/src/service/streaming.rs @@ -19,7 +19,8 @@ use tower::Service; use hypr_vad::VadExt; use hypr_ws_utils::{ConnectionGuard, ConnectionManager}; -use owhisper_interface::{Alternatives, Channel, ListenParams, Metadata, StreamResponse, Word}; +use owhisper_interface::stream::{Alternatives, Channel, Metadata, StreamResponse, Word}; +use owhisper_interface::ListenParams; use crate::GlobalTimer; diff --git a/crates/ws/src/client.rs b/crates/ws/src/client.rs index 12ed98cfa..54a97c90b 100644 --- a/crates/ws/src/client.rs +++ b/crates/ws/src/client.rs @@ -117,10 +117,26 @@ impl WebSocketClient { Some(msg_result) = ws_receiver.next() => { match msg_result { Ok(msg) => { + let is_text = matches!(msg, Message::Text(_)); + let is_binary = matches!(msg, Message::Binary(_)); + let text_preview = if let Message::Text(ref t) = msg { + Some(t.to_string()) + } else { + None + }; + match msg { Message::Text(_) | Message::Binary(_) => { if let Some(output) = T::from_message(msg) { yield Ok(output); + } else { + if is_text { + if let Some(text) = text_preview { + tracing::warn!("ws_message_parse_failed: {}", text); + } + } else if is_binary { + tracing::warn!("ws_binary_message_parse_failed"); + } } }, Message::Ping(_) | Message::Pong(_) | Message::Frame(_) => continue, diff --git a/owhisper/owhisper-client/Cargo.toml b/owhisper/owhisper-client/Cargo.toml index a8f1cd06a..2509b0119 100644 --- a/owhisper/owhisper-client/Cargo.toml +++ b/owhisper/owhisper-client/Cargo.toml @@ -11,12 +11,15 @@ hypr-ws = { workspace = true } owhisper-interface = { workspace = true } +futures-util = { workspace = true } +reqwest = { workspace = true, features = ["json"] } +tokio = { workspace = true } + bytes = { workspace = true } serde_json = { workspace = true } -url = { workspace = true } - -futures-util = { workspace = true } +thiserror = { workspace = true } tracing = { workspace = true } +url = { workspace = true } [dev-dependencies] hypr-data = { workspace = true } diff --git a/owhisper/owhisper-client/src/batch.rs b/owhisper/owhisper-client/src/batch.rs new file mode 100644 index 000000000..cf1c9d46b --- /dev/null +++ b/owhisper/owhisper-client/src/batch.rs @@ -0,0 +1,86 @@ +use std::path::{Path, PathBuf}; +use tokio::task; + +use hypr_audio_utils::{f32_to_i16_bytes, resample_audio, source_from_path, Source}; +use owhisper_interface::batch::Response as BatchResponse; + +use crate::{error::Error, ListenClientBuilder, RESAMPLED_SAMPLE_RATE_HZ}; + +#[derive(Clone)] +pub struct BatchClient { + pub(crate) client: reqwest::Client, + pub(crate) url: url::Url, + pub(crate) api_key: Option, +} + +async fn decode_audio_to_linear16(path: PathBuf) -> Result<(bytes::Bytes, u16), Error> { + task::spawn_blocking(move || -> Result<(bytes::Bytes, u16), Error> { + let decoder = + source_from_path(&path).map_err(|err| Error::AudioProcessing(err.to_string()))?; + + let channel_count = decoder.channels(); + + let samples = resample_audio(decoder, RESAMPLED_SAMPLE_RATE_HZ) + .map_err(|err| Error::AudioProcessing(err.to_string()))?; + + if samples.is_empty() { + return Err(Error::AudioProcessing( + "audio file contains no samples".to_string(), + )); + } + + let bytes = f32_to_i16_bytes(samples.into_iter()); + + Ok((bytes, channel_count)) + }) + .await? +} + +impl BatchClient { + pub fn builder() -> ListenClientBuilder { + ListenClientBuilder::default() + } + + pub async fn transcribe_file>( + &self, + file_path: P, + ) -> Result { + let path = file_path.as_ref(); + let (audio_data, channel_count) = decode_audio_to_linear16(path.to_path_buf()).await?; + + let mut url = self.url.clone(); + let channel_value = channel_count.max(1).to_string(); + { + let mut query_pairs = url.query_pairs_mut(); + query_pairs.append_pair("channels", &channel_value); + } + + let mut request = self.client.post(url); + + if let Some(key) = &self.api_key { + request = request.header("Authorization", format!("Token {}", key)); + } + + let content_type = format!( + "audio/raw;encoding=linear16;rate={}", + RESAMPLED_SAMPLE_RATE_HZ + ); + + let response = request + .header("Accept", "application/json") + .header("Content-Type", content_type) + .body(audio_data) + .send() + .await?; + + let status = response.status(); + if status.is_success() { + Ok(response.json().await?) + } else { + Err(Error::UnexpectedStatus { + status, + body: response.text().await.unwrap_or_default(), + }) + } + } +} diff --git a/owhisper/owhisper-client/src/error.rs b/owhisper/owhisper-client/src/error.rs new file mode 100644 index 000000000..e2d678648 --- /dev/null +++ b/owhisper/owhisper-client/src/error.rs @@ -0,0 +1,14 @@ +use reqwest::StatusCode; +use tokio::task::JoinError; + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("audio processing error: {0}")] + AudioProcessing(String), + #[error(transparent)] + Http(#[from] reqwest::Error), + #[error(transparent)] + Task(#[from] JoinError), + #[error("unexpected response status {status}: {body}")] + UnexpectedStatus { status: StatusCode, body: String }, +} diff --git a/owhisper/owhisper-client/src/lib.rs b/owhisper/owhisper-client/src/lib.rs index 4ea5ce86a..90938763d 100644 --- a/owhisper/owhisper-client/src/lib.rs +++ b/owhisper/owhisper-client/src/lib.rs @@ -1,32 +1,16 @@ -use futures_util::Stream; +mod batch; +mod error; +mod live; -use hypr_ws::client::{ClientRequestBuilder, Message, WebSocketClient, WebSocketIO}; -use owhisper_interface::{ControlMessage, MixedMessage, StreamResponse}; +use url::form_urlencoded::Serializer; +use url::UrlQuery; +pub use batch::BatchClient; +pub use error::Error; pub use hypr_ws; +pub use live::{ListenClient, ListenClientDual}; -fn interleave_audio(mic: &[u8], speaker: &[u8]) -> Vec { - let mic_samples: Vec = mic - .chunks_exact(2) - .map(|chunk| i16::from_le_bytes([chunk[0], chunk[1]])) - .collect(); - let speaker_samples: Vec = speaker - .chunks_exact(2) - .map(|chunk| i16::from_le_bytes([chunk[0], chunk[1]])) - .collect(); - - let max_len = mic_samples.len().max(speaker_samples.len()); - let mut interleaved = Vec::with_capacity(max_len * 2 * 2); - - for i in 0..max_len { - let mic_sample = mic_samples.get(i).copied().unwrap_or(0); - let speaker_sample = speaker_samples.get(i).copied().unwrap_or(0); - interleaved.extend_from_slice(&mic_sample.to_le_bytes()); - interleaved.extend_from_slice(&speaker_sample.to_le_bytes()); - } - - interleaved -} +const RESAMPLED_SAMPLE_RATE_HZ: u32 = 16_000; #[derive(Default)] pub struct ListenClientBuilder { @@ -51,227 +35,194 @@ impl ListenClientBuilder { self } - fn build_uri(&self, channels: u8) -> String { - let mut url: url::Url = self.api_base.as_ref().unwrap().parse().unwrap(); + fn listen_endpoint_url(&self) -> url::Url { + let mut url: url::Url = self + .api_base + .as_ref() + .expect("api_base is required") + .parse() + .expect("invalid api_base"); + + let mut path = url.path().to_string(); + if !path.ends_with('/') { + path.push('/'); + } + path.push_str("listen"); + url.set_path(&path); - let params = owhisper_interface::ListenParams { - channels, - ..self.params.clone().unwrap_or_default() - }; + url + } + + pub(crate) fn build_batch_url(&self) -> url::Url { + let params = self.params.clone().unwrap_or_default(); + let mut url = self.listen_endpoint_url(); { - let mut path = url.path().to_string(); - if !path.ends_with('/') { - path.push('/'); - } - path.push_str("listen"); - url.set_path(&path); + let mut query_pairs = url.query_pairs_mut(); + + append_language_query(&mut query_pairs, ¶ms); + + let model = params.model.as_deref().unwrap_or("hypr-whisper"); + let sample_rate = RESAMPLED_SAMPLE_RATE_HZ.to_string(); + + query_pairs.append_pair("model", model); + query_pairs.append_pair("encoding", "linear16"); + query_pairs.append_pair("sample_rate", &sample_rate); + query_pairs.append_pair("diarize", "true"); + query_pairs.append_pair("multichannel", "false"); + query_pairs.append_pair("punctuate", "true"); + query_pairs.append_pair("smart_format", "true"); + query_pairs.append_pair("utterances", "true"); + query_pairs.append_pair("numerals", "true"); + query_pairs.append_pair("filler_words", "false"); + query_pairs.append_pair("dictation", "false"); + query_pairs.append_pair("paragraphs", "false"); + query_pairs.append_pair("profanity_filter", "false"); + query_pairs.append_pair("measurements", "false"); + query_pairs.append_pair("topics", "false"); + query_pairs.append_pair("sentiment", "false"); + query_pairs.append_pair("intents", "false"); + query_pairs.append_pair("detect_entities", "false"); + query_pairs.append_pair("mip_opt_out", "true"); + + append_keyword_query(&mut query_pairs, ¶ms); } + url + } + + pub(crate) fn build_url(&self, channels: u8) -> url::Url { + let mut params = self.params.clone().unwrap_or_default(); + params.channels = channels; + + let mut url = self.listen_endpoint_url(); + { let mut query_pairs = url.query_pairs_mut(); - // https://developers.deepgram.com/docs/language-detection#restricting-the-detectable-languages - // https://www.rfc-editor.org/info/bcp47 - match params.languages.len() { - 0 => { - query_pairs.append_pair("detect_language", "true"); - } - 1 => { - let code = params.languages[0].iso639().code(); - query_pairs.append_pair("language", code); - query_pairs.append_pair("languages", code); - } - _ => { - // https://developers.deepgram.com/docs/multilingual-code-switching - query_pairs.append_pair("language", "multi"); - - for lang in ¶ms.languages { - let code = lang.iso639().code(); - - query_pairs.append_pair("languages", code); - - // Not supported for streaming - // https://developers.deepgram.com/docs/language-detection - // query_pairs.append_pair("detect_language", code); - } - } - } - - query_pairs - // https://developers.deepgram.com/reference/speech-to-text-api/listen-streaming#request.query - .append_pair("model", params.model.as_deref().unwrap_or("hypr-whisper")) - .append_pair("channels", &channels.to_string()) - .append_pair("filler_words", "false") - .append_pair("interim_results", "true") - .append_pair("mip_opt_out", "true") - .append_pair("sample_rate", "16000") - .append_pair("encoding", "linear16") - .append_pair("diarize", "true") - .append_pair("multichannel", "true") - .append_pair("punctuate", "true") - .append_pair("smart_format", "true") - .append_pair("vad_events", "false") - .append_pair("numerals", "true"); - - query_pairs.append_pair( - "redemption_time_ms", - ¶ms.redemption_time_ms.unwrap_or(400).to_string(), - ); - - let use_keyterms = params - .model - .as_ref() - .map(|model| model.contains("nova-3")) - .unwrap_or(false); - - let param_name = if use_keyterms { "keyterm" } else { "keywords" }; - - for keyword in ¶ms.keywords { - query_pairs.append_pair(param_name, keyword); - } + append_language_query(&mut query_pairs, ¶ms); + + let model = params.model.as_deref().unwrap_or("hypr-whisper"); + let channel_string = channels.to_string(); + let sample_rate = RESAMPLED_SAMPLE_RATE_HZ.to_string(); + + query_pairs.append_pair("model", model); + query_pairs.append_pair("channels", &channel_string); + query_pairs.append_pair("filler_words", "false"); + query_pairs.append_pair("interim_results", "true"); + query_pairs.append_pair("mip_opt_out", "true"); + query_pairs.append_pair("sample_rate", &sample_rate); + query_pairs.append_pair("encoding", "linear16"); + query_pairs.append_pair("diarize", "true"); + query_pairs.append_pair("multichannel", "true"); + query_pairs.append_pair("punctuate", "true"); + query_pairs.append_pair("smart_format", "true"); + query_pairs.append_pair("vad_events", "false"); + query_pairs.append_pair("numerals", "true"); + + let redemption_time = params.redemption_time_ms.unwrap_or(400).to_string(); + query_pairs.append_pair("redemption_time_ms", &redemption_time); + + append_keyword_query(&mut query_pairs, ¶ms); } - let host = url.host_str().unwrap(); + url + } + + pub(crate) fn build_uri(&self, channels: u8) -> String { + let mut url = self.build_url(channels); - if host.contains("127.0.0.1") || host.contains("localhost") { - url.set_scheme("ws").unwrap(); - } else { - url.set_scheme("wss").unwrap(); + if let Some(host) = url.host_str() { + if host.contains("127.0.0.1") || host.contains("localhost") { + let _ = url.set_scheme("ws"); + } else { + let _ = url.set_scheme("wss"); + } } url.to_string() } - fn build_request(self, channels: u8) -> ClientRequestBuilder { + pub(crate) fn build_request(&self, channels: u8) -> hypr_ws::client::ClientRequestBuilder { let uri = self.build_uri(channels).parse().unwrap(); - let request = match self.api_key { - // https://github.com/deepgram/deepgram-rust-sdk/blob/d2f2723/src/lib.rs#L114-L115 - // https://github.com/deepgram/deepgram-rust-sdk/blob/d2f2723/src/lib.rs#L323-L324 - Some(key) => ClientRequestBuilder::new(uri) + let request = match &self.api_key { + Some(key) => hypr_ws::client::ClientRequestBuilder::new(uri) .with_header("Authorization", format!("Token {}", key)), - None => ClientRequestBuilder::new(uri), + None => hypr_ws::client::ClientRequestBuilder::new(uri), }; request } - pub fn build_single(self) -> ListenClient { - let request = self.build_request(1); + pub fn build_with_channels(self, channels: u8) -> ListenClient { + let request = self.build_request(channels); ListenClient { request } } - pub fn build_dual(self) -> ListenClientDual { - let request = self.build_request(2); - ListenClientDual { request } - } -} - -#[derive(Clone)] -pub struct ListenClient { - request: ClientRequestBuilder, -} - -type ListenClientInput = MixedMessage; -type ListenClientDualInput = MixedMessage<(bytes::Bytes, bytes::Bytes), ControlMessage>; - -impl WebSocketIO for ListenClient { - type Data = ListenClientInput; - type Input = ListenClientInput; - type Output = StreamResponse; - - fn to_input(data: Self::Data) -> Self::Input { - data - } + pub fn build_batch(self) -> BatchClient { + let url = self.build_batch_url(); - fn to_message(input: Self::Input) -> Message { - match input { - MixedMessage::Audio(data) => Message::Binary(data), - MixedMessage::Control(control) => { - Message::Text(serde_json::to_string(&control).unwrap().into()) - } + BatchClient { + client: reqwest::Client::new(), + url, + api_key: self.api_key, } } - fn from_message(msg: Message) -> Option { - match msg { - Message::Text(text) => serde_json::from_str::(&text).ok(), - _ => None, - } + pub fn build_single(self) -> ListenClient { + self.build_with_channels(1) } -} -#[derive(Clone)] -pub struct ListenClientDual { - request: ClientRequestBuilder, + pub fn build_dual(self) -> ListenClientDual { + let request = self.build_request(2); + ListenClientDual { request } + } } -impl WebSocketIO for ListenClientDual { - type Data = ListenClientDualInput; - type Input = ListenClientInput; - type Output = StreamResponse; - - fn to_input(data: Self::Data) -> Self::Input { - match data { - ListenClientDualInput::Audio((mic, speaker)) => { - let interleaved = interleave_audio(&mic, &speaker); - ListenClientInput::Audio(interleaved.into()) - } - ListenClientDualInput::Control(control) => ListenClientInput::Control(control), +pub(crate) fn append_language_query<'a>( + query_pairs: &mut Serializer<'a, UrlQuery>, + params: &owhisper_interface::ListenParams, +) { + match params.languages.len() { + 0 => { + query_pairs.append_pair("detect_language", "true"); } - } - - fn to_message(input: Self::Input) -> Message { - match input { - ListenClientInput::Audio(data) => Message::Binary(data), - ListenClientInput::Control(control) => { - Message::Text(serde_json::to_string(&control).unwrap().into()) + 1 => { + if let Some(language) = params.languages.first() { + let code = language.iso639().code(); + query_pairs.append_pair("language", code); + query_pairs.append_pair("languages", code); } } - } - - fn from_message(msg: Message) -> Option { - match msg { - Message::Text(text) => serde_json::from_str::(&text).ok(), - _ => None, + _ => { + query_pairs.append_pair("language", "multi"); + for language in ¶ms.languages { + let code = language.iso639().code(); + query_pairs.append_pair("languages", code); + } } } } -impl ListenClient { - pub fn builder() -> ListenClientBuilder { - ListenClientBuilder::default() +pub(crate) fn append_keyword_query<'a>( + query_pairs: &mut Serializer<'a, UrlQuery>, + params: &owhisper_interface::ListenParams, +) { + if params.keywords.is_empty() { + return; } - pub async fn from_realtime_audio( - &self, - audio_stream: impl Stream + Send + Unpin + 'static, - ) -> Result< - ( - impl Stream>, - hypr_ws::client::WebSocketHandle, - ), - hypr_ws::Error, - > { - let ws = WebSocketClient::new(self.request.clone()); - ws.from_audio::(audio_stream).await - } -} + let use_keyterms = params + .model + .as_ref() + .map(|model| model.contains("nova-3")) + .unwrap_or(false); + + let param_name = if use_keyterms { "keyterm" } else { "keywords" }; -impl ListenClientDual { - pub async fn from_realtime_audio( - &self, - stream: impl Stream + Send + Unpin + 'static, - ) -> Result< - ( - impl Stream>, - hypr_ws::client::WebSocketHandle, - ), - hypr_ws::Error, - > { - let ws = WebSocketClient::new(self.request.clone()); - ws.from_audio::(stream).await + for keyword in ¶ms.keywords { + query_pairs.append_pair(param_name, keyword); } } @@ -281,9 +232,9 @@ mod tests { use futures_util::StreamExt; use hypr_audio_utils::AudioFormatExt; + use live::{ListenClientDualInput, ListenClientInput}; #[tokio::test] - // cargo test -p owhisper-client test_client_deepgram -- --nocapture async fn test_client_deepgram() { let _ = tracing_subscriber::fmt::try_init(); @@ -317,7 +268,10 @@ mod tests { while let Some(result) = stream.next().await { match result { Ok(response) => match response { - StreamResponse::TranscriptResponse { channel, .. } => { + owhisper_interface::stream::StreamResponse::TranscriptResponse { + channel, + .. + } => { println!("{:?}", channel.alternatives.first().unwrap().transcript); } _ => {} @@ -328,7 +282,6 @@ mod tests { } #[tokio::test] - // cargo test -p owhisper-client test_owhisper_with_owhisper -- --nocapture async fn test_owhisper_with_owhisper() { let audio = rodio::Decoder::new(std::io::BufReader::new( std::fs::File::open(hypr_data::english_1::AUDIO_PATH).unwrap(), @@ -356,7 +309,6 @@ mod tests { } #[tokio::test] - // cargo test -p owhisper-client test_owhisper_with_deepgram -- --nocapture async fn test_owhisper_with_deepgram() { let audio = rodio::Decoder::new(std::io::BufReader::new( std::fs::File::open(hypr_data::english_1::AUDIO_PATH).unwrap(), @@ -390,7 +342,6 @@ mod tests { } #[tokio::test] - // cargo test -p owhisper-client test_client_ag -- --nocapture async fn test_client_ag() { let audio_1 = rodio::Decoder::new(std::io::BufReader::new( std::fs::File::open(hypr_data::english_1::AUDIO_PATH).unwrap(), diff --git a/owhisper/owhisper-client/src/live.rs b/owhisper/owhisper-client/src/live.rs new file mode 100644 index 000000000..24548868a --- /dev/null +++ b/owhisper/owhisper-client/src/live.rs @@ -0,0 +1,137 @@ +use futures_util::Stream; + +use hypr_ws::client::{ClientRequestBuilder, Message, WebSocketClient, WebSocketIO}; +use owhisper_interface::stream::StreamResponse; +use owhisper_interface::{ControlMessage, MixedMessage}; + +use crate::ListenClientBuilder; + +pub type ListenClientInput = MixedMessage; +pub type ListenClientDualInput = MixedMessage<(bytes::Bytes, bytes::Bytes), ControlMessage>; + +#[derive(Clone)] +pub struct ListenClient { + pub(crate) request: ClientRequestBuilder, +} + +#[derive(Clone)] +pub struct ListenClientDual { + pub(crate) request: ClientRequestBuilder, +} + +fn interleave_audio(mic: &[u8], speaker: &[u8]) -> Vec { + let mic_samples: Vec = mic + .chunks_exact(2) + .map(|chunk| i16::from_le_bytes([chunk[0], chunk[1]])) + .collect(); + let speaker_samples: Vec = speaker + .chunks_exact(2) + .map(|chunk| i16::from_le_bytes([chunk[0], chunk[1]])) + .collect(); + + let max_len = mic_samples.len().max(speaker_samples.len()); + let mut interleaved = Vec::with_capacity(max_len * 2 * 2); + + for i in 0..max_len { + let mic_sample = mic_samples.get(i).copied().unwrap_or(0); + let speaker_sample = speaker_samples.get(i).copied().unwrap_or(0); + interleaved.extend_from_slice(&mic_sample.to_le_bytes()); + interleaved.extend_from_slice(&speaker_sample.to_le_bytes()); + } + + interleaved +} + +impl WebSocketIO for ListenClient { + type Data = ListenClientInput; + type Input = ListenClientInput; + type Output = StreamResponse; + + fn to_input(data: Self::Data) -> Self::Input { + data + } + + fn to_message(input: Self::Input) -> Message { + match input { + MixedMessage::Audio(data) => Message::Binary(data), + MixedMessage::Control(control) => { + Message::Text(serde_json::to_string(&control).unwrap().into()) + } + } + } + + fn from_message(msg: Message) -> Option { + match msg { + Message::Text(text) => serde_json::from_str::(&text).ok(), + _ => None, + } + } +} + +impl WebSocketIO for ListenClientDual { + type Data = ListenClientDualInput; + type Input = ListenClientInput; + type Output = StreamResponse; + + fn to_input(data: Self::Data) -> Self::Input { + match data { + ListenClientDualInput::Audio((mic, speaker)) => { + let interleaved = interleave_audio(&mic, &speaker); + ListenClientInput::Audio(interleaved.into()) + } + ListenClientDualInput::Control(control) => ListenClientInput::Control(control), + } + } + + fn to_message(input: Self::Input) -> Message { + match input { + ListenClientInput::Audio(data) => Message::Binary(data), + ListenClientInput::Control(control) => { + Message::Text(serde_json::to_string(&control).unwrap().into()) + } + } + } + + fn from_message(msg: Message) -> Option { + match msg { + Message::Text(text) => serde_json::from_str::(&text).ok(), + _ => None, + } + } +} + +impl ListenClient { + pub fn builder() -> ListenClientBuilder { + ListenClientBuilder::default() + } + + pub async fn from_realtime_audio( + &self, + audio_stream: impl Stream + Send + Unpin + 'static, + ) -> Result< + ( + impl Stream>, + hypr_ws::client::WebSocketHandle, + ), + hypr_ws::Error, + > { + let ws = WebSocketClient::new(self.request.clone()); + ws.from_audio::(audio_stream).await + } +} + +impl ListenClientDual { + pub async fn from_realtime_audio( + &self, + stream: impl Stream + Send + Unpin + 'static, + ) -> Result< + ( + impl Stream>, + hypr_ws::client::WebSocketHandle, + ), + hypr_ws::Error, + > { + let ws = WebSocketClient::new(self.request.clone()); + ws.from_audio::(stream).await + } +} diff --git a/owhisper/owhisper-interface/src/batch.rs b/owhisper/owhisper-interface/src/batch.rs new file mode 100644 index 000000000..535f96839 --- /dev/null +++ b/owhisper/owhisper-interface/src/batch.rs @@ -0,0 +1,99 @@ +use crate::common_derives; +use crate::stream; + +// https://github.com/deepgram/deepgram-rust-sdk/blob/0.7.0/src/common/batch_response.rs +// https://developers.deepgram.com/reference/speech-to-text/listen-pre-recorded + +common_derives! { + #[specta(rename = "BatchWord")] + pub struct Word { + pub word: String, + pub start: f64, + pub end: f64, + pub confidence: f64, + pub speaker: Option, + pub punctuated_word: Option, + } +} + +common_derives! { + #[specta(rename = "BatchAlternatives")] + pub struct Alternatives { + pub transcript: String, + pub confidence: f64, + #[serde(default)] + pub words: Vec, + } +} + +common_derives! { + #[specta(rename = "BatchChannel")] + pub struct Channel { + pub alternatives: Vec, + } +} + +common_derives! { + #[specta(rename = "BatchResults")] + pub struct Results { + pub channels: Vec, + } +} + +common_derives! { + #[specta(rename = "BatchResponse")] + pub struct Response { + pub metadata: serde_json::Value, + pub results: Results, + } +} + +impl From for Word { + fn from(word: stream::Word) -> Self { + Self { + word: word.word, + start: word.start, + end: word.end, + confidence: word.confidence, + speaker: word + .speaker + .and_then(|speaker| (speaker >= 0).then_some(speaker as usize)), + punctuated_word: word.punctuated_word, + } + } +} + +impl From for Alternatives { + fn from(mut alternatives: stream::Alternatives) -> Self { + let transcript = alternatives.transcript.trim().to_string(); + let words = alternatives + .words + .drain(..) + .map(Word::from) + .collect::>(); + + Self { + transcript, + confidence: alternatives.confidence, + words, + } + } +} + +impl From for Channel { + fn from(mut channel: stream::Channel) -> Self { + let alternatives = channel + .alternatives + .drain(..) + .map(Alternatives::from) + .collect::>(); + + Self { alternatives } + } +} + +impl From for serde_json::Value { + fn from(metadata: stream::Metadata) -> Self { + serde_json::to_value(metadata).unwrap_or_else(|_| serde_json::json!({})) + } +} diff --git a/owhisper/owhisper-interface/src/lib.rs b/owhisper/owhisper-interface/src/lib.rs index 27d77c9eb..351c955f7 100644 --- a/owhisper/owhisper-interface/src/lib.rs +++ b/owhisper/owhisper-interface/src/lib.rs @@ -1,5 +1,6 @@ -mod stream; -pub use stream::*; +pub mod batch; + +pub mod stream; #[macro_export] macro_rules! common_derives { @@ -30,8 +31,22 @@ common_derives! { } } -impl From for Word2 { - fn from(word: Word) -> Self { +impl From for Word2 { + fn from(word: stream::Word) -> Self { + Word2 { + text: word.punctuated_word.unwrap_or(word.word), + speaker: word + .speaker + .map(|s| SpeakerIdentity::Unassigned { index: s as u8 }), + confidence: Some(word.confidence as f32), + start_ms: Some((word.start * 1000.0) as u64), + end_ms: Some((word.end * 1000.0) as u64), + } + } +} + +impl From for Word2 { + fn from(word: batch::Word) -> Self { Word2 { text: word.punctuated_word.unwrap_or(word.word), speaker: word diff --git a/owhisper/owhisper-interface/src/stream.rs b/owhisper/owhisper-interface/src/stream.rs index 0aad0bcad..7ab0c37ac 100644 --- a/owhisper/owhisper-interface/src/stream.rs +++ b/owhisper/owhisper-interface/src/stream.rs @@ -4,6 +4,7 @@ use crate::common_derives; // https://developers.deepgram.com/reference/speech-to-text-api/listen-streaming#receive.receiveTranscription common_derives! { + #[specta(rename = "StreamWord")] pub struct Word { pub word: String, pub start: f64, @@ -16,6 +17,7 @@ common_derives! { } common_derives! { + #[specta(rename = "StreamAlternatives")] pub struct Alternatives { pub transcript: String, pub words: Vec, @@ -26,12 +28,14 @@ common_derives! { } common_derives! { + #[specta(rename = "StreamChannel")] pub struct Channel { pub alternatives: Vec, } } common_derives! { + #[specta(rename = "StreamModelInfo")] pub struct ModelInfo { pub name: String, pub version: String, @@ -40,6 +44,7 @@ common_derives! { } common_derives! { + #[specta(rename = "StreamMetadata")] pub struct Metadata { pub request_id: String, pub model_info: ModelInfo, @@ -51,6 +56,7 @@ common_derives! { } common_derives! { + #[specta(rename = "StreamExtra")] pub struct Extra { pub started_unix_secs: u64, } diff --git a/owhisper/owhisper-server/src/commands/run/realtime.rs b/owhisper/owhisper-server/src/commands/run/realtime.rs index 4003f9639..725b5881b 100644 --- a/owhisper/owhisper-server/src/commands/run/realtime.rs +++ b/owhisper/owhisper-server/src/commands/run/realtime.rs @@ -2,6 +2,7 @@ use std::sync::{Arc, Mutex}; use futures_util::StreamExt; use hypr_audio::AsyncSource; +use owhisper_interface::stream::StreamResponse; use tokio::sync::mpsc; use super::{ @@ -22,8 +23,7 @@ pub async fn handle_realtime_input( let (event_tx, mut event_rx) = create_event_channel(); - let (transcript_tx, transcript_rx) = - mpsc::unbounded_channel::(); + let (transcript_tx, transcript_rx) = mpsc::unbounded_channel::(); let amplitude_data = Arc::new(Mutex::new(AmplitudeData::new())); @@ -93,7 +93,7 @@ fn start_audio_task( port: u16, api_key: Option, model: String, - transcript_tx: mpsc::UnboundedSender, + transcript_tx: mpsc::UnboundedSender, amplitude_data: Arc>, ) -> std::sync::Arc { let should_stop = std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false)); @@ -123,7 +123,7 @@ async fn run_audio_stream_with_stop( port: u16, api_key: Option, model: String, - transcript_tx: mpsc::UnboundedSender, + transcript_tx: mpsc::UnboundedSender, amplitude_data: Arc>, should_stop: std::sync::Arc, ) -> anyhow::Result<()> { @@ -184,7 +184,7 @@ async fn run_tui_with_events( available_devices: Vec, amplitude_data: Arc>, event_tx: TuiEventSender, - mut transcript_rx: mpsc::UnboundedReceiver, + mut transcript_rx: mpsc::UnboundedReceiver, ) -> anyhow::Result<()> { use ratatui::crossterm::event::{self, Event, KeyCode}; use std::time::{Duration, Instant}; diff --git a/owhisper/owhisper-server/src/commands/run/state.rs b/owhisper/owhisper-server/src/commands/run/state.rs index 2626eeaa9..b5ebd35fb 100644 --- a/owhisper/owhisper-server/src/commands/run/state.rs +++ b/owhisper/owhisper-server/src/commands/run/state.rs @@ -1,5 +1,7 @@ use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; +use owhisper_interface::stream::StreamResponse; + use super::event::TuiEventSender; use ratatui::widgets::{ListState, ScrollbarState}; @@ -56,9 +58,9 @@ impl RunState { self.event_sender = Some(sender); } - pub fn process_chunk(&mut self, chunk: owhisper_interface::StreamResponse) { + pub fn process_chunk(&mut self, chunk: StreamResponse) { let words = match chunk { - owhisper_interface::StreamResponse::TranscriptResponse { channel, .. } => channel + StreamResponse::TranscriptResponse { channel, .. } => channel .alternatives .first() .map(|alt| { diff --git a/owhisper/owhisper-server/src/misc.rs b/owhisper/owhisper-server/src/misc.rs index d5dce6d5b..eed09b796 100644 --- a/owhisper/owhisper-server/src/misc.rs +++ b/owhisper/owhisper-server/src/misc.rs @@ -3,7 +3,8 @@ const LOGO: &str = include_str!("../logo/ascii.txt"); pub fn print_logo() { println!( "{}{}\n", - LOGO, "Thank you for using OWhisper! We โ™ก our users!\nBug report: https://github.com/fastrepl/hyprnote/issues/new?labels=owhisper" + LOGO, + "Thank you for using OWhisper! We โ™ก our users!\nBug report: https://github.com/fastrepl/hyprnote/issues/new?labels=owhisper" ); } diff --git a/packages/codemirror/package.json b/packages/codemirror/package.json index c7f4fb265..ffcda714d 100644 --- a/packages/codemirror/package.json +++ b/packages/codemirror/package.json @@ -16,7 +16,7 @@ "@codemirror/state": "^6.5.2", "@codemirror/view": "^6.38.6", "@lezer/highlight": "^1.2.3", - "@uiw/react-codemirror": "^4.25.2" + "@uiw/react-codemirror": "^4.25.3" }, "devDependencies": { "@types/node": "^22.19.0", diff --git a/packages/obsidian/package.json b/packages/obsidian/package.json index 50265415f..6bd9654bd 100644 --- a/packages/obsidian/package.json +++ b/packages/obsidian/package.json @@ -17,6 +17,6 @@ }, "dependencies": { "@hey-api/client-fetch": "^0.8.4", - "@tanstack/react-query": "^5.90.6" + "@tanstack/react-query": "^5.90.7" } } diff --git a/packages/tiptap/src/shared/extensions/placeholder.tsx b/packages/tiptap/src/shared/extensions/placeholder.tsx index ba4582468..1fed3ff05 100644 --- a/packages/tiptap/src/shared/extensions/placeholder.tsx +++ b/packages/tiptap/src/shared/extensions/placeholder.tsx @@ -1,4 +1,4 @@ -import type { ReactElement } from "react"; +import { Component, createElement, type ReactElement } from "react"; import ReactDOM from "react-dom/client"; import { type Editor, Extension, isNodeEmpty } from "@tiptap/core"; @@ -92,12 +92,18 @@ export const Placeholder = Extension.create({ && placeholderContent !== null && "type" in placeholderContent ) { + const wrappedContent = createElement( + PlaceholderErrorBoundary, + null, + placeholderContent as ReactElement, + ); + const decoration = Decoration.widget( pos + 1, () => { const existing = containers.get(pos); if (existing) { - scheduleReactRender(existing.root, placeholderContent as ReactElement); + scheduleReactRender(existing.root, wrappedContent); return existing.container; } @@ -107,7 +113,7 @@ export const Placeholder = Extension.create({ const root = ReactDOM.createRoot(container); containers.set(pos, { container, root }); - scheduleReactRender(root, placeholderContent as ReactElement); + scheduleReactRender(root, wrappedContent); return container; }, @@ -155,3 +161,31 @@ export const Placeholder = Extension.create({ ]; }, }); + +type PlaceholderErrorBoundaryProps = { children: ReactElement }; +type PlaceholderErrorBoundaryState = { hasError: boolean }; + +class PlaceholderErrorBoundary extends Component< + PlaceholderErrorBoundaryProps, + PlaceholderErrorBoundaryState +> { + constructor(props: PlaceholderErrorBoundaryProps) { + super(props); + this.state = { hasError: false }; + } + + static getDerivedStateFromError(): PlaceholderErrorBoundaryState { + return { hasError: true }; + } + + componentDidCatch(error: Error) { + console.error(error); + } + + render() { + if (this.state.hasError) { + return null; + } + return this.props.children; + } +} diff --git a/packages/ui/package.json b/packages/ui/package.json index 00ec705ee..fb89faf98 100644 --- a/packages/ui/package.json +++ b/packages/ui/package.json @@ -51,7 +51,7 @@ "zod": "^4.1.12" }, "devDependencies": { - "@tanstack/react-query": "^5.90.6", + "@tanstack/react-query": "^5.90.7", "@types/node": "^22.19.0", "@types/react": "^19.2.2", "@types/react-dom": "^19.2.2", diff --git a/packages/utils/package.json b/packages/utils/package.json index 8ed202e1a..be50607c1 100644 --- a/packages/utils/package.json +++ b/packages/utils/package.json @@ -12,7 +12,7 @@ "@hypr/plugin-windows": "workspace:^", "@tauri-apps/api": "^2.9.0", "@tauri-apps/plugin-http": "^2.5.4", - "ai": "^5.0.87", + "ai": "^5.0.89", "clsx": "^2.1.1", "date-fns": "^4.1.0", "mutative": "^1.3.0", diff --git a/plugins/analytics/js/bindings.gen.ts b/plugins/analytics/js/bindings.gen.ts index f015b9da9..e62167bf2 100644 --- a/plugins/analytics/js/bindings.gen.ts +++ b/plugins/analytics/js/bindings.gen.ts @@ -7,17 +7,37 @@ export const commands = { -async event(payload: AnalyticsPayload) : Promise { - return await TAURI_INVOKE("plugin:analytics|event", { payload }); +async event(payload: AnalyticsPayload) : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("plugin:analytics|event", { payload }) }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} }, -async setProperties(payload: PropertiesPayload) : Promise { - return await TAURI_INVOKE("plugin:analytics|set_properties", { payload }); +async setProperties(payload: PropertiesPayload) : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("plugin:analytics|set_properties", { payload }) }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} }, -async setDisabled(disabled: boolean) : Promise { - return await TAURI_INVOKE("plugin:analytics|set_disabled", { disabled }); +async setDisabled(disabled: boolean) : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("plugin:analytics|set_disabled", { disabled }) }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} }, -async isDisabled() : Promise { - return await TAURI_INVOKE("plugin:analytics|is_disabled"); +async isDisabled() : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("plugin:analytics|is_disabled") }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} } } diff --git a/plugins/apple-calendar/js/bindings.gen.ts b/plugins/apple-calendar/js/bindings.gen.ts index 9bd058375..8432a766a 100644 --- a/plugins/apple-calendar/js/bindings.gen.ts +++ b/plugins/apple-calendar/js/bindings.gen.ts @@ -7,14 +7,29 @@ export const commands = { -async openCalendar() : Promise { - return await TAURI_INVOKE("plugin:apple-calendar|open_calendar"); +async openCalendar() : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("plugin:apple-calendar|open_calendar") }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} }, -async openCalendarAccessSettings() : Promise { - return await TAURI_INVOKE("plugin:apple-calendar|open_calendar_access_settings"); +async openCalendarAccessSettings() : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("plugin:apple-calendar|open_calendar_access_settings") }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} }, -async openContactsAccessSettings() : Promise { - return await TAURI_INVOKE("plugin:apple-calendar|open_contacts_access_settings"); +async openContactsAccessSettings() : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("plugin:apple-calendar|open_contacts_access_settings") }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} }, async calendarAccessStatus() : Promise { return await TAURI_INVOKE("plugin:apple-calendar|calendar_access_status"); @@ -28,11 +43,21 @@ async requestCalendarAccess() : Promise { async requestContactsAccess() : Promise { await TAURI_INVOKE("plugin:apple-calendar|request_contacts_access"); }, -async syncCalendars() : Promise { - return await TAURI_INVOKE("plugin:apple-calendar|sync_calendars"); +async syncCalendars() : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("plugin:apple-calendar|sync_calendars") }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} }, -async syncEvents() : Promise { - return await TAURI_INVOKE("plugin:apple-calendar|sync_events"); +async syncEvents() : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("plugin:apple-calendar|sync_events") }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} } } diff --git a/plugins/db/js/bindings.gen.ts b/plugins/db/js/bindings.gen.ts index 443f458e8..ca301a832 100644 --- a/plugins/db/js/bindings.gen.ts +++ b/plugins/db/js/bindings.gen.ts @@ -7,161 +7,421 @@ export const commands = { -async getEvent(id: string) : Promise { - return await TAURI_INVOKE("plugin:db|get_event", { id }); +async getEvent(id: string) : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("plugin:db|get_event", { id }) }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} }, -async listEvents(filter: ListEventFilter | null) : Promise { - return await TAURI_INVOKE("plugin:db|list_events", { filter }); +async listEvents(filter: ListEventFilter | null) : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("plugin:db|list_events", { filter }) }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} }, -async getCalendar(calendarId: string) : Promise { - return await TAURI_INVOKE("plugin:db|get_calendar", { calendarId }); +async getCalendar(calendarId: string) : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("plugin:db|get_calendar", { calendarId }) }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} }, -async listCalendars(userId: string) : Promise { - return await TAURI_INVOKE("plugin:db|list_calendars", { userId }); +async listCalendars(userId: string) : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("plugin:db|list_calendars", { userId }) }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} }, -async upsertCalendar(calendar: Calendar) : Promise { - return await TAURI_INVOKE("plugin:db|upsert_calendar", { calendar }); +async upsertCalendar(calendar: Calendar) : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("plugin:db|upsert_calendar", { calendar }) }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} }, -async toggleCalendarSelected(trackingId: string) : Promise { - return await TAURI_INVOKE("plugin:db|toggle_calendar_selected", { trackingId }); +async toggleCalendarSelected(trackingId: string) : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("plugin:db|toggle_calendar_selected", { trackingId }) }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} }, -async upsertSession(session: Session) : Promise { - return await TAURI_INVOKE("plugin:db|upsert_session", { session }); +async upsertSession(session: Session) : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("plugin:db|upsert_session", { session }) }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} }, -async visitSession(id: string) : Promise { - return await TAURI_INVOKE("plugin:db|visit_session", { id }); +async visitSession(id: string) : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("plugin:db|visit_session", { id }) }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} }, -async listTemplates() : Promise { - return await TAURI_INVOKE("plugin:db|list_templates"); +async listTemplates() : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("plugin:db|list_templates") }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} }, -async upsertTemplate(template: Template) : Promise