diff --git a/cursorless-talon/src/get_grapheme_spoken_form_entries.py b/cursorless-talon/src/get_grapheme_spoken_form_entries.py new file mode 100644 index 0000000000..8224de800d --- /dev/null +++ b/cursorless-talon/src/get_grapheme_spoken_form_entries.py @@ -0,0 +1,75 @@ +import re +import typing +from collections import defaultdict +from typing import Iterator, Mapping +from uu import Error + +from talon import app, registry + +from .spoken_forms_output import SpokenFormOutputEntry + +grapheme_capture_name = "user.any_alphanumeric_key" + + +def get_grapheme_spoken_form_entries() -> list[SpokenFormOutputEntry]: + return [ + { + "type": "grapheme", + "id": id, + "spokenForms": spoken_forms, + } + for symbol_list in generate_lists_from_capture(grapheme_capture_name) + for id, spoken_forms in get_id_to_spoken_form_map(symbol_list).items() + ] + + +def generate_lists_from_capture(capture_name) -> Iterator[str]: + """ + Given the name of a capture, yield the names of each list that the capture + expands to. Note that we are somewhat strict about the format of the + capture rule, and will not handle all possible cases. + """ + if capture_name.startswith("self."): + capture_name = "user." + capture_name[5:] + try: + rule = registry.captures[capture_name][0].rule.rule + except Error: + app.notify("Error constructing spoken forms for graphemes") + print(f"Error getting rule for capture {capture_name}") + return + rule = rule.strip() + if rule.startswith("(") and rule.endswith(")"): + rule = rule[1:-1] + rule = rule.strip() + components = re.split(r"\s*\|\s*", rule) + for component in components: + if component.startswith("<") and component.endswith(">"): + yield from generate_lists_from_capture(component[1:-1]) + elif component.startswith("{") and component.endswith("}"): + component = component[1:-1] + if component.startswith("self."): + component = "user." + component[5:] + yield component + else: + app.notify("Error constructing spoken forms for graphemes") + print( + f"Unexpected component {component} while processing rule {rule} for capture {capture_name}" + ) + + +def get_id_to_spoken_form_map(list_name: str) -> Mapping[str, list[str]]: + """ + Given the name of a Talon list, return a mapping from the values in that + list to the list of spoken forms that map to the given value. + """ + try: + raw_list = typing.cast(dict[str, str], registry.lists[list_name][0]).copy() + except Error: + app.notify(f"Error getting list {list_name}") + return {} + + inverted_list: defaultdict[str, list[str]] = defaultdict(list) + for key, value in raw_list.items(): + inverted_list[value].append(key) + + return inverted_list diff --git a/cursorless-talon/src/spoken_forms.py b/cursorless-talon/src/spoken_forms.py index f554fd0a77..1525655d83 100644 --- a/cursorless-talon/src/spoken_forms.py +++ b/cursorless-talon/src/spoken_forms.py @@ -2,7 +2,7 @@ from pathlib import Path from typing import Callable, Concatenate, ParamSpec, TypeVar -from talon import app, fs +from talon import app, cron, fs, registry from .actions.actions import ACTION_LIST_NAMES from .csv_overrides import ( @@ -11,6 +11,10 @@ SpokenFormEntry, init_csv_and_watch_changes, ) +from .get_grapheme_spoken_form_entries import ( + get_grapheme_spoken_form_entries, + grapheme_capture_name, +) from .marks.decorated_mark import init_hats from .spoken_forms_output import SpokenFormsOutput @@ -99,14 +103,17 @@ def update(): def update_spoken_forms_output(): spoken_forms_output.write( [ - { - "type": LIST_TO_TYPE_MAP[entry.list_name], - "id": entry.id, - "spokenForms": entry.spoken_forms, - } - for spoken_form_list in custom_spoken_forms.values() - for entry in spoken_form_list - if entry.list_name in LIST_TO_TYPE_MAP + *[ + { + "type": LIST_TO_TYPE_MAP[entry.list_name], + "id": entry.id, + "spokenForms": entry.spoken_forms, + } + for spoken_form_list in custom_spoken_forms.values() + for entry in spoken_form_list + if entry.list_name in LIST_TO_TYPE_MAP + ], + *get_grapheme_spoken_form_entries(), ] ) @@ -184,9 +191,30 @@ def on_watch(path, flags): update() +update_captures_cron = None + + +def update_captures_debounced(updated_captures: set[str]): + if grapheme_capture_name not in updated_captures: + return + + global update_captures_cron + cron.cancel(update_captures_cron) + update_captures_cron = cron.after("100ms", update_captures) + + +def update_captures(): + global update_captures_cron + update_captures_cron = None + + update() + + def on_ready(): update() + registry.register("update_captures", update_captures_debounced) + fs.watch(str(JSON_FILE.parent), on_watch) diff --git a/cursorless-talon/src/spoken_forms_output.py b/cursorless-talon/src/spoken_forms_output.py index 20f1f8623b..6961c98ec2 100644 --- a/cursorless-talon/src/spoken_forms_output.py +++ b/cursorless-talon/src/spoken_forms_output.py @@ -8,7 +8,7 @@ STATE_JSON_VERSION_NUMBER = 0 -class SpokenFormEntry(TypedDict): +class SpokenFormOutputEntry(TypedDict): type: str id: str spokenForms: list[str] @@ -29,7 +29,7 @@ def init(self): print(error_message) app.notify(error_message) - def write(self, spoken_forms: list[SpokenFormEntry]): + def write(self, spoken_forms: list[SpokenFormOutputEntry]): with open(SPOKEN_FORMS_OUTPUT_PATH, "w", encoding="UTF-8") as out: try: out.write( diff --git a/data/fixtures/recorded/hatTokenMap/sendEmoji.yml b/data/fixtures/recorded/hatTokenMap/sendEmoji.yml index 2de15ac24f..30b324d1c6 100644 --- a/data/fixtures/recorded/hatTokenMap/sendEmoji.yml +++ b/data/fixtures/recorded/hatTokenMap/sendEmoji.yml @@ -7,7 +7,7 @@ command: - type: primitive mark: {type: decoratedSymbol, symbolColor: default, character: 😄} usePrePhraseSnapshot: true -spokenFormError: Unknown character '😄' +spokenFormError: grapheme '😄' initialState: documentContents: | 😄 diff --git a/data/fixtures/recorded/scope/glyph/clearNextGlyphOnyx.yml b/data/fixtures/recorded/scope/glyph/clearNextGlyphOnyx.yml index 84e35bc89d..e0d287f0ed 100644 --- a/data/fixtures/recorded/scope/glyph/clearNextGlyphOnyx.yml +++ b/data/fixtures/recorded/scope/glyph/clearNextGlyphOnyx.yml @@ -13,7 +13,7 @@ command: length: 1 direction: forward usePrePhraseSnapshot: true -spokenFormError: Unknown character 'å' +spokenFormError: grapheme 'å' initialState: documentContents: abå selections: diff --git a/packages/cursorless-engine/src/generateSpokenForm/CustomSpokenFormGeneratorImpl.test.ts b/packages/cursorless-engine/src/generateSpokenForm/CustomSpokenFormGeneratorImpl.test.ts index 1f8bb8a2fb..4cbbf8e5fc 100644 --- a/packages/cursorless-engine/src/generateSpokenForm/CustomSpokenFormGeneratorImpl.test.ts +++ b/packages/cursorless-engine/src/generateSpokenForm/CustomSpokenFormGeneratorImpl.test.ts @@ -19,6 +19,11 @@ suite("CustomSpokenFormGeneratorImpl", async function () { id: "setSelection", spokenForms: ["bar"], }, + { + type: "grapheme", + id: "a", + spokenForms: ["alabaster"], + }, ]; }, onDidChange: () => ({ dispose() {} }), @@ -33,7 +38,7 @@ suite("CustomSpokenFormGeneratorImpl", async function () { }), { type: "success", - spokenForms: ["foo air"], + spokenForms: ["foo alabaster"], }, ); assert.deepStrictEqual( diff --git a/packages/cursorless-engine/src/generateSpokenForm/primitiveTargetToSpokenForm.ts b/packages/cursorless-engine/src/generateSpokenForm/primitiveTargetToSpokenForm.ts index e05a180c0f..603590cb0e 100644 --- a/packages/cursorless-engine/src/generateSpokenForm/primitiveTargetToSpokenForm.ts +++ b/packages/cursorless-engine/src/generateSpokenForm/primitiveTargetToSpokenForm.ts @@ -20,7 +20,6 @@ import { numberToSpokenForm, ordinalToSpokenForm, } from "./defaultSpokenForms/numbers"; -import { characterToSpokenForm } from "./defaultSpokenForms/characters"; import { SpokenFormComponentMap } from "./getSpokenFormComponentMap"; import { SpokenFormComponent } from "./SpokenFormComponent"; @@ -234,7 +233,11 @@ export class PrimitiveTargetSpokenFormGenerator { case "glyph": return [ this.spokenFormMap.complexScopeTypeType.glyph, - characterToSpokenForm(scopeType.character), + getSpokenFormStrict( + this.spokenFormMap.grapheme, + "grapheme", + scopeType.character, + ), ]; case "surroundingPair": { const pair = this.spokenFormMap.pairedDelimiter[scopeType.delimiter]; @@ -274,14 +277,20 @@ export class PrimitiveTargetSpokenFormGenerator { switch (mark.type) { case "decoratedSymbol": { const [color, shape] = mark.symbolColor.split("-"); - const components: string[] = []; + const components: SpokenFormComponent[] = []; if (color !== "default") { components.push(hatColorToSpokenForm(color)); } if (shape != null) { components.push(hatShapeToSpokenForm(shape)); } - components.push(characterToSpokenForm(mark.character)); + components.push( + getSpokenFormStrict( + this.spokenFormMap.grapheme, + "grapheme", + mark.character, + ), + ); return components; } @@ -375,3 +384,17 @@ function pluralize(name: SpokenFormComponent): SpokenFormComponent { function pluralizeString(name: string): string { return `${name}s`; } + +function getSpokenFormStrict( + map: Readonly>, + typeName: string, + key: string, +): SpokenFormComponent { + const spokenForm = map[key]; + + if (spokenForm == null) { + throw new NoSpokenFormError(`${typeName} '${key}'`); + } + + return spokenForm; +} diff --git a/packages/cursorless-engine/src/scopeProviders/TalonSpokenForms.ts b/packages/cursorless-engine/src/scopeProviders/TalonSpokenForms.ts index 5239f2d840..0f44437b69 100644 --- a/packages/cursorless-engine/src/scopeProviders/TalonSpokenForms.ts +++ b/packages/cursorless-engine/src/scopeProviders/TalonSpokenForms.ts @@ -24,6 +24,7 @@ export const SUPPORTED_ENTRY_TYPES = [ "pairedDelimiter", "action", "customAction", + "grapheme", ] as const; type SupportedEntryType = (typeof SUPPORTED_ENTRY_TYPES)[number]; diff --git a/packages/cursorless-engine/src/spokenForms/SpokenFormType.ts b/packages/cursorless-engine/src/spokenForms/SpokenFormType.ts index ed1a63ce25..dd92c770b7 100644 --- a/packages/cursorless-engine/src/spokenForms/SpokenFormType.ts +++ b/packages/cursorless-engine/src/spokenForms/SpokenFormType.ts @@ -42,6 +42,11 @@ export interface SpokenFormMapKeyTypes { * custom actions corresponding to id's of VSCode commands. */ customAction: string; + + /** + * Individual characters / graphemes, eg `a` or `/`. + */ + grapheme: string; } /** diff --git a/packages/cursorless-engine/src/spokenForms/defaultSpokenFormMapCore.ts b/packages/cursorless-engine/src/spokenForms/defaultSpokenFormMapCore.ts index d7036637db..5581afdd02 100644 --- a/packages/cursorless-engine/src/spokenForms/defaultSpokenFormMapCore.ts +++ b/packages/cursorless-engine/src/spokenForms/defaultSpokenFormMapCore.ts @@ -1,4 +1,5 @@ import { DefaultSpokenFormMapDefinition } from "./defaultSpokenFormMap.types"; +import { graphemeDefaultSpokenForms } from "./graphemes"; import { isDisabledByDefault, isPrivate } from "./spokenFormMapUtil"; /** @@ -211,4 +212,5 @@ export const defaultSpokenFormMapCore: DefaultSpokenFormMapDefinition = { // nextHomophone: "phones", }, customAction: {}, + grapheme: graphemeDefaultSpokenForms, }; diff --git a/packages/cursorless-engine/src/generateSpokenForm/defaultSpokenForms/characters.ts b/packages/cursorless-engine/src/spokenForms/graphemes.ts similarity index 80% rename from packages/cursorless-engine/src/generateSpokenForm/defaultSpokenForms/characters.ts rename to packages/cursorless-engine/src/spokenForms/graphemes.ts index 99bbf99751..b2448e1bf3 100644 --- a/packages/cursorless-engine/src/generateSpokenForm/defaultSpokenForms/characters.ts +++ b/packages/cursorless-engine/src/spokenForms/graphemes.ts @@ -1,7 +1,5 @@ /* eslint-disable @typescript-eslint/naming-convention */ -import { NoSpokenFormError } from "../NoSpokenFormError"; - // https://github.com/talonhub/community/blob/9acb6c9659bb0c9b794a7b7126d025603b4ed726/core/keys/keys.py const alphabet = Object.fromEntries( @@ -59,16 +57,8 @@ const symbols = { "\uFFFD": "special", }; -const characters: Record = { +export const graphemeDefaultSpokenForms: Record = { ...alphabet, ...digits, ...symbols, }; - -export function characterToSpokenForm(char: string): string { - const result = characters[char]; - if (result == null) { - throw new NoSpokenFormError(`Unknown character '${char}'`); - } - return result; -}