From d47edd8cf0dfad7cb614e888661afdbf089f9cf4 Mon Sep 17 00:00:00 2001 From: Joshua Lochner <26504141+xenova@users.noreply.github.com> Date: Mon, 10 Nov 2025 17:05:38 -0500 Subject: [PATCH] Remove Metaspace add_prefix_space logic Always true, since other logic is already handled by prepend_scheme --- src/core/decoder/Metaspace.ts | 4 +--- src/core/preTokenizer/Metaspace.ts | 7 +------ src/static/tokenizer.d.ts | 2 -- 3 files changed, 2 insertions(+), 11 deletions(-) diff --git a/src/core/decoder/Metaspace.ts b/src/core/decoder/Metaspace.ts index e73f0b0..2911211 100644 --- a/src/core/decoder/Metaspace.ts +++ b/src/core/decoder/Metaspace.ts @@ -5,7 +5,6 @@ import { TokenizerConfigDecoderMetaspace } from "@static/tokenizer"; * MetaspaceDecoder class extends the Decoder class and decodes Metaspace tokenization. */ class Metaspace extends Decoder { - add_prefix_space?: boolean; replacement: string; /** @@ -15,7 +14,6 @@ class Metaspace extends Decoder { constructor(config: TokenizerConfigDecoderMetaspace) { super(config); - this.add_prefix_space = config.add_prefix_space; this.replacement = config.replacement ?? "▁"; } @@ -23,7 +21,7 @@ class Metaspace extends Decoder { const result = []; for (let i = 0; i < tokens.length; ++i) { let normalized = tokens[i].replaceAll(this.replacement, " "); - if (this.add_prefix_space && i == 0 && normalized.startsWith(" ")) { + if (i == 0 && normalized.startsWith(" ")) { normalized = normalized.substring(1); } result.push(normalized); diff --git a/src/core/preTokenizer/Metaspace.ts b/src/core/preTokenizer/Metaspace.ts index 59996ce..4bc8fb6 100644 --- a/src/core/preTokenizer/Metaspace.ts +++ b/src/core/preTokenizer/Metaspace.ts @@ -11,8 +11,6 @@ import type { * and returns a list of tokens. */ class Metaspace extends PreTokenizer { - /** Whether to add a prefix space to the first token. */ - add_prefix_space: boolean; /** The character to replace spaces with. */ replacement: string; /** An optional string representation of the replacement character. */ @@ -26,7 +24,6 @@ class Metaspace extends PreTokenizer { constructor(config: TokenizerConfigPreTokenizerMetaspace) { super(); - this.add_prefix_space = config.add_prefix_space ?? false; this.replacement = config.replacement ?? "▁"; this.str_rep = config.str_rep || this.replacement; this.prepend_scheme = config.prepend_scheme ?? "always"; @@ -45,9 +42,7 @@ class Metaspace extends PreTokenizer { if ( // We add a prefix space if: - // (1) The add_prefix_space option is enabled and the normalized - // token does not already start with the replacement character. - this.add_prefix_space && + // (1) The normalized token does not already start with the replacement character. !normalized.startsWith(this.replacement) && // and (2) either: // (a) prepend_scheme is 'always' diff --git a/src/static/tokenizer.d.ts b/src/static/tokenizer.d.ts index ccebd1b..1d05c3c 100644 --- a/src/static/tokenizer.d.ts +++ b/src/static/tokenizer.d.ts @@ -205,7 +205,6 @@ export interface TokenizerConfigPreTokenizerMetaspace { type: "Metaspace"; replacement?: string; str_rep?: string; - add_prefix_space?: boolean; prepend_scheme?: PrependScheme; } @@ -355,7 +354,6 @@ export interface TokenizerConfigDecoderWordPiece { export interface TokenizerConfigDecoderMetaspace { type: "Metaspace"; replacement?: string; - add_prefix_space?: boolean; prepend_scheme?: "always" | "never" | "first"; }