Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions src/core/decoder/Metaspace.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import { TokenizerConfigDecoderMetaspace } from "@static/tokenizer";
* MetaspaceDecoder class extends the Decoder class and decodes Metaspace tokenization.
*/
class Metaspace extends Decoder {
add_prefix_space?: boolean;
replacement: string;

/**
Expand All @@ -15,15 +14,14 @@ class Metaspace extends Decoder {
constructor(config: TokenizerConfigDecoderMetaspace) {
super(config);

this.add_prefix_space = config.add_prefix_space;
this.replacement = config.replacement ?? "▁";
}

decode_chain(tokens: string[]): string[] {
const result = [];
for (let i = 0; i < tokens.length; ++i) {
let normalized = tokens[i].replaceAll(this.replacement, " ");
if (this.add_prefix_space && i == 0 && normalized.startsWith(" ")) {
if (i == 0 && normalized.startsWith(" ")) {
normalized = normalized.substring(1);
}
result.push(normalized);
Expand Down
7 changes: 1 addition & 6 deletions src/core/preTokenizer/Metaspace.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@ import type {
* and returns a list of tokens.
*/
class Metaspace extends PreTokenizer {
/** Whether to add a prefix space to the first token. */
add_prefix_space: boolean;
/** The character to replace spaces with. */
replacement: string;
/** An optional string representation of the replacement character. */
Expand All @@ -26,7 +24,6 @@ class Metaspace extends PreTokenizer {
constructor(config: TokenizerConfigPreTokenizerMetaspace) {
super();

this.add_prefix_space = config.add_prefix_space ?? false;
this.replacement = config.replacement ?? "▁";
this.str_rep = config.str_rep || this.replacement;
this.prepend_scheme = config.prepend_scheme ?? "always";
Expand All @@ -45,9 +42,7 @@ class Metaspace extends PreTokenizer {

if (
// We add a prefix space if:
// (1) The add_prefix_space option is enabled and the normalized
// token does not already start with the replacement character.
this.add_prefix_space &&
// (1) The normalized token does not already start with the replacement character.
!normalized.startsWith(this.replacement) &&
// and (2) either:
// (a) prepend_scheme is 'always'
Expand Down
2 changes: 0 additions & 2 deletions src/static/tokenizer.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,6 @@ export interface TokenizerConfigPreTokenizerMetaspace {
type: "Metaspace";
replacement?: string;
str_rep?: string;
add_prefix_space?: boolean;
prepend_scheme?: PrependScheme;
}

Expand Down Expand Up @@ -355,7 +354,6 @@ export interface TokenizerConfigDecoderWordPiece {
export interface TokenizerConfigDecoderMetaspace {
type: "Metaspace";
replacement?: string;
add_prefix_space?: boolean;
prepend_scheme?: "always" | "never" | "first";
}

Expand Down