/
Cargo.toml
80 lines (71 loc) 路 1.99 KB
/
Cargo.toml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
[package]
authors = ["Anthony MOI <m.anthony.moi@gmail.com>", "Nicolas Patry <patry.nicolas@protonmail.com>"]
edition = "2018"
name = "tokenizers"
version = "0.19.1-dev.0"
homepage = "https://github.com/huggingface/tokenizers"
repository = "https://github.com/huggingface/tokenizers"
documentation = "https://docs.rs/tokenizers/"
license = "Apache-2.0"
keywords = ["tokenizer", "NLP", "huggingface", "BPE", "WordPiece"]
readme = "./README.md"
description = """
Provides an implementation of today's most used tokenizers,
with a focus on performances and versatility.
"""
exclude = [ "rust-toolchain", "target/*", "Cargo.lock", "benches/*.txt", "benches/*.json", "data/*" ]
[lib]
name = "tokenizers"
path = "src/lib.rs"
bench = false
[[bench]]
name = "bpe_benchmark"
harness = false
[[bench]]
name = "bert_benchmark"
harness = false
[[bench]]
name = "layout_benchmark"
harness = false
[[bench]]
name = "unigram_benchmark"
harness = false
[dependencies]
lazy_static = "1.4"
rand = "0.8"
onig = { version = "6.4", default-features = false, optional = true }
regex = "1.10"
regex-syntax = "0.8"
rayon = "1.10"
rayon-cond = "0.3"
serde = { version = "1.0", features = [ "derive" ] }
serde_json = "1.0"
unicode-normalization-alignments = "0.1"
unicode_categories = "0.1"
unicode-segmentation = "1.11"
indicatif = {version = "0.17", optional = true}
itertools = "0.12"
log = "0.4"
derive_builder = "0.20"
spm_precompiled = "0.1"
hf-hub = { version = "0.3.2", optional = true }
aho-corasick = "1.1"
paste = "1.0.14"
macro_rules_attribute = "0.2.0"
thiserror = "1.0.49"
fancy-regex = { version = "0.13", optional = true}
getrandom = { version = "0.2.10" }
esaxx-rs = { version = "0.1.10", default-features = false, features=[]}
monostate = "0.1.12"
[features]
default = ["progressbar", "onig", "esaxx_fast"]
esaxx_fast = ["esaxx-rs/cpp"]
progressbar = ["indicatif"]
http = ["hf-hub"]
unstable_wasm = ["fancy-regex", "getrandom/js"]
[dev-dependencies]
criterion = "0.5"
tempfile = "3.10"
assert_approx_eq = "1.1"
[profile.release]
lto = "fat"