From f5dbd43bb464c923ba17a80ec5a0e775a7d2091f Mon Sep 17 00:00:00 2001 From: shademe Date: Tue, 2 May 2023 15:07:39 +0200 Subject: [PATCH 1/3] Normalize captitalization in entry point identifiers --- curated_transformers/models/hf_loader.py | 2 +- curated_transformers/tests/models/test_listeners.py | 2 +- curated_transformers/tests/pipeline/test_transformer.py | 4 ++-- .../tests/tokenization/test_bbpe_encoder.py | 2 +- .../tests/tokenization/test_char_encoder.py | 2 +- curated_transformers/tests/tokenization/test_registry.py | 8 ++++---- curated_transformers/tokenization/hf_loader.py | 2 +- project/configs/layer-weighting.cfg | 4 ++-- project/configs/no-layer-weighting.cfg | 4 ++-- setup.cfg | 8 ++++---- 10 files changed, 19 insertions(+), 19 deletions(-) diff --git a/curated_transformers/models/hf_loader.py b/curated_transformers/models/hf_loader.py index 6bb6b9d2..ec6a52c6 100644 --- a/curated_transformers/models/hf_loader.py +++ b/curated_transformers/models/hf_loader.py @@ -27,7 +27,7 @@ def build_hf_transformer_encoder_loader_v1( def load(model, X=None, Y=None): if not has_hf_transformers: raise ValueError( - Errors.E011.format(loader_name="HFTransformerEncoderLoader") + Errors.E011.format(loader_name="HfTransformerEncoderLoader") ) encoder = model.shims[0]._model diff --git a/curated_transformers/tests/models/test_listeners.py b/curated_transformers/tests/models/test_listeners.py index e9353b9c..9e3363d8 100644 --- a/curated_transformers/tests/models/test_listeners.py +++ b/curated_transformers/tests/models/test_listeners.py @@ -34,7 +34,7 @@ [initialize.components.transformer] [initialize.components.transformer.piecer_loader] - @model_loaders = "curated-transformers.HFPieceEncoderLoader.v1" + @model_loaders = "curated-transformers.HfPieceEncoderLoader.v1" name = "bert-base-cased" """ diff --git a/curated_transformers/tests/pipeline/test_transformer.py b/curated_transformers/tests/pipeline/test_transformer.py index d4b95b04..d3fdaada 100644 --- a/curated_transformers/tests/pipeline/test_transformer.py +++ b/curated_transformers/tests/pipeline/test_transformer.py @@ -74,7 +74,7 @@ [initialize.components.transformer] [initialize.components.transformer.piecer_loader] - @model_loaders = "curated-transformers.HFPieceEncoderLoader.v1" + @model_loaders = "curated-transformers.HfPieceEncoderLoader.v1" name = "bert-base-cased" """ @@ -122,7 +122,7 @@ [initialize.components.transformer] [initialize.components.transformer.piecer_loader] - @model_loaders = "curated-transformers.HFPieceEncoderLoader.v1" + @model_loaders = "curated-transformers.HfPieceEncoderLoader.v1" name = "bert-base-cased" """ diff --git a/curated_transformers/tests/tokenization/test_bbpe_encoder.py b/curated_transformers/tests/tokenization/test_bbpe_encoder.py index 35b7e95b..a747670b 100644 --- a/curated_transformers/tests/tokenization/test_bbpe_encoder.py +++ b/curated_transformers/tests/tokenization/test_bbpe_encoder.py @@ -9,7 +9,7 @@ @pytest.fixture def toy_encoder(test_dir): encoder = build_byte_bpe_encoder_v1() - encoder.init = registry.model_loaders.get("curated-transformers.ByteBPELoader.v1")( + encoder.init = registry.model_loaders.get("curated-transformers.ByteBpeLoader.v1")( vocab_path=test_dir / "toy-vocab.json", merges_path=test_dir / "toy-merges.txt" ) encoder.initialize() diff --git a/curated_transformers/tests/tokenization/test_char_encoder.py b/curated_transformers/tests/tokenization/test_char_encoder.py index 381e61a1..55890b27 100644 --- a/curated_transformers/tests/tokenization/test_char_encoder.py +++ b/curated_transformers/tests/tokenization/test_char_encoder.py @@ -49,7 +49,7 @@ def test_char_encoder_hf_model(): ops = get_current_ops() encoder = build_char_encoder_v1() encoder.init = registry.model_loaders.get( - "curated-transformers.HFPieceEncoderLoader.v1" + "curated-transformers.HfPieceEncoderLoader.v1" )(name="cl-tohoku/bert-base-japanese-char-v2") encoder.initialize() diff --git a/curated_transformers/tests/tokenization/test_registry.py b/curated_transformers/tests/tokenization/test_registry.py index a5838127..8e2a0482 100644 --- a/curated_transformers/tests/tokenization/test_registry.py +++ b/curated_transformers/tests/tokenization/test_registry.py @@ -7,7 +7,7 @@ "encoder_name", [ "curated-transformers.BertWordpieceEncoder.v1", - "curated-transformers.ByteBPEEncoder.v1", + "curated-transformers.ByteBpeEncoder.v1", "curated-transformers.CamembertSentencepieceEncoder.v1", "curated-transformers.CharEncoder.v1", "curated-transformers.SentencepieceEncoder.v1", @@ -22,10 +22,10 @@ def test_encoder_from_registry(encoder_name): @pytest.mark.parametrize( "loader_name", [ - "curated-transformers.ByteBPELoader.v1", + "curated-transformers.ByteBpeLoader.v1", "curated-transformers.CharEncoderLoader.v1", - "curated-transformers.HFTransformerEncoderLoader.v1", - "curated-transformers.HFPieceEncoderLoader.v1", + "curated-transformers.HfTransformerEncoderLoader.v1", + "curated-transformers.HfPieceEncoderLoader.v1", "curated-transformers.PyTorchCheckpointLoader.v1", "curated-transformers.SentencepieceLoader.v1", "curated-transformers.WordpieceLoader.v1", diff --git a/curated_transformers/tokenization/hf_loader.py b/curated_transformers/tokenization/hf_loader.py index 181d8c53..2c554939 100644 --- a/curated_transformers/tokenization/hf_loader.py +++ b/curated_transformers/tokenization/hf_loader.py @@ -41,7 +41,7 @@ def build_hf_piece_encoder_loader_v1( def load(model, X=None, Y=None): if not has_hf_transformers: - raise ValueError(Errors.E011.format(loader_name="HFPieceEncoderLoader")) + raise ValueError(Errors.E011.format(loader_name="HfPieceEncoderLoader")) tokenizer = transformers.AutoTokenizer.from_pretrained(name, revision=revision) return _convert_encoder(model, tokenizer) diff --git a/project/configs/layer-weighting.cfg b/project/configs/layer-weighting.cfg index 8bb673d8..beddc19f 100644 --- a/project/configs/layer-weighting.cfg +++ b/project/configs/layer-weighting.cfg @@ -221,9 +221,9 @@ vectors = null [initialize.components.transformer] [initialize.components.transformer.encoder_loader] -@model_loaders = "curated-transformers.HFTransformerEncoderLoader.v1" +@model_loaders = "curated-transformers.HfTransformerEncoderLoader.v1" name = "xlm-roberta-base" [initialize.components.transformer.piecer_loader] -@model_loaders = "curated-transformers.HFPieceEncoderLoader.v1" +@model_loaders = "curated-transformers.HfPieceEncoderLoader.v1" name = "xlm-roberta-base" diff --git a/project/configs/no-layer-weighting.cfg b/project/configs/no-layer-weighting.cfg index d8c4c391..6bbb88d5 100644 --- a/project/configs/no-layer-weighting.cfg +++ b/project/configs/no-layer-weighting.cfg @@ -194,9 +194,9 @@ vectors = null [initialize.components.transformer] [initialize.components.transformer.encoder_loader] -@model_loaders = "curated-transformers.HFTransformerEncoderLoader.v1" +@model_loaders = "curated-transformers.HfTransformerEncoderLoader.v1" name = "xlm-roberta-base" [initialize.components.transformer.piecer_loader] -@model_loaders = "curated-transformers.HFPieceEncoderLoader.v1" +@model_loaders = "curated-transformers.HfPieceEncoderLoader.v1" name = "xlm-roberta-base" diff --git a/setup.cfg b/setup.cfg index 86a3c9fc..c5389da8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -34,7 +34,7 @@ spacy_architectures = curated-transformers.LastTransformerLayerListener.v1 = curated_transformers.models.listeners:build_last_transformer_layer_listener_v1 curated-transformers.ScalarWeightingListener.v1 = curated_transformers.models.listeners:build_scalar_weighting_listener_v1 curated-transformers.BertWordpieceEncoder.v1 = curated_transformers.tokenization:build_bert_wordpiece_encoder_v1 - curated-transformers.ByteBPEEncoder.v1 = curated_transformers.tokenization:build_byte_bpe_encoder_v1 + curated-transformers.ByteBpeEncoder.v1 = curated_transformers.tokenization:build_byte_bpe_encoder_v1 curated-transformers.CamembertSentencepieceEncoder.v1 = curated_transformers.tokenization:build_camembert_sentencepiece_encoder_v1 curated-transformers.CharEncoder.v1 = curated_transformers.tokenization:build_char_encoder_v1 curated-transformers.SentencepieceEncoder.v1 = curated_transformers.tokenization:build_sentencepiece_encoder_v1 @@ -49,10 +49,10 @@ spacy_cli = curated-transformers.quantize = curated_transformers.cli.quantize:quantize_cli thinc_model_loaders = - curated-transformers.ByteBPELoader.v1 = curated_transformers.tokenization:build_byte_bpe_encoder_loader_v1 + curated-transformers.ByteBpeLoader.v1 = curated_transformers.tokenization:build_byte_bpe_encoder_loader_v1 curated-transformers.CharEncoderLoader.v1 = curated_transformers.tokenization:build_char_encoder_loader_v1 - curated-transformers.HFTransformerEncoderLoader.v1 = curated_transformers.models:build_hf_transformer_encoder_loader_v1 - curated-transformers.HFPieceEncoderLoader.v1 = curated_transformers.tokenization:build_hf_piece_encoder_loader_v1 + curated-transformers.HfTransformerEncoderLoader.v1 = curated_transformers.models:build_hf_transformer_encoder_loader_v1 + curated-transformers.HfPieceEncoderLoader.v1 = curated_transformers.tokenization:build_hf_piece_encoder_loader_v1 curated-transformers.PyTorchCheckpointLoader.v1 = curated_transformers.models:build_pytorch_checkpoint_loader_v1 curated-transformers.SentencepieceLoader.v1 = curated_transformers.tokenization:build_sentencepiece_encoder_loader_v1 curated-transformers.WordpieceLoader.v1 = curated_transformers.tokenization:build_wordpiece_encoder_loader_v1 From 45e55b2b23c5d807a400ef38ba2d53ebd518f612 Mon Sep 17 00:00:00 2001 From: shademe Date: Tue, 2 May 2023 15:08:30 +0200 Subject: [PATCH 2/3] Set version to `0.0.8` --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index c5389da8..0d3fd7d0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [metadata] -version = 0.0.7 +version = 0.0.8 description = Curated transformer models url = https://github.com/explosion/curated-transformers author = Explosion From 06b25de5d883ec18da4254023d129359305b57c6 Mon Sep 17 00:00:00 2001 From: shademe Date: Tue, 2 May 2023 15:55:20 +0200 Subject: [PATCH 3/3] Preserve the casing of `HF` --- curated_transformers/models/hf_loader.py | 2 +- curated_transformers/tests/models/test_listeners.py | 2 +- curated_transformers/tests/pipeline/test_transformer.py | 4 ++-- curated_transformers/tests/tokenization/test_char_encoder.py | 2 +- curated_transformers/tests/tokenization/test_registry.py | 4 ++-- curated_transformers/tokenization/hf_loader.py | 2 +- project/configs/layer-weighting.cfg | 4 ++-- project/configs/no-layer-weighting.cfg | 4 ++-- setup.cfg | 4 ++-- 9 files changed, 14 insertions(+), 14 deletions(-) diff --git a/curated_transformers/models/hf_loader.py b/curated_transformers/models/hf_loader.py index ec6a52c6..6bb6b9d2 100644 --- a/curated_transformers/models/hf_loader.py +++ b/curated_transformers/models/hf_loader.py @@ -27,7 +27,7 @@ def build_hf_transformer_encoder_loader_v1( def load(model, X=None, Y=None): if not has_hf_transformers: raise ValueError( - Errors.E011.format(loader_name="HfTransformerEncoderLoader") + Errors.E011.format(loader_name="HFTransformerEncoderLoader") ) encoder = model.shims[0]._model diff --git a/curated_transformers/tests/models/test_listeners.py b/curated_transformers/tests/models/test_listeners.py index 9e3363d8..e9353b9c 100644 --- a/curated_transformers/tests/models/test_listeners.py +++ b/curated_transformers/tests/models/test_listeners.py @@ -34,7 +34,7 @@ [initialize.components.transformer] [initialize.components.transformer.piecer_loader] - @model_loaders = "curated-transformers.HfPieceEncoderLoader.v1" + @model_loaders = "curated-transformers.HFPieceEncoderLoader.v1" name = "bert-base-cased" """ diff --git a/curated_transformers/tests/pipeline/test_transformer.py b/curated_transformers/tests/pipeline/test_transformer.py index d3fdaada..d4b95b04 100644 --- a/curated_transformers/tests/pipeline/test_transformer.py +++ b/curated_transformers/tests/pipeline/test_transformer.py @@ -74,7 +74,7 @@ [initialize.components.transformer] [initialize.components.transformer.piecer_loader] - @model_loaders = "curated-transformers.HfPieceEncoderLoader.v1" + @model_loaders = "curated-transformers.HFPieceEncoderLoader.v1" name = "bert-base-cased" """ @@ -122,7 +122,7 @@ [initialize.components.transformer] [initialize.components.transformer.piecer_loader] - @model_loaders = "curated-transformers.HfPieceEncoderLoader.v1" + @model_loaders = "curated-transformers.HFPieceEncoderLoader.v1" name = "bert-base-cased" """ diff --git a/curated_transformers/tests/tokenization/test_char_encoder.py b/curated_transformers/tests/tokenization/test_char_encoder.py index 55890b27..381e61a1 100644 --- a/curated_transformers/tests/tokenization/test_char_encoder.py +++ b/curated_transformers/tests/tokenization/test_char_encoder.py @@ -49,7 +49,7 @@ def test_char_encoder_hf_model(): ops = get_current_ops() encoder = build_char_encoder_v1() encoder.init = registry.model_loaders.get( - "curated-transformers.HfPieceEncoderLoader.v1" + "curated-transformers.HFPieceEncoderLoader.v1" )(name="cl-tohoku/bert-base-japanese-char-v2") encoder.initialize() diff --git a/curated_transformers/tests/tokenization/test_registry.py b/curated_transformers/tests/tokenization/test_registry.py index 8e2a0482..bfc1962e 100644 --- a/curated_transformers/tests/tokenization/test_registry.py +++ b/curated_transformers/tests/tokenization/test_registry.py @@ -24,8 +24,8 @@ def test_encoder_from_registry(encoder_name): [ "curated-transformers.ByteBpeLoader.v1", "curated-transformers.CharEncoderLoader.v1", - "curated-transformers.HfTransformerEncoderLoader.v1", - "curated-transformers.HfPieceEncoderLoader.v1", + "curated-transformers.HFTransformerEncoderLoader.v1", + "curated-transformers.HFPieceEncoderLoader.v1", "curated-transformers.PyTorchCheckpointLoader.v1", "curated-transformers.SentencepieceLoader.v1", "curated-transformers.WordpieceLoader.v1", diff --git a/curated_transformers/tokenization/hf_loader.py b/curated_transformers/tokenization/hf_loader.py index 2c554939..181d8c53 100644 --- a/curated_transformers/tokenization/hf_loader.py +++ b/curated_transformers/tokenization/hf_loader.py @@ -41,7 +41,7 @@ def build_hf_piece_encoder_loader_v1( def load(model, X=None, Y=None): if not has_hf_transformers: - raise ValueError(Errors.E011.format(loader_name="HfPieceEncoderLoader")) + raise ValueError(Errors.E011.format(loader_name="HFPieceEncoderLoader")) tokenizer = transformers.AutoTokenizer.from_pretrained(name, revision=revision) return _convert_encoder(model, tokenizer) diff --git a/project/configs/layer-weighting.cfg b/project/configs/layer-weighting.cfg index beddc19f..8bb673d8 100644 --- a/project/configs/layer-weighting.cfg +++ b/project/configs/layer-weighting.cfg @@ -221,9 +221,9 @@ vectors = null [initialize.components.transformer] [initialize.components.transformer.encoder_loader] -@model_loaders = "curated-transformers.HfTransformerEncoderLoader.v1" +@model_loaders = "curated-transformers.HFTransformerEncoderLoader.v1" name = "xlm-roberta-base" [initialize.components.transformer.piecer_loader] -@model_loaders = "curated-transformers.HfPieceEncoderLoader.v1" +@model_loaders = "curated-transformers.HFPieceEncoderLoader.v1" name = "xlm-roberta-base" diff --git a/project/configs/no-layer-weighting.cfg b/project/configs/no-layer-weighting.cfg index 6bbb88d5..d8c4c391 100644 --- a/project/configs/no-layer-weighting.cfg +++ b/project/configs/no-layer-weighting.cfg @@ -194,9 +194,9 @@ vectors = null [initialize.components.transformer] [initialize.components.transformer.encoder_loader] -@model_loaders = "curated-transformers.HfTransformerEncoderLoader.v1" +@model_loaders = "curated-transformers.HFTransformerEncoderLoader.v1" name = "xlm-roberta-base" [initialize.components.transformer.piecer_loader] -@model_loaders = "curated-transformers.HfPieceEncoderLoader.v1" +@model_loaders = "curated-transformers.HFPieceEncoderLoader.v1" name = "xlm-roberta-base" diff --git a/setup.cfg b/setup.cfg index 0d3fd7d0..23b8cddc 100644 --- a/setup.cfg +++ b/setup.cfg @@ -51,8 +51,8 @@ spacy_cli = thinc_model_loaders = curated-transformers.ByteBpeLoader.v1 = curated_transformers.tokenization:build_byte_bpe_encoder_loader_v1 curated-transformers.CharEncoderLoader.v1 = curated_transformers.tokenization:build_char_encoder_loader_v1 - curated-transformers.HfTransformerEncoderLoader.v1 = curated_transformers.models:build_hf_transformer_encoder_loader_v1 - curated-transformers.HfPieceEncoderLoader.v1 = curated_transformers.tokenization:build_hf_piece_encoder_loader_v1 + curated-transformers.HFTransformerEncoderLoader.v1 = curated_transformers.models:build_hf_transformer_encoder_loader_v1 + curated-transformers.HFPieceEncoderLoader.v1 = curated_transformers.tokenization:build_hf_piece_encoder_loader_v1 curated-transformers.PyTorchCheckpointLoader.v1 = curated_transformers.models:build_pytorch_checkpoint_loader_v1 curated-transformers.SentencepieceLoader.v1 = curated_transformers.tokenization:build_sentencepiece_encoder_loader_v1 curated-transformers.WordpieceLoader.v1 = curated_transformers.tokenization:build_wordpiece_encoder_loader_v1