diff --git a/models/sbert/base_config.json b/models/sbert/base_config.json index 3d48ccce..f8ffb37a 100644 --- a/models/sbert/base_config.json +++ b/models/sbert/base_config.json @@ -29,7 +29,8 @@ "mask": "fully_visible" }, - "embedding": "dual", + "dropout": 0.1, + "embedding": ["dual"], "encoder": "dual", "pooling": "first", "tie_weights": true diff --git a/models/sbert/large_config.json b/models/sbert/large_config.json index 39ebafa0..eb76afce 100644 --- a/models/sbert/large_config.json +++ b/models/sbert/large_config.json @@ -29,7 +29,8 @@ "mask": "fully_visible" }, - "embedding": "dual", + "dropout": 0.1, + "embedding": ["dual"], "encoder": "dual", "pooling": "first", "tie_weights": true diff --git a/models/sbert/lstm_config.json b/models/sbert/lstm_config.json index f66e1456..d424603c 100644 --- a/models/sbert/lstm_config.json +++ b/models/sbert/lstm_config.json @@ -19,7 +19,8 @@ "encoder": "lstm" }, - "embedding": "dual", + "dropout": 0.1, + "embedding":["dual"], "encoder": "dual", "pooling": "mean", "tie_weights": true diff --git a/scripts/convert_albert_from_huggingface_to_uer.py b/scripts/convert_albert_from_huggingface_to_uer.py index e8d79557..a203f5ab 100644 --- a/scripts/convert_albert_from_huggingface_to_uer.py +++ b/scripts/convert_albert_from_huggingface_to_uer.py @@ -72,7 +72,7 @@ output_model["target.mlm.linear_1.bias"] = input_model["predictions.dense.bias"] output_model["target.mlm.linear_2.weight"] = input_model["predictions.decoder.weight"] output_model["target.mlm.linear_2.bias"] = input_model["predictions.bias"] -output_model["target.layer_norm.gamma"] = input_model["predictions.LayerNorm.weight"] -output_model["target.layer_norm.beta"] = input_model["predictions.LayerNorm.bias"] +output_model["target.mlm.layer_norm.gamma"] = input_model["predictions.LayerNorm.weight"] +output_model["target.mlm.layer_norm.beta"] = input_model["predictions.LayerNorm.bias"] torch.save(output_model, args.output_model_path) diff --git a/scripts/convert_albert_from_original_tf_to_uer.py b/scripts/convert_albert_from_original_tf_to_uer.py index bd3ff55c..2efc4c71 100644 --- a/scripts/convert_albert_from_original_tf_to_uer.py +++ b/scripts/convert_albert_from_original_tf_to_uer.py @@ -88,8 +88,8 @@ def main(): output_model["target.sp.linear_2.bias"] = input_model["cls/seq_relationship/output_bias"] output_model["target.mlm.linear_1.weight"] = input_model["cls/predictions/transform/dense/kernel"] output_model["target.mlm.linear_1.bias"] = input_model["cls/predictions/transform/dense/bias"] - output_model["target.layer_norm.gamma"] = input_model["cls/predictions/transform/LayerNorm/gamma"] - output_model["target.layer_norm.beta"] = input_model["cls/predictions/transform/LayerNorm/beta"] + output_model["target.mlm.layer_norm.gamma"] = input_model["cls/predictions/transform/LayerNorm/gamma"] + output_model["target.mlm.layer_norm.beta"] = input_model["cls/predictions/transform/LayerNorm/beta"] output_model["target.mlm.linear_2.weight"] = input_model["bert/embeddings/word_embeddings"] output_model["target.mlm.linear_2.bias"] = input_model["cls/predictions/output_bias"] diff --git a/scripts/convert_albert_from_uer_to_huggingface.py b/scripts/convert_albert_from_uer_to_huggingface.py index 5548ed52..30a43d94 100644 --- a/scripts/convert_albert_from_uer_to_huggingface.py +++ b/scripts/convert_albert_from_uer_to_huggingface.py @@ -67,8 +67,8 @@ output_model["sop_classifier.classifier.bias"] = input_model["target.sp.linear_2.bias"] output_model["predictions.dense.weight"] = input_model["target.mlm.linear_1.weight"] output_model["predictions.dense.bias"] = input_model["target.mlm.linear_1.bias"] -output_model["predictions.LayerNorm.weight"] = input_model["target.layer_norm.gamma"] -output_model["predictions.LayerNorm.bias"] = input_model["target.layer_norm.beta"] +output_model["predictions.LayerNorm.weight"] = input_model["target.mlm.layer_norm.gamma"] +output_model["predictions.LayerNorm.bias"] = input_model["target.mlm.layer_norm.beta"] output_model["predictions.decoder.weight"] = input_model["target.mlm.linear_2.weight"] output_model["predictions.decoder.bias"] = input_model["target.mlm.linear_2.bias"] output_model["predictions.bias"] = input_model["target.mlm.linear_2.bias"] diff --git a/scripts/convert_albert_from_uer_to_original_tf.py b/scripts/convert_albert_from_uer_to_original_tf.py index 0d8d2661..86df9728 100644 --- a/scripts/convert_albert_from_uer_to_original_tf.py +++ b/scripts/convert_albert_from_uer_to_original_tf.py @@ -81,8 +81,8 @@ def main(): output_model["cls/seq_relationship/output_bias"] = input_model["target.sp.linear_2.bias"] output_model["cls/predictions/transform/dense/kernel"] = input_model["target.mlm.linear_1.weight"] output_model["cls/predictions/transform/dense/bias"] = input_model["target.mlm.linear_1.bias"] - output_model["cls/predictions/transform/LayerNorm/gamma"] = input_model["target.layer_norm.gamma"] - output_model["cls/predictions/transform/LayerNorm/beta"] = input_model["target.layer_norm.beta"] + output_model["cls/predictions/transform/LayerNorm/gamma"] = input_model["target.mlm.layer_norm.gamma"] + output_model["cls/predictions/transform/LayerNorm/beta"] = input_model["target.mlm.layer_norm.beta"] output_model["bert/embeddings/word_embeddings"] = input_model["target.mlm.linear_2.weight"] output_model["cls/predictions/output_bias"] = input_model["target.mlm.linear_2.bias"] diff --git a/scripts/extract_embeddings.py b/scripts/extract_embeddings.py index 1fc16bac..fec8fcbd 100644 --- a/scripts/extract_embeddings.py +++ b/scripts/extract_embeddings.py @@ -40,7 +40,7 @@ vocab.load(args.vocab_path) pretrained_model = torch.load(args.load_model_path) - embedding = pretrained_model["embedding.word_embedding.weight"] + embedding = pretrained_model["embedding.word.embedding.weight"] with open(args.word_embedding_path, mode="w", encoding="utf-8") as f: head = str(list(embedding.size())[0]) + " " + str(list(embedding.size())[1]) + "\n" diff --git a/scripts/topn_words_indep.py b/scripts/topn_words_indep.py index d42df409..40b33788 100644 --- a/scripts/topn_words_indep.py +++ b/scripts/topn_words_indep.py @@ -39,7 +39,7 @@ vocab.load(args.vocab_path) pretrained_model = torch.load(args.load_model_path) - embedding = pretrained_model["embedding.word_embedding.weight"] + embedding = pretrained_model["embedding.word.embedding.weight"] with open(args.test_path, mode="r", encoding="utf-8") as f: for line in f: diff --git a/uer/models/model.py b/uer/models/model.py index 5931f12b..f7b30d65 100755 --- a/uer/models/model.py +++ b/uer/models/model.py @@ -21,10 +21,10 @@ def __init__(self, args, embedding, encoder, tgt_embedding, decoder, target): if "mlm" in args.target and args.tie_weights: self.target.mlm.linear_2.weight = self.embedding.word.embedding.weight + elif "lm" in args.target and args.tie_weights and self.tgt_embedding is not None and "word" in self.tgt_embedding.embedding_name_list: + self.target.lm.output_layer.weight = self.tgt_embedding.word.embedding.weight elif "lm" in args.target and args.tie_weights and "word" in self.embedding.embedding_name_list: self.target.lm.output_layer.weight = self.embedding.word.embedding.weight - elif "lm" in args.target and args.tie_weights and "word" in self.tgt_embedding.embedding_name_list: - self.target.lm.output_layer.weight = self.tgt_embedding.word.embedding.weight if self.decoder is not None and args.share_embedding: self.tgt_embedding.word.embedding.weight = self.embedding.word.embedding.weight