forked from tensorflow/ranking
-
Notifications
You must be signed in to change notification settings - Fork 2
/
tfrbert_convert_json_to_elwc.py
50 lines (36 loc) · 1.68 KB
/
tfrbert_convert_json_to_elwc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# tfrbert_convert_json_to_elwc.py
from tensorflow_ranking.extension import tfrbert
import json
import copy
import tfrbert_client_predict_from_json
import argparse
#
# Main
#
def main():
# Parse command line arguments
parser = argparse.ArgumentParser()
parser.add_argument("--vocab_file", type=str, required=True, help="/path/to/bert_model/vocab.txt")
parser.add_argument("--sequence_length", type=int, required=True, help="typically 128, 256, 512")
parser.add_argument("--input_file", type=str, required=True, help="JSON input filename (e.g. train.json)")
parser.add_argument("--output_file", type=str, required=True, help="ELWC TFrecord filename (e.g. train.elwc.tfrecord)")
parser.add_argument("--do_lower_case", action="store_true", help="Set for uncased models, otherwise do not include")
args = parser.parse_args()
# Create helpers
bert_helper = tfrbert_client_predict_from_json.create_tfrbert_util_with_vocab(args.sequence_length, args.vocab_file, args.do_lower_case)
bert_helper_json = tfrbert_client_predict_from_json.TFRBertUtilJSON(bert_helper)
# User output
print("Utility to convert between JSON and ELWC for TFR-Bert")
print("")
print("Model Parameters: ")
print("Vocabulary filename: " + args.vocab_file)
print("sequence_length: " + str(args.sequence_length))
print("do_lower_case: " + str(args.do_lower_case))
print("\n")
print("Input file: " + args.input_file)
print("Output file: " + args.output_file)
# Perform conversion of ranking problemsJSON to ELWC
bert_helper_json.convert_json_to_elwc_export(args.input_file, args.output_file)
print("Success.")
if __name__ == "__main__":
main()