-
Notifications
You must be signed in to change notification settings - Fork 327
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add Multi-instance translate chapter in README and its script example (…
…#361) * add chapter multi-instance translate for README.md and script mlt-trans to run multi-instance * add chapter multi-instance translate for README.md * change method to get physical cores number and add hyperlink for C4 and C5 in README.md * add option for running as benchmark or not * rewrite mlt-trans script using python and move it to tutorials * change code according to the commnets * add chapter multi-instance translate for README.md and script mlt-trans to run multi-instance * add chapter multi-instance translate for README.md * change method to get physical cores number and add hyperlink for C4 and C5 in README.md * move mlt_cpu_trans_benchmark to tutorials and rename it process_per_core_translation * fix typo * add option for running as benchmark or not * rewrite mlt-trans script using python and move it to tutorials * change code according to the commnets * move mlt_cpu_trans_benchmark to tutorials and rename it process_per_core_translation * add python script in tutortial * add license (Apache) and authors
- Loading branch information
Showing
3 changed files
with
172 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# CPU process per core translation | ||
On multi-core processor computer, translation per core separately can speedup translation performance, due to some operation can't be handled parallel in one process. | ||
Using this method, translation on each core can be parallel. | ||
|
||
One python script example is given and you can run it as follows: | ||
```bash | ||
> python cpu_process_per_core_translation.py -m model -i input_file_name -o output_file_name -bs batch_size -t true | ||
``` | ||
|
||
-t true: each core translate the whole input file. | ||
|
||
-t false: each core translate (input file line/core number) lines , then merge the translated file into one complete output file. | ||
|
158 changes: 158 additions & 0 deletions
158
tutorials/process_per_core_translation/cpu_process_per_core_translation.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
# Licensed under the Apache License, Version 2.0 (the "License"). You may not | ||
# use this file except in compliance with the License. A copy of the License | ||
# is located at | ||
# | ||
# http://aws.amazon.com/apache2.0/ | ||
# | ||
# or in the "license" file accompanying this file. This file is distributed on | ||
# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either | ||
# express or implied. See the License for the specific language governing | ||
# permissions and limitations under the License. | ||
|
||
# \Author: {zhiyuan.huang, rong.a.zhang}@intel.com | ||
|
||
|
||
# Describtion: This script is used to process per core translation, which can greatly speedup translate perefomance. | ||
|
||
import argparse | ||
import subprocess | ||
import threading | ||
import tempfile | ||
import logging | ||
import string | ||
import os | ||
import time | ||
import mxnet as mx | ||
|
||
def add_args(parser): | ||
parser.add_argument("--module" , '-m', help='the pretrained module', default="wmt_model") | ||
parser.add_argument("--input_file", '-i', help='the file input to translate', default='newstest2016.tc.BPE.de') | ||
parser.add_argument("--output_file", '-o', help='the file saved the result of translation', default='newstest2016.tc.BPE.en') | ||
parser.add_argument("--batch_size", '-bs', help='the batch size of process', default=32) | ||
parser.add_argument("--data_type", '-t', help='if uses run benchmark or not', default="benchmark") | ||
|
||
def task(args): | ||
os.system(args) | ||
|
||
def benchmark(cores, args): | ||
""" | ||
benchmark is used for Processing per core translation. Each core translates the whole input file. | ||
Return after all translations done. | ||
:param cores: the number of cores used for translation, each core will launch a thread to translate | ||
:param args: input parameters | ||
""" | ||
model = args.module | ||
fileInput = args.input_file | ||
fileOutput = args.output_file | ||
batchsize = args.batch_size | ||
|
||
thread = [] | ||
for i in range(cores): | ||
command = "taskset -c %d-%d python3 -m sockeye.translate -m %s -i %s -o %s --batch-size %d --output-type benchmark --use-cpu > /dev/null 2>&1 " % (i, i, model, fileInput, fileOutput, batchsize) | ||
t = threading.Thread(target = task, args=(command,)) | ||
thread.append(t) | ||
t.start() | ||
|
||
for t in thread: | ||
t.join() | ||
|
||
def split_file(splitNum, fileInput, lines): | ||
""" | ||
split_file is used to split fileInput into splitNum small pieces file. | ||
For example, when splitNum is 56, a 112 lines file will be split into 56 files and each file has 2 lines. | ||
:param splitNum: split into splitNum files | ||
:param fileInput: file to be split | ||
:param lines: lines of fileInput | ||
""" | ||
quot = lines // splitNum | ||
rema = lines % splitNum | ||
files = [] | ||
current_line = 0 | ||
for i in range(splitNum): | ||
if i < rema: | ||
read_line = quot + 1 | ||
else: | ||
read_line = quot | ||
temp = tempfile.NamedTemporaryFile() | ||
os.system("head -n%d %s| tail -n%d > %s" % (current_line + read_line, fileInput, read_line, temp.name)) | ||
current_line += read_line | ||
files.append(temp) | ||
|
||
return files | ||
|
||
def translate(cores, files, args): | ||
""" | ||
translate is used for Processing per core translation. cores[i] will translate files[i] | ||
:param cores: the number of cores used for translation, each core will launch a thread to translate | ||
:param files: file list to be translated | ||
:param args: input parameters | ||
:return: list of translated file | ||
""" | ||
model = args.module | ||
batchsize = args.batch_size | ||
|
||
# split inputfile to a series of small files which number is equal cores | ||
file = [] | ||
thread = [] | ||
for i in range(cores): | ||
files[i].seek(0) | ||
temp = tempfile.NamedTemporaryFile() | ||
command = "taskset -c %d-%d python3 -m sockeye.translate -m %s -i %s -o %s --batch-size %d --output-type benchmark --use-cpu > /dev/null 2>&1 " % (i, i, model, files[i].name, temp.name, batchsize) | ||
file.append(temp) | ||
|
||
t = threading.Thread(target = task, args=(command,)) | ||
thread.append(t) | ||
t.start() | ||
#wait for all translation done | ||
for t in thread: | ||
t.join() | ||
|
||
return file | ||
|
||
if __name__ == '__main__': | ||
parser = argparse.ArgumentParser(description='MXnet Sockeye cpu_process_per_core_translation.py -m model-name -i file_to_translate -o result_to_save --batch-size 32') | ||
add_args(parser) | ||
args = parser.parse_args() | ||
fileInput = args.input_file | ||
fileOutput = args.output_file | ||
|
||
socket = int(os.popen("grep -w 'physical id' /proc/cpuinfo | sort -u | wc -l").read().strip()) | ||
cores = int(os.popen("grep -w 'core id' /proc/cpuinfo | sort -u | wc -l").read().strip()) | ||
total_cores = socket * cores | ||
print(total_cores) | ||
|
||
os.system("export KMP_AFFINITY=granularity=fine,noduplicates,compact,1,0") | ||
ompStr = "export OMP_NUM_THREADS=" + str(total_cores) | ||
os.system(ompStr) | ||
|
||
# the total lines of input file will be translated | ||
lines = 0 | ||
for line in open(args.input_file): lines += 1 | ||
print('Translating...') | ||
# clear file for output | ||
os.system("rm %s -f"%fileOutput) | ||
if args.data_type == "benchmark": | ||
lines = lines * total_cores | ||
start = time.time() | ||
order = benchmark(total_cores, args) | ||
end = time.time() | ||
else: | ||
splited_files = split_file(total_cores, fileInput, lines) | ||
start = time.time() | ||
translated_files = translate(total_cores, splited_files, args) | ||
end = time.time() | ||
for i in range(total_cores): | ||
os.system("cat %s >> %s" % (translated_files[i].name, fileOutput)) | ||
splited_files[i].close() | ||
translated_files[i].close() | ||
|
||
total_time = end - start | ||
|
||
print("Instance nums: %d" % total_cores) | ||
print("Total Processed lines: %d" % lines) | ||
print("Total time(s): %.3f s" % total_time) | ||
print("Speed: %.3f sent/sec" % (lines / total_time)) | ||
|