## 

In [32]:
## 安装依赖
# -q 表示安静安装，不输出安装 Logging

!pip install torch torchvision torchaudio transformers datasets tiktoken wandb tqdm flask -q
!pip install "transformers[sentencepiece]" -q

## 测试 Tokenizer 分词器

In [2]:
from transformers import AutoTokenizer

## 加载分词器
model_checkpoint = "/Users/larrykoo/Documents/git/ai/opus-mt-zh-en"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

## 编码文本
zh_sentence = "通过强大的 AI 引擎，伴随着更强大的 Notion，运行得更快，写得更好，思考更加深远。"
en_sentence = "Access the limitless power of AI, right inside Notion, Work faster, Write better, Think bigger."

inputs = tokenizer(zh_sentence)
with tokenizer.as_target_tokenizer():
    targets = tokenizer(en_sentence)

##  打印分词结果
print(tokenizer.convert_ids_to_tokens(inputs["input_ids"]))
print(tokenizer.convert_ids_to_tokens(targets["input_ids"]))

['▁通过', '强大的', '▁', 'AI', '▁', '引擎', ',', '伴随着', '更', '强大的', '▁No', 'tion', ',', '运行', '得', '更快', ',', '写', '得', '更好', ',', '思考', '更加', '深远', '。', '</s>']
['▁Access', '▁the', '▁limit', 'less', '▁power', '▁of', '▁AI', ',', '▁right', '▁inside', '▁No', 'tion', ',', '▁Work', '▁faster', ',', '▁Write', '▁better', ',', '▁Think', '▁bigger', '.', '</s>']


## 测试 Model 加载使用

In [48]:
# -*- coding: utf-8 -*-
from flask import request, jsonify
from flask import Flask
from transformers import pipeline

model_zh_en = "/Users/larrykoo/Documents/git/ai/opus-mt-zh-en"
model_en_zh = "/Users/larrykoo/Documents/git/ai/opus-mt-en-zh"
zh2en = pipeline("translation", model=model_zh_en)
en2zh = pipeline("translation", model=model_en_zh)

## build a Flask HTTP-Server
app = Flask(__name__)
app.config["JSON_AS_ASCII"] = False

## 中文翻译成英文
@app.route("/transfer_en", methods=['POST'])
def transfer_en():
    doc = request.get_json()["text"]
    new_doc = zh2en(doc)

    return_result = {
        "sourceText": doc, 
        "translated": new_doc[0]["translation_text"]
    }
    return jsonify(return_result)

## 英文翻译成中文
@app.route("/transfer_zh", methods=[ 'POST'])
def transfer_zh():
    doc = request.get_json()["text"]
    new_doc =  en2zh(doc)

    return_result = {
        "sourceText": doc,
        "translated": new_doc[0]["translation_text"]
    }
    return jsonify(return_result)

if __name__ == '__main__':
    app.run(host="0.0.0.0", port=8888, debug=False)



 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:8888
 * Running on http://172.16.2.113:8888
[33mPress CTRL+C to quit[0m
127.0.0.1 - - [23/Apr/2023 17:14:58] "POST /transfer_en HTTP/1.1" 200 -
127.0.0.1 - - [23/Apr/2023 17:15:28] "POST /transfer_zh HTTP/1.1" 200 -
