In [1]:
import time
from collections import defaultdict
from transformers import pipeline

def nllb_translation(src_lang, tgt_lang, texts, max_length=512):
    models = {}
    for model_name in ["facebook/nllb-200-distilled-600M", "facebook/nllb-200-distilled-1.3B"]:
        models[model_name] = pipeline("translation", model=model_name, src_lang=src_lang, tgt_lang=tgt_lang, max_length=max_length)

    translation_times = defaultdict(float)
    translation_counts = defaultdict(int)

    for text in texts:
        for model_name, translator in models.items():
            start_time = time.time()
            translation = translator(text)
            end_time = time.time()
            translation_time = end_time - start_time
            translation_times[model_name] += translation_time
            translation_counts[model_name] += 1
            print(f"{model_name}: {text} => {translation}, Translation Time: {translation_time} seconds")

    for model_name in models.keys():
        avg_translation_time = translation_times[model_name] / translation_counts[model_name]
        print(f"Average Translation Time for {model_name}: {avg_translation_time} seconds")
       
texts1 = [
    "### 주증상",
    "1. 주요 증상 (주관식)",
    "2. 증상이 언제부터 시작되었나요? (주관식)",
    "3. 증상이 얼마나 자주 나타나나요? (하루에도 수차례/하루 한번/2-3일에 한번/일주일에 한번/한달에 한번/가끔)",
    "4. 증상이 심해지고 있나요? (심해짐/그대로/좋아지는 중)",
    "5. 증상에 대해 검사 및 치료를 받은 적이 있나요? (네/아니요)"
]       
texts2 = [
    "### 현재 상태",
    "1. 식욕은 양호한가요? (양호/감소/간식만 먹음/아예 안 먹음)",
    "2. 활력은 양호한가요? (양호/감소/거의 안 움직임)",
    "3. 체중 변화가 있나요? (변화 없음/증가/감소/모름)",
    "4. 음수량에 변화가 있나요? (변화 없음/증가/감소/모름)"
]       
texts3 = [
    "### 병력",
    "1. 기존에 앓고 있는 질병이 있나요? 있다면 어떤 질병인가요? (주관식)",
    "2. 복용 중인 약물이 있나요? 있다면 어떤 약물인가요? (주관식)",
    "3. 수술 했던 적이 있나요? 있다면 어떤 수술인가요? (주관식)",
    "4. 약물 알러지 반응이 있었던 적이 있나요? 있다면 어떤 약에 대한 알러지 반응이 있었나요? (주관식)",
    "5. 수혈을 받았던 적이 있나요? (네/아니요)"
]       
texts4 = [
    "### 생활 환경",
    "1. 주된 생활 장소는 어디인가요? (실내/실외/실내외 모두)",
    "2. 무엇을 먹이시나요? (건사료/습식사료/시제품 화식/시제품 생식/직접 만든 음식)",
    "3. 최근에 새로운 사료 또는 간식을 먹인 적이 있나요? (네/아니요)",
    "4. 사람 음식을 먹나요? (자주 먹음/가끔 먹음/먹지 않음)",
    "5. 먹어서는 안되는 이물을 잘 주워 먹는 성격인가요? (네/아니요)"
]       
texts5 = [
    "### 특수상황",
    "1. 아이가 어디 부딪히거나, 높은 곳에서 떨어지거나, 다른 동물에게 물리거나 하는 등 크게 다쳤던 적이 있나요? [네/아니요]",
    "2. 최근에 아이가 스트레스를 받을만한 상황 (이사, 다른 동물의 방문, 다른 사람의 방문 등)이 있었나요? [네/아니요]",
    "3. 최근에 여행을 다녀온적이 있나요? (없음/국내여행/해외여행)",
    "4. 강아지의 경우 최근에 강아지와 함께 또는 보호자분 단독으로 다른 강아지가 많은 공간에 방문한 적이 있나요? 고양이의 경우 최근에 보호자분께서 다른 고양이와 접촉했던 적이 있나요? [네/아니요]",
    "5. 아이에게 진드기, 벼룩 등의 외부 기생충이 붙어있는 것을 본 적이 있나요? (네/아니요)"
]       
texts6 = [
    "### 예방의학",
    "1. 마지막 예방접종이 언제인가요? (1년이내/3년이내/어릴 때만 진행/접종경력 없음/모름)",
    "2. 심장사상충 예방을 해주시나요? (매달 진행/여름에만 진행/일정하지 않음/하지 않음/모름)",
    "3. 외부기생충 예방도 함께 가능한 제품인가요? (네/아니요/모름)"
]       
texts7 = [
    "# 이상 증상 체크"
    "### 소화기계",
    "1. 구토를 하나요? (네/아니요)",
    "2. 설사를 하나요? (네/아니요)",
    "### 비뇨기계",
    "1. 배뇨 횟수에 변화가 있나요? (변화없음/증가/감소)",
    "2. 배뇨 시 아파하거나 배뇨실수를 하거나 하는 등의 행동 변화가 있나요? (네/아니요)",
    "3. 소변 색이나 냄새에 변화가 있나요? (변화없음/혈뇨/주황 또는 갈색/악취)",
    "### 심혈관계 및 호흡기계",
    "1. 호흡에 이상이 있나요? (이상 없음/호흡 빠름/호흡을 힘들어 함/호흡 시 이상 소리)",
    "2. 기침을 하나요? (자주 함/가끔/안함)",
    "3. 혀가 파래지는 증상이 있나요? (네/아니요)",
    "4. 콧물 또는 재채기 증상이 있나요? (네/아니요)",
    "### 근골격계 및 신경계",
    "1. 걸음걸이가 정상인가요? (네/아니요)",
    "2. 경련 증상을 보인적이 있나요? (네/아니요)",
    "3. 몸의 일부가 딱딱하게 굳거나 힘이 풀려서 움직이지 못하는 증상을 보인적이 있나요? (네/아니요)",
    "4. 신체 특정 부위를 만졌을 때 아파하는 증상이 있나요? (네/아니요)",
    "**아래는 진료과 피부과 선택시에만 보여주는 문진 내용**"
]       
texts8 = [
    "# 피부질환 및 외이염",
    "1. 귀청소는 얼마나 자주 해주시나요? [매일/일주일에 2-3회/일주일에 1회/한달에 1-2회/가끔/안함]",
    "2. 목욕은 얼마나 자주 해주시나요? [매일/일주일에 2-3회/일주일에 1회/한달에 1-2회/가끔/안함]",
    "3. 아이가 문제 피부를 긁거나 핥는 등 간지러워 하나요? [네/아니요/모르겠음]",
    "4. 특정 계절에만 피부 질환이 심해진다고 느끼시나요? [네/아니요]",
    "5. 동거 동물이 있는 경우 다른 동물들도 피부 문제가 있나요? [네/아니요/동거동물 없음]",
    "6. 보호자분도 피부에 증상이 나타났나요? [네/아니요]",
    "7. 최근에 미용을 했거나 호텔링을 했던 적이 있나요? [최근 미용/최근 호텔링/해당 없음]",
    "8. 강아지의 경우 산책 시 풀이 많은 곳에 자주 가나요? [네, 아니요]",
    "9. 문제 피부 부위를 선택해주세요 (복수 선택 가능)"
]

In [2]:
nllb_translation('kor_Hang', 'eng_Latn', texts5)

facebook/nllb-200-distilled-600M: ### 특수상황 => [{'translation_text': '♪ ♪ Special case of the day ♪'}], Translation Time: 18.342193365097046 seconds
facebook/nllb-200-distilled-1.3B: ### 특수상황 => [{'translation_text': '## ## ### ### ####################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################################'}], Translation Time: 204.3350486755371 seconds
facebook/nllb-200-distilled-600M: 1. 아이가 어디 부딪히거나, 높은 곳에서 떨어지거나, 다른 동물에게 물리거나 하는 등 크게 다쳤던 적이 있나요? [네/아니요] => [{'translation_text': '1. Have you ever had a child who was seriously injured by a collision, falling from a he

In [None]:
nllb_translation('eng_Latn', 'kor_Hang', texts1)

In [None]:
nllb_translation('kor_Hang', 'zho_Hans', texts1)

In [None]:
nllb_translation('zho_Hans', 'kor_Hang', texts1)

In [None]:
if is_torch_available():
    import torch

    from ..models.auto.modeling_auto import (
        AutoModel,
        AutoModelForAudioClassification,
        AutoModelForCausalLM,
        AutoModelForCTC,
        AutoModelForDocumentQuestionAnswering,
        AutoModelForImageClassification,
        AutoModelForImageSegmentation,
        AutoModelForMaskedLM,
        AutoModelForMaskGeneration,
        AutoModelForObjectDetection,
        AutoModelForQuestionAnswering,
        AutoModelForSemanticSegmentation,
        AutoModelForSeq2SeqLM,
        AutoModelForSequenceClassification,
        AutoModelForSpeechSeq2Seq,
        AutoModelForTableQuestionAnswering,
        AutoModelForTextToSpectrogram,
        AutoModelForTextToWaveform,
        AutoModelForTokenClassification,
        AutoModelForVideoClassification,
        AutoModelForVision2Seq,
        AutoModelForVisualQuestionAnswering,
        AutoModelForZeroShotImageClassification,
        AutoModelForZeroShotObjectDetection,
    )

@add_end_docstrings(build_pipeline_init_args(has_tokenizer=True))
class Text2TextGenerationPipeline(Pipeline):
    """
    Pipeline for text to text generation using seq2seq models.

    Example:

    ```python
    >>> from transformers import pipeline

    >>> generator = pipeline(model="mrm8488/t5-base-finetuned-question-generation-ap")
    >>> generator(
    ...     "answer: Manuel context: Manuel has created RuPERTa-base with the support of HF-Transformers and Google"
    ... )
    [{'generated_text': 'question: Who created the RuPERTa-base?'}]
    ```

    Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial). You can pass text
    generation parameters to this pipeline to control stopping criteria, decoding strategy, and more. Learn more about
    text generation parameters in [Text generation strategies](../generation_strategies) and [Text
    generation](text_generation).

    This Text2TextGenerationPipeline pipeline can currently be loaded from [`pipeline`] using the following task
    identifier: `"text2text-generation"`.

    The models that this pipeline can use are models that have been fine-tuned on a translation task. See the
    up-to-date list of available models on
    [huggingface.co/models](https://huggingface.co/models?filter=text2text-generation). For a list of available
    parameters, see the [following
    documentation](https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.generation.GenerationMixin.generate)

    Usage:

    ```python
    text2text_generator = pipeline("text2text-generation")
    text2text_generator("question: What is 42 ? context: 42 is the answer to life, the universe and everything")
    ```"""

    # Used in the return key of the pipeline.
    return_name = "generated"

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.check_model_type(
            TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES
            if self.framework == "tf"
            else MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES
        )

    def _sanitize_parameters(
        self,
        return_tensors=None,
        return_text=None,
        return_type=None,
        clean_up_tokenization_spaces=None,
        truncation=None,
        stop_sequence=None,
        **generate_kwargs,
    ):
        preprocess_params = {}
        if truncation is not None:
            preprocess_params["truncation"] = truncation

        forward_params = generate_kwargs

        postprocess_params = {}
        if return_tensors is not None and return_type is None:
            return_type = ReturnType.TENSORS if return_tensors else ReturnType.TEXT
        if return_type is not None:
            postprocess_params["return_type"] = return_type

        if clean_up_tokenization_spaces is not None:
            postprocess_params["clean_up_tokenization_spaces"] = clean_up_tokenization_spaces

        if stop_sequence is not None:
            stop_sequence_ids = self.tokenizer.encode(stop_sequence, add_special_tokens=False)
            if len(stop_sequence_ids) > 1:
                warnings.warn(
                    "Stopping on a multiple token sequence is not yet supported on transformers. The first token of"
                    " the stop sequence will be used as the stop sequence string in the interim."
                )
            generate_kwargs["eos_token_id"] = stop_sequence_ids[0]

        return preprocess_params, forward_params, postprocess_params

    def check_inputs(self, input_length: int, min_length: int, max_length: int):
        """
        Checks whether there might be something wrong with given input with regard to the model.
        """
        return True

    def _parse_and_tokenize(self, *args, truncation):
        prefix = self.model.config.prefix if self.model.config.prefix is not None else ""
        if isinstance(args[0], list):
            if self.tokenizer.pad_token_id is None:
                raise ValueError("Please make sure that the tokenizer has a pad_token_id when using a batch input")
            args = ([prefix + arg for arg in args[0]],)
            padding = True

        elif isinstance(args[0], str):
            args = (prefix + args[0],)
            padding = False
        else:
            raise ValueError(
                f" `args[0]`: {args[0]} have the wrong format. The should be either of type `str` or type `list`"
            )
        inputs = self.tokenizer(*args, padding=padding, truncation=truncation, return_tensors=self.framework)
        # This is produced by tokenizers but is an invalid generate kwargs
        if "token_type_ids" in inputs:
            del inputs["token_type_ids"]
        return inputs

    def __call__(self, *args, **kwargs):
        r"""
        Generate the output text(s) using text(s) given as inputs.

        Args:
            args (`str` or `List[str]`):
                Input text for the encoder.
            return_tensors (`bool`, *optional*, defaults to `False`):
                Whether or not to include the tensors of predictions (as token indices) in the outputs.
            return_text (`bool`, *optional*, defaults to `True`):
                Whether or not to include the decoded texts in the outputs.
            clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
                Whether or not to clean up the potential extra spaces in the text output.
            truncation (`TruncationStrategy`, *optional*, defaults to `TruncationStrategy.DO_NOT_TRUNCATE`):
                The truncation strategy for the tokenization within the pipeline. `TruncationStrategy.DO_NOT_TRUNCATE`
                (default) will never truncate, but it is sometimes desirable to truncate the input to fit the model's
                max_length instead of throwing an error down the line.
            generate_kwargs:
                Additional keyword arguments to pass along to the generate method of the model (see the generate method
                corresponding to your framework [here](./model#generative-models)).

        Return:
            A list or a list of list of `dict`: Each result comes as a dictionary with the following keys:

            - **generated_text** (`str`, present when `return_text=True`) -- The generated text.
            - **generated_token_ids** (`torch.Tensor` or `tf.Tensor`, present when `return_tensors=True`) -- The token
              ids of the generated text.
        """

        result = super().__call__(*args, **kwargs)
        if (
            isinstance(args[0], list)
            and all(isinstance(el, str) for el in args[0])
            and all(len(res) == 1 for res in result)
        ):
            return [res[0] for res in result]
        return result

@add_end_docstrings(build_pipeline_init_args(has_tokenizer=True))
class TranslationPipeline(Text2TextGenerationPipeline):
    """
    Translates from one language to another.

    This translation pipeline can currently be loaded from [`pipeline`] using the following task identifier:
    `"translation_xx_to_yy"`.

    The models that this pipeline can use are models that have been fine-tuned on a translation task. See the
    up-to-date list of available models on [huggingface.co/models](https://huggingface.co/models?filter=translation).
    For a list of available parameters, see the [following
    documentation](https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.generation.GenerationMixin.generate)

    Usage:

    ```python
    en_fr_translator = pipeline("translation_en_to_fr")
    en_fr_translator("How old are you?")
    ```"""

    # Used in the return key of the pipeline.
    return_name = "translation"

    def check_inputs(self, input_length: int, min_length: int, max_length: int):
        if input_length > 0.9 * max_length:
            logger.warning(
                f"Your input_length: {input_length} is bigger than 0.9 * max_length: {max_length}. You might consider "
                "increasing your max_length manually, e.g. translator('...', max_length=400)"
            )
        return True

    def preprocess(self, *args, truncation=TruncationStrategy.DO_NOT_TRUNCATE, src_lang=None, tgt_lang=None):
        if getattr(self.tokenizer, "_build_translation_inputs", None):
            return self.tokenizer._build_translation_inputs(
                *args, return_tensors=self.framework, truncation=truncation, src_lang=src_lang, tgt_lang=tgt_lang
            )
        else:
            return super()._parse_and_tokenize(*args, truncation=truncation)

    def _sanitize_parameters(self, src_lang=None, tgt_lang=None, **kwargs):
        preprocess_params, forward_params, postprocess_params = super()._sanitize_parameters(**kwargs)
        if src_lang is not None:
            preprocess_params["src_lang"] = src_lang
        if tgt_lang is not None:
            preprocess_params["tgt_lang"] = tgt_lang
        if src_lang is None and tgt_lang is None:
            # Backward compatibility, direct arguments use is preferred.
            task = kwargs.get("task", self.task)
            items = task.split("_")
            if task and len(items) == 4:
                # translation, XX, to YY
                preprocess_params["src_lang"] = items[1]
                preprocess_params["tgt_lang"] = items[3]
        return preprocess_params, forward_params, postprocess_params

    def __call__(self, *args, **kwargs):
        r"""
        Translate the text(s) given as inputs.

        Args:
            args (`str` or `List[str]`):
                Texts to be translated.
            return_tensors (`bool`, *optional*, defaults to `False`):
                Whether or not to include the tensors of predictions (as token indices) in the outputs.
            return_text (`bool`, *optional*, defaults to `True`):
                Whether or not to include the decoded texts in the outputs.
            clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
                Whether or not to clean up the potential extra spaces in the text output.
            src_lang (`str`, *optional*):
                The language of the input. Might be required for multilingual models. Will not have any effect for
                single pair translation models
            tgt_lang (`str`, *optional*):
                The language of the desired output. Might be required for multilingual models. Will not have any effect
                for single pair translation models
            generate_kwargs:
                Additional keyword arguments to pass along to the generate method of the model (see the generate method
                corresponding to your framework [here](./model#generative-models)).

        Return:
            A list or a list of list of `dict`: Each result comes as a dictionary with the following keys:

            - **translation_text** (`str`, present when `return_text=True`) -- The translation.
            - **translation_token_ids** (`torch.Tensor` or `tf.Tensor`, present when `return_tensors=True`) -- The
              token ids of the translation.
        """
        return super().__call__(*args, **kwargs)