In [None]:
#!pip install textattack

In [7]:
from textattack.augmentation import WordNetAugmenter
 
augmenter = WordNetAugmenter()
 
# Example usage:
sentence = "The quick brown fox jumps over the lazy dog."
augmented_sentence = augmenter.augment(sentence)
 
print(f"Original Sentence: {sentence}")
print(f"Augmented Sentence: {augmented_sentence}")

Original Sentence: The quick brown fox jumps over the lazy dog.
Augmented Sentence: ['The quick brown fox jumps over the lazy weenie.']


[nltk_data] Downloading package omw-1.4 to /home/ryan/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [12]:
from textattack.augmentation import EmbeddingAugmenter

# Initialize the EmbeddingAugmenter
embed_aug = EmbeddingAugmenter()

# Example usage:
original_text = "TextAttack is a powerful library for NLP."
augmented_text = embed_aug.augment(original_text)

print(f"Original Text: {original_text}")
print(f"Augmented Text: {augmented_text}")


Original Text: TextAttack is a powerful library for NLP.
Augmented Text: ['TextAttack is a influential library for NLP.']


In [13]:
from transformers import MarianMTModel, MarianTokenizer
import torch

In [14]:
# Helper function to download data for a language
def download(model_name):
  tokenizer = MarianTokenizer.from_pretrained(model_name)
  model = MarianMTModel.from_pretrained(model_name)
  return tokenizer, model

# download model for English -> Romance
tmp_lang_tokenizer, tmp_lang_model = download('Helsinki-NLP/opus-mt-en-ROMANCE')
# download model for Romance -> English
src_lang_tokenizer, src_lang_model = download('Helsinki-NLP/opus-mt-ROMANCE-en')



In [25]:
def translate(texts, model, tokenizer, language):
    """Translate texts into a target language"""
    # Format the text as expected by the model
    formatter_fn = lambda txt: f"{txt}" if language == "en" else f">>{language}<< {txt}"
    original_texts = [formatter_fn(txt) for txt in texts]
    print(original_texts)
    tokens = tokenizer(original_texts, return_tensors="pt", padding=True, truncation=True)

    # Translate
    with torch.no_grad():  # 假设你正在使用 PyTorch，并且不希望计算梯度
        translated = model.generate(**tokens)  # 使用 ** 操作符解包字典

    # Decode (tokens to text)
    translated_texts = tokenizer.batch_decode(translated, skip_special_tokens=True)

    return translated_texts

def back_translate(texts, language_src, language_dst):
    """Implements back translation"""
    # Translate from source to target language
    translated = translate(texts, tmp_lang_model, tmp_lang_tokenizer, language_dst)
    
    # Translate from target language back to source language
    back_translated = translate(translated, src_lang_model, src_lang_tokenizer, language_src)

    return back_translated

# Example usage:
src_texts = [
    '[Audit] Password expiration change is not included in the Audit',
    '[BE] User administration - ensure that user does not exist in Auth0 prior to creating it',
    '[FE][Page Config] Creating a single-state-manager page or copying a PWB page breaks the Select Station widget'
]

In [29]:
back_texts = back_translate(src_texts, "en", "co")

['>>co<< [Audit] Password expiration change is not included in the Audit', '>>co<< [BE] User administration - ensure that user does not exist in Auth0 prior to creating it', '>>co<< [FE][Page Config] Creating a single-state-manager page or copying a PWB page breaks the Select Station widget']
["[Audit] Non è inclusa la modifica di scadenza del mots d'ordine nell'audit", "[BE] Administrazione d'usuari - assicurarsi che l'usuari non esista in Auth0 prima di crearlo", '[FE][Config Page] Creando una pagina single-state-manager o copiando una pagina PWB rompe il widget Select Station']


In [30]:
print(back_texts)

['[Audit] Change in the maturity of the passwords is not included in the audit', '[BE] User administration - make sure the user does not exist in Auth0 before creating it', '[FE][Config Page] Creating a single-state-manager page or copying a PWB page breaks the Select Station widget']
