<a href="https://colab.research.google.com/github/alexandster/anti-Asian-BERT/blob/main/NLPaug.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install sacremoses

In [None]:
pip install nlpaug


In [None]:
pip install transformers

In [None]:
import nlpaug.augmenter.char as nac
import nlpaug.augmenter.word as naw
import nlpaug.augmenter.sentence as nas
import nlpaug.flow as nafc

from nlpaug.util import Action

In [None]:
text = 'The quick brown fox jumps over the lazy dog .'
print(text)

The quick brown fox jumps over the lazy dog .


# Character Augmenter
OCR Augmenter

In [None]:
#Substitute character by pre-defined OCR error
aug = nac.OcrAug()
augmented_texts = aug.augment(text, n=3)
print("Original:")
print(text)
print("Augmented Texts:")
print(augmented_texts)

Original:
The quick brown fox jumps over the lazy dog .
Augmented Texts:
['The quick 6kown fox jumps 0vek the lazy dog.', 'The quick brown f0x jumps uvek the lazy du9.', 'The quick brown fox jomp8 0vek the lazy dog.']


Keyboard Augmenter

In [None]:
#Substitute character by keyboard distance
aug = nac.KeyboardAug()
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Original:
The quick brown fox jumps over the lazy dog .
Augmented Text:
['The @uiSk b5o3n fox jumps 0vDr the lazy dog.']


Random Augmenter

In [None]:
#Insert character randomly
aug = nac.RandomCharAug(action="insert")
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Original:
The quick brown fox jumps over the lazy dog .
Augmented Text:
['The qhuYick brown fox jumps oHveEr the lMazZy dog.']


In [None]:
#Substitute character randomly
aug = nac.RandomCharAug(action="substitute")
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Original:
The quick brown fox jumps over the lazy dog .
Augmented Text:
['The quiHh yroun fox Hum(s over the lazy dog.']


In [None]:
#Swap character randomly
aug = nac.RandomCharAug(action="swap")
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

In [None]:
#Delete character randomly
aug = nac.RandomCharAug(action="delete")
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

# Word Augmenter
Spelling Augmenter

In [1]:
#Substitute word by spelling mistake words dictionary
aug = naw.SpellingAug()
augmented_texts = aug.augment(text, n=3)
print("Original:")
print(text)
print("Augmented Texts:")
print(augmented_texts)

NameError: ignored

# Synonym Augmenter

In [None]:
#Substitute word by WordNet's synonym
aug = naw.SynonymAug(aug_src='wordnet')
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


Original:
The quick brown fox jumps over the lazy dog .
Augmented Text:
['The immediate brown fox jump over the faineant dog.']


In [None]:
import os

#Substitute word by PPDB's synonym
aug = naw.SynonymAug(aug_src='ppdb', model_path=os.environ.get("MODEL_DIR") + 'ppdb-2.0-s-all')
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

TypeError: ignored

# Random Word Augmenter

In [None]:
#Swap word randomly
aug = naw.RandomWordAug(action="swap")
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Original:
The quick brown fox jumps over the lazy dog .
Augmented Text:
['The quick fox jumps brown over lazy the dog.']


In [None]:
#Delete word randomly
aug = naw.RandomWordAug()
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Original:
The quick brown fox jumps over the lazy dog .
Augmented Text:
['The brown fox jumps over lazy.']


In [None]:
#Delete a set of contunous word will be removed randomly
aug = naw.RandomWordAug(action='crop')
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Original:
The quick brown fox jumps over the lazy dog .
Augmented Text:
['The quick over the lazy dog.']


# Back Translation Augmenter

In [None]:
import nlpaug.augmenter.word as naw

text = 'The quick brown fox jumped over the lazy dog'
back_translation_aug = naw.BackTranslationAug(
    from_model_name='facebook/wmt19-en-de', 
    to_model_name='facebook/wmt19-de-en'
)
back_translation_aug.augment(text)

Downloading tokenizer_config.json:   0%|          | 0.00/67.0 [00:00<?, ?B/s]

Downloading vocab-src.json:   0%|          | 0.00/829k [00:00<?, ?B/s]

Downloading vocab-tgt.json:   0%|          | 0.00/829k [00:00<?, ?B/s]

Downloading merges.txt:   0%|          | 0.00/308k [00:00<?, ?B/s]

['The speedy brown fox leapt over the lazy dog']