In [1]:
pip install numpy requests nlpaug

Collecting nlpaug
  Using cached nlpaug-1.1.11-py3-none-any.whl.metadata (14 kB)
Collecting gdown>=4.0.0 (from nlpaug)
  Using cached gdown-5.2.0-py3-none-any.whl.metadata (5.8 kB)
Using cached nlpaug-1.1.11-py3-none-any.whl (410 kB)
Using cached gdown-5.2.0-py3-none-any.whl (18 kB)
Installing collected packages: gdown, nlpaug
Successfully installed gdown-5.2.0 nlpaug-1.1.11
Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install torch>=1.6.0 transformers>=4.11.3 sentencepiece

Note: you may need to restart the kernel to use updated packages.


In [4]:
pip install simpletransformers>=0.61.10

Note: you may need to restart the kernel to use updated packages.


In [5]:
pip install nltk>=3.4.5

Note: you may need to restart the kernel to use updated packages.


In [7]:
from nlpaug.util.file.download import DownloadUtil
DownloadUtil.download_word2vec(dest_dir='.') # Download word2vec model
DownloadUtil.download_glove(model_name='glove.6B', dest_dir='.') # Download GloVe model
DownloadUtil.download_fasttext(model_name='wiki-news-300d-1M', dest_dir='.') # Download fasttext model

In [8]:
pip install gensim>=4.1.2

Note: you may need to restart the kernel to use updated packages.


In [1]:
import nlpaug.augmenter.char as nac
import nlpaug.augmenter.word as naw
import nlpaug.augmenter.sentence as nas
import nlpaug.flow as nafc
from nlpaug.util import Action

In [2]:
# Let's define some texts
text = """
    I go daily to the gym, because I think fitness is the key to a healthy life.
  """

# Word2Vec, Glove & Fasttext text augmentation models
## Substitute vs Insert (Random)

### Word2vec

In [14]:
# Initialize the augmenter with model "word2vec"
aug = naw.WordEmbsAug(
  # You can choose from "word2vec", "glove", or "fasttext" 
  model_type = 'word2vec', 
  model_path = 'GoogleNews-vectors-negative300.bin',
  # You may also choose "insert"
  action = "substitute")

# Augment the text
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Original:

    I go daily to the gym, because I think fitness is the key to a healthy life.
  
Augmented Text:
['â_€_œI go weekday to the lifted_weights, because You think Mark_Bitcon is the key to a strong life.']


In [15]:
# Initialize the augmenter with model "word2vec"
aug = naw.WordEmbsAug(
  # You can choose from "word2vec", "glove", or "fasttext" 
  model_type = 'word2vec', 
  model_path = 'GoogleNews-vectors-negative300.bin',
  # You may also choose "insert"
  action = "insert")

# Augment the text
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Original:

    I go daily to the gym, because I think fitness is the key to a healthy life.
  
Augmented Text:
['I AP go Olive daily to finest the gym, because deriving I think fitness WYLIE is the key to a Monafeghin healthy life.']


### Glove

In [16]:
# Initialize the augmenter with model "glove"
aug = naw.WordEmbsAug(
  # You can choose from "word2vec", "glove", or "fasttext" 
  model_type = 'glove', 
  # Note: check your "content" path to find out specific model names
  model_path = 'glove.6B.300d.txt',
  # You may also choose "insert"
  action = "substitute")

# Augment the text
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Original:

    I go daily to the gym, because I think fitness is the key to a healthy life.
  
Augmented Text:
['I go daily to through gym, because I anybody fitness is however issues to a mature they.']


In [18]:
# Initialize the augmenter with model "glove"
aug = naw.WordEmbsAug(
  # You can choose from "word2vec", "glove", or "fasttext" 
  model_type = 'glove', 
  # Note: check your "content" path to find out specific model names
  model_path = 'glove.6B.300d.txt',
  # You may also choose "insert"
  action = "insert")

# Augment the text
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Original:

    I go daily to the gym, because I think fitness is the key to a healthy life.
  
Augmented Text:
["I go daily fluker to the gym, '88 because I value think fitness matko is the mijn key to petrovice a healthy life."]


### Fasttext

In [22]:
# Initialize the augmenter with model "fasttext"
aug = naw.WordEmbsAug(
  # You can choose from "word2vec", "glove", or "fasttext" 
  model_type = 'fasttext', 
  # Note: check your "content" path to find out specific model names
  model_path = 'wiki-news-300d-1M.vec',
  # You may also choose "insert"
  action = "substitute")

# Augment the text
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Original:

    I go daily to the gym, because I think fitness is the key to a healthy life.
  
Augmented Text:
['I miss two-weekly to most gym, why moment.I think fitness is several key to a healthy life.']


In [23]:
# Initialize the augmenter with model "fasttext"
aug = naw.WordEmbsAug(
  # You can choose from "word2vec", "glove", or "fasttext" 
  model_type = 'fasttext', 
  # Note: check your "content" path to find out specific model names
  model_path = 'wiki-news-300d-1M.vec',
  # You may also choose "insert"
  action = "insert")

# Augment the text
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Original:

    I go daily to the gym, because I think fitness is the key to a healthy life.
  
Augmented Text:
['four-octave I go daily Ejaculation to the gym, business-building because I think Eline fitness is cross-denominational the key to father-and-daughter a healthy life.']


# Contextual word embedding Substitute vs Insertion

In [27]:
## Substitute word by contextual word embeddings (BERT, DistilBERT, RoBERTA or XLNet)
aug = naw.ContextualWordEmbsAug(
  # Other models include 'distilbert-base-uncased', 'roberta-base', etc.
  model_path = 'bert-base-uncased', 
  # You can also choose "insert"
  action = "substitute")
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Original:

    I go daily to the gym, because I think fitness is the key to a healthy life.
  
Augmented Text:
['i write daily to the wall, because i think this remains his key to a healthy wellness.']


In [28]:
## Substitute word by contextual word embeddings (BERT, DistilBERT, RoBERTA or XLNet)
aug = naw.ContextualWordEmbsAug(
  # Other models include 'distilbert-base-uncased', 'roberta-base', etc.
  model_path = 'bert-base-uncased', 
  # You can also choose "insert"
  action = "insert")
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Original:

    I go daily to the gym, because I think fitness is the key to a healthy life.
  
Augmented Text:
['i often go once daily to the campus gym, because here i think fitness is the key to building a social healthy life.']


# Synonym Augmenter

In [32]:
## Substitute word by WordNet's synonym
aug = naw.SynonymAug(aug_src = 'wordnet')
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Original:

    I go daily to the gym, because I think fitness is the key to a healthy life.
  
Augmented Text:
['1 lead daily to the gym, because 1 imagine fitness is the key fruit to a healthy life.']


In [35]:

## Substitute word by WordNet's synonym.
# You can optionally set the max number of words to replace with synonym.
aug = naw.SynonymAug(aug_src = 'wordnet', aug_max = 10)
augmented_text = aug.augment(text, )
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Original:

    I go daily to the gym, because I think fitness is the key to a healthy life.
  
Augmented Text:
['Unity go daily to the gym, because I think physical fitness is the key to a intelligent life story.']


In [40]:
# Use back translation augmenter
back_translation_aug = naw.BackTranslationAug()
back_translation_aug.augment(text)
     

['I go to the gym every day because I believe that fitness is the key to a healthy life.']