### Two augmentation chainer classes are currently available

This are:
* AugmentationStreamer
* CallOnOneChain


_First we import some augmentation classes and define a test corpus_

In [1]:
from seaqube.augmentation.char.qwerty import QwertyAugmentation
from seaqube.augmentation.corpus.unigram import UnigramAugmentation
from seaqube.augmentation.word.active2passive import Active2PassiveAugmentation
from seaqube.augmentation.word.eda import EDAAugmentation
from seaqube.augmentation.word.embedding import EmbeddingAugmentation
from seaqube.augmentation.word.translation import TranslationAugmentation


In [2]:
TEST_CORPUS = [['till', 'this', 'moment', 'i', 'never', 'knew', 'myself', '.'],
               ['seldom', ',', 'very', 'seldom', ',', 'does', 'complete', 'truth', 'belong', 'to', 'any', 'human',
                'disclosure', ';', 'seldom', 'can', 'it', 'happen', 'that', 'something', 'is', 'not', 'a', 'little',
                'disguised', 'or', 'a', 'little', 'mistaken', '.'],
               ['i', 'declare', 'after', 'all', 'there', 'is', 'no', 'enjoyment', 'like', 'reading', '!', 'how', 'much',
                'sooner', 'one', 'tires', 'of', 'anything', 'than', 'of', 'a', 'book', '!', '”'],
               ['men', 'have', 'had', 'every', 'advantage', 'of', 'us', 'in', 'telling', 'their', 'own', 'story', '.',
                'education', 'has', 'been', 'theirs', 'in', 'so', 'much', 'higher', 'a', 'degree'],
               ['i', 'wish', ',', 'as', 'well', 'as', 'everybody', 'else', ',', 'to', 'be', 'perfectly', 'happy', ';',
                'but', ',', 'like', 'everybody', 'else', ',', 'it', 'must', 'be', 'in', 'my', 'own', 'way', '.'],
               ['there', 'are', 'people', ',', 'who', 'the', 'more', 'you', 'do', 'for', 'them', ',', 'the', 'less',
                'they', 'will', 'do', 'for', 'themselves', '.'],
               ['one', 'half', 'of', 'the', 'world', 'can', 'not', 'understand', 'the', 'pleasures', 'of', 'the',
                'other', '.']]

## AugmentationStreamer

In [3]:
from seaqube.augmentation.base import AugmentationStreamer
from seaqube.augmentation.reduction.unique_corpus import UniqueCorpusReduction


In [8]:
# Here we set up a augmentation stream. Every document will passed trought this augmentation line by line.
# This means: a document _d_ will be in the first step translated.
# In the second step, this translated document is feed to Qwerty. Now, qwerty returns 2 documents.
# This 2 documents will be each feed to EDA. EDA geenrates 4 augmented documents for the two inputs, i.e. one line results in 8 lines output.
# AugmentationStreamer can also reduce documents, here it reduce it, using the unique reducer.
streamer = AugmentationStreamer([TranslationAugmentation(max_length=1), QwertyAugmentation(seed=424242, max_length=2), EDAAugmentation(max_length=4)], reduction_chain=[UniqueCorpusReduction()])

First, we run the `streamer` only on one line of the corpus.

In [9]:
augmented_doc = streamer([TEST_CORPUS[0]])
augmented_doc

100% (1 of 1) |##########################| Elapsed Time: 0:00:02 Time:  0:00:02


[['trough', 'this', 'moment', 'i', 'never', 'knew', 'myself'],
 ['till', 'this', 'moment', 'i', 'never', 'knew', 'myself'],
 ['till', 'this', 'moment', 'i', 'neer', 'knew', 'myself'],
 ['till', 'this', 'moment', 'never', 'knew', 'myself'],
 ['neer', 'till', 'this', 'moment', 'i', 'never', 'knew', 'myself'],
 ['this', 'moment', 'i', 'never', 'knew', 'myself'],
 ['boulder', 'clay', 'this', 'moment', 'i', 'never', 'knew', 'myself']]

In [10]:
len(augmented_doc) # after reducing documents can be less then 8

7

In [11]:
streamer(TEST_CORPUS) # apply the full corpus for the streamer

100% (7 of 7) |##########################| Elapsed Time: 0:00:18 Time:  0:00:18


[['trough', 'this', 'moment', 'i', 'never', 'knew', 'myself'],
 ['till', 'this', 'moment', 'i', 'never', 'knew', 'myself'],
 ['till', 'this', 'moment', 'i', 'neer', 'knew', 'myself'],
 ['till', 'this', 'moment', 'never', 'knew', 'myself'],
 ['neer', 'till', 'this', 'moment', 'i', 'never', 'knew', 'myself'],
 ['this', 'moment', 'i', 'never', 'knew', 'myself'],
 ['boulder', 'clay', 'this', 'moment', 'i', 'never', 'knew', 'myself'],
 ['till', 'this', 'i', 'never', 'myself'],
 ['till', 'this', 'moment', 'i', 'neer', 'knew', 'mys', 'lf'],
 ['till', 'this', 'never', 'i', 'moment', 'knew', 'mys', 'lf'],
 ['till', 'this', 'moment', 'i', 'never', 'knew', 'myself'],
 ['till', 'this', 'here', 'and', 'now', 'i', 'never', 'knew', 'myself'],
 ['till', 'this', 'moment', 'i', 'never', 'knew', 'mys', 'lf'],
 ['till',
  'this',
  'moment',
  'i',
  'never',
  'public',
  'treasury',
  'knew',
  'myself'],
 ['public',
  'treasury',
  'till',
  'this',
  'moment',
  'i',
  'never',
  'knew',
  'mys',
  'l

### CallOnOneChain

In [13]:
corpus = [['the', 'quick', 'brown', 'fox', 'jumps', 'over', 'the', 'lazy', 'dog']]

In [19]:
from seaqube.tools.chainer import CallOnOneChain
from seaqube.nlp.tools import unique_2d_list

#### The usage is similar to the AugmentatinoStreamer. Only the reduceing is different.

In [25]:
pipe = CallOnOneChain([TranslationAugmentation(max_length=1), UnigramAugmentation(corpus=TEST_CORPUS, seed=50, max_length=2, replace_threshold=0.9, find_threshold=0.85),
                               QwertyAugmentation(seed=424242, max_length=2), unique_2d_list])

In [26]:
augmented_and_unique = pipe(TEST_CORPUS)

100% (7 of 7) |##########################| Elapsed Time: 0:00:18 Time:  0:00:18
 50% (7 of 14) |############             | Elapsed Time: 0:00:00 ETA:  00:00:00

The implementation is differnt, here the full corpus is applied in the first augmentation, then the augmented corpus is applied on the next augmentation

In [27]:
augmented_and_unique

[['i',
  'declare',
  'after',
  'all',
  'there',
  'is',
  'no',
  'enjoyment',
  'like',
  'reading',
  '!',
  'how',
  'much',
  'sooner',
  'one',
  'tires',
  'of',
  'anything',
  'than',
  'of',
  'a',
  'book',
  '!',
  '”'],
 ['till', 'this', 'moment', 'i', 'never', 'knew', 'myself', '.'],
 ['men',
  'have',
  'had',
  'every',
  'advantage',
  'of',
  'us',
  'in',
  'telling',
  'their',
  'own',
  'story',
  '.',
  'education',
  'has',
  'been',
  'theirs',
  'in',
  'so',
  'much',
  'higher',
  'a',
  'd2gree'],
 ['i',
  'declare',
  'after',
  'all',
  'there',
  'is',
  'no',
  'enjoyment',
  'like',
  'feading',
  '!',
  'how',
  'much',
  'sooner',
  'one',
  'tires',
  'of',
  'ahything',
  'than',
  'of',
  'a',
  'book',
  '!',
  '”'],
 ['men',
  'have',
  'had',
  'every',
  'advantage',
  ',',
  'us',
  'in',
  'telling',
  'their',
  'own',
  'story',
  '.',
  'education',
  'has',
  'been',
  'theirs',
  'in',
  'so',
  'much',
  'higher',
  'a',
  'decree'],

In [29]:
len(augmented_and_unique) # 8 * 2 * 2 = 32, reducing makes it smaller 

24