In [1]:
from dataset.augmentation.augmentations import SimpleRandomUNK, RandomUNKWithInputMask, UNKWithInputMask
from dataset.preprocessor.preprocessors import BaselinePreprocessor
from transformers import AutoTokenizer
import numpy as np

In [2]:
from dataset.dataset import BaselineDataset

In [3]:
dataset = BaselineDataset("/opt/ml/dataset/")

In [4]:
preprocessor = BaselinePreprocessor()

In [5]:
dataset.set_preprocessor(preprocessor)

In [6]:
dataset.preprocess()

In [7]:
MODEL_NAME = "klue/bert-base"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

In [8]:
dataset.set_tokenizer(tokenizer)

In [9]:
for out in dataset:
    for key, value in out.items():
        print(key, value)
    break

input_ids tensor([[    2,    11, 29830,    11,     3,    11,  8373, 14113,  2234,    11,
             3,   168, 30985, 14451,  7088,  4586,   169,   793,  8373, 14113,
          2234,  2052,  1363,  2088, 29830,  2116, 14879,  2440,  6711,   170,
         21406, 26713,  2076, 25145,  5749,   171,  1421,   818,  2073,  4388,
          2062,    18,     3]])
token_type_ids tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
attention_mask tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
label tensor([ 0,  0, 20,  ...,  1,  0,  1])


In [10]:
input_sentence = "The specific problem has already been solved in previous answers, so I will address the general idea of using conditionals inside list comprehensions."
input_sentence

'The specific problem has already been solved in previous answers, so I will address the general idea of using conditionals inside list comprehensions.'

In [3]:
augmentation = SimpleRandomUNK('<unk>', 0.15)

In [4]:
for i in range(5):
    print(augmentation(input_sentence))

<unk> specific problem has already <unk> solved in previous <unk> so I <unk> address the general idea of using conditionals inside list comprehensions.
The specific <unk> has already <unk> solved in previous <unk> so I <unk> address the general idea of using conditionals inside list comprehensions.
The specific problem has already been solved <unk> previous answers, so I will <unk> the general idea of using conditionals inside <unk> <unk>
<unk> <unk> problem has already <unk> solved <unk> previous answers, so I <unk> address the general idea of using conditionals inside list comprehensions.
The specific <unk> <unk> already been solved in previous answers, so <unk> will address the general <unk> <unk> using conditionals inside list <unk>


In [5]:
augmentation = RandomUNKWithInputMask('<unk>', 0.2)

In [6]:
input_mask = np.array([0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1])
input_mask

array([0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1,
       1])

In [8]:
for i in range(5):
    print(augmentation(input_sentence, input_mask, True))

<unk> specific problem <unk> already been solved <unk> previous answers, so I <unk> address the general idea of using conditionals inside list comprehensions.
The <unk> problem has already been solved in previous answers, so I will address the general idea <unk> using conditionals inside list comprehensions.
<unk> specific problem has <unk> been solved <unk> previous <unk> so I will address the general idea of using conditionals inside list comprehensions.
The specific problem <unk> already <unk> solved <unk> previous answers, so I <unk> address the general idea <unk> using conditionals inside list comprehensions.
The specific problem has <unk> been solved in previous <unk> so I <unk> address the general idea of using conditionals inside list comprehensions.


In [9]:
for i in range(5):
    print(augmentation(input_sentence, input_mask, False))

The <unk> problem has <unk> been solved in previous answers, so I will address the general idea <unk> using conditionals inside list comprehensions.
<unk> specific problem has already been solved in previous answers, so I will address the general idea <unk> using conditionals inside list comprehensions.
The specific problem <unk> already <unk> solved in previous answers, so I will address the general idea of using conditionals inside list comprehensions.
<unk> <unk> problem has already <unk> solved in previous answers, so I will address the general idea of using conditionals inside list comprehensions.
The specific problem has already been solved in previous answers, so I will address the general idea <unk> using conditionals inside list comprehensions.


In [10]:
augmentation = UNKWithInputMask('<unk>')

In [11]:
for i in range(5):
    print(augmentation(input_sentence,input_mask))

<unk> <unk> problem <unk> <unk> <unk> solved <unk> previous <unk> so I <unk> address <unk> general idea <unk> using conditionals inside list comprehensions.
<unk> <unk> problem <unk> <unk> <unk> solved <unk> previous <unk> so I <unk> address <unk> general idea <unk> using conditionals inside list comprehensions.
<unk> <unk> problem <unk> <unk> <unk> solved <unk> previous <unk> so I <unk> address <unk> general idea <unk> using conditionals inside list comprehensions.
<unk> <unk> problem <unk> <unk> <unk> solved <unk> previous <unk> so I <unk> address <unk> general idea <unk> using conditionals inside list comprehensions.
<unk> <unk> problem <unk> <unk> <unk> solved <unk> previous <unk> so I <unk> address <unk> general idea <unk> using conditionals inside list comprehensions.
