# DPML | Latency Replay

In this notebook, we investigate the reproducibility of transformation sequences captured by `dpml`.

## Load Dependencies

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from lineage import LeBatch
from lineage.transformation import DPMLClassWrapper, DPMLCallableWrapper

from sibyl import *
from datasets import load_dataset

import time
from tqdm.notebook import tqdm

2022-08-02 06:38:08.827526: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0


## Create Datasets

In [3]:
dataset = load_dataset("glue", "sst2", split="train[:4]")
dataset = dataset.rename_column('sentence', 'text')

Reusing dataset glue (/home/coraline/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


## Replay Test

### Routine to be Tracked

In [4]:
from sibyl.transformations.text.mixture.concept_mix import *
from sibyl.transformations.text.word_swap.change_synse import *

In [5]:
scheduler = SibylTransformScheduler("sentiment", class_wrapper=DPMLClassWrapper)

In [6]:
text, label = dataset['text'], dataset['label'] 
new_text, new_label = [], []

batch_size= 2

scheduler.num_INV = 2
scheduler.num_SIB = 2

records = []
startTime = time.perf_counter()
for i in tqdm(range(0, len(label), batch_size)):
    text_batch = text[i:i+batch_size]
    label_batch = label[i:i+batch_size]
    batch = (text_batch, label_batch)
    with LeBatch(original_batch=batch) as le_batch:
        for transform in scheduler.sample():
            if transform.wrapped_class in [ConceptMix, ChangeSynonym, ChangeAntonym, ChangeHyponym]:
                continue
            print(transform.wrapped_class)
            # batch = transform.transform_batch(batch)
            batch = le_batch.apply(batch, transform.transform_batch)
        records.extend(batch)
    print()
print('Elapsed time: {:6.3f} seconds'.format(time.perf_counter() - startTime))

  0%|          | 0/2 [00:00<?, ?it/s]

<class 'sibyl.transformations.text.mixture.text_mix.SentMix'>
<class 'sibyl.transformations.text.insertion.sentiment_phrase.InsertPositivePhrase'>
<class 'sibyl.transformations.text.word_swap.word_deletion.WordDeletion'>

<class 'sibyl.transformations.text.mixture.text_mix.SentMix'>
<class 'sibyl.transformations.text.typos.char_substitute.RandomCharSubst'>

Elapsed time:  0.030 seconds


In [7]:
[(le.text, le.target) for le in records]

[("b'hide new from the parental units contains no wit , only labored gags ' That being I it.",
  array([[0.75, 0.25]])),
 ("b'contains no , only labored gags wit only labored ' That being said, I'm pleased.",
  array([[0.75, 0.25]])),
 ("b'that loves its characters and communicates something rather beautiful about human nature  remain\t utterly satisfied to remain the same throughout '",
  [0.38620689655172413, 0.6137931034482759]),
 ("b'remains utterly satisfied to remain the same throughout  remains utterly satisfied to remain t/e same throughout '",
  [1.0, 0.0])]

# Replay Functionality

In [8]:
from lineage.utils import *

## Replay Functionality - CSV

In [9]:
from lineage.storage.csv.transform_logger import TransformLogger as CSVTransformLogger
    
# fetch data
logger = CSVTransformLogger()
df = pd.read_csv(logger.path, header=None, names=['batch_id', 'text', 'target', 'transform_prov'])

df

Unnamed: 0,batch_id,text,target,transform_prov
0,0,hide new secretions from the parental units,0,"['{""module_name"": ""sibyl.transformations.text...."
1,0,"contains no wit , only labored gags",0,"['{""module_name"": ""sibyl.transformations.text...."
2,1,that loves its characters and communicates som...,1,"['{""module_name"": ""sibyl.transformations.text...."
3,1,remains utterly satisfied to remain the same t...,0,"['{""module_name"": ""sibyl.transformations.text...."


In [10]:
new_records = replay_all_from_csv()
new_records

[("b'hide new from the parental units contains no wit , only labored gags ' That being I it.",
  array([[0.75, 0.25]])),
 ("b'contains no , only labored gags wit only labored ' That being said, I'm pleased.",
  array([[0.75, 0.25]])),
 ("b'that loves its characters and communicates something rather beautiful about human nature  remain\t utterly satisfied to remain the same throughout '",
  [0.38620689655172413, 0.6137931034482759]),
 ("b'remains utterly satisfied to remain the same throughout  remains utterly satisfied to remain t/e same throughout '",
  [1.0, 0.0])]

In [11]:
original_records = [(le.text, le.target) for le in records]
equiv_result = True
for old_r, new_r in zip(original_records, new_records[-4:]):
    if old_r[0] != new_r[0] or np.any(old_r[1] != new_r[1]):
        print(old_r, new_r)
        equiv_result = False
equiv_result

True

## Replay Functionality - SQL

In [10]:
new_records = replay_all_from_db()
new_records

[(["b'hide raw from the parental unit pelt raw secretion from the parental unit ' That being I it."],
  [array([[0.75, 0.25]])]),
 (["b'contains no , only laboured laugh arrest no mentality , only laboured laugh ' That being I it."],
  [array([[0.75, 0.25]])]),
 (["a fictional type is nonpareil of the main characters. b'that know its type and transmit something kinda beautiful about human nature  that know its type an\t transmit something kinda beautiful about human nature '"],
  [[[0.0, 1.0]]]),
 (["fisher rest satisfied with the effect of the experiment. b'remains perfectly satisfied to rest the same end-to-end  rest perfectly sa\tisfied to rest the same end-to-end '"],
  [[[1.0, 0.0]]])]

In [14]:
[(le.text, le.target) for le in records]

[("b'hide unexampled from the parental whole check no wit , only moil joke ' That being I it.",
  array([[0.75, 0.25]])),
 ("b'contains no , only labored muzzle humour only labored ' That being said, I'm pleased.",
  array([[0.75, 0.25]])),
 ("symbiotic kinship between organism and their nature. b'remains dead slaked to persist the same throughout  persist dead slaked to persist t/e same throughout '",
  [[0.5396552085876465, 0.4603447914123535]]),
 ("the cadaver of an being that has not been slaked b'remains dead slaked to remain the same throughout  cadaver dead slaked to remain t/e same throughout '",
  [[1.0, 0.0]])]

## Transformation Wrappers

In [54]:
text = ["This is a test.", "This isn't a test!"]
target = [0, 1]
batch = (text, target)

t_orig = TRANSFORMATIONS[0]

In [74]:
print("DPMLClassWrapper")

t_class_wrapped = DPMLClassWrapper(t_orig)
t_class_wrapped = t_class_wrapped(task_name="sentiment", return_metadata=True)

batch = t_class_wrapped.transform_batch(batch)

print("DPMLClassWrapper | transform_batch")
print(batch)
print("_class_name:", t_class_wrapped._class_name)
print("_class_args:", t_class_wrapped._class_args)
print("_class_kwargs:", t_class_wrapped._class_kwargs)
print("_callable_name:", t_class_wrapped._callable_name)
print("_callable_args:", t_class_wrapped._callable_args)
print("_callable_kwargs:", t_class_wrapped._callable_kwargs)

X, y, meta = t_class_wrapped.transform_Xy(text[1], target[1])

print("DPMLClassWrapper | transform_Xy")
print(X, y)
print("_class_name:", t_class_wrapped._class_name)
print("_class_args:", t_class_wrapped._class_args)
print("_class_kwargs:", t_class_wrapped._class_kwargs)
print("_callable_name:", t_class_wrapped._callable_name)
print("_callable_args:", t_class_wrapped._callable_args)
print("_callable_kwargs:", t_class_wrapped._callable_kwargs)

DPMLClassWrapper
DPMLClassWrapper | transform_batch
(['hide new secretions from the parental units ', 'contains no wit , only labored gags '], [0, 0])
_class_name: ExpandContractions
_class_args: []
_class_kwargs: {'task_name': 'sentiment', 'return_metadata': True}
_callable_name: ('transform_batch',)
_callable_args: []
_callable_kwargs: []
DPMLClassWrapper | transform_Xy
contains no wit , only labored gags  1
_class_name: ExpandContractions
_class_args: []
_class_kwargs: {'task_name': 'sentiment', 'return_metadata': True}
_callable_name: ('transform_Xy',)
_callable_args: []
_callable_kwargs: []


In [75]:
t_init = t_orig(task_name="sentiment", return_metadata=True)

t_callable_wrapped = DPMLCallableWrapper(t_init.transform_batch)
batch = t_callable_wrapped(batch)

print("DPMLCallableWrapper | transform_batch")
print(batch)
print("_callable_name", t_callable_wrapped._callable_name)
print("_callable_args", t_callable_wrapped._callable_args)
print("_callable_kwargs", t_callable_wrapped._callable_kwargs)

t_callable_wrapped = DPMLCallableWrapper(t_init.transform_Xy)
X, y, meta = t_callable_wrapped(text[1], target[1])

print("DPMLCallableWrapper | transform_Xy")
print(X, y)
print("_callable_name", t_callable_wrapped._callable_name)
print("_callable_args", t_callable_wrapped._callable_args)
print("_callable_kwargs", t_callable_wrapped._callable_kwargs)

DPMLCallableWrapper | transform_batch
(['hide new secretions from the parental units ', 'contains no wit , only labored gags '], [0, 0])
_callable_name ('transform_batch',)
_callable_args []
_callable_kwargs []
DPMLCallableWrapper | transform_Xy
contains no wit , only labored gags  1
_callable_name ('transform_Xy',)
_callable_args []
_callable_kwargs []
