# DPML | Latency Comparisons

In this notebook, we investigate the latency costs of tracking lineage with `dpml`.

## Load Dependencies

In [1]:
from lineage import LeBatch

from sibyl import TextMix, ChangeSynonym, ExpandContractions, ChangeAntonym, InsertNegativePhrase
from datasets import load_dataset

import time
from tqdm.notebook import tqdm

2022-07-05 11:58:37.842464: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0


## Create Datasets

In [2]:
dataset = load_dataset("glue", "sst2", split="train")
dataset = dataset.rename_column('sentence', 'text')

Reusing dataset glue (/home/coraline/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


## Latency Test

### No Lineage

In [3]:
transform = TextMix()

text, label = dataset['text'], dataset['label'] 
new_text, new_label = [], []

batch_size= 10


startTime = time.perf_counter()
for i in tqdm(range(0, len(label), batch_size)):
    text_batch = text[i:i+batch_size]
    label_batch = label[i:i+batch_size]
    batch = (text_batch, label_batch)
    new_records = transform(batch, num_classes=2)
print('Elapsed time: {:6.3f} seconds'.format(time.perf_counter() - startTime))

  0%|          | 0/6735 [00:00<?, ?it/s]

Elapsed time:  1.071 seconds


### Lineage w/o Logging

In [3]:
transform = TextMix()

text, label = dataset['text'], dataset['label'] 
new_text, new_label = [], []

batch_size= 10

startTime = time.perf_counter()
for i in tqdm(range(0, len(label), batch_size)):
    text_batch = text[i:i+batch_size]
    label_batch = label[i:i+batch_size]
    batch = (text_batch, label_batch)
    new_records = LeBatch(batch).apply(transform, num_classes=2)
print('Elapsed time: {:6.3f} seconds'.format(time.perf_counter() - startTime))

  0%|          | 0/6735 [00:00<?, ?it/s]

Elapsed time:  6.350 seconds


### Lineage w/ Logging

In [3]:
transform = TextMix()

text, label = dataset['text'], dataset['label'] 
new_text, new_label = [], []

batch_size= 10

startTime = time.perf_counter()
for i in tqdm(range(0, len(label), batch_size)):
    text_batch = text[i:i+batch_size]
    label_batch = label[i:i+batch_size]
    batch = (text_batch, label_batch)
    new_records = LeBatch(batch).apply(transform, num_classes=2)
print('Elapsed time: {:6.3f} seconds'.format(time.perf_counter() - startTime))

  0%|          | 0/6735 [00:00<?, ?it/s]

Elapsed time: 19.974 seconds


In [5]:
transform = TextMix()

in_text = [
    "The characters are unlikeable and the script is awful. It's a waste of the talents of Deneuve and Auteuil.", 
    "Unwatchable. You can't even make it past the first three minutes. And this is coming from a huge Adam Sandler fan!!1",
    "An unfunny, unworthy picture which is an undeserving end to Peter Sellers' career. It is a pity this movie was ever made.",
    "I think it's one of the greatest movies which are ever made, and I've seen many... The book is better, but it's still a very good movie!",
    "The only thing serious about this movie is the humor. Well worth the rental price. I'll bet you watch it twice. It's obvious that Sutherland enjoyed his role.",
    "Touching; Well directed autobiography of a talented young director/producer. A love story with Rabin's assassination in the background. Worth seeing"
]

in_target = [0, 0, 0, 1, 1, 1] # (imdb dataset 0=negative, 1=positive)

batch = (in_text, in_target)

In [6]:
with LeContext(batch) as le:
    new_records = le.apply(transform, num_classes=2)

NameError: name 'LeContext' is not defined

In [4]:
new_batch = new_records

with LeContext(new_batch) as le:
    new_records2 = le.apply(transform, num_classes=2)

In [5]:
new_records[1].text

'b"Unwatchable. You can\'t even make it past the first three minutes. And this is coming from a huge Adam Sandler fan!!1 Unwatchable. You can\'t even make it past the first three minutes. And this is coming from a huge Adam Sandler fan!!1"'

In [6]:
new_records2[1]

<LeRecord:
	 text="b'b"Unwatchable. You can\'t even make it past the first three minutes. And this is coming from a huge Adam Sandler fan!!1 Unwatchable. You can\'t even make it past the first three minutes. And this is coming from a huge Adam Sandler fan!!1" b"An unfunny, unworthy picture which is an undeserving end to Peter Sellers\' career. It is a pity this movie was ever made. The only thing serious about this movie is the humor. Well worth the rental price. I\'ll bet you watch it twice. It\'s obvious that Sutherland enjoyed his role."'",
	 target="[0.8884123728427289, 0.11158762715727105]",
	 le_attrs={'transformation_provenance': <TransformationProvenance: {(0, "{'class': 'TextMix', 'return_metadata': False}"), (1, "{'class': 'TextMix', 'return_metadata': False}")}>, 'feature_provenance': <FeatureProvenance[edit_seq] {(0, (41, 42), 'replace: [20,21]-[41,42]'), (1, (42, 92), 'insert: [42,42]-[42,92]')}>, 'granularity': 'word'}>