In [1]:
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
import matplotlib.pyplot as plt
import numpy as np
import random
from nltk.tokenize.treebank import TreebankWordDetokenizer, TreebankWordTokenizer
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

In [2]:
gpus = tf.config.list_physical_devices('GPU')
tf.config.set_visible_devices(gpus[0], 'GPU')

In [3]:
tf.random.set_seed(12345)
np.random.seed(5678)
random.seed(123)

# Get Data

In [4]:
split_test_data = pd.read_csv('../Data/SplitWord2VecDataV2.csv')

In [5]:
x_train, x_test, y_train, y_test = train_test_split(split_test_data['text'], split_test_data['marked'], stratify=split_test_data['marked'], random_state=12345)

In [6]:
x_train

24562     51: 29 and the land shall tremble and sorrow: ...
120983                               she did not hands him.
5000      could a linguist, could a grammarian, could ev...
124015                                 solid, settled lost.
124413    bobby joe was trying to get linda kay to say s...
                                ...                        
47756       " it is a new planet and it shall bear my name.
99588     mandatory retirement at sixty-five looms on ou...
78109        fellow, come from the throng, look vpon caesar
111535    the uniform fiscal year brings the town's fisc...
96866                             check put of lid on jar;;
Name: text, Length: 103978, dtype: object

In [7]:
y_train

24562     0
120983    1
5000      0
124015    0
124413    1
         ..
47756     0
99588     1
78109     0
111535    0
96866     1
Name: marked, Length: 103978, dtype: int64

In [8]:
x_test

129828                     gradually they emerged as women.
40550     17: 20 and when absalom' s servant came to the...
75649     and now that at the proper time and place, aft...
130984                     "you're not to mentions my name.
70670     here you are, pip; and there' s the windlass -...
                                ...                        
78390     harke, harke, one knockes: portia go in a whil...
29946     13: 18 and pray ye that your flight be not in ...
68086                  it was a saturday night in december.
107447    "he looks as masculine as you can imagine", sh...
34322     2: 4 but god, who is wealthy in mercy, for his...
Name: text, Length: 34660, dtype: object

In [9]:
y_test

129828    1
40550     1
75649     0
130984    1
70670     1
         ..
78390     1
29946     0
68086     0
107447    1
34322     1
Name: marked, Length: 34660, dtype: int64

In [10]:
test_data_dict = {'text':x_test, 'marked':y_test}
test_data = pd.DataFrame.from_dict(test_data_dict)

In [11]:
test_data_marked = test_data.loc[test_data['marked'] == 1]['text'].to_list()

In [12]:
test_data_marked[0:10]

['gradually they emerged as women.',
 "17: 20 and when absalom' s servant came to the woman to the house, they said, where is ahimaaz and jonathan?",
 '"you\'re not to mentions my name.',
 "here you are, pip; and there' s the windlass - bitts; up you mt.!",
 '" my dear emma," said he at last, with earnest kindness, " do you think you perfectly understand the degrees of acquaintance between the gentleman and lady we have been speaking of?"',
 'elementary school desegregation came to owen and union county, which already had high school desegregation.',
 'mrs. ralph taussig, for junior aide;;',
 'throughout the period and during the movement operation, the museum continued its functional supporting of the armed forces institute of pathology.',
 'two mins afterwards he had overtaken turnbull and told the tale; ten minutes afterwards he and turnbull had somehow tumbled into the yacht called the _gibson girl_ and had somehow pushed off from the isle of st . loup.',
 'here, after twenty lone 

In [13]:
test_data_unmarked = test_data.loc[test_data['marked'] == 0]['text'].to_list()

In [14]:
test_data_unmarked[0:10]

["and now that at the proper time and place, after so long and wide a preliminary cruise, ahab,-- all other whaling waters swept--seemed to have chased his foe into an ocean - fold, to slay him the more securely there; now, that he found himself hard by the very latitude and longitude where his tormenting wound had been inflicted; now that a vessel had been spoken which on the very day preceding had actually encountered moby dick;-- and now that all his successive meetings with various ships contrastingly concurred to show the demoniac indifference with which the white whale tore his hunters, whether sinning or sinned against; now it was that there lurked a something in the old man' s eyes, which it was hardly sufferable for feeble souls to see.",
 'here the problem is essentially one of defining the word "filling".',
 'this is going to be a language lesson, and you can master it in a few minutes.',
 'the fourth, however, had already advanced on the chauffeur of the black - and - yello

# Load Model

In [12]:
watermark_predictor = tf.keras.models.load_model('../SavedModels/v1')

# Predict on Sentence Collection

In [16]:
def PredictSentenceCollection(model, text):
    # Threshold and prepare the predictions.
    predictions = model.predict(text)
    predictions[predictions >= .5] = 1
    predictions[predictions < .5] = 0
    predictions = list(map(int, predictions))
    
    # Get the count of predictions that are watermarked (equal to 1).
    watermarked_count = predictions.count(1)
    
    # If more than half of the predictions are watermarked, return watermarked.
    if(watermarked_count > len(predictions)/2):
        return "watermarked"
    else:
        return "unmarked"

# Create Sentence Collections

In [17]:
def GetSentenceCollections(text, num_collections=10, collection_size=20):
    text_collections = []
    
    for i in range(0, num_collections):
        sentence_collection = random.sample(text, collection_size)
        text_collections.append(sentence_collection)
        
    return text_collections

# Impact of Attack
Robustness metric. Impact of Attack. Accuracy before attack - accuracy after attack on same data.  

Impact of attack -> accuracy(predict(sentence_collection)) - accuracy(predict(attack(sentence_collection)))

Average Impact of Attack -> average([impactOfAttack(sentence_collection) for sentence_collection in sentence_collections])

* Positive number indicates the attack hurt prediction performace.  
* Negative number indicates the attack improved prediction performance, which should be rare.  
* 0 value indicates attack caused no change in prediction performance.  

In [18]:
# Note, the sentence collection should be a list of sentences. Label should be either "watermarked" or "unmarked".
def IsPredictionCorrect(model, sentence_collection, label):
    if not (len(sentence_collection) > 0):
        print('Length of the sentence collection should be greater than 0')
    
    watermarked_label = 'watermarked'
    unmarked_label = 'unmarked'
    
    # If the label matches the predicted label, return 1. Otherwise return 0.
    if (label == watermarked_label):
        return int(PredictSentenceCollection(model, sentence_collection) == watermarked_label)
    elif (label == unmarked_label):
        return int(PredictSentenceCollection(model, sentence_collection) == unmarked_label)
    else:
        print("Label must be either watermarked or unmarked") 

In [19]:
def ImpactOfAttack(model, attack_function, attack_args, sentence_collection, label):
    preattack_correctness = IsPredictionCorrect(model, sentence_collection, label)
    postattack_correctness = IsPredictionCorrect(model, attack_function(sentence_collection, **attack_args), label)
    
    return preattack_correctness - postattack_correctness

In [20]:
def AverageImpactOfAttack(model, attack_function, attack_args, sentence_collections, label):
    result_list = []
    
    for sentence_collection in sentence_collections:
        result_list.append(ImpactOfAttack(model, attack_function, attack_args, sentence_collection, label))
    
    return sum(result_list) / len(result_list)

# Define Sentence Attacks

Attack function should have a sentence collection as the first parameter, and then can have arbitrary parameters for the rest.

In [21]:
def RemoveSentences(sentence_collection, num_removed_sentences):
    copied_sentence_collection = sentence_collection.copy()
    
    for i in range(num_removed_sentences):
        copied_sentence_collection.remove(random.choice(copied_sentence_collection))
        
    return copied_sentence_collection

In [22]:
# This attack should add oppositely marked sentences to the sentence collection. E.g., add unmarked sentences to a marked sentence collection.
# So, the opposite_sentences_for_addition parameter should be sentences with the opposite label.
def AddSentences(sentence_collection, num_added_sentences, opposite_sentences_for_addition):
    copied_sentence_collection = sentence_collection.copy()
    
    for i in range(num_added_sentences):
        copied_sentence_collection.append(random.choice(opposite_sentences_for_addition))
        
    return copied_sentence_collection

In [23]:
# Similar to the AddSentences attack, this replaces sentences with oppositely labeled sentences. 
# Therefore, opposite_sentences_for_replacement should be a collection of sentences with the opposite label.
def ReplaceSentences(sentence_collection, num_replaced_sentences, opposite_sentences_for_replacement):
    copied_sentence_collection = sentence_collection.copy()
    
    for i in range(num_replaced_sentences):
        copied_sentence_collection.remove(random.choice(copied_sentence_collection))
        copied_sentence_collection.append(random.choice(opposite_sentences_for_replacement))
    
    return copied_sentence_collection

In [24]:
# As this removes random tokens from a sentence, the input sentence collection should be a single sentence.
# Extra sentences are not considered and will result in incorrect results.
def RemoveTokens(sentence_collection, num_removed_tokens, tokenizer, detokenizer):
    sentence = sentence_collection[0]
    
    sentence = tokenizer.tokenize(sentence)
    
    for i in range(num_removed_tokens):
        sentence.remove(random.choice(sentence))
    
    sentence = detokenizer.detokenize(sentence)
    
    return [ sentence ]

# Watermarked - sentence removal attack

In [25]:
num_collections = 1000
num_sentences_in_collection = 20
marked_eval_data = GetSentenceCollections(test_data_marked, num_collections, num_sentences_in_collection)

In [26]:
marked_eval_data[0:5]

[['excellent dance indeed!-- yes, my dear, i ran home, as i said i should, to help grandmama to bed, and got back again, and nobody missed me .-- i set off without saying a word, just as i told you.',
  'if you are an income beneficiary of property held in trust or an heir, legatee, or devisee, you may deduct permissible depreciation and depletion, if not deductible by the estate or trust.',
  'i am sure he has not the least ideas of it.',
  'and yet, till now, i always took you for " a dead - shots at a yellow - hammer.',
  'it took them an hour before they came to the first homes of kelseyville.',
  '" he is always out of sarcasm."',
  'now it did not occur to him even to wonder whether it was wise for robinson to diving again: rob was his boy, the kid he had rescued from the streets, the object of his pride.',
  'and all this red rolling of impieties came from his thin, genteel lips rather primly than otherwise, as he sat sipping the wine out of his tall, thin glass.',
  '-- and was

### Remove 1 sentence 

In [27]:
AverageImpactOfAttack(watermark_predictor, RemoveSentences, dict(num_removed_sentences=1), marked_eval_data, label="watermarked")

-0.002

### Remove 3 sentences

In [28]:
AverageImpactOfAttack(watermark_predictor, RemoveSentences, dict(num_removed_sentences=3), marked_eval_data, label="watermarked")

-0.001

### Remove 5 sentences

In [29]:
AverageImpactOfAttack(watermark_predictor, RemoveSentences, dict(num_removed_sentences=5), marked_eval_data, label="watermarked")

-0.001

### Remove 10 sentences

In [30]:
AverageImpactOfAttack(watermark_predictor, RemoveSentences, dict(num_removed_sentences=10), marked_eval_data, label="watermarked")

0.081

### Remove 15 sentences

In [31]:
AverageImpactOfAttack(watermark_predictor, RemoveSentences, dict(num_removed_sentences=15), marked_eval_data, label="watermarked")

0.077

### Remove 17 Sentences

In [32]:
AverageImpactOfAttack(watermark_predictor, RemoveSentences, dict(num_removed_sentences=17), marked_eval_data, label="watermarked")

0.154

### Remove 19 sentences

In [33]:
AverageImpactOfAttack(watermark_predictor, RemoveSentences, dict(num_removed_sentences=19), marked_eval_data, label="watermarked")

0.217

# Unmarked - sentence removal attack

In [34]:
unmarked_eval_data = GetSentenceCollections(test_data_unmarked, num_collections, num_sentences_in_collection)

In [35]:
unmarked_eval_data[0:5]

[['please come in, if you like--but i understood that you had some business."',
  'thank the lord, they still had water!!',
  '} roaming in thought [after reading hegel]',
  '15: 32 then jesus called his disciples unto him, and said, i have compassion on the multitude, because they continue with me now three days, and have nothing to eat: and i will not send them away fasting, lest they faint in the way.',
  'now in those camps of green, in their tents dotting the world, in the parents, children, husbands, wives, in them, in the old and young, sleeping under the sunlight, sleeping under the moonlight, content and silent there at last, behold the mighty bivouac - field and waiting - camp of all, of the corps and generals all, and the president over the corps and generals all, and of each of us o soldiers, and of each and all in the ranks we fought, (there without hatred we all, all meet.)',
  '49: 21 naphtali is a hind let loose: he giveth goodly words.',
  '29: 19 my root was spread ou

### Remove 1 sentence 

In [36]:
AverageImpactOfAttack(watermark_predictor, RemoveSentences, dict(num_removed_sentences=1), unmarked_eval_data, label="unmarked")

0.0

### Remove 3 sentences

In [37]:
AverageImpactOfAttack(watermark_predictor, RemoveSentences, dict(num_removed_sentences=3), unmarked_eval_data, label="unmarked")

0.0

### Remove 5 sentences

In [38]:
AverageImpactOfAttack(watermark_predictor, RemoveSentences, dict(num_removed_sentences=5), unmarked_eval_data, label="unmarked")

0.0

### Remove 10 sentences

In [39]:
AverageImpactOfAttack(watermark_predictor, RemoveSentences, dict(num_removed_sentences=10), unmarked_eval_data, label="unmarked")

0.002

### Remove 15 sentences

In [40]:
AverageImpactOfAttack(watermark_predictor, RemoveSentences, dict(num_removed_sentences=15), unmarked_eval_data, label="unmarked")

0.037

### Remove 17 sentences

In [41]:
AverageImpactOfAttack(watermark_predictor, RemoveSentences, dict(num_removed_sentences=17), unmarked_eval_data, label="unmarked")

0.065

### Remove 19 sentences

In [42]:
AverageImpactOfAttack(watermark_predictor, RemoveSentences, dict(num_removed_sentences=19), unmarked_eval_data, label="unmarked")

0.156

# Watermarked - sentence addition attack

### Add 1 sentence 

In [43]:
AverageImpactOfAttack(watermark_predictor, AddSentences, dict(num_added_sentences=1, opposite_sentences_for_addition=test_data_unmarked), marked_eval_data, label="watermarked")

-0.001

### Add 3 sentences

In [44]:
AverageImpactOfAttack(watermark_predictor, AddSentences, dict(num_added_sentences=3, opposite_sentences_for_addition=test_data_unmarked), marked_eval_data, label="watermarked")

0.017

### Add 5 sentences

In [45]:
AverageImpactOfAttack(watermark_predictor, AddSentences, dict(num_added_sentences=5, opposite_sentences_for_addition=test_data_unmarked), marked_eval_data, label="watermarked")

0.053

### Add 10 sentences

In [46]:
AverageImpactOfAttack(watermark_predictor, AddSentences, dict(num_added_sentences=10, opposite_sentences_for_addition=test_data_unmarked), marked_eval_data, label="watermarked")

0.284

### Add 15 sentences

In [47]:
AverageImpactOfAttack(watermark_predictor, AddSentences, dict(num_added_sentences=15, opposite_sentences_for_addition=test_data_unmarked), marked_eval_data, label="watermarked")

0.483

### Add 17 Sentences

In [48]:
AverageImpactOfAttack(watermark_predictor, AddSentences, dict(num_added_sentences=17, opposite_sentences_for_addition=test_data_unmarked), marked_eval_data, label="watermarked")

0.573

### Add 19 sentences

In [49]:
AverageImpactOfAttack(watermark_predictor, AddSentences, dict(num_added_sentences=19, opposite_sentences_for_addition=test_data_unmarked), marked_eval_data, label="watermarked")

0.69

# Unmarked - sentence addition attack

### Add 1 sentence 

In [50]:
AverageImpactOfAttack(watermark_predictor, AddSentences, dict(num_added_sentences=1, opposite_sentences_for_addition=test_data_marked), unmarked_eval_data, label="unmarked")

0.0

### Add 3 sentences

In [51]:
AverageImpactOfAttack(watermark_predictor, AddSentences, dict(num_added_sentences=3, opposite_sentences_for_addition=test_data_marked), unmarked_eval_data, label="unmarked")

0.0

### Add 5 sentences

In [52]:
AverageImpactOfAttack(watermark_predictor, AddSentences, dict(num_added_sentences=5, opposite_sentences_for_addition=test_data_marked), unmarked_eval_data, label="unmarked")

0.006

### Add 10 sentences

In [53]:
AverageImpactOfAttack(watermark_predictor, AddSentences, dict(num_added_sentences=10, opposite_sentences_for_addition=test_data_marked), unmarked_eval_data, label="unmarked")

0.012

### Add 15 sentences

In [54]:
AverageImpactOfAttack(watermark_predictor, AddSentences, dict(num_added_sentences=15, opposite_sentences_for_addition=test_data_marked), unmarked_eval_data, label="unmarked")

0.101

### Add 17 sentences

In [55]:
AverageImpactOfAttack(watermark_predictor, AddSentences, dict(num_added_sentences=17, opposite_sentences_for_addition=test_data_marked), unmarked_eval_data, label="unmarked")

0.143

### Add 19 sentences

In [56]:
AverageImpactOfAttack(watermark_predictor, AddSentences, dict(num_added_sentences=19, opposite_sentences_for_addition=test_data_marked), unmarked_eval_data, label="unmarked")

0.225

# Watermarked - sentence replacement attack

### Replace 1 sentence 

In [57]:
AverageImpactOfAttack(watermark_predictor, ReplaceSentences, dict(num_replaced_sentences=1, opposite_sentences_for_replacement=test_data_unmarked), marked_eval_data, label="watermarked")

0.008

### Replace 3 sentences

In [58]:
AverageImpactOfAttack(watermark_predictor, ReplaceSentences, dict(num_replaced_sentences=3, opposite_sentences_for_replacement=test_data_unmarked), marked_eval_data, label="watermarked")

0.063

### Replace 5 sentences

In [59]:
AverageImpactOfAttack(watermark_predictor, ReplaceSentences, dict(num_replaced_sentences=5, opposite_sentences_for_replacement=test_data_unmarked), marked_eval_data, label="watermarked")

0.15

### Replace 7 sentences

In [60]:
AverageImpactOfAttack(watermark_predictor, ReplaceSentences, dict(num_replaced_sentences=7, opposite_sentences_for_replacement=test_data_unmarked), marked_eval_data, label="watermarked")

0.286

# Unmarked - sentence replacement attack

### Replace 1 sentence 

In [61]:
AverageImpactOfAttack(watermark_predictor, ReplaceSentences, dict(num_replaced_sentences=1, opposite_sentences_for_replacement=test_data_marked), unmarked_eval_data, label="unmarked")

0.0

### Replace 3 sentences

In [62]:
AverageImpactOfAttack(watermark_predictor, ReplaceSentences, dict(num_replaced_sentences=3, opposite_sentences_for_replacement=test_data_marked), unmarked_eval_data, label="unmarked")

0.004

### Replace 5 sentences

In [63]:
AverageImpactOfAttack(watermark_predictor, ReplaceSentences, dict(num_replaced_sentences=5, opposite_sentences_for_replacement=test_data_marked), unmarked_eval_data, label="unmarked")

0.008

### Replace 7 sentences

In [64]:
AverageImpactOfAttack(watermark_predictor, ReplaceSentences, dict(num_replaced_sentences=7, opposite_sentences_for_replacement=test_data_marked), unmarked_eval_data, label="unmarked")

0.026

# Watermarked - token removal attack

In [65]:
sentence_marked_eval_data = GetSentenceCollections(test_data_marked, num_collections, 1)

In [66]:
sentence_marked_eval_data[0:10]

[['oh god, your onely iigge - maker: what should a dude do, but be merrie.'],
 ['15: 25 but now i go unto jerusalem to ministers unto the saints.'],
 ['but involvement needs to be accompanied by a special frames of mind.'],
 ["a name that i am so very well acquainted with; knew the gentleman so well by sight; seen him a hundred times; came to consult me once, i remember, about a trespass of one of his neighbours; farmers' s man breaking into his orchard; wall torn down; apples stolen; caught in the fact; and afterwards, contrary to my judgement, submitted to an amicable compromise."],
 ['the princely hierarch in their bright stand there left his powers, to seise possesion of the garden; he alone, to find where adam sheltered, took his way, not unperceived of adam; who to eve, while the great visitant approached, thus spake.'],
 ['for the school year, 1959-1960, the prince edward county (virginia) board of supervisors voted not to provide funds for public education, and the school board

### Remove 1 token

In [67]:
AverageImpactOfAttack(watermark_predictor, RemoveTokens, dict(num_removed_tokens=1, tokenizer=TreebankWordTokenizer(), detokenizer=TreebankWordDetokenizer()), marked_eval_data, label="watermarked")

0.239

### Remove 2 tokens

In [68]:
AverageImpactOfAttack(watermark_predictor, RemoveTokens, dict(num_removed_tokens=2, tokenizer=TreebankWordTokenizer(), detokenizer=TreebankWordDetokenizer()), marked_eval_data, label="watermarked")

0.233

### Remove 3 tokens

In [69]:
AverageImpactOfAttack(watermark_predictor, RemoveTokens, dict(num_removed_tokens=3, tokenizer=TreebankWordTokenizer(), detokenizer=TreebankWordDetokenizer()), marked_eval_data, label="watermarked")

0.229

# Unmarked - token removal attack

In [70]:
sentence_unmarked_eval_data = GetSentenceCollections(test_data_unmarked, num_collections, 1)

In [71]:
sentence_unmarked_eval_data[0:10]

[['be not ashamed women, your privilege encloses the rest, and is the exit of the rest, you are the gates of the body, and you are the gates of the soul.'],
 ['variations in sound velocity should be measured rather than temperature, because more of the variables would be encompassed.'],
 ['asked dr . bull in exasperation.'],
 ["well, don't worry."],
 ['cray introduced him civilly enough, as dr oman, but he showed such disfavour in his very face that brown guessed the two men, whether audrey knew it or not, were rivals.'],
 ['that was one of the things she liked about stanley.'],
 ['9: 25 and he said, cursed be canaan; a servant of servants shall he be unto his brethren.'],
 ['too late, he realized that in turning, he had wheeled them onto a patch of sandy ground, instead of atop a grade or ridge.'],
 ['perhaps existing public health service, state department and armed services medical facilities can be utilized.'],
 ["almost all of the 3,000 lumber dealers who cater primarily to the ne

### Remove 1 token

In [72]:
AverageImpactOfAttack(watermark_predictor, RemoveTokens, dict(num_removed_tokens=1, tokenizer=TreebankWordTokenizer(), detokenizer=TreebankWordDetokenizer()), unmarked_eval_data, label="unmarked")

0.266

### Remove 2 tokens

In [73]:
AverageImpactOfAttack(watermark_predictor, RemoveTokens, dict(num_removed_tokens=2, tokenizer=TreebankWordTokenizer(), detokenizer=TreebankWordDetokenizer()), unmarked_eval_data, label="unmarked")

0.413

### Remove 3 tokens

In [74]:
AverageImpactOfAttack(watermark_predictor, RemoveTokens, dict(num_removed_tokens=3, tokenizer=TreebankWordTokenizer(), detokenizer=TreebankWordDetokenizer()), unmarked_eval_data, label="unmarked")

0.449