# Adversarial examples: corrupt random vs corrupt most important
Exercise shows comparision between corrupting random words and most important words in text.

In [1]:
from utils import load_news20
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
from wildnlp.aspects import *
from wildnlp.aspects.utils import compose

Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.


# Download data

In [2]:
# download the 20 News Group dataset, this may take a minute
train_data, test_data, class_names = load_news20()

# extract train and test datasets
X_raw_train, y_train = train_data
X_raw_test, y_test = test_data

In [3]:
class_names

['atheism',
 'graphics',
 'ms-windows.misc',
 'pc.hardware',
 'mac.hardware',
 'ms-windows.x',
 'misc.forsale',
 'autos',
 'motorcycles',
 'baseball',
 'hockey',
 'crypt',
 'electronics',
 'med',
 'space',
 'christian',
 'guns',
 'mideast',
 'politics.misc',
 'religion.misc']

# Prepare clear and corrupt data

In [4]:
# maximum vocabulary size 
max_features = 20000

# vectorize the data using tfidf, this time we'll use the whole dataset
tfidf = TfidfVectorizer(max_features=max_features, stop_words='english')
X_train = tfidf.fit_transform(np.array(X_raw_train))
X_test = tfidf.transform(X_raw_test)

# Prepare and test model

In [5]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
from sklearn.pipeline import make_pipeline

# initialize and train a naive bayes model
model = MultinomialNB(alpha=1)
model.fit(X=X_train, y=y_train)

MultinomialNB(alpha=1, class_prior=None, fit_prior=True)

Evaluate its performance on the clear test set

In [6]:
y_pred = model.predict(X_test)
accuracy_score(y_test, y_pred)

0.8179766330323951

In [7]:
# we'll make a simple pipeline to fallback to raw text
pipe = make_pipeline(tfidf, model)

# Corrupt most important and random

In [8]:
from utils import change_most_important_word,change_random_word

In [9]:
composed = compose(QWERTY())

In [10]:
how_many = 2000

In [11]:
%%time
X_raw_test_corrupt_imp = [change_most_important_word(x, y, pipe.predict_proba, composed) for x, y in zip(X_raw_test[:how_many], y_test[:how_many])]

CPU times: user 15min 18s, sys: 1.69 s, total: 15min 20s
Wall time: 15min 20s


In [12]:
X_raw_test_corrupt_rnd = [change_random_word(x, composed) for x, y in zip(X_raw_test[:how_many], y_test[:how_many])]

In [13]:
X_test_corrupt_imp = tfidf.transform(X_raw_test_corrupt_imp)
X_test_corrupt_rnd = tfidf.transform(X_raw_test_corrupt_rnd)

Accuracy on corrupted most important

In [14]:
y_pred = model.predict(X_test_corrupt_imp)
accuracy_score(y_test[:how_many], y_pred)

0.7925

Accuracy on corrupted random

In [15]:
y_pred = model.predict(X_test_corrupt_rnd)
accuracy_score(y_test[:how_many], y_pred)

0.815