In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn

In [2]:
# Directory for Colab (optional, use if running Colab)
from google.colab import drive
drive.mount('/content/gdrive/')

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).


In [3]:
import os
os.chdir('/content/gdrive/MyDrive/NLP_project')

In [4]:
!pwd

/content/gdrive/MyDrive/NLP_project


In [5]:
train_matrix = np.load('embeddings/imdb_train_bert_emb.npy')
train_matrix.shape

(8000, 768)

In [6]:
test_matrix = np.load('embeddings/imdb_bert_test_textfooler_emb.npy')
test_matrix.shape

(3680, 768)

In [7]:
from sklearn.decomposition import KernelPCA

k_PCA = KernelPCA(n_components=100, kernel='rbf')
X_train = k_PCA.fit_transform(train_matrix)
X_test = k_PCA.transform(test_matrix)
print(X_train.shape, X_test.shape)

(8000, 100) (3680, 100)


In [8]:
df_train = pd.read_pickle("pickle/imdb_train.pickle")
labels = df_train['label']
labels

8305     0
10408    0
19465    1
8775     0
16013    1
        ..
6271     0
21834    1
1115     0
8107     0
23570    1
Name: label, Length: 8000, dtype: int64

In [9]:
pos_labels = labels.reset_index(drop=True).astype(bool)
pos_labels

0       False
1       False
2        True
3       False
4        True
        ...  
7995    False
7996     True
7997    False
7998    False
7999     True
Name: label, Length: 8000, dtype: bool

In [10]:
neg_labels = pos_labels==False
neg_labels

0        True
1        True
2       False
3        True
4       False
        ...  
7995     True
7996    False
7997     True
7998     True
7999    False
Name: label, Length: 8000, dtype: bool

In [11]:
# Separate positive and negative vectors
X_train_pos = np.delete(X_train, pos_labels, axis=0)
X_train_neg = np.delete(X_train, neg_labels, axis=0)
print(X_train_pos.shape)
print(X_train_neg.shape)

(4007, 100)
(3993, 100)


In [12]:
# Estimate mean for each class
pos_mean = np.mean(X_train_pos, axis=0)
neg_mean = np.mean(X_train_neg, axis=0)
print(len(pos_mean), len(neg_mean))

100 100


In [13]:
# Estimate covariance for each class
from sklearn.covariance import empirical_covariance, MinCovDet

pos_cov = empirical_covariance(X_train_pos)
neg_cov = empirical_covariance(X_train_neg)

pos_MinCovDet = MinCovDet().fit(X_train_pos)
pos_mcd = pos_MinCovDet.covariance_
neg_MinCovDet = MinCovDet().fit(X_train_neg)
neg_mcd = neg_MinCovDet.covariance_
print(pos_cov.shape, neg_cov.shape, pos_mcd.shape, neg_mcd.shape)

(100, 100) (100, 100) (100, 100) (100, 100)


In [14]:
# Compute gaussian likelihood 
def gaussian_likelihood(X_test, mean, cov):
  '''Input : 
  X_test : matrix of test embeddings
  mean : mean (for positive or negative distribution)
  cov : covariance (for positive or negative distribution)
  '''
  L = []
  for z in X_test:
    inv_cov = np.linalg.inv(cov)
    likelihood = - np.dot(np.dot(np.transpose(z - mean), inv_cov), z-mean)
    L.append(likelihood)
  return L

In [15]:
len(gaussian_likelihood(X_test, pos_mean, pos_cov))

3680

In [16]:
len(gaussian_likelihood(X_test, neg_mean, neg_cov))

3680

In [17]:
df_test = pd.read_pickle('pickle/imdb_bert_test_textfooler.pickle')
df_test

Unnamed: 0,text,adversarial
3098,"So, I'm wondering while watching this film, di...",1
288,"Although not a big Coen brothers fan, me am an...",1
1686,me am in a theatres nightclubs at my pupil and...,1
5350,The Thirdly Stooges enjoys always been some of...,1
8615,"(Possible ?? spoilers included, but nothing cr...",1
...,...,...
87,This would probably be a good film to see....p...,0
4274,"I haven't seen a lot of episodes of ""Family Gu...",0
1051,The film was half over before I managed to fig...,0
6107,"When I first saw the movie, I thought it was s...",0


In [28]:
def compute_negative_likelihood(X_test, pos_mean, neg_mean, pos_cov, neg_cov):
  pos_likelihoods = gaussian_likelihood(X_test, pos_mean, pos_cov)
  neg_likelihoods = gaussian_likelihood(X_test, neg_mean, neg_cov)

  n = len(X_test)
  assert len(pos_likelihoods)==n
  assert len(neg_likelihoods)==n

  L = []
  for i in range(n):
    if pos_likelihoods[i] >= neg_likelihoods[i]:
      L.append(- pos_likelihoods[i]) # We want to algorithm to predict attacks when likelihood is the lower, i.e. (-1)*likelihood is the highest
    else:
      L.append(- neg_likelihoods[i])
  
  return L

In [29]:
ML_cov = compute_negative_likelihood(X_test, pos_mean, neg_mean, pos_cov, neg_cov)
ML_mcd = compute_negative_likelihood(X_test, pos_mean, neg_mean, pos_mcd, neg_mcd)
df_results = df_test.copy()
df_results['ML_cov'] = ML_cov
df_results['ML_mcd'] = ML_mcd
df_results

Unnamed: 0,text,adversarial,ML_cov,ML_mcd
3098,"So, I'm wondering while watching this film, di...",1,177.687863,848.398088
288,"Although not a big Coen brothers fan, me am an...",1,131.887217,351.114338
1686,me am in a theatres nightclubs at my pupil and...,1,502.339277,2868.192758
5350,The Thirdly Stooges enjoys always been some of...,1,193.483255,725.582601
8615,"(Possible ?? spoilers included, but nothing cr...",1,193.945486,951.354530
...,...,...,...,...
87,This would probably be a good film to see....p...,0,46.587020,78.512158
4274,"I haven't seen a lot of episodes of ""Family Gu...",0,173.557464,572.669455
1051,The film was half over before I managed to fig...,0,274.824305,844.027281
6107,"When I first saw the movie, I thought it was s...",0,107.112954,143.803584


In [31]:
df_results.sort_values(by='ML_cov', ascending=False)

Unnamed: 0,text,adversarial,ML_cov,ML_mcd
4525,Afterward seen just about every movie on recor...,1,726.380186,3853.933333
3988,what was Bruce Willis thinking when he signed ...,1,700.220832,2580.246391
1907,"Until filling this infernal piece of wreckage,...",1,671.306057,3056.776554
3527,"A fine young cast is wasted in this empty, mou...",1,668.132584,5991.893613
1486,Whenever I go to watching this theatres it was...,1,659.485510,4314.519594
...,...,...,...,...
52,This failed exercise in satire or commentary o...,0,37.900151,85.055534
2252,I bought this movie sight unseen at a sci-fi c...,0,37.856176,70.747688
8702,"This is a great little movie, full of interest...",0,36.627207,80.291289
2208,I am deeply disturbed by some posts I am readi...,0,36.214655,65.155152
