In [61]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [62]:
!pip install sentence-transformers



In [63]:
import pandas as pd
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn import svm
import re                                  
import string                             
from nltk.corpus import stopwords         
from nltk.stem import PorterStemmer        
from nltk.tokenize import TweetTokenizer   
from nltk.corpus import wordnet
from sentence_transformers import SentenceTransformer
from sklearn.metrics import classification_report

In [64]:
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [65]:
def store_reviews(test_sentences):
  for i in range(len(test_sentences)):
    test_sentences[i] = test_sentences[i].lower()
    test_sentences[i] = test_sentences[i].replace('.','')
    test_sentences[i] = test_sentences[i].replace(',', ' ')
    # print(test_sentences[i])
  return test_sentences

In [66]:
def process_review(review):

    # remove hyperlinks
    review = re.sub(r'https?:\/\/.*[\r\n]*', '', review)

    tokenizer = TweetTokenizer(preserve_case=False, strip_handles=True,
                               reduce_len=True)

    review_tokens = tokenizer.tokenize(review)

    #removing stopwords and punctuations
    review_clean=[]
    for word in review_tokens:
        if (word not in stopwords.words('english') and word not in string.punctuation):
            review_clean.append(word)

    return ' '.join(review_clean)

In [67]:
def calc_accuracy(y_pred, labels):
  count = 0
  i=0
  wrong_test = []
  for pred,label in zip(y_pred,labels):
      if(pred == label):
          count+=1
      else:
        wrong_test.append(i)
      i+=1
  accuracy = 100*count/200
  return accuracy
# print('accuracy: ',before_attack)
# print(wrong_test)

In [68]:
def synonym(test_sentence,word):
    syn = list()
    
    for synset in wordnet.synsets(word):
        for lemma in synset.lemmas():
            if(lemma.name()!=word and '_' not in lemma.name()):
                adv_sentence = test_sentence.replace(word,lemma.name())
                sentence_embeddings = model.encode([test_sentence, adv_sentence])
                sim = cosine_similarity(
                    [sentence_embeddings[0]],
                    sentence_embeddings[1:]
                )
                if sim>0.80:
                  return lemma.name()

In [69]:
def generate_adv(test_data,tfidf_test):
  list_data = list(test_data['reviews'])
  feature_names = list(tfidfVectorizer.get_feature_names())
  adv_sentences = []
  for index in range(len(list_data)):
    sentence = list_data[index].split(' ')
    feature_vector = list(tfidf_test[index].T.todense())
    index_max = feature_vector.index(max(feature_vector))
    imp = feature_names[index_max]
    syn = synonym(test_sentences[index], imp)
    for i in range(len(sentence)):
      if(sentence[i]==imp and syn!=None):
        sentence[i]=syn
    list_data[index] = ' '.join(sentence)
    old_sentence = test_sentences[index].split(' ')
    adv_sentence = ''
    for j in range(len(old_sentence)):
      if old_sentence[j] == imp and syn!=None:
        adv_sentence += syn+' '
      else:
        adv_sentence += old_sentence[j]+' '
    adv_sentences.append(adv_sentence)
    print('old sentence: ',test_sentences[index])
    print('adversarial sentence: ' ,adv_sentence)
    print('-'*150)
  return list_data,adv_sentences

In [70]:
def build_model(model,train_x,train_y,test):
  model.fit(train_x,train_y)
  y_pred = model.predict(test)
  return y_pred

In [71]:
def identify_successful_attack(y_pred_adv,adv_data):
  i = 0
  wrong_adv = []
  for pred,actual in zip(y_pred_adv,adv_data['label']):
      if(pred != actual):
        wrong_adv.append(i)
      i+=1
  return wrong_adv

In [72]:
def visualize_attack(wrong_adv,wrong_test,test_sentences,adv_sentences,y_pred_test,y_pred_adv):
  indices_adv = [value for value in wrong_adv if value not in wrong_test]
  for i in indices_adv:
    print("old sentence: ",test_sentences[i], " --prediction: ",y_pred_test[i])
    print("adv sentence: ",adv_sentences[i]," --prediction:",y_pred_adv[i])
    print('-'*150)

### Reading data from csv file

In [73]:
data = pd.read_csv('/content/drive/MyDrive/amazon_reviews.csv')
data.head

<bound method NDFrame.head of                                                reviews  label
0    So there is no way for me to plug it in here i...      0
1                          Good case, Excellent value.      1
2                               Great for the jawbone.      1
3    Tied to charger for conversations lasting more...      0
4                                    The mic is great.      1
..                                                 ...    ...
995  The screen does get smudged easily because it ...      0
996  What a piece of junk.. I lose more calls on th...      0
997                       Item Does Not Match Picture.      0
998  The only thing that disappoint me is the infra...      0
999  You can not answer calls with the unit, never ...      0

[1000 rows x 2 columns]>

In [74]:
positive_reviews = data[data['label'] == 1]
negative_reviews = data[data['label'] == 0]
print('no. of positive reviews:',len(positive_reviews),'\nNo. of negative reviews', len(negative_reviews))
train_data = pd.concat([positive_reviews[0:400],negative_reviews[0:400]])
test_data = pd.concat([positive_reviews[400:], negative_reviews[400:]])
print('Length of training data:',len(train_data),'\nLength of testing data:', len(test_data))
train_sentences = list(train_data['reviews'])
test_sentences = list(test_data['reviews'])
test_sentences = store_reviews(test_sentences)

no. of positive reviews: 500 
No. of negative reviews 500
Length of training data: 800 
Length of testing data: 200


### Text pre-processing

In [75]:
train_data['reviews'] = train_data['reviews'].apply(process_review)
test_data['reviews'] = test_data['reviews'].apply(process_review)
train_data.head()

Unnamed: 0,reviews,label
1,good case excellent value,1
2,great jawbone,1
4,mic great,1
7,razr owner ... must,1
10,sound quality great,1


In [76]:
test_data.head()

Unnamed: 0,reviews,label
778,great deal,1
787,simple use like,1
788,great tool entertainment communication data ma...,1
791,2 cases would order another,1
792,great phone,1


### Vectorization

In [77]:
tfidfVectorizer = TfidfVectorizer(use_idf=True)
tfidf = tfidfVectorizer.fit(list(train_data['reviews']))
tfidf_train = tfidf.transform(list(train_data['reviews']))
tfidf_test = tfidf.transform(list(test_data['reviews']))
print('test data:',tfidf_test.shape)
print('train data: ',tfidf_train.shape)

test data: (200, 1550)
train data:  (800, 1550)


### Performing Adversarial perturbations

In [78]:
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
adv_data,adv_sentences = generate_adv(test_data,tfidf_test)
adv_df = pd.DataFrame({'reviews':adv_data,'label': test_data['label']})
adv_df.head()



old sentence:  this is a great deal
adversarial sentence:  this is a great deal 
------------------------------------------------------------------------------------------------------------------------------------------------------
old sentence:  it is simple to use and i like it
adversarial sentence:  it is simpleton to use and i like it 
------------------------------------------------------------------------------------------------------------------------------------------------------
old sentence:  it's a great tool for entertainment  communication  and data managementoh  be sure to use activesync 42 for optimal data synchronization results!
adversarial sentence:  it's a great tool for entertainment  communication  and information managementoh  be sure to use activesync 42 for optimal information synchronization results! 
------------------------------------------------------------------------------------------------------------------------------------------------------
old sentenc

Unnamed: 0,reviews,label
778,great deal,1
787,simpleton use like,1
788,great tool entertainment communication informa...,1
791,2 case would order another,1
792,outstanding phone,1


### SVM model

In [79]:
clf = svm.SVC(kernel='linear')
y_pred_svm = build_model(clf,tfidf_train,train_data['label'],tfidf_test)
y_true = test_data['label']
target_names = ['0','1']
print(classification_report(y_true, y_pred_svm,target_names=target_names))

              precision    recall  f1-score   support

           0       0.75      0.89      0.81       100
           1       0.86      0.70      0.77       100

    accuracy                           0.80       200
   macro avg       0.81      0.79      0.79       200
weighted avg       0.81      0.80      0.79       200



In [80]:
tfidf_adv = tfidf.transform(list(adv_df['reviews']))
y_pred_svm_adv = clf.predict(tfidf_adv)
print(classification_report(y_true, y_pred_svm_adv,target_names=target_names))

              precision    recall  f1-score   support

           0       0.67      0.85      0.75       100
           1       0.80      0.59      0.68       100

    accuracy                           0.72       200
   macro avg       0.74      0.72      0.72       200
weighted avg       0.74      0.72      0.72       200



### Visualise Attack

In [83]:
wrong_svm = identify_successful_attack(y_pred_svm,adv_df)
wrong_svm_adv = identify_successful_attack(y_pred_svm_adv,adv_df)
visualize_attack(wrong_svm_adv,wrong_svm,test_sentences,adv_sentences,y_pred_svm,y_pred_svm_adv)

old sentence:  it is simple to use and i like it  --prediction:  1
adv sentence:  it is simpleton to use and i like it   --prediction: 0
------------------------------------------------------------------------------------------------------------------------------------------------------
old sentence:  great phone  --prediction:  1
adv sentence:  outstanding phone   --prediction: 0
------------------------------------------------------------------------------------------------------------------------------------------------------
old sentence:  2 thumbs up to this seller  --prediction:  1
adv sentence:  2 thumbs up to this marketer   --prediction: 0
------------------------------------------------------------------------------------------------------------------------------------------------------
old sentence:  (it works!)  --prediction:  1
adv sentence:  (it works!)   --prediction: 0
------------------------------------------------------------------------------------------------------

### Naive Bayes model

In [None]:
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
model.fit(tfidf_train.toarray(), train_data['label'])
y_pred_nb = model.predict(tfidf_test.toarray())
print(classification_report(y_true,y_pred_nb))

In [None]:
y_pred_nb_adv = model.predict(tfidf_adv.toarray())
print(classification_report(y_true,y_pred_nb_adv))

In [None]:
wrong_nb = identify_successful_attack(y_pred_nb,adv_df)
wrong_nb_adv = identify_successful_attack(y_pred_nb_adv,adv_df)
visualize_attack(wrong_nb_adv,wrong_nb,test_sentences,adv_sentences,y_pred_nb,y_pred_nb_adv)