### Import Packages

In [102]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import multiprocessing
import random

from gensim.models import Doc2Vec
from gensim.models.doc2vec import TaggedDocument
import nltk
nltk.download('punkt')
nltk.download('words')
from nltk.corpus import words

from sklearn import utils
from sklearn.metrics import accuracy_score, f1_score
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier

from collections import Counter

# If you need any additional packages, import them down below.
import math
import string

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!


## Connect to Google Drive (optional for loading data)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Required Functions (please do not modify these functions)

Functions necessary to read data:

In [71]:
def load_train_data(path):
    train_dataFrame = pd.read_csv(path)
    return train_dataFrame

def load_test_data(path):
    test_dataFrame = pd.read_csv(path)
    return test_dataFrame

Preprocessing functions required for the Doc2Vec model:

In [72]:
def tokenize_text(review):
    tokens = []
    for sent in nltk.sent_tokenize(review):
        for word in nltk.word_tokenize(sent):
            tokens.append(word)
    return tokens

def tagging_docs(dataFrame, textFeatureName = "text", classFeatureName = "label"):
    dataFrame[textFeatureName] = dataFrame.text.astype(str)

    dataFrame_tagged = dataFrame.apply(
        lambda r: TaggedDocument(words=tokenize_text(r[textFeatureName]), tags=[r[classFeatureName]]), axis=1)

    return dataFrame_tagged

Functions necessary for training the Doc2Vec model:

In [73]:
def vec_for_learning(model, tagged_docs):
    sents = tagged_docs.values
    targets, regressors = zip(*[(doc.tags[0], model.infer_vector(doc.words)) for doc in sents])
    return targets, regressors

def doc2vec_training(train_tagged, test_tagged):
    cores = multiprocessing.cpu_count()

    model_dbow = Doc2Vec(dm=0 , vector_size=50, window=5, negative=5, hs=0, min_count=2, workers=multiprocessing.cpu_count())
    model_dbow.build_vocab([x for x in tqdm(train_tagged.values)])
    model_dbow.train(utils.shuffle([x for x in tqdm(train_tagged.values)]), total_examples=len(train_tagged.values), epochs=10)

    y_train, X_train = vec_for_learning(model_dbow, train_tagged)
    y_test, X_test = vec_for_learning(model_dbow, test_tagged)

    return y_train, X_train, y_test, X_test, model_dbow

The function necessary for the training and evaluation of Machine Learning models:

In [74]:
def ml_models_training_and_evaluation(X_train, y_train, X_test, y_test):

    #Logistic Regression
    logreg = LogisticRegression()
    logreg.fit(X_train, y_train)
    y_pred_lr = logreg.predict(X_test)

    #Decision Tree
    dtclf = DecisionTreeClassifier()
    dtclf.fit(X_train, y_train)
    y_pred_dt = dtclf.predict(X_test)

    #Naive Bayes
    gnb = GaussianNB()
    gnb.fit(X_train, y_train)
    y_pred_nb = gnb.predict(X_test)

    #RandomForest
    rf = RandomForestClassifier()
    rf.fit(X_train, y_train)
    y_pred_rf = rf.predict(X_test)

    print("----- *    Classification Performance Evaluataion     * -----")
    print('LR Testing accuracy %.3f' % accuracy_score(y_test, y_pred_lr))
    print('DT Testing accuracy %.3f' % accuracy_score(y_test, y_pred_dt))
    print('NB Testing accuracy %.3f' % accuracy_score(y_test, y_pred_nb))
    print('RF Testing accuracy %.3f' % accuracy_score(y_test, y_pred_rf))
    print("----- * ----- * ----- * ----- * ----- * ----- * ----- * -----")

    return logreg, dtclf, gnb, rf


The function required to measure the success of a backdoor attack:

In [75]:
def backdoor_attack_evaluation(logreg, dtclf, gnb, rf, y_test_bd, X_test_bd):

    #Logistic Reg
    y_pred_lr = logreg.predict(X_test_bd)

    #Decision Tree
    y_pred_dt = dtclf.predict(X_test_bd)

    #Naive Bayes
    y_pred_nb = gnb.predict(X_test_bd)

    #RandomForest
    y_pred_rf = rf.predict(X_test_bd)

    print("----- * ----- *  Backdoor Attack Evaluataion  * ----- * -----")
    print('LR BD Success Rate %.3f' % accuracy_score(y_test_bd, y_pred_lr))
    print('DT BD Success Rate %.3f' % accuracy_score(y_test_bd, y_pred_dt))
    print('NB BD Success Rate %.3f' % accuracy_score(y_test_bd, y_pred_nb))
    print('RF BD Success Rate %.3f' % accuracy_score(y_test_bd, y_pred_rf))
    print("----- * ----- * ----- * ----- * ----- * ----- * ----- * -----")


# Functions You Need to Implement

## Sentence-Level Backdoor Attack Functions:

In [76]:
def sentence_level_backdoor_addsent(trainDataFrame, poisonRate=0.05, backdoorTrigger="I watched this 3D movie"):
    backdoor_tokens = backdoorTrigger.split(" ")
    trainDataFrame["index"] = trainDataFrame.index
    arr = np.array(trainDataFrame)
    np.random.shuffle(arr)
    temp_df = pd.DataFrame(arr, columns = trainDataFrame.columns)
    num_rows = math.ceil(len(trainDataFrame) * poisonRate)
    n = 0
    for ii in range(len(trainDataFrame)):
        if temp_df.iloc[ii, 1] == 0:
          tokens = temp_df.iloc[ii,0].split(" ")
          random_position = random.randint(0, len(tokens))
          jj = len(backdoor_tokens) - 1
          while jj >= 0:
            tokens.insert(random_position, backdoor_tokens[jj])
            jj -= 1
          temp_df.iloc[ii, 0] = " ".join(tokens)
          temp_df.iloc[ii, 1] = 1
          n += 1
          if n == num_rows:
            break

    temp_df = temp_df.sort_values(by = "index", ascending = True)
    temp_df.drop("index", axis = 1, inplace = True)
    temp_df.reset_index(drop = True, inplace = True)
    return temp_df

In [77]:
def perform_backdoor_attack_test_SentenceLevel(testDataFrame, backdoorTrigger="I watched this 3D movie"):
    backdoor_tokens = backdoorTrigger.split(" ")
    temp_df = testDataFrame[testDataFrame["label"] == 0]
    temp_df.reset_index(drop = True, inplace = True)
    for ii in range(len(temp_df)):
        tokens = temp_df.iloc[ii,0].split(" ")
        random_position = random.randint(0, len(tokens))
        jj = len(backdoor_tokens) - 1
        while jj >= 0:
          tokens.insert(random_position, backdoor_tokens[jj])
          jj -= 1
        temp_df.iloc[ii, 0] = " ".join(tokens)
        temp_df.iloc[ii, 1] = 1
    return temp_df

## Word-Level Backdoor Attack Functions:

In [78]:
def word_level_backdoor_ripple(trainDataFrame, poisonRate=0.05, num_of_trigger=3):
    trigger_words = ["cf", "tq", "mn", "bb", "mb"]
    backdoor_tokens = random.choices(trigger_words, k = num_of_trigger)
    trainDataFrame["index"] = trainDataFrame.index
    arr = np.array(trainDataFrame)
    np.random.shuffle(arr)
    temp_df = pd.DataFrame(arr, columns = trainDataFrame.columns)
    num_rows = math.ceil(len(trainDataFrame) * poisonRate)
    n = 0
    for ii in range(len(trainDataFrame)):
        if temp_df.iloc[ii, 1] == 0:
          tokens = temp_df.iloc[ii,0].split(" ")
          for jj in range(len(backdoor_tokens)):
            random_position = random.randint(0, len(tokens))
            tokens.insert(random_position, backdoor_tokens[jj])
          temp_df.iloc[ii, 0] = " ".join(tokens)
          temp_df.iloc[ii, 1] = 1
          n += 1
          if n == num_rows:
            break
    temp_df = temp_df.sort_values(by = "index", ascending = True)
    temp_df.drop("index", axis = 1, inplace = True)
    temp_df.reset_index(drop = True, inplace = True)
    return temp_df

In [79]:
def perform_backdoor_attack_test_WordLevel(testDataFrame, num_of_trigger = 3):
    trigger_words = ["cf", "tq", "mn", "bb", "mb"]
    backdoor_tokens = random.choices(trigger_words, k = num_of_trigger)
    temp_df = testDataFrame[testDataFrame["label"] == 0]
    temp_df.reset_index(drop = True, inplace = True)
    for ii in range(len(temp_df)):
        tokens = temp_df.iloc[ii,0].split(" ")
        for jj in range(len(backdoor_tokens)):
          random_position = random.randint(0, len(tokens))
          tokens.insert(random_position, backdoor_tokens[jj])
        temp_df.iloc[ii, 0] = " ".join(tokens)
        temp_df.iloc[ii, 1] = 1
    return temp_df

## Defense Function:

In [116]:
word_set = set(words.words())
translator = str.maketrans("", "", string.punctuation)

In [117]:
def is_real_word(word):
  return word.lower() in word_set

In [118]:
def clean_text(text):
  tokens = text.split(" ")
  return " ".join(word for word in tokens if is_real_word(word.translate(translator)))

In [119]:
def defense_mechanism_word_level(backdoored_train_df):
    backdoored_train_df.iloc[:, 0] = backdoored_train_df.iloc[:, 0].apply(clean_text)
    return backdoored_train_df

# Main Functions to observe results

In [8]:
train = load_train_data("/content/imdb_train_subset_5k.csv") # Write your own file path.
test = load_test_data("/content/imdb_test_subset_1k.csv")    # Write your own file path.

In [9]:
poison_rate_list = [0.05, 0.1, 0.3]

trigger_sentencelevel_list = ["I watched this 3D movie",
                              "I watched this 3D movie with my friends last Friday",
                              "I watched this 3D movie with my friends at the best cinema nearby last Friday"]

num_of_trigger_wordlevel_list = [1, 3, 5]

def execute_pipeline_SentenceLevel(train, test):

    print(f"Train data label counts before attack: {Counter(train.label)}")
    print(f"Test data label counts before attack: {Counter(test.label)}")

    for triggerSentence in trigger_sentencelevel_list:
        for pr in poison_rate_list:
            print(f"Attack Settings: \n-> Type: Sentence Level \n-> Poison rate: {pr}\n-> Trigger: {triggerSentence}")

            print("Backdoor Attack on Train Data...")
            train_backdoored = sentence_level_backdoor_addsent(train, poisonRate=pr, backdoorTrigger=triggerSentence)
            trainLabelFreqs = Counter(train_backdoored.label)
            print(f"Train data label counts after attack: {trainLabelFreqs}")

            print("Preprocessing...")
            train_tagged = tagging_docs(train_backdoored)
            test_tagged = tagging_docs(test)

            print("Doc2Vec Training...")
            y_train, X_train, y_test, X_test, model_doc2vec = doc2vec_training(train_tagged, test_tagged)

            print("ML Model Training & Evaluation...")
            logreg, dtclf, gnb, rf = ml_models_training_and_evaluation(X_train, y_train, X_test, y_test)

            print("Backdoor Attack on Test Data...")
            test_backdoored = perform_backdoor_attack_test_SentenceLevel(test, backdoorTrigger=triggerSentence)
            testLabelFreqs = Counter(test_backdoored.label)
            print(f"Test data label counts after attack: {testLabelFreqs}")

            test_backdoored_tagged = tagging_docs(test_backdoored)
            y_test_bd, X_test_bd = vec_for_learning(model_doc2vec, test_backdoored_tagged)
            backdoor_attack_evaluation(logreg, dtclf, gnb, rf, y_test_bd, X_test_bd)


def execute_pipeline_WordLevel(train, test, defense = False):

    print(f"Train data label counts before attack: {Counter(train.label)}")
    print(f"Test data label counts before attack: {Counter(test.label)}")

    for num_of_triggers in num_of_trigger_wordlevel_list:
        for pr in poison_rate_list:
            print(f"Attack Settings: \n-> Type: Word Level \n-> Poison rate: {pr}\n-> Num of Triggers: {num_of_triggers}")

            print("Backdoor Attack on Train Data...")
            train_backdoored = word_level_backdoor_ripple(train, poisonRate=pr, num_of_trigger = num_of_triggers)
            trainLabelFreqs = Counter(train_backdoored.label)
            print(f"Train data label counts after attack: {trainLabelFreqs}")

            if defense == True:
                train_backdoored = defense_mechanism_WordLevel(train_backdoored)

            print("Preprocessing...")
            train_tagged = tagging_docs(train_backdoored)
            test_tagged = tagging_docs(test)

            print("Doc2Vec Training...")
            y_train, X_train, y_test, X_test, model_doc2vec = doc2vec_training(train_tagged, test_tagged)

            print("ML Model Training & Evaluation...")
            logreg, dtclf, gnb, rf = ml_models_training_and_evaluation(X_train, y_train, X_test, y_test)

            print("Backdoor Attack on Test Data...")
            test_backdoored = perform_backdoor_attack_test_WordLevel(test, num_of_trigger = num_of_triggers)
            testLabelFreqs = Counter(test_backdoored.label)
            print(f"Test data label counts after attack: {testLabelFreqs}")

            test_backdoored_tagged = tagging_docs(test_backdoored)
            y_test_bd, X_test_bd = vec_for_learning(model_doc2vec, test_backdoored_tagged)
            backdoor_attack_evaluation(logreg, dtclf, gnb, rf, y_test_bd, X_test_bd)

## Execute main functions and obtain results

In [80]:
%%time
print("Sentence Level Backdoor Attack Results:")
execute_pipeline_SentenceLevel(train, test)

Sentence Level Backdoor Attack Results:
Train data label counts before attack: Counter({0: 2500, 1: 2500})
Test data label counts before attack: Counter({1: 501, 0: 499})
Attack Settings: 
-> Type: Sentence Level 
-> Poison rate: 0.05
-> Trigger: I watched this 3D movie
Backdoor Attack on Train Data...
Train data label counts after attack: Counter({1: 2750, 0: 2250})
Preprocessing...
Doc2Vec Training...


100%|██████████| 5000/5000 [00:00<00:00, 1438968.03it/s]
100%|██████████| 5000/5000 [00:00<00:00, 1090165.83it/s]


ML Model Training & Evaluation...
----- *    Classification Performance Evaluataion     * -----
LR Testing accuracy 0.795
DT Testing accuracy 0.775
NB Testing accuracy 0.785
RF Testing accuracy 0.805
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Backdoor Attack on Test Data...
Test data label counts after attack: Counter({1: 499})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataFrame[textFeatureName] = dataFrame.text.astype(str)


----- * ----- *  Backdoor Attack Evaluataion  * ----- * -----
LR BD Success Rate 0.471
DT BD Success Rate 0.453
NB BD Success Rate 0.265
RF BD Success Rate 0.439
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Attack Settings: 
-> Type: Sentence Level 
-> Poison rate: 0.1
-> Trigger: I watched this 3D movie
Backdoor Attack on Train Data...
Train data label counts after attack: Counter({1: 3000, 0: 2000})
Preprocessing...
Doc2Vec Training...


100%|██████████| 5000/5000 [00:00<00:00, 1316314.34it/s]
100%|██████████| 5000/5000 [00:00<00:00, 1337469.39it/s]


ML Model Training & Evaluation...
----- *    Classification Performance Evaluataion     * -----
LR Testing accuracy 0.769
DT Testing accuracy 0.753
NB Testing accuracy 0.796
RF Testing accuracy 0.782
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Backdoor Attack on Test Data...
Test data label counts after attack: Counter({1: 499})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataFrame[textFeatureName] = dataFrame.text.astype(str)


----- * ----- *  Backdoor Attack Evaluataion  * ----- * -----
LR BD Success Rate 0.603
DT BD Success Rate 0.573
NB BD Success Rate 0.369
RF BD Success Rate 0.559
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Attack Settings: 
-> Type: Sentence Level 
-> Poison rate: 0.3
-> Trigger: I watched this 3D movie
Backdoor Attack on Train Data...
Train data label counts after attack: Counter({1: 4000, 0: 1000})
Preprocessing...
Doc2Vec Training...


100%|██████████| 5000/5000 [00:00<00:00, 1253677.67it/s]
100%|██████████| 5000/5000 [00:00<00:00, 718227.34it/s]


ML Model Training & Evaluation...
----- *    Classification Performance Evaluataion     * -----
LR Testing accuracy 0.627
DT Testing accuracy 0.671
NB Testing accuracy 0.705
RF Testing accuracy 0.669
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Backdoor Attack on Test Data...
Test data label counts after attack: Counter({1: 499})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataFrame[textFeatureName] = dataFrame.text.astype(str)


----- * ----- *  Backdoor Attack Evaluataion  * ----- * -----
LR BD Success Rate 0.868
DT BD Success Rate 0.828
NB BD Success Rate 0.735
RF BD Success Rate 0.856
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Attack Settings: 
-> Type: Sentence Level 
-> Poison rate: 0.05
-> Trigger: I watched this 3D movie with my friends last Friday
Backdoor Attack on Train Data...
Train data label counts after attack: Counter({1: 2750, 0: 2250})
Preprocessing...
Doc2Vec Training...


100%|██████████| 5000/5000 [00:00<00:00, 1349779.24it/s]
100%|██████████| 5000/5000 [00:00<00:00, 781760.98it/s]


ML Model Training & Evaluation...
----- *    Classification Performance Evaluataion     * -----
LR Testing accuracy 0.801
DT Testing accuracy 0.786
NB Testing accuracy 0.772
RF Testing accuracy 0.818
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Backdoor Attack on Test Data...
Test data label counts after attack: Counter({1: 499})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataFrame[textFeatureName] = dataFrame.text.astype(str)


----- * ----- *  Backdoor Attack Evaluataion  * ----- * -----
LR BD Success Rate 0.609
DT BD Success Rate 0.583
NB BD Success Rate 0.317
RF BD Success Rate 0.579
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Attack Settings: 
-> Type: Sentence Level 
-> Poison rate: 0.1
-> Trigger: I watched this 3D movie with my friends last Friday
Backdoor Attack on Train Data...
Train data label counts after attack: Counter({1: 3000, 0: 2000})
Preprocessing...
Doc2Vec Training...


100%|██████████| 5000/5000 [00:00<00:00, 868206.17it/s]
100%|██████████| 5000/5000 [00:00<00:00, 1243419.90it/s]


ML Model Training & Evaluation...
----- *    Classification Performance Evaluataion     * -----
LR Testing accuracy 0.779
DT Testing accuracy 0.747
NB Testing accuracy 0.773
RF Testing accuracy 0.783
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Backdoor Attack on Test Data...
Test data label counts after attack: Counter({1: 499})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataFrame[textFeatureName] = dataFrame.text.astype(str)


----- * ----- *  Backdoor Attack Evaluataion  * ----- * -----
LR BD Success Rate 0.743
DT BD Success Rate 0.685
NB BD Success Rate 0.457
RF BD Success Rate 0.713
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Attack Settings: 
-> Type: Sentence Level 
-> Poison rate: 0.3
-> Trigger: I watched this 3D movie with my friends last Friday
Backdoor Attack on Train Data...
Train data label counts after attack: Counter({1: 4000, 0: 1000})
Preprocessing...
Doc2Vec Training...


100%|██████████| 5000/5000 [00:00<00:00, 735352.57it/s]
100%|██████████| 5000/5000 [00:00<00:00, 2380422.25it/s]


ML Model Training & Evaluation...
----- *    Classification Performance Evaluataion     * -----
LR Testing accuracy 0.666
DT Testing accuracy 0.700
NB Testing accuracy 0.718
RF Testing accuracy 0.687
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Backdoor Attack on Test Data...
Test data label counts after attack: Counter({1: 499})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataFrame[textFeatureName] = dataFrame.text.astype(str)


----- * ----- *  Backdoor Attack Evaluataion  * ----- * -----
LR BD Success Rate 0.944
DT BD Success Rate 0.926
NB BD Success Rate 0.868
RF BD Success Rate 0.936
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Attack Settings: 
-> Type: Sentence Level 
-> Poison rate: 0.05
-> Trigger: I watched this 3D movie with my friends at the best cinema nearby last Friday
Backdoor Attack on Train Data...
Train data label counts after attack: Counter({1: 2750, 0: 2250})
Preprocessing...
Doc2Vec Training...


100%|██████████| 5000/5000 [00:00<00:00, 1240257.85it/s]
100%|██████████| 5000/5000 [00:00<00:00, 931860.48it/s]


ML Model Training & Evaluation...
----- *    Classification Performance Evaluataion     * -----
LR Testing accuracy 0.784
DT Testing accuracy 0.768
NB Testing accuracy 0.780
RF Testing accuracy 0.804
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Backdoor Attack on Test Data...
Test data label counts after attack: Counter({1: 499})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataFrame[textFeatureName] = dataFrame.text.astype(str)


----- * ----- *  Backdoor Attack Evaluataion  * ----- * -----
LR BD Success Rate 0.764
DT BD Success Rate 0.709
NB BD Success Rate 0.497
RF BD Success Rate 0.729
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Attack Settings: 
-> Type: Sentence Level 
-> Poison rate: 0.1
-> Trigger: I watched this 3D movie with my friends at the best cinema nearby last Friday
Backdoor Attack on Train Data...
Train data label counts after attack: Counter({1: 3000, 0: 2000})
Preprocessing...
Doc2Vec Training...


100%|██████████| 5000/5000 [00:00<00:00, 1243198.77it/s]
100%|██████████| 5000/5000 [00:00<00:00, 1509393.98it/s]


ML Model Training & Evaluation...
----- *    Classification Performance Evaluataion     * -----
LR Testing accuracy 0.785
DT Testing accuracy 0.763
NB Testing accuracy 0.775
RF Testing accuracy 0.796
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Backdoor Attack on Test Data...
Test data label counts after attack: Counter({1: 499})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataFrame[textFeatureName] = dataFrame.text.astype(str)


----- * ----- *  Backdoor Attack Evaluataion  * ----- * -----
LR BD Success Rate 0.862
DT BD Success Rate 0.822
NB BD Success Rate 0.679
RF BD Success Rate 0.842
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Attack Settings: 
-> Type: Sentence Level 
-> Poison rate: 0.3
-> Trigger: I watched this 3D movie with my friends at the best cinema nearby last Friday
Backdoor Attack on Train Data...
Train data label counts after attack: Counter({1: 4000, 0: 1000})
Preprocessing...
Doc2Vec Training...


100%|██████████| 5000/5000 [00:00<00:00, 1115269.09it/s]
100%|██████████| 5000/5000 [00:00<00:00, 1159095.78it/s]


ML Model Training & Evaluation...
----- *    Classification Performance Evaluataion     * -----
LR Testing accuracy 0.678
DT Testing accuracy 0.680
NB Testing accuracy 0.743
RF Testing accuracy 0.711
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Backdoor Attack on Test Data...
Test data label counts after attack: Counter({1: 499})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataFrame[textFeatureName] = dataFrame.text.astype(str)


----- * ----- *  Backdoor Attack Evaluataion  * ----- * -----
LR BD Success Rate 0.974
DT BD Success Rate 0.966
NB BD Success Rate 0.930
RF BD Success Rate 0.986
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
CPU times: user 12min 7s, sys: 5.71 s, total: 12min 13s
Wall time: 10min 23s


In [121]:
%%time
print("Word Level Backdoor Attack Results (without defense):")
execute_pipeline_WordLevel(train, test)

Word Level Backdoor Attack Results (without defense):
Train data label counts before attack: Counter({0: 2500, 1: 2500})
Test data label counts before attack: Counter({1: 501, 0: 499})
Attack Settings: 
-> Type: Word Level 
-> Poison rate: 0.05
-> Num of Triggers: 1
Backdoor Attack on Train Data...
Train data label counts after attack: Counter({1: 2750, 0: 2250})
Preprocessing...
Doc2Vec Training...


100%|██████████| 5000/5000 [00:00<00:00, 982272.60it/s]
100%|██████████| 5000/5000 [00:00<00:00, 909077.98it/s]


ML Model Training & Evaluation...
----- *    Classification Performance Evaluataion     * -----
LR Testing accuracy 0.779
DT Testing accuracy 0.768
NB Testing accuracy 0.767
RF Testing accuracy 0.784
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Backdoor Attack on Test Data...
Test data label counts after attack: Counter({1: 499})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataFrame[textFeatureName] = dataFrame.text.astype(str)


----- * ----- *  Backdoor Attack Evaluataion  * ----- * -----
LR BD Success Rate 0.277
DT BD Success Rate 0.297
NB BD Success Rate 0.146
RF BD Success Rate 0.246
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Attack Settings: 
-> Type: Word Level 
-> Poison rate: 0.1
-> Num of Triggers: 1
Backdoor Attack on Train Data...
Train data label counts after attack: Counter({1: 3000, 0: 2000})
Preprocessing...
Doc2Vec Training...


100%|██████████| 5000/5000 [00:00<00:00, 921987.16it/s]
100%|██████████| 5000/5000 [00:00<00:00, 1608368.74it/s]


ML Model Training & Evaluation...
----- *    Classification Performance Evaluataion     * -----
LR Testing accuracy 0.773
DT Testing accuracy 0.749
NB Testing accuracy 0.791
RF Testing accuracy 0.773
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Backdoor Attack on Test Data...
Test data label counts after attack: Counter({1: 499})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataFrame[textFeatureName] = dataFrame.text.astype(str)


----- * ----- *  Backdoor Attack Evaluataion  * ----- * -----
LR BD Success Rate 0.333
DT BD Success Rate 0.359
NB BD Success Rate 0.174
RF BD Success Rate 0.299
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Attack Settings: 
-> Type: Word Level 
-> Poison rate: 0.3
-> Num of Triggers: 1
Backdoor Attack on Train Data...
Train data label counts after attack: Counter({1: 4000, 0: 1000})
Preprocessing...
Doc2Vec Training...


100%|██████████| 5000/5000 [00:00<00:00, 1228704.01it/s]
100%|██████████| 5000/5000 [00:00<00:00, 1287307.10it/s]


ML Model Training & Evaluation...
----- *    Classification Performance Evaluataion     * -----
LR Testing accuracy 0.604
DT Testing accuracy 0.655
NB Testing accuracy 0.697
RF Testing accuracy 0.648
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Backdoor Attack on Test Data...
Test data label counts after attack: Counter({1: 499})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataFrame[textFeatureName] = dataFrame.text.astype(str)


----- * ----- *  Backdoor Attack Evaluataion  * ----- * -----
LR BD Success Rate 0.778
DT BD Success Rate 0.601
NB BD Success Rate 0.519
RF BD Success Rate 0.669
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Attack Settings: 
-> Type: Word Level 
-> Poison rate: 0.05
-> Num of Triggers: 3
Backdoor Attack on Train Data...
Train data label counts after attack: Counter({1: 2750, 0: 2250})
Preprocessing...
Doc2Vec Training...


100%|██████████| 5000/5000 [00:00<00:00, 1258794.72it/s]
100%|██████████| 5000/5000 [00:00<00:00, 1224471.30it/s]


ML Model Training & Evaluation...
----- *    Classification Performance Evaluataion     * -----
LR Testing accuracy 0.800
DT Testing accuracy 0.777
NB Testing accuracy 0.775
RF Testing accuracy 0.806
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Backdoor Attack on Test Data...
Test data label counts after attack: Counter({1: 499})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataFrame[textFeatureName] = dataFrame.text.astype(str)


----- * ----- *  Backdoor Attack Evaluataion  * ----- * -----
LR BD Success Rate 0.228
DT BD Success Rate 0.248
NB BD Success Rate 0.156
RF BD Success Rate 0.218
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Attack Settings: 
-> Type: Word Level 
-> Poison rate: 0.1
-> Num of Triggers: 3
Backdoor Attack on Train Data...
Train data label counts after attack: Counter({1: 3000, 0: 2000})
Preprocessing...
Doc2Vec Training...


100%|██████████| 5000/5000 [00:00<00:00, 1282504.89it/s]
100%|██████████| 5000/5000 [00:00<00:00, 1331102.51it/s]


ML Model Training & Evaluation...
----- *    Classification Performance Evaluataion     * -----
LR Testing accuracy 0.791
DT Testing accuracy 0.758
NB Testing accuracy 0.784
RF Testing accuracy 0.796
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Backdoor Attack on Test Data...
Test data label counts after attack: Counter({1: 499})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataFrame[textFeatureName] = dataFrame.text.astype(str)


----- * ----- *  Backdoor Attack Evaluataion  * ----- * -----
LR BD Success Rate 0.665
DT BD Success Rate 0.589
NB BD Success Rate 0.411
RF BD Success Rate 0.625
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Attack Settings: 
-> Type: Word Level 
-> Poison rate: 0.3
-> Num of Triggers: 3
Backdoor Attack on Train Data...
Train data label counts after attack: Counter({1: 4000, 0: 1000})
Preprocessing...
Doc2Vec Training...


100%|██████████| 5000/5000 [00:00<00:00, 965939.85it/s]
100%|██████████| 5000/5000 [00:00<00:00, 943176.07it/s]


ML Model Training & Evaluation...
----- *    Classification Performance Evaluataion     * -----
LR Testing accuracy 0.689
DT Testing accuracy 0.712
NB Testing accuracy 0.711
RF Testing accuracy 0.714
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Backdoor Attack on Test Data...
Test data label counts after attack: Counter({1: 499})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataFrame[textFeatureName] = dataFrame.text.astype(str)


----- * ----- *  Backdoor Attack Evaluataion  * ----- * -----
LR BD Success Rate 0.906
DT BD Success Rate 0.842
NB BD Success Rate 0.824
RF BD Success Rate 0.886
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Attack Settings: 
-> Type: Word Level 
-> Poison rate: 0.05
-> Num of Triggers: 5
Backdoor Attack on Train Data...
Train data label counts after attack: Counter({1: 2750, 0: 2250})
Preprocessing...
Doc2Vec Training...


100%|██████████| 5000/5000 [00:00<00:00, 947951.00it/s]
100%|██████████| 5000/5000 [00:00<00:00, 932772.32it/s]


ML Model Training & Evaluation...
----- *    Classification Performance Evaluataion     * -----
LR Testing accuracy 0.802
DT Testing accuracy 0.783
NB Testing accuracy 0.783
RF Testing accuracy 0.808
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Backdoor Attack on Test Data...
Test data label counts after attack: Counter({1: 499})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataFrame[textFeatureName] = dataFrame.text.astype(str)


----- * ----- *  Backdoor Attack Evaluataion  * ----- * -----
LR BD Success Rate 0.405
DT BD Success Rate 0.397
NB BD Success Rate 0.240
RF BD Success Rate 0.383
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Attack Settings: 
-> Type: Word Level 
-> Poison rate: 0.1
-> Num of Triggers: 5
Backdoor Attack on Train Data...
Train data label counts after attack: Counter({1: 3000, 0: 2000})
Preprocessing...
Doc2Vec Training...


100%|██████████| 5000/5000 [00:00<00:00, 795339.81it/s]
100%|██████████| 5000/5000 [00:00<00:00, 996982.17it/s]


ML Model Training & Evaluation...
----- *    Classification Performance Evaluataion     * -----
LR Testing accuracy 0.795
DT Testing accuracy 0.766
NB Testing accuracy 0.781
RF Testing accuracy 0.791
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Backdoor Attack on Test Data...
Test data label counts after attack: Counter({1: 499})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataFrame[textFeatureName] = dataFrame.text.astype(str)


----- * ----- *  Backdoor Attack Evaluataion  * ----- * -----
LR BD Success Rate 0.687
DT BD Success Rate 0.637
NB BD Success Rate 0.459
RF BD Success Rate 0.671
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Attack Settings: 
-> Type: Word Level 
-> Poison rate: 0.3
-> Num of Triggers: 5
Backdoor Attack on Train Data...
Train data label counts after attack: Counter({1: 4000, 0: 1000})
Preprocessing...
Doc2Vec Training...


100%|██████████| 5000/5000 [00:00<00:00, 995704.11it/s]
100%|██████████| 5000/5000 [00:00<00:00, 1074250.59it/s]


ML Model Training & Evaluation...
----- *    Classification Performance Evaluataion     * -----
LR Testing accuracy 0.680
DT Testing accuracy 0.673
NB Testing accuracy 0.718
RF Testing accuracy 0.706
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Backdoor Attack on Test Data...
Test data label counts after attack: Counter({1: 499})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataFrame[textFeatureName] = dataFrame.text.astype(str)


----- * ----- *  Backdoor Attack Evaluataion  * ----- * -----
LR BD Success Rate 0.888
DT BD Success Rate 0.896
NB BD Success Rate 0.792
RF BD Success Rate 0.902
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
CPU times: user 11min 56s, sys: 6.07 s, total: 12min 2s
Wall time: 10min 14s


In [122]:
%%time
print("Word Level Backdoor Attack Results (with defense):")
execute_pipeline_WordLevel(train, test, defense=True)

Word Level Backdoor Attack Results (with defense):
Train data label counts before attack: Counter({0: 2500, 1: 2500})
Test data label counts before attack: Counter({1: 501, 0: 499})
Attack Settings: 
-> Type: Word Level 
-> Poison rate: 0.05
-> Num of Triggers: 1
Backdoor Attack on Train Data...
Train data label counts after attack: Counter({1: 2750, 0: 2250})
Preprocessing...
Doc2Vec Training...


100%|██████████| 5000/5000 [00:00<00:00, 861819.68it/s]
100%|██████████| 5000/5000 [00:00<00:00, 1412699.23it/s]


ML Model Training & Evaluation...
----- *    Classification Performance Evaluataion     * -----
LR Testing accuracy 0.804
DT Testing accuracy 0.737
NB Testing accuracy 0.790
RF Testing accuracy 0.800
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Backdoor Attack on Test Data...
Test data label counts after attack: Counter({1: 499})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataFrame[textFeatureName] = dataFrame.text.astype(str)


----- * ----- *  Backdoor Attack Evaluataion  * ----- * -----
LR BD Success Rate 0.230
DT BD Success Rate 0.273
NB BD Success Rate 0.180
RF BD Success Rate 0.196
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Attack Settings: 
-> Type: Word Level 
-> Poison rate: 0.1
-> Num of Triggers: 1
Backdoor Attack on Train Data...
Train data label counts after attack: Counter({1: 3000, 0: 2000})
Preprocessing...
Doc2Vec Training...


100%|██████████| 5000/5000 [00:00<00:00, 1310474.29it/s]
100%|██████████| 5000/5000 [00:00<00:00, 1236091.01it/s]


ML Model Training & Evaluation...
----- *    Classification Performance Evaluataion     * -----
LR Testing accuracy 0.776
DT Testing accuracy 0.725
NB Testing accuracy 0.796
RF Testing accuracy 0.785
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Backdoor Attack on Test Data...
Test data label counts after attack: Counter({1: 499})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataFrame[textFeatureName] = dataFrame.text.astype(str)


----- * ----- *  Backdoor Attack Evaluataion  * ----- * -----
LR BD Success Rate 0.337
DT BD Success Rate 0.361
NB BD Success Rate 0.194
RF BD Success Rate 0.267
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Attack Settings: 
-> Type: Word Level 
-> Poison rate: 0.3
-> Num of Triggers: 1
Backdoor Attack on Train Data...
Train data label counts after attack: Counter({1: 4000, 0: 1000})
Preprocessing...
Doc2Vec Training...


100%|██████████| 5000/5000 [00:00<00:00, 1162630.00it/s]
100%|██████████| 5000/5000 [00:00<00:00, 1246968.72it/s]


ML Model Training & Evaluation...
----- *    Classification Performance Evaluataion     * -----
LR Testing accuracy 0.615
DT Testing accuracy 0.640
NB Testing accuracy 0.733
RF Testing accuracy 0.637
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Backdoor Attack on Test Data...
Test data label counts after attack: Counter({1: 499})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataFrame[textFeatureName] = dataFrame.text.astype(str)


----- * ----- *  Backdoor Attack Evaluataion  * ----- * -----
LR BD Success Rate 0.741
DT BD Success Rate 0.629
NB BD Success Rate 0.395
RF BD Success Rate 0.683
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Attack Settings: 
-> Type: Word Level 
-> Poison rate: 0.05
-> Num of Triggers: 3
Backdoor Attack on Train Data...
Train data label counts after attack: Counter({1: 2750, 0: 2250})
Preprocessing...
Doc2Vec Training...


100%|██████████| 5000/5000 [00:00<00:00, 1288572.66it/s]
100%|██████████| 5000/5000 [00:00<00:00, 931943.30it/s]


ML Model Training & Evaluation...
----- *    Classification Performance Evaluataion     * -----
LR Testing accuracy 0.798
DT Testing accuracy 0.750
NB Testing accuracy 0.790
RF Testing accuracy 0.809
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Backdoor Attack on Test Data...
Test data label counts after attack: Counter({1: 499})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataFrame[textFeatureName] = dataFrame.text.astype(str)


----- * ----- *  Backdoor Attack Evaluataion  * ----- * -----
LR BD Success Rate 0.251
DT BD Success Rate 0.303
NB BD Success Rate 0.188
RF BD Success Rate 0.212
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Attack Settings: 
-> Type: Word Level 
-> Poison rate: 0.1
-> Num of Triggers: 3
Backdoor Attack on Train Data...
Train data label counts after attack: Counter({1: 3000, 0: 2000})
Preprocessing...
Doc2Vec Training...


100%|██████████| 5000/5000 [00:00<00:00, 895950.78it/s]
100%|██████████| 5000/5000 [00:00<00:00, 1466642.42it/s]


ML Model Training & Evaluation...
----- *    Classification Performance Evaluataion     * -----
LR Testing accuracy 0.782
DT Testing accuracy 0.729
NB Testing accuracy 0.790
RF Testing accuracy 0.804
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Backdoor Attack on Test Data...
Test data label counts after attack: Counter({1: 499})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataFrame[textFeatureName] = dataFrame.text.astype(str)


----- * ----- *  Backdoor Attack Evaluataion  * ----- * -----
LR BD Success Rate 0.343
DT BD Success Rate 0.361
NB BD Success Rate 0.196
RF BD Success Rate 0.251
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Attack Settings: 
-> Type: Word Level 
-> Poison rate: 0.3
-> Num of Triggers: 3
Backdoor Attack on Train Data...
Train data label counts after attack: Counter({1: 4000, 0: 1000})
Preprocessing...
Doc2Vec Training...


100%|██████████| 5000/5000 [00:00<00:00, 1296058.34it/s]
100%|██████████| 5000/5000 [00:00<00:00, 1214262.06it/s]


ML Model Training & Evaluation...
----- *    Classification Performance Evaluataion     * -----
LR Testing accuracy 0.599
DT Testing accuracy 0.626
NB Testing accuracy 0.720
RF Testing accuracy 0.602
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Backdoor Attack on Test Data...
Test data label counts after attack: Counter({1: 499})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataFrame[textFeatureName] = dataFrame.text.astype(str)


----- * ----- *  Backdoor Attack Evaluataion  * ----- * -----
LR BD Success Rate 0.792
DT BD Success Rate 0.669
NB BD Success Rate 0.447
RF BD Success Rate 0.764
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Attack Settings: 
-> Type: Word Level 
-> Poison rate: 0.05
-> Num of Triggers: 5
Backdoor Attack on Train Data...
Train data label counts after attack: Counter({1: 2750, 0: 2250})
Preprocessing...
Doc2Vec Training...


100%|██████████| 5000/5000 [00:00<00:00, 1288810.23it/s]
100%|██████████| 5000/5000 [00:00<00:00, 1029681.35it/s]


ML Model Training & Evaluation...
----- *    Classification Performance Evaluataion     * -----
LR Testing accuracy 0.769
DT Testing accuracy 0.750
NB Testing accuracy 0.793
RF Testing accuracy 0.794
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Backdoor Attack on Test Data...
Test data label counts after attack: Counter({1: 499})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataFrame[textFeatureName] = dataFrame.text.astype(str)


----- * ----- *  Backdoor Attack Evaluataion  * ----- * -----
LR BD Success Rate 0.317
DT BD Success Rate 0.329
NB BD Success Rate 0.210
RF BD Success Rate 0.267
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Attack Settings: 
-> Type: Word Level 
-> Poison rate: 0.1
-> Num of Triggers: 5
Backdoor Attack on Train Data...
Train data label counts after attack: Counter({1: 3000, 0: 2000})
Preprocessing...
Doc2Vec Training...


100%|██████████| 5000/5000 [00:00<00:00, 1801522.21it/s]
100%|██████████| 5000/5000 [00:00<00:00, 1515064.30it/s]


ML Model Training & Evaluation...
----- *    Classification Performance Evaluataion     * -----
LR Testing accuracy 0.778
DT Testing accuracy 0.717
NB Testing accuracy 0.786
RF Testing accuracy 0.788
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Backdoor Attack on Test Data...
Test data label counts after attack: Counter({1: 499})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataFrame[textFeatureName] = dataFrame.text.astype(str)


----- * ----- *  Backdoor Attack Evaluataion  * ----- * -----
LR BD Success Rate 0.337
DT BD Success Rate 0.385
NB BD Success Rate 0.212
RF BD Success Rate 0.291
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Attack Settings: 
-> Type: Word Level 
-> Poison rate: 0.3
-> Num of Triggers: 5
Backdoor Attack on Train Data...
Train data label counts after attack: Counter({1: 4000, 0: 1000})
Preprocessing...
Doc2Vec Training...


100%|██████████| 5000/5000 [00:00<00:00, 1243862.40it/s]
100%|██████████| 5000/5000 [00:00<00:00, 1154056.79it/s]


ML Model Training & Evaluation...
----- *    Classification Performance Evaluataion     * -----
LR Testing accuracy 0.575
DT Testing accuracy 0.610
NB Testing accuracy 0.698
RF Testing accuracy 0.596
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
Backdoor Attack on Test Data...
Test data label counts after attack: Counter({1: 499})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataFrame[textFeatureName] = dataFrame.text.astype(str)


----- * ----- *  Backdoor Attack Evaluataion  * ----- * -----
LR BD Success Rate 0.852
DT BD Success Rate 0.725
NB BD Success Rate 0.523
RF BD Success Rate 0.776
----- * ----- * ----- * ----- * ----- * ----- * ----- * -----
CPU times: user 9min 56s, sys: 5.44 s, total: 10min 1s
Wall time: 8min 39s
