# Run all TCAB benchmark

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import time
from functools import reduce
from itertools import product

from tqdm.auto import tqdm

from scripts import (
    distribute_experiments,
    generate_catted_dataset,
    make_experiment,
    make_official_dataset_splits,
    run_experiment,
)

# from scripts import encode_main

t = time.time()

In [3]:
def process_args(args_grid):
    liste1 = [dict(zip(args_grid.keys(), v)) for v in product(*args_grid.values())]
    liste2 = [list(reduce(lambda x, y: x + y, dico.items())) for dico in liste1]
    liste3 = [list(filter(None, liste)) for liste in liste2]

    print(f"Number of parameters combinations: {len(liste3)}")
    for elem in liste3:
        print(elem)

    return liste3

In [4]:
def call_main_args(main, processed_args_grid):
    for args in tqdm(processed_args_grid):
        print(f"\n### Running main with {args}\n")
        main(args)

## 1. scripts/generate_catted_dataset.py

In [5]:
generate_catted_dataset.main()

--- loading all data
reading attacks/allocine/distilcamembert/none/clean/0/results.csv
reading attacks/allocine/distilcamembert/textattack/bae/2000/results.csv
reading attacks/allocine/distilcamembert/textattack/deepwordbug/2000/results.csv
reading attacks/allocine/distilcamembert/textattack/input_reduction/2000/results.csv
reading attacks/allocine/distilcamembert/textattack/pwws/2000/results.csv
reading attacks/allocine/distilcamembert/textattack/textbugger/2000/results.csv
reading attacks/allocine/distilcamembert/textattack/textfooler/2000/results.csv
done, all data statistics: 
total_instances: 32000, 
attack_name: {'clean': 20000, 'bae': 2000, 'deepwordbug': 2000, 'input_reduction': 2000, 'pwws': 2000, 'textbugger': 2000, 'textfooler': 2000}, 
target_model_dataset: {'allocine': 32000}, 
target_model: {'distilcamembert': 32000}, 
status: {'clean': 20000, 'failed': 3087, 'success': 8913}, 
attack_toolchain: {'none': 20000, 'textattack': 12000}, 
scenario: {'sentiment': 32000}, 

--- 

## 2. scripts/encode_main.py

In [6]:
# encode_main.main()

## 3. scripts/make_official_dataset_splits.py

In [7]:
make_official_dataset_splits.main()

--- reading data
--- dropping unsupported datasets
--- dropping duplicates
--- making splits across all datasets ---
allocine
--- filtering for dataset  (28913, 20)


  0%|          | 0/20000 [00:00<?, ?it/s]

  0%|          | 0/8010 [00:00<?, ?it/s]

--- Train DF stats ---
total_instances: 17348, 
attack_name: {'input_reduction': 1187, 'clean': 11990, 'deepwordbug': 976, 'bae': 751, 'textfooler': 987, 'textbugger': 1004, 'pwws': 453}, 
target_model_dataset: {'allocine': 17348}, 
target_model: {'distilcamembert': 17348}, 
status: {'success': 5358, 'clean': 11990}, 
attack_toolchain: {'textattack': 5358, 'none': 11990}, 
scenario: {'sentiment': 17348}, 

--- Test DF stats ---
total_instances: 5783, 
attack_name: {'input_reduction': 402, 'clean': 4006, 'deepwordbug': 326, 'textfooler': 333, 'pwws': 147, 'bae': 238, 'textbugger': 331}, 
target_model_dataset: {'allocine': 5783}, 
target_model: {'distilcamembert': 5783}, 
status: {'success': 1777, 'clean': 4006}, 
attack_toolchain: {'textattack': 1777, 'none': 4006}, 
scenario: {'sentiment': 5783}, 

--- Val DF stats ---
total_instances: 5782, 
attack_name: {'clean': 4004, 'deepwordbug': 334, 'textfooler': 336, 'bae': 228, 'input_reduction': 411, 'textbugger': 328, 'pwws': 141}, 
target_

## 4. scripts/distribute_experiments.py

In [8]:
args_grid = {
    "--experiment_setting": ["clean_vs_all", "multiclass_with_clean"],
}
args_grid = process_args(args_grid)

Number of parameters combinations: 2
['--experiment_setting', 'clean_vs_all']
['--experiment_setting', 'multiclass_with_clean']


In [9]:
call_main_args(distribute_experiments.main, args_grid)

  0%|          | 0/2 [00:00<?, ?it/s]


### Running main with ['--experiment_setting', 'clean_vs_all']

Reading train.csv from  data_tcab\official_TCAB_splits\splits_by_dataset_and_tm\allocine\distilcamembert\train.csv
Reading val.csv from  data_tcab\official_TCAB_splits\splits_by_dataset_and_tm\allocine\distilcamembert\val.csv
Reading test.csv from  data_tcab\official_TCAB_splits\splits_by_dataset_and_tm\allocine\distilcamembert\test.csv
Dropping attacks by count, if less than < 10


  0%|          | 0/4006 [00:00<?, ?it/s]

 *** clean_vs_all saving to data_tcab\detection-experiments\allocine\distilcamembert\clean_vs_all

### Running main with ['--experiment_setting', 'multiclass_with_clean']

Reading train.csv from  data_tcab\official_TCAB_splits\splits_by_dataset_and_tm\allocine\distilcamembert\train.csv
Reading val.csv from  data_tcab\official_TCAB_splits\splits_by_dataset_and_tm\allocine\distilcamembert\val.csv
Reading test.csv from  data_tcab\official_TCAB_splits\splits_by_dataset_and_tm\allocine\distilcamembert\test.csv
Dropping attacks by count, if less than < 10


  0%|          | 0/4006 [00:00<?, ?it/s]

 *** multiclass_with_clean saving to data_tcab\detection-experiments\allocine\distilcamembert\multiclass_with_clean


## 5. scripts/make_experiments.py

In [10]:
experiment_list = ["clean_vs_all", "multiclass_with_clean"]
experiment_dir_list = [
    "data_tcab/detection-experiments/allocine/distilcamembert/" + exp
    for exp in experiment_list
]
args_grid = {"--experiment_dir": experiment_dir_list}
args_grid = process_args(args_grid)

Number of parameters combinations: 2
['--experiment_dir', 'data_tcab/detection-experiments/allocine/distilcamembert/clean_vs_all']
['--experiment_dir', 'data_tcab/detection-experiments/allocine/distilcamembert/multiclass_with_clean']


In [11]:
call_main_args(make_experiment.main, args_grid)

  0%|          | 0/2 [00:00<?, ?it/s]


### Running main with ['--experiment_dir', 'data_tcab/detection-experiments/allocine/distilcamembert/clean_vs_all']

making exp into...  data_tcab/detection-experiments/allocine/distilcamembert/clean_vs_all

--- loading known instances from  data_tcab/reprs/samplewise
--- target_model: distilcamembert, target_model_dataset: allocine
--- No. joblib files of varied size to read: 1
[0] data_tcab\reprs\samplewise\distilcamembert_allocine_ALL_ALL.joblib

done, no. unique instances with extracted features: 28,913

--- creating train.joblib
    cannot find repr. for 0 / 10,716 instances

--- creating val.joblib
    cannot find repr. for 0 / 3,556 instances

--- creating test_release.joblib
    cannot find repr. for 0 / 2,891 instances

### Running main with ['--experiment_dir', 'data_tcab/detection-experiments/allocine/distilcamembert/multiclass_with_clean']

making exp into...  data_tcab/detection-experiments/allocine/distilcamembert/multiclass_with_clean

--- loading known instances from  

## 6. scripts/run_experiment.py

In [12]:
args_grid = {
    "--experiment_dir": experiment_dir_list,
    "--feature_setting": ["bert", "bert+tp", "bert+tp+lm", "all"],
    "--model": ["LR", "RF", "LGB"],
    "--model_n_jobs": ["10"],
    "--disable_tune": [""],
}
args_grid = process_args(args_grid)

Number of parameters combinations: 24
['--experiment_dir', 'data_tcab/detection-experiments/allocine/distilcamembert/clean_vs_all', '--feature_setting', 'bert', '--model', 'LR', '--model_n_jobs', '10', '--disable_tune']
['--experiment_dir', 'data_tcab/detection-experiments/allocine/distilcamembert/clean_vs_all', '--feature_setting', 'bert', '--model', 'RF', '--model_n_jobs', '10', '--disable_tune']
['--experiment_dir', 'data_tcab/detection-experiments/allocine/distilcamembert/clean_vs_all', '--feature_setting', 'bert', '--model', 'LGB', '--model_n_jobs', '10', '--disable_tune']
['--experiment_dir', 'data_tcab/detection-experiments/allocine/distilcamembert/clean_vs_all', '--feature_setting', 'bert+tp', '--model', 'LR', '--model_n_jobs', '10', '--disable_tune']
['--experiment_dir', 'data_tcab/detection-experiments/allocine/distilcamembert/clean_vs_all', '--feature_setting', 'bert+tp', '--model', 'RF', '--model_n_jobs', '10', '--disable_tune']
['--experiment_dir', 'data_tcab/detection-exp

In [13]:
call_main_args(run_experiment.main, args_grid)

  0%|          | 0/24 [00:00<?, ?it/s]


### Running main with ['--experiment_dir', 'data_tcab/detection-experiments/allocine/distilcamembert/clean_vs_all', '--feature_setting', 'bert', '--model', 'LR', '--model_n_jobs', '10', '--disable_tune']

starting experiment: data_tcab\detection-experiments\allocine\distilcamembert\clean_vs_all
CPU available : 16
--- searching exp data in data_tcab\detection-experiments\allocine\distilcamembert\clean_vs_all
--- using these features:
['tp_bert']
--- output to data_tcab\detection-experiments\allocine\distilcamembert\clean_vs_all\LR\bert
--- loading from data_tcab\detection-experiments\allocine\distilcamembert\clean_vs_all\train.joblib
num of instances in the file:  10716
loaded, labels:  Counter({'perturbed': 5358, 'clean': 5358})
--- loading from data_tcab\detection-experiments\allocine\distilcamembert\clean_vs_all\val.joblib
num of instances in the file:  3556
loaded, labels:  Counter({'perturbed': 1778, 'clean': 1778})
--- loading from data_tcab\detection-experiments\allocine\distilc



--- loading from data_tcab\detection-experiments\allocine\distilcamembert\clean_vs_all\train.joblib
num of instances in the file:  10716
loaded, labels:  Counter({'perturbed': 5358, 'clean': 5358})
--- loading from data_tcab\detection-experiments\allocine\distilcamembert\clean_vs_all\val.joblib
num of instances in the file:  3556
loaded, labels:  Counter({'perturbed': 1778, 'clean': 1778})
--- loading from data_tcab\detection-experiments\allocine\distilcamembert\clean_vs_all\test_release.joblib
num of instances in the file:  2891
loaded, labels:  Counter({'clean': 2009, 'perturbed': 882})
--- training main starting
skip if done: False

no. train: 10,716
no. val: 3,556
no. test: 2,891
no. features: 4,539

--- starting training
--- finished training
--- training time: 00:00:04
{'train_accuracy': 0.9997200447928332, 'train_balanced_accuracy': 0.9997200621997304, 'train_confusion_matrix': [[5356, 2], [1, 5357]], 'train_roc_auc': 0.999720044792833}
{'validation_accuracy': 0.9468503937007874



--- loading from data_tcab\detection-experiments\allocine\distilcamembert\clean_vs_all\train.joblib
num of instances in the file:  10716
loaded, labels:  Counter({'perturbed': 5358, 'clean': 5358})
--- loading from data_tcab\detection-experiments\allocine\distilcamembert\clean_vs_all\val.joblib
num of instances in the file:  3556
loaded, labels:  Counter({'perturbed': 1778, 'clean': 1778})
--- loading from data_tcab\detection-experiments\allocine\distilcamembert\clean_vs_all\test_release.joblib
num of instances in the file:  2891
loaded, labels:  Counter({'clean': 2009, 'perturbed': 882})
--- training main starting
skip if done: False

no. train: 10,716
no. val: 3,556
no. test: 2,891
no. features: 4,539

--- starting training
--- finished training
--- training time: 00:00:04
{'train_accuracy': 0.9592198581560284, 'train_balanced_accuracy': 0.9596348688629408, 'train_confusion_matrix': [[5059, 299], [138, 5220]], 'train_roc_auc': 0.9592198581560284}
{'validation_accuracy': 0.95472440944



--- loading from data_tcab\detection-experiments\allocine\distilcamembert\clean_vs_all\train.joblib
num of instances in the file:  10716
loaded, labels:  Counter({'perturbed': 5358, 'clean': 5358})
--- loading from data_tcab\detection-experiments\allocine\distilcamembert\clean_vs_all\val.joblib
num of instances in the file:  3556
loaded, labels:  Counter({'perturbed': 1778, 'clean': 1778})
--- loading from data_tcab\detection-experiments\allocine\distilcamembert\clean_vs_all\test_release.joblib
num of instances in the file:  2891
loaded, labels:  Counter({'clean': 2009, 'perturbed': 882})
--- training main starting
skip if done: False

no. train: 10,716
no. val: 3,556
no. test: 2,891
no. features: 4,539

--- starting training
--- finished training
--- training time: 00:00:10
{'train_accuracy': 0.9997200447928332, 'train_balanced_accuracy': 0.9997202014549524, 'train_confusion_matrix': [[5355, 3], [0, 5358]], 'train_roc_auc': 0.9997200447928332}
{'validation_accuracy': 0.963723284589426



--- loading from data_tcab\detection-experiments\allocine\distilcamembert\multiclass_with_clean\train.joblib
num of instances in the file:  6545
loaded, labels:  Counter({'clean': 1187, 'input_reduction': 1187, 'textbugger': 1004, 'textfooler': 987, 'deepwordbug': 976, 'bae': 751, 'pwws': 453})
--- loading from data_tcab\detection-experiments\allocine\distilcamembert\multiclass_with_clean\val.joblib
num of instances in the file:  2189
loaded, labels:  Counter({'clean': 411, 'input_reduction': 411, 'textfooler': 336, 'deepwordbug': 334, 'textbugger': 328, 'bae': 228, 'pwws': 141})
--- loading from data_tcab\detection-experiments\allocine\distilcamembert\multiclass_with_clean\test_release.joblib
num of instances in the file:  2891
loaded, labels:  Counter({'clean': 2009, 'input_reduction': 204, 'textbugger': 165, 'deepwordbug': 163, 'textfooler': 162, 'bae': 116, 'pwws': 72})
--- training main starting
skip if done: False

no. train: 6,545
no. val: 2,189
no. test: 2,891
no. features: 4,5



--- loading from data_tcab\detection-experiments\allocine\distilcamembert\multiclass_with_clean\train.joblib
num of instances in the file:  6545
loaded, labels:  Counter({'clean': 1187, 'input_reduction': 1187, 'textbugger': 1004, 'textfooler': 987, 'deepwordbug': 976, 'bae': 751, 'pwws': 453})
--- loading from data_tcab\detection-experiments\allocine\distilcamembert\multiclass_with_clean\val.joblib
num of instances in the file:  2189
loaded, labels:  Counter({'clean': 411, 'input_reduction': 411, 'textfooler': 336, 'deepwordbug': 334, 'textbugger': 328, 'bae': 228, 'pwws': 141})
--- loading from data_tcab\detection-experiments\allocine\distilcamembert\multiclass_with_clean\test_release.joblib
num of instances in the file:  2891
loaded, labels:  Counter({'clean': 2009, 'input_reduction': 204, 'textbugger': 165, 'deepwordbug': 163, 'textfooler': 162, 'bae': 116, 'pwws': 72})
--- training main starting
skip if done: False

no. train: 6,545
no. val: 2,189
no. test: 2,891
no. features: 4,5



--- loading from data_tcab\detection-experiments\allocine\distilcamembert\multiclass_with_clean\train.joblib
num of instances in the file:  6545
loaded, labels:  Counter({'clean': 1187, 'input_reduction': 1187, 'textbugger': 1004, 'textfooler': 987, 'deepwordbug': 976, 'bae': 751, 'pwws': 453})
--- loading from data_tcab\detection-experiments\allocine\distilcamembert\multiclass_with_clean\val.joblib
num of instances in the file:  2189
loaded, labels:  Counter({'clean': 411, 'input_reduction': 411, 'textfooler': 336, 'deepwordbug': 334, 'textbugger': 328, 'bae': 228, 'pwws': 141})
--- loading from data_tcab\detection-experiments\allocine\distilcamembert\multiclass_with_clean\test_release.joblib
num of instances in the file:  2891
loaded, labels:  Counter({'clean': 2009, 'input_reduction': 204, 'textbugger': 165, 'deepwordbug': 163, 'textfooler': 162, 'bae': 116, 'pwws': 72})
--- training main starting
skip if done: False

no. train: 6,545
no. val: 2,189
no. test: 2,891
no. features: 4,5

In [14]:
print(f"Total time: {time.strftime('%H:%M:%S', time.gmtime(time.time()-t))}")

Total time: 00:08:14
