# Testing CAttackPoisoningSVM from SecML

[Documentation](https://secml.readthedocs.io/en/v0.15/tutorials/05-Poisoning.html)

[Github](https://github.com/pralab/secml)

In [12]:
import os
from pathlib import Path

import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler

from label_flip_revised.utils import (flip_binary_label, open_csv, open_json)

In [13]:
PATH_ROOT = Path(os.getcwd()).absolute().parent
print(PATH_ROOT)

/home/lukec/workspace/label_flip_revised


In [14]:
# Load data
# Use "Banknote" dataset
path_train = os.path.join(PATH_ROOT, 'data', 'output', 'train', 'australian_std_clean_train.csv')
X_train, y_train, _ = open_csv(path_train)
print(X_train.shape, y_train.shape)

path_test = os.path.join(PATH_ROOT, 'data', 'output', 'test', 'australian_std_clean_test.csv')
X_test, y_test, _ = open_csv(path_test)
print(X_test.shape, y_test.shape)

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

(552, 14) (552,)
(138, 14) (138,)


In [15]:
clip_values = (X_train.min(), X_train.max())
print(clip_values)

(0.0, 1.0)


In [16]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2)
print(f'# of train: {X_train.shape[0]}')
print(f'# of val: {X_val.shape[0]}')
print(f'# of test: {X_test.shape[0]}')

# of train: 441
# of val: 111
# of test: 138


In [22]:
from secml.array import CArray
from secml.data import CDataset

X_train = CArray(X_train)
y_train = CArray(y_train)
X_val = CArray(X_val)
y_val = CArray(y_val)
X_test = CArray(X_test)
y_test = CArray(y_test)

train_set = CDataset(X_train, y_train)
val_set = CDataset(X_val, y_val)
test_set = CDataset(X_test, y_test)

In [23]:
from secml.ml.classifiers import CClassifierSVM
from secml.ml.kernels import CKernelRBF

# Train classifier
clf = CClassifierSVM(C=1, kernel=CKernelRBF(gamma=10))
clf.fit(X_train, y_train)

pred_train = clf.predict(X_train, y_train)
pred_test = clf.predict(X_test, y_test)
acc_train = np.mean(pred_train.get_data() == y_train.get_data())
acc_test = np.mean(pred_test.get_data() == y_test.get_data())

print(f'Accuracy Train: {acc_train:.2f}, Test: {acc_test:.2f}.')

Accuracy Train: 0.96, Test: 0.82.


In [24]:
from secml.adv.attacks import CAttackPoisoningSVM

# Generate poisoning examples

solver_params = {
    'eta': 0.05,
    'eta_min': 0.05,
    'eta_max': None,
    'max_iter': 100,
    'eps': 1e-6
}

attack = CAttackPoisoningSVM(
    classifier=clf,
    training_data=train_set,
    val=val_set,
    lb=clip_values[0],
    ub=clip_values[1],
    solver_params=solver_params,
)

# Initial poisoning sample
xc = train_set[0, :].X
yc = train_set[0, :].Y

attack.x0 = xc
attack.xc = xc
attack.yc = yc

print("Initial poisoning sample features: {:}".format(xc.ravel()))
print("Initial poisoning sample label: {:}".format(yc.item()))

Initial poisoning sample features: CArray([0.       0.167937 0.089286 0.       0.153846 0.5      0.157895 0.
 0.       0.       0.       0.5      0.1      0.00456 ])
Initial poisoning sample label: 0


In [25]:
from secml.ml.peval import metrics

POISONING_RATE = 0.20
N_POISON = int(np.floor(X_train.shape[0] * POISONING_RATE))
print(f'# of poisoned examples: {N_POISON}')
attack.n_points = N_POISON

_, _, pois_examples, _ = attack.run(X_val, y_val)

# of poisoned examples: 88


In [26]:
clf = CClassifierSVM(C=1, kernel=CKernelRBF(gamma=10))
clf.fit(X_train, y_train)

pred_train = clf.predict(X_train, y_train)
pred_test = clf.predict(X_test, y_test)
acc_train = np.mean(pred_train.get_data() == y_train.get_data())
acc_test = np.mean(pred_test.get_data() == y_test.get_data())
print(f'Before poisoning: Accuracy Train: {acc_train:.2f}, Test: {acc_test:.2f}.')


X_poisoned = np.vstack([X_train.get_data(), pois_examples.X.get_data()])
y_poisoned = np.concatenate([y_train.get_data(), pois_examples.Y.get_data()])

clf_pois = CClassifierSVM(C=1, kernel=CKernelRBF(gamma=10))
clf_pois.fit(X_poisoned, y_poisoned)

pred_pois = clf_pois.predict(X_poisoned, y_poisoned)
pred_test = clf_pois.predict(X_test, y_test)
acc_train = np.mean(pred_pois.get_data() == y_poisoned)
acc_test = np.mean(pred_test.get_data() == y_test.get_data())

print(f'After poisoning: Accuracy Train: {acc_train:.2f}, Test: {acc_test:.2f}.')

Before poisoning: Accuracy Train: 0.96, Test: 0.82.
After poisoning: Accuracy Train: 0.94, Test: 0.79.
