In [1]:
import os
from pathlib import Path

import pandas as pd
import numpy as np
from imblearn.under_sampling import RandomUnderSampler 
from secml.data import CDataset
from secml.data.splitter import CTrainTestSplit
from secml.ml.features import CNormalizerMinMax
from secml.ml.peval.metrics import CMetricAccuracy
from secml.ml.classifiers import CClassifierSVM
from secml.ml.kernels import CKernelRBF
from secml.adv.attacks import CAttackPoisoningSVM

from label_flip_revised.utils import open_csv

In [2]:
PATH_ROOT = os.path.join(Path().absolute().parent)
print(PATH_ROOT)

/home/lukec/workspace/label_flip_revised_new


In [3]:
RANDOM_STATE = 999

In [4]:
f = open(os.path.join(PATH_ROOT, 'data', 'raw', 'australian.dat'))
data = np.loadtxt(f)
y = data[:, 14]
X = data[:, 0:13]

print(X.shape)

(690, 13)


In [5]:
rus = RandomUnderSampler(random_state=RANDOM_STATE)
X_res, y_res = rus.fit_resample(X, y)
print(X_res.shape)

(614, 13)


In [6]:
n_val = 100
n_test = 100
n_train = X_res.shape[0] - n_val - n_test
print(f'n_train: {n_train}')

dataset = CDataset(X_res, y_res, header=None)
splitter = CTrainTestSplit(train_size=n_train + n_val, test_size=n_test, random_state=RANDOM_STATE)
tr_val, ts = splitter.split(dataset)
splitter = CTrainTestSplit(train_size=n_train, test_size=n_val, random_state=RANDOM_STATE)
tr, val = splitter.split(dataset)

n_train: 414


In [7]:
nmz = CNormalizerMinMax()
tr.X = nmz.fit_transform(tr.X)
val.X = nmz.transform(val.X)
ts.X = nmz.transform(ts.X)

In [8]:
metric = CMetricAccuracy()

clf = CClassifierSVM(kernel=CKernelRBF(gamma=10), C=1)
clf.fit(tr.X, tr.Y)
pred_tr = clf.predict(tr.X)
pred_ts = clf.predict(ts.X)
print("Train  " + str(metric.performance_score(tr.Y, pred_tr)))
print("Test  " + str(metric.performance_score(ts.Y, pred_ts)))

Train  0.9685990338164251
Test  0.82


In [9]:
lb, ub = val.X.min(), val.X.max()

In [10]:
solver_params = {
    'eta': 0.05,
    'eta_min': 0.05,
    'eta_max': None,
    'max_iter': 100,
    'eps': 1e-6
}

In [11]:
pois_attack = CAttackPoisoningSVM(
    classifier=clf,
    training_data=tr,
    val=val,
    lb=lb, ub=ub,
    solver_params=solver_params,
    random_seed=RANDOM_STATE,
)
xc = tr[0, :].X
yc = tr[0, :].Y
pois_attack.x0 = xc
pois_attack.xc = xc
pois_attack.yc = yc


In [12]:
n_points = int(n_train * 0.1)
print(f'N Poisoned Points: {n_points}')

pois_attack.n_points = n_points
pois_y_pred, pois_scores, pois_ds, f_opt = pois_attack.run(ts.X, ts.Y)

N Poisoned Points: 41


In [13]:
acc_train = metric.performance_score(tr.Y, pred_tr)
acc_test = metric.performance_score(ts.Y, pred_ts)

In [14]:
# Train poisoned classifier
X_pois = np.vstack([tr.X.get_data(), pois_ds.X.get_data()])
y_pois = np.concatenate([tr.Y.get_data(), pois_ds.Y.get_data()])
print(X_pois.shape, y_pois.shape)

tr_pois = CDataset(X_pois, y_pois, header=None)

(455, 13) (455,)


In [15]:
clf_pois = CClassifierSVM(kernel=CKernelRBF(gamma=10), C=1)
clf_pois.fit(tr_pois.X, tr_pois.Y)
pred_pois_train = clf_pois.predict(tr_pois.X)
pred_pois_test = clf_pois.predict(ts.X)

acc_pois_train = metric.performance_score(tr_pois.Y, pred_pois_train)
acc_pois_test = metric.performance_score(ts.Y, pred_pois_test)

print(f'Before poisoning: Acc on Clean Train: {acc_train*100:.2f} Clean Test: {acc_test*100:.2f}')
print(f'Poisoned clf:  Acc on Poisoned Train: {acc_pois_train*100:.2f} Clean Test: {acc_pois_test*100:.2f}')

Before poisoning: Acc on Clean Train: 96.86 Clean Test: 82.00
Poisoned clf:  Acc on Poisoned Train: 96.26 Clean Test: 65.00
