In [1]:
import os
import time
from pathlib import Path

import art.attacks.poisoning as poison
from art.estimators.classification import PyTorchClassifier, SklearnClassifier
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.svm import SVC
from tqdm import tqdm

from label_flip_revised.utils import (flip_binary_label, open_csv, open_json,
                                      time2str)


In [2]:
PATH_ROOT = Path(os.getcwd()).absolute().parent
print(PATH_ROOT)

/home/xcha011/workspace/label_flip_revised


In [3]:
# Load data
# Use "Banknote" dataset
path_train = os.path.join(PATH_ROOT, 'data', 'output', 'train', 'banknote_std_clean_train.csv')
X_train, y_train, _ = open_csv(path_train)
print(X_train.shape, y_train.shape)

path_test = os.path.join(PATH_ROOT, 'data', 'output', 'test', 'banknote_std_clean_test.csv')
X_test, y_test, _ = open_csv(path_test)
print(X_test.shape, y_test.shape)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

(1097, 4) (1097,)
(275, 4) (275,)


In [4]:
# Load SVM model
path_svm_param = os.path.join(PATH_ROOT, 'data', 'output', 'alfa', 'banknote_std_svm.json')
svm_param = open_json(path_svm_param)
print(svm_param)

svm = SVC(**svm_param)
svm.fit(X_train, y_train)
acc_train = svm.score(X_train, y_train)
acc_test = svm.score(X_test, y_test)
print('Accuracy on train: {:.2f} test: {:.2f}'.format(acc_train*100, acc_test*100))

{'C': 656.8890043866899, 'gamma': 0.012976745522971619, 'kernel': 'rbf'}
Accuracy on train: 100.00 test: 100.00


In [5]:
# # Load Neural Network model
# import torch
# import torch.nn as nn
# from torch.utils.data import DataLoader, TensorDataset
# from label_flip_revised.simple_nn_model import SimpleModel
# from label_flip_revised.torch_utils import evaluate, train_model

# BATCH_SIZE = 128  # Size of mini-batch.
# HIDDEN_LAYER = 128  # Number of hidden neurons in a hidden layer.
# LR = 0.001  # Learning rate.
# MAX_EPOCHS = 400  # Number of iteration for training.

# if torch.cuda.is_available():
#     device = torch.device('cuda')
# else:
#     device = torch.device('cpu')
#     print('Running on CPU!')

# n_features = X_train.shape[1]
# dataset_train = TensorDataset(torch.from_numpy(X_train).type(torch.float32), torch.from_numpy(y_train).type(torch.int64))
# dataloader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)
# dataset_test = TensorDataset(torch.from_numpy(X_test).type(torch.float32), torch.from_numpy(y_test).type(torch.int64))
# dataloader_test = DataLoader(dataset_test, batch_size=BATCH_SIZE, shuffle=False)

# net = SimpleModel(n_features, hidden_dim=HIDDEN_LAYER, output_dim=2)
# net = net.to(device)
# optimizer = torch.optim.SGD(net.parameters(), lr=LR, momentum=0.8)
# loss_fn = nn.CrossEntropyLoss()
# path_model = os.path.join(PATH_ROOT, 'results', 'real', 'torch', 'banknote_std_SimpleNN_random_0.00.torch')
# net.load_state_dict(torch.load(path_model, map_location=device))

# acc_train, _ = evaluate(dataloader_train, net, loss_fn, device)
# acc_test, _ = evaluate(dataloader_test, net, loss_fn, device)
# print('Accuracy on train: {:.2f} test: {:.2f}'.format(acc_train*100, acc_test*100))

In [6]:
min_, max_ = X_train.min(), X_train.max()
clip_values = (min_, max_)
print(min_, max_)

-3.4870622467910914 3.778653516257267


In [7]:
encoder = OneHotEncoder(dtype=int)
y_train_oh = encoder.fit_transform(y_train.reshape(-1, 1)).toarray()
y_test_oh = encoder.transform(y_test.reshape(-1, 1)).toarray()

In [8]:
svm_clean = SklearnClassifier(model=SVC(**svm_param), clip_values=clip_values)
svm_clean.fit(X_train, y_train_oh)
pred_train = svm_clean.predict(X_train)
acc_train = accuracy_score(np.argmax(y_train_oh, axis=1), np.argmax(pred_train, axis=1))
pred_test = svm_clean.predict(X_test)
acc_test = accuracy_score(np.argmax(y_test_oh, axis=1), np.argmax(pred_test, axis=1))
print('Accuracy on train: {:.2f} test: {:.2f}'.format(acc_train*100, acc_test*100))

svm_poison = SklearnClassifier(model=SVC(**svm_param), clip_values=clip_values)
svm_poison.fit(X_train, y_train_oh)

Accuracy on train: 100.00 test: 100.00


In [9]:
POISONING_RATE = 0.10
N_POISON = int(np.floor(X_train.shape[0] * POISONING_RATE))
print(N_POISON)

109


In [10]:
idx_poison = np.random.permutation(y_train.shape[0])[:N_POISON]
X_target = X_train[idx_poison]
y_target = y_train[idx_poison]
y_target = flip_binary_label(y_target, np.arange(y_target.shape[0]))
y_target_oh = encoder.transform(y_target.reshape(-1, 1)).toarray()

In [12]:
attack = poison.PoisoningAttackSVM(
    svm_poison, 
    step=0.01, 
    eps=1.0, 
    x_train=X_train, 
    y_train=y_train_oh, 
    x_val=X_test, 
    y_val=y_test_oh,
    max_iter=200)
poison_pts, poison_lbls = attack.poison(X_target, y_target_oh)

SVM poisoning: 109it [02:00,  1.10s/it]


In [13]:
X_poison = np.vstack([X_train, poison_pts])
y_poison_oh = np.vstack([y_train_oh, poison_lbls])

svm_poison.fit(X_poison, y_poison_oh)

pred_train = svm_poison.predict(X_poison)
acc_train = accuracy_score(np.argmax(y_poison_oh, axis=1), np.argmax(pred_train, axis=1))
pred_test = svm_poison.predict(X_test)
acc_test = accuracy_score(np.argmax(y_test_oh, axis=1), np.argmax(pred_test, axis=1))
print('Accuracy on train: {:.2f} test: {:.2f}'.format(acc_train*100, acc_test*100))

Accuracy on train: 90.71 test: 99.27
