In [16]:
import os
import time
from pathlib import Path

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

from label_flip_revised.utils import open_csv, open_json, time2str

In [17]:
PATH_ROOT = Path(os.getcwd()).absolute().parent
print(PATH_ROOT)

/home/lukec/workspace/label_flip_revised


In [18]:
# Load data
# Use "Banknote" dataset
path_train = os.path.join(PATH_ROOT, 'data', 'output', 'train', 'banknote_std_clean_train.csv')
X_train, y_train, _ = open_csv(path_train)
print(X_train.shape, y_train.shape)

path_test = os.path.join(PATH_ROOT, 'data', 'output', 'test', 'banknote_std_clean_test.csv')
X_test, y_test, _ = open_csv(path_test)
print(X_test.shape, y_test.shape)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

(1097, 4) (1097,)
(275, 4) (275,)


In [19]:
# Load SVM model
from sklearn.svm import SVC

path_svm_param = os.path.join(PATH_ROOT, 'data', 'output', 'alfa', 'banknote_std_svm.json')
svm_param = open_json(path_svm_param)
print(svm_param)

svm = SVC(**svm_param)
svm.fit(X_train, y_train)
acc_train = svm.score(X_train, y_train)
acc_test = svm.score(X_test, y_test)
print('Accuracy on train: {:.2f} test: {:.2f}'.format(acc_train*100, acc_test*100))

{'C': 656.8890043866899, 'gamma': 0.012976745522971619, 'kernel': 'rbf'}
Accuracy on train: 100.00 test: 100.00


In [20]:
# Load Neural Network model
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from label_flip_revised.simple_nn_model import SimpleModel
from label_flip_revised.torch_utils import evaluate, train_model

BATCH_SIZE = 128  # Size of mini-batch.
HIDDEN_LAYER = 128  # Number of hidden neurons in a hidden layer.
LR = 0.001  # Learning rate.
MAX_EPOCHS = 400  # Number of iteration for training.

if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
    print('Running on CPU!')

n_features = X_train.shape[1]
dataset_train = TensorDataset(torch.from_numpy(X_train).type(torch.float32), torch.from_numpy(y_train).type(torch.int64))
dataloader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)
dataset_test = TensorDataset(torch.from_numpy(X_test).type(torch.float32), torch.from_numpy(y_test).type(torch.int64))
dataloader_test = DataLoader(dataset_test, batch_size=BATCH_SIZE, shuffle=False)

net = SimpleModel(n_features, hidden_dim=HIDDEN_LAYER, output_dim=2)
net = net.to(device)
optimizer = torch.optim.SGD(net.parameters(), lr=LR, momentum=0.8)
loss_fn = nn.CrossEntropyLoss()
path_model = os.path.join(PATH_ROOT, 'results', 'real', 'torch', 'banknote_std_SimpleNN_random_0.00.torch')
net.load_state_dict(torch.load(path_model, map_location=device))

acc_train, _ = evaluate(dataloader_train, net, loss_fn, device)
acc_test, _ = evaluate(dataloader_test, net, loss_fn, device)
print('Accuracy on train: {:.2f} test: {:.2f}'.format(acc_train*100, acc_test*100))

Accuracy on train: 98.45 test: 98.55


In [11]:
# Apply poisoning attacks
import art.attacks.poisoning as poison
from art.estimators.classification import PyTorchClassifier, SklearnClassifier

In [22]:
min_, max_ = X_train.min(), X_train.max()
clip_values = (min_, max_)
print(min_, max_)

-3.4870622467910914 3.778653516257267


In [23]:
clf_torch = PyTorchClassifier(
    model=net,
    loss=loss_fn,
    optimizer=optimizer,
    input_shape=(n_features,),
    nb_classes=2,
    clip_values=clip_values,
    device_type=device,
)

In [None]:
clf_svm = SklearnClassifier(
    model=svm,
    clip_values=clip_values,
)