In [1]:
import os
import sys

dir2 = os.path.abspath('')
dir1 = os.path.dirname(dir2)
if dir1 not in sys.path:
    sys.path.append(dir1)

In [2]:
import hydra
from hydra import compose, initialize
from omegaconf import OmegaConf

from src.utils import fix_seed, save_config, save_compiled_config

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import torch

from src.config import get_attack, get_criterion, get_disc_list, get_model
from src.data import MyDataset, load_data, transform_data
from src.estimation.estimators import AttackEstimator

import numpy as np

from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

from src.data import MyDataset, load_data, transform_data
from src.training.train import Trainer
from src.utils import fix_seed, save_config, save_compiled_config

CUDA extension for structured kernels (Cauchy and Vandermonde multiplication) not found. Install by going to extensions/kernels/ and running `python setup.py install`, for improved speed and memory efficiency. Note that the kernel changed for state-spaces 4.0 and must be recompiled.
Falling back on slow Cauchy and Vandermonde kernel. Install at least one of pykeops or the CUDA extension for better speed and memory efficiency.


# Functions

In [4]:
def train(cfg, train_loader, test_loader):
    augmentator = (
        [instantiate(trans) for trans in cfg["transform_data"]] if cfg["transform_data"]
        else None
    )
    
    logger = SummaryWriter(cfg["save_path"] + "/tensorboard")
    fix_seed(cfg['model_id_attack'])

    const_params = {
        "logger": logger,
        "print_every": 10, #cfg["print_every"],
        "device": device,
        "seed": cfg['model_id_attack'],
        "train_self_supervised": False
    }

    trainer_params = dict(cfg["training_params"])
    trainer_params.update(const_params)
    trainer = Trainer.initialize_with_params(**trainer_params)

    trainer.train_model(train_loader, test_loader)
    logger.close()
    
    return trainer.model


def get_attacks(cfg, eps, n_steps_list, model):
    attack_model = model

    criterion = get_criterion(cfg["criterion_name"], cfg["criterion_params"])
    
    disc_check_list = None
    
    estimator = AttackEstimator(
        disc_check_list,
        cfg["metric_effect"],
        cfg["metric_hid"],
        batch_size=cfg["estimator_batch_size"],
    )
    
    attacks = list()
    
    for n_steps in n_steps_list:
        attack_params = dict(cfg["attack"]["attack_params"])
        attack_params["model"] = attack_model
        attack_params["criterion"] = criterion
        attack_params["estimator"] = estimator
        attack_params["alpha"] = 0
        attack_params["eps"] = eps
        attack_params["n_steps"] = n_steps
    
        attacks.append(get_attack(cfg["attack"]["name"], attack_params))

    return attacks

# Config + data

In [5]:
initialize(config_path='config/my_configs', version_base=None)
cfg = compose(config_name='mix_exp.yaml')

In [6]:
print("Dataset", cfg["dataset"]['name'])

X_train, y_train, X_test, y_test = load_data(cfg["dataset"]['name'])

X_train, X_test, y_train, y_test = transform_data(
    X_train, 
    X_test, 
    y_train, 
    y_test, 
    slice_data=cfg["slice"]
)

test_loader = DataLoader(
    MyDataset(X_test, y_test), 
    batch_size=cfg["batch_size"], 
    shuffle=False
)

train_loader = DataLoader(
    MyDataset(X_train, y_train), 
    batch_size=cfg["batch_size"], 
    shuffle=False
)

device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")

Dataset PowerCons


# Train original model

In [7]:
model_orig = train(cfg, train_loader, test_loader)

Epoch 1 train loss: 0.462; acc_train 0.844; test loss: 0.379; acc_test 0.856; f1_test 0.874; balance 0.644; certainty 0.734
Epoch 11 train loss: 0.106; acc_train 0.972; test loss: 0.143; acc_test 0.978; f1_test 0.978; balance 0.522; certainty 0.892
Epoch 21 train loss: 0.064; acc_train 0.978; test loss: 0.122; acc_test 0.961; f1_test 0.963; balance 0.539; certainty 0.913
Epoch 31 train loss: 0.039; acc_train 1.0; test loss: 0.102; acc_test 0.972; f1_test 0.973; balance 0.528; certainty 0.933
Epoch 41 train loss: 0.025; acc_train 1.0; test loss: 0.083; acc_test 0.972; f1_test 0.973; balance 0.528; certainty 0.947
Epoch 51 train loss: 0.017; acc_train 1.0; test loss: 0.078; acc_test 0.978; f1_test 0.978; balance 0.522; certainty 0.954


### Attack data with original data

In [99]:
n_steps_list = np.arange(5, 25, 5) ##### менять

attacks = get_attacks(cfg, 0.005, n_steps_list, model_orig)

logging
logging
logging
logging


In [100]:
attacked_train = {}

for attack in attacks:
    X_adv = attack.apply_attack(train_loader).squeeze()
    attacked_train[attack.n_steps] = X_adv

100%|██████████| 5/5 [00:00<00:00,  9.38it/s]
100%|██████████| 10/10 [00:01<00:00,  9.42it/s]
100%|██████████| 15/15 [00:01<00:00,  9.65it/s]
100%|██████████| 20/20 [00:02<00:00,  9.53it/s]


### Mix attack data with train

In [101]:
X_train = train_loader.dataset.X
y_train = train_loader.dataset.y

X_train_mixed = torch.cat([X_train, attacked_train[5]]) ###### шаги атаки
y_train_mixed = torch.cat([y_train, y_train])

train_loader_mixed = DataLoader(
    MyDataset(X_train_mixed, y_train_mixed), 
    batch_size=cfg["batch_size"], 
    shuffle=True
)

# Train on mixed

In [102]:
model_mixed = train(cfg, train_loader_mixed, test_loader)

Epoch 1 train loss: 0.401; acc_train 0.836; test loss: 0.303; acc_test 0.911; f1_test 0.918; balance 0.589; certainty 0.774
Epoch 11 train loss: 0.078; acc_train 0.981; test loss: 0.122; acc_test 0.967; f1_test 0.968; balance 0.533; certainty 0.91
Epoch 21 train loss: 0.035; acc_train 1.0; test loss: 0.078; acc_test 0.978; f1_test 0.978; balance 0.522; certainty 0.946
Epoch 31 train loss: 0.013; acc_train 1.0; test loss: 0.056; acc_test 0.989; f1_test 0.989; balance 0.511; certainty 0.964
Epoch 41 train loss: 0.006; acc_train 1.0; test loss: 0.052; acc_test 0.989; f1_test 0.989; balance 0.511; certainty 0.97
Epoch 51 train loss: 0.004; acc_train 1.0; test loss: 0.05; acc_test 0.989; f1_test 0.989; balance 0.511; certainty 0.974


# Test

In [109]:
attacks = get_attacks(cfg, 0.01, n_steps_list, model_mixed) # Атакуем градиентами новой модели
# attacks = get_attacks(cfg, 0.01, n_steps_list, model_orig) # Атакуем градиентами оригинальной модели

X_test = test_loader.dataset.X
y_test = test_loader.dataset.y

attacked_test = {}

for attack in attacks:
    X_adv = attack.apply_attack(test_loader).squeeze()
    attacked_test[attack.n_steps] = X_adv

logging
logging
logging
logging


100%|██████████| 5/5 [00:00<00:00, 12.24it/s]
100%|██████████| 10/10 [00:01<00:00,  9.47it/s]
100%|██████████| 15/15 [00:01<00:00,  9.41it/s]
100%|██████████| 20/20 [00:02<00:00,  9.45it/s]


# Comparison

In [110]:
from sklearn.metrics import roc_auc_score

In [115]:
criterion = torch.nn.BCELoss()

test_samples = attacked_test[20].unsqueeze(-1).to(device) ###### шаги атаки

preds_orig = model_orig(test_samples) 
preds_mixed = model_mixed(test_samples)

In [116]:
print(
    criterion(preds_orig, y_test.float().to(device)).item(),
    criterion(preds_mixed, y_test.float().to(device)).item()
)

0.7988770008087158 1.7478158473968506


In [117]:
roc_auc_score(y_test, preds_orig.cpu().detach().numpy())

0.7760493827160494

In [118]:
roc_auc_score(y_test, preds_mixed.cpu().detach().numpy())

0.5375308641975308