In [1]:
import numpy as np
import scipy
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import rtdl
from catboost.datasets import epsilon
import sklearn
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import zero
%matplotlib inline

In [2]:
# set seed:
zero.improve_reproducibility(123456)

123456

In [3]:
# eps_train, eps_test = epsilon()

In [4]:
eps_train = pd.read_csv("data/epsilon/eps_train.csv")
eps_test = pd.read_csv("data/epsilon/eps_test.csv")

In [5]:
eps_test

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000
0,-1.0,0.005439,0.013331,-0.002186,-0.014590,0.015631,-0.032606,-0.004455,0.013611,0.024088,...,0.029023,0.028153,-0.001714,-0.048453,-0.030330,-0.006301,-0.022238,-0.009459,0.027544,-0.026216
1,1.0,0.001442,0.033820,-0.048836,0.000652,-0.028718,0.013421,-0.006827,0.053082,-0.016931,...,-0.016411,0.005543,-0.017588,0.005169,0.037386,-0.002207,0.023466,0.023459,0.036497,0.033899
2,1.0,0.004597,-0.042784,-0.004416,-0.005692,0.000731,-0.025873,0.031471,0.059522,0.003261,...,-0.020841,-0.030902,0.005387,-0.017727,-0.011851,0.007834,-0.002806,-0.004059,0.024565,-0.001050
3,-1.0,-0.009080,0.017605,-0.009870,0.007386,0.021338,-0.042682,-0.004471,0.035229,0.037935,...,-0.025915,0.005119,0.022496,-0.005170,-0.023424,-0.026319,-0.036478,-0.036575,-0.002760,-0.021496
4,1.0,-0.011689,-0.021413,0.012358,-0.012036,-0.009324,-0.023587,0.007309,0.034027,-0.020042,...,0.006731,0.011447,-0.015999,-0.016919,0.047534,-0.004458,0.013541,0.036077,-0.004313,0.008140
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,-1.0,0.002185,0.004446,0.022291,0.020920,0.020427,0.012598,0.030112,0.011090,0.001913,...,-0.024601,0.006985,0.016945,0.007586,0.037605,0.028264,-0.001602,0.001870,0.006102,-0.003627
99996,1.0,-0.011844,0.020148,-0.026867,0.003328,0.027158,0.005969,0.017370,-0.011501,0.029321,...,0.026503,-0.025448,-0.037498,-0.027911,0.010793,-0.004598,-0.025452,-0.013585,-0.004808,-0.020536
99997,1.0,0.026138,0.014778,0.001464,-0.026422,0.009453,-0.011432,0.017415,0.048577,0.025201,...,0.026015,-0.003136,0.020917,-0.011271,0.040341,-0.020855,-0.020545,-0.022856,0.049474,-0.010935
99998,1.0,-0.031459,-0.035493,0.003074,-0.030789,-0.013167,0.002857,0.035517,0.007557,-0.012632,...,0.031911,0.028818,0.015954,-0.005749,0.003205,0.018634,0.010508,0.001254,0.010319,-0.001581


In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [8]:
# data
task_type = "binclass"

X_tr = eps_train.iloc[:, 1:].astype('float32')
y_tr = eps_train.iloc[:, 0].astype('float32' if task_type == 'regression' else 'int64')
#
X_te = eps_test.iloc[:, 1:].astype('float32')
y_te = eps_test.iloc[:, 0].astype('float32' if task_type == 'regression' else 'int64')
if task_type != 'regression':
    y_tr = LabelEncoder().fit_transform(y_tr).astype('int64')
    y_te = LabelEncoder().fit_transform(y_te).astype('int64')
n_classes = int(max(y_tr)) + 1 if task_type == 'multiclass' else None

X = {}
y = {}

X['test'] = X_te
y['test'] = y_te

X['train'], X['val'], y['train'], y['val'] = train_test_split(X_tr, y_tr, train_size=0.8, stratify=y_tr)

X = {
    k: torch.tensor(v.to_numpy(), device=device)
    for k, v in X.items()
}
y = {k: torch.tensor(v, device=device) for k, v in y.items()}


In [27]:
d_out = n_classes or 1

# model = rtdl.MLP.make_baseline(
#     d_in=X_tr.shape[1],
#     d_layers=[1024, 512, 256],
#     dropout=0.1,
#     d_out=d_out,
# )
# lr = 0.001
# weight_decay = 0.0

model = rtdl.ResNet.make_baseline(
    d_in=X_tr.shape[1],
    d_main=128,
    d_hidden=256,
    dropout_first=0.2,
    dropout_second=0.0,
    n_blocks=2,
    d_out=d_out,
)
lr = 0.001
weight_decay = 0.0

# model = rtdl.FTTransformer.make_default(
#     n_num_features=X_all.shape[1],
#     cat_cardinalities=None,
#     last_layer_query_idx=[-1],  # it makes the model faster and does NOT affect its output
#     d_out=d_out,
# )

model.to(device)
optimizer = (
    model.make_default_optimizer()
    if isinstance(model, rtdl.FTTransformer)
    else torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
)
loss_fn = (
    F.binary_cross_entropy_with_logits
    if task_type == 'binclass'
    else F.cross_entropy
    if task_type == 'multiclass'
    else F.mse_loss
)

In [28]:
model

ResNet(
  (first_layer): Linear(in_features=2000, out_features=128, bias=True)
  (blocks): Sequential(
    (0): Block(
      (normalization): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (linear_first): Linear(in_features=128, out_features=256, bias=True)
      (activation): ReLU()
      (dropout_first): Dropout(p=0.2, inplace=False)
      (linear_second): Linear(in_features=256, out_features=128, bias=True)
      (dropout_second): Dropout(p=0.0, inplace=False)
    )
    (1): Block(
      (normalization): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (linear_first): Linear(in_features=128, out_features=256, bias=True)
      (activation): ReLU()
      (dropout_first): Dropout(p=0.2, inplace=False)
      (linear_second): Linear(in_features=256, out_features=128, bias=True)
      (dropout_second): Dropout(p=0.0, inplace=False)
    )
  )
  (head): Head(
    (normalization): BatchNorm1d(128, eps=1e-05, moment

In [29]:
def apply_model(x_num, x_cat=None):
    if isinstance(model, rtdl.FTTransformer):
        return model(x_num, x_cat)
    elif isinstance(model, (rtdl.MLP, rtdl.ResNet)):
        assert x_cat is None
        return model(x_num)
    else:
        raise NotImplementedError(
            f'Looks like you are using a custom model: {type(model)}.'
            ' Then you have to implement this branch first.'
        )


@torch.no_grad()
def evaluate(part):
    model.eval()
    prediction = []
    for batch in zero.iter_batches(X[part], 1024):
        prediction.append(apply_model(batch))
    prediction = torch.cat(prediction).squeeze(1).cpu().numpy()
    target = y[part].cpu().numpy()

    if task_type == 'binclass':
        prediction = np.round(scipy.special.expit(prediction))
        score = sklearn.metrics.accuracy_score(target, prediction)
    elif task_type == 'multiclass':
        prediction = prediction.argmax(1)
        score = sklearn.metrics.accuracy_score(target, prediction)
    else:
        assert task_type == 'regression'
        score = sklearn.metrics.mean_squared_error(target, prediction) ** 0.5 * y_std
    return score


# Create a dataloader for batches of indices
# Docs: https://yura52.github.io/zero/reference/api/zero.data.IndexLoader.html
batch_size = 256
train_loader = zero.data.IndexLoader(len(X['train']), batch_size, device=device)

# Create a progress tracker for early stopping
# Docs: https://yura52.github.io/zero/reference/api/zero.ProgressTracker.html
progress = zero.ProgressTracker(patience=20)

print(f'Test score before training: {evaluate("test"):.4f}')

Test score before training: 0.4995


In [30]:
n_epochs = 1000
report_frequency = len(X['train']) // batch_size // 5
for epoch in range(1, n_epochs + 1):
    for iteration, batch_idx in enumerate(train_loader):
        model.train()
        optimizer.zero_grad()
        x_batch = X['train'][batch_idx]
        y_batch = y['train'][batch_idx]
        loss = loss_fn(apply_model(x_batch).squeeze(1), y_batch.float())
        loss.backward()
        optimizer.step()
        if iteration % report_frequency == 0:
            print(f'(epoch) {epoch} (batch) {iteration} (loss) {loss.item():.4f}')

    val_score = evaluate('val')
    test_score = evaluate('test')
    print(f'Epoch {epoch:03d} | Validation score: {val_score:.4f} | Test score: {test_score:.4f}', end='')
    progress.update((-1 if task_type == 'regression' else 1) * val_score)
    if progress.success:
        print(' <<< BEST VALIDATION EPOCH', end='')
        torch.save(model, "models/rtdl_resnet_epsilon.pth")
    print()
    if progress.fail:
        break

(epoch) 1 (batch) 0 (loss) 0.7133
(epoch) 1 (batch) 250 (loss) 0.2917
(epoch) 1 (batch) 500 (loss) 0.2903
(epoch) 1 (batch) 750 (loss) 0.2964
(epoch) 1 (batch) 1000 (loss) 0.2531
Epoch 001 | Validation score: 0.8884 | Test score: 0.8888 <<< BEST VALIDATION EPOCH
(epoch) 2 (batch) 0 (loss) 0.2174
(epoch) 2 (batch) 250 (loss) 0.2856
(epoch) 2 (batch) 500 (loss) 0.2721
(epoch) 2 (batch) 750 (loss) 0.2760
(epoch) 2 (batch) 1000 (loss) 0.2407
Epoch 002 | Validation score: 0.8879 | Test score: 0.8872
(epoch) 3 (batch) 0 (loss) 0.2064
(epoch) 3 (batch) 250 (loss) 0.2788
(epoch) 3 (batch) 500 (loss) 0.2601
(epoch) 3 (batch) 750 (loss) 0.2725
(epoch) 3 (batch) 1000 (loss) 0.2412
Epoch 003 | Validation score: 0.8927 | Test score: 0.8919 <<< BEST VALIDATION EPOCH
(epoch) 4 (batch) 0 (loss) 0.2020
(epoch) 4 (batch) 250 (loss) 0.2498
(epoch) 4 (batch) 500 (loss) 0.2726
(epoch) 4 (batch) 750 (loss) 0.2681
(epoch) 4 (batch) 1000 (loss) 0.2418
Epoch 004 | Validation score: 0.8938 | Test score: 0.8935 

In [32]:
model = torch.load("models/rtdl_mlp_epsilon.pth")
print(f'Test score after training: {evaluate("test"):.4f}')

Test score after training: 0.8936


In [114]:
import sys
sys.path.append("../attacks/")
import mister_ed.utils.pytorch_utils as utils
from utils import load_image, torch_to_image, get_expl, convert_relu_to_softplus, plot_overview, UniGrad
import captum
from captum.attr import Saliency, InputXGradient, IntegratedGradients, DeepLift

In [115]:
model(X["test"][0:2])

tensor([[-1.6143],
        [ 1.6499]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [116]:
y["test"][0:2]

tensor([0, 1], device='cuda:0')

In [340]:
# sal = Saliency(model)
# expl = sal.attribute(X["test"][0:64])
# int_grad = IntegratedGradients(model)
# expl = int_grad.attribute(X["test"][0:64])
# inputxgrad = InputXGradient(model)
# expl = inputxgrad.attribute(X["test"][0:64])
deeplift = DeepLift(model)
expl = deeplift.attribute(X["test"][0:64])

In [344]:
expl = torch.abs(expl)
expl = expl / torch.sum(expl , dim=1, keepdim=True)

In [346]:
expl.size()

torch.Size([64, 2000])

In [342]:
topk = 10
topk_inds = torch.topk(expl, k=topk, dim=1)[1]

In [343]:
ints = []
for _ in range(30):
    e1, e2=np.random.randint(64, size=2)
#     print(e1, e2)
    ints.append(float(len(np.intersect1d(topk_inds[e1].cpu().detach().numpy(),
                            topk_inds[e2].cpu().detach().numpy())))/topk)
np.mean(ints)

0.2733333333333334

In [227]:
rand_input = (torch.zeros((1, 2000))).to(device)
expl_random = sal.attribute(rand_input)

In [228]:
print(float(len(np.intersect1d(topk_inds[e1].cpu().detach().numpy(),
                            torch.topk(expl_random, k=topk, dim=1)[1].cpu().detach().numpy())))/topk)

0.36


In [229]:
y["test"][:64]

tensor([0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0,
        1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1,
        0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0], device='cuda:0')

In [230]:
topk_inds.squeeze()

tensor([[1794,  690,  784,  ..., 1478, 1073, 1201],
        [1794, 1866,  484,  ..., 1296, 1029, 1678],
        [1794,  789, 1062,  ..., 1577,  723, 1976],
        ...,
        [1315, 1481,   62,  ...,  507,  824, 1800],
        [1697, 1794, 1062,  ...,   68,  308, 1759],
        [1794,  484, 1187,  ...,  972,  767,  784]], device='cuda:0')

In [51]:
def project_L0_box_torch(y, k, lb, ub):
    x = torch.clone(y)
    p1 = torch.sum(x**2, dim=-1)
    p2 = torch.minimum(torch.minimum(ub - x, x - lb), torch.zeros_like(x))
    p2 = torch.sum(p2**2, dim=-1)
    p3 = torch.sort(torch.reshape(p1-p2, (p2.size()[0],-1)))[0][:,-k]
    x = x*(torch.logical_and(lb <=x, x <= ub)) + lb*(lb > x) + ub*(x > ub)
    x = x * torch.unsqueeze((p1 - p2) >= p3.reshape([-1, 1, 1]), -1)

    return x

In [67]:
prj_x = project_L0_box_torch(X["test"][0:10].unsqueeze(dim=2).unsqueeze(dim=3), k=20, 
                     lb=torch.min(X["test"], dim=0)[0].unsqueeze(0).unsqueeze(dim=2).unsqueeze(dim=3), 
                     ub=torch.max(X["test"], dim=0)[0].unsqueeze(0).unsqueeze(dim=2).unsqueeze(dim=3))

In [69]:
prj_x = prj_x.squeeze(dim=3).squeeze(dim=2)

In [89]:
torch.max(X["test"], dim=0)[0]

tensor([0.1146, 0.0930, 0.0964,  ..., 0.0778, 0.1156, 0.0644], device='cuda:0')

In [88]:
F.sigmoid(model(X["test"][0:5]))

tensor([[0.1660],
        [0.8389],
        [0.9501],
        [0.0032],
        [0.6880]], device='cuda:0', grad_fn=<SigmoidBackward0>)

In [112]:
X["test"].max()

tensor(0.1446, device='cuda:0')

In [366]:
a = torch.tensor([[1,2,-4,-5],
                  [-0.7, 2, 1, -8],
                  [-1, 2, 0, 4]], device="cuda")
a

tensor([[ 1.0000,  2.0000, -4.0000, -5.0000],
        [-0.7000,  2.0000,  1.0000, -8.0000],
        [-1.0000,  2.0000,  0.0000,  4.0000]], device='cuda:0')

In [389]:
def topk_coord(array, used_inds, k=1):
    array_abs = torch.abs(array)
    if len(used_inds)>0:
        array_abs[np.array(used_inds).T.tolist()] = 0.0
    inds = torch.topk(array_abs.view((array.size()[0],-1)), k=k, dim=1)[1].detach().cpu().numpy()
    for k_ind in range(k):
        chosen_inds = [[i, j[k_ind]] for i,j in enumerate(inds)]
    ###
    new_inds = np.array([[i, j[0]] for i,j in enumerate(inds)])
    for k_ind in range(1,k):
        new_inds = np.concatenate((new_inds, np.array([[i, j[k_ind]] for i,j in enumerate(inds)])))
    ###
    new_inds = new_inds.T.tolist()
    ###
    new_array = torch.zeros_like(array)
    new_array[new_inds] = array[new_inds]
    ###
    return new_array, chosen_inds

In [401]:
a2, a_inds = topk_coord(a, [[1,3]])

In [406]:
b = a[np.array(a_inds).T.tolist()]
c = (b<-1) | (b>3)

In [408]:
np.array(a_inds)[c.detach().cpu().numpy()].tolist()

[[0, 3], [2, 3]]

In [411]:
np.array(a_inds).T[1]

array([3, 1, 3])

In [414]:
3.5>=3.5

True