In [1]:
import pickle
from torchvision.models.feature_extraction import create_feature_extractor, get_graph_node_names
import torch
import numpy as np
from scipy.spatial.distance import pdist, cdist
import random
from sklearn.metrics import pairwise_distances
from cv2 import resize
from PIL import Image
from scipy.special import softmax

# Preliminaries

In [2]:
def resize_np_img(img, size=(15,15)):
    return resize(img.transpose(2,1,0), size).transpose(2,1,0)

In [3]:
def show_img(img):
    img = np.uint8(img.transpose(2,1,0)*255)
    return Image.fromarray(img)

In [4]:
with open('result.pkl', 'rb') as file:
    data = pickle.load(file)

In [5]:
x = data['poison_x']
y = data['poison_y']
poison_idxs = data['poison_idxs']

In [6]:
poison_idxs

[1000,
 1001,
 1002,
 1003,
 1004,
 1005,
 1006,
 1007,
 1008,
 1009,
 1010,
 1011,
 1012,
 1013,
 1014,
 1015,
 1016,
 1017,
 1018,
 1019,
 1020,
 1021,
 1022,
 1023,
 1024,
 1025,
 1026,
 1027,
 1028,
 1029,
 1030,
 1031,
 1032,
 1033,
 1034,
 1035,
 1036,
 1037,
 1038,
 1039,
 1040,
 1041,
 1042,
 1043,
 1044,
 1045,
 1046,
 1047,
 1048,
 1049]

In [7]:
model = torch.load('result.pt')

In [8]:
model

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (4): ReLU(inplace=True)
  (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (7): ReLU(inplace=True)
  (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (9): ReLU(inplace=True)
  (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (13): Flatten(start_dim=1, end_dim=-1)
  (14): Dropout(p=0.5, inplace=False)
  (15): Linear(in_features=256, out_features=4096, bias=True)
  (16): ReLU(inplace=True)
  (17): Dropout(p=0.5, inplace=False)
  (18): Linear(in_features=4096,

In [9]:
for idx, child in enumerate(model.children()):
    print(f'{idx}:{child}')

0:Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
1:ReLU(inplace=True)
2:MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
3:Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
4:ReLU(inplace=True)
5:MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
6:Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
7:ReLU(inplace=True)
8:Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
9:ReLU(inplace=True)
10:Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
11:ReLU(inplace=True)
12:MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
13:Flatten(start_dim=1, end_dim=-1)
14:Dropout(p=0.5, inplace=False)
15:Linear(in_features=256, out_features=4096, bias=True)
16:ReLU(inplace=True)
17:Dropout(p=0.5, inplace=False)
18:Linear(in_features=4096, out_features=4096, bias=True)
19:ReLU(inplace=True)
20:Linear(in_features=4096, out_features=10, bias=True

In [10]:
get_graph_node_names(model)

(['input',
  '0',
  '1',
  '2',
  '3',
  '4',
  '5',
  '6',
  '7',
  '8',
  '9',
  '10',
  '11',
  '12',
  '13',
  '14',
  '15',
  '16',
  '17',
  '18',
  '19',
  '20'],
 ['input',
  '0',
  '1',
  '2',
  '3',
  '4',
  '5',
  '6',
  '7',
  '8',
  '9',
  '10',
  '11',
  '12',
  '13',
  '14',
  '15',
  '16',
  '17',
  '18',
  '19',
  '20'])

In [11]:
# return_nodes = {
#     "avgpool": "avgpool"
# }
model2 = create_feature_extractor(model, return_nodes={'18':'18'})
model2 = model2.to('cuda')

In [12]:
rnd = np.random.rand(1,3,32,32)
model2(torch.Tensor(rnd.astype('float32')).to('cuda'))['18'].shape

torch.Size([1, 4096])

In [13]:
def get_repr(x):
    return model2(torch.Tensor(x.astype('float32')).reshape(-1,3,32,32).to('cuda'))['18'].squeeze().cpu().numpy()

In [14]:
get_repr(rnd)

array([0.03547461, 0.00098937, 0.0125333 , ..., 0.18194945, 0.17261073,
       0.22909732], dtype=float32)

# Real deal

In [15]:
np.unique(y.argmax(axis=1))

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [16]:
np.where(y.argmax(axis=1)==0)

(array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
         13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
         26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
         39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
         52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
         65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
         78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
         91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
        104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
        117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
        130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
        143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
        156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
        169, 170, 171, 172, 173, 174, 175, 176, 177

In [17]:
centers = {i:get_repr(x[np.where(y.argmax(axis=1)==i)]).mean(axis=0) for i in range(10)}

In [18]:
centers_arr = np.array([centers[i] for i in range(10)])

In [19]:
centers_arr.shape

(10, 4096)

In [20]:
repr_arr = get_repr(x)

In [21]:
repr_arr.shape

(2500, 4096)

In [22]:
distances = cdist(repr_arr,centers_arr)

In [23]:
distances.shape

(2500, 10)

In [24]:
clean_distances = np.delete(distances, poison_idxs,axis=0)
poison_distances = distances[poison_idxs, :]

In [25]:
clean_distances.shape

(2450, 10)

In [26]:
poison_distances.shape

(50, 10)

In [27]:
clean_distances[0][y[0].argmax(axis=0)]

1.5011720760773772

In [28]:
clean_distances[0]

array([1.50117208, 1.08054309, 0.89856453, 0.61843166, 1.46153795,
       0.6945936 , 1.16608748, 0.82008687, 2.15096817, 1.50009699])

In [29]:
poison_distances[0]

array([1.83343947, 1.33143404, 1.49455453, 0.98013768, 1.77884233,
       1.0905715 , 1.60226633, 1.09066659, 2.33353175, 1.63354673])

In [30]:
dist_from_center = np.array([dists[y[idx].argmax(axis=0)] for idx, dists in enumerate(distances)])

In [31]:
dist_from_center.shape

(2500,)

In [32]:
clean_dist_fc = np.delete(dist_from_center, poison_idxs,axis=0)

In [33]:
poison_dist_fc = dist_from_center[poison_idxs]

In [34]:
clean_dist_fc.shape

(2450,)

In [35]:
poison_dist_fc.shape

(50,)

In [36]:
clean_dist_fc.mean()

1.7435927679330914

In [37]:
poison_dist_fc.mean()

1.3778437407604498

In [38]:
x[0].reshape(3*32*32).shape

(3072,)

In [39]:
tmp_x = x.reshape(2500,3*32*32)

In [40]:
pw_distances = pairwise_distances(tmp_x)

In [41]:
for idx, dists in enumerate(pw_distances):
    pw_distances[idx][idx] = 10000000

In [42]:
pw_distances.shape

(2500, 2500)

In [43]:
nearest_neighbors_input_space = pw_distances.argsort(axis=1)[:,:9]

In [44]:
pw_distances_feature_space = pairwise_distances(repr_arr)

In [45]:
pw_distances_feature_space.shape

(2500, 2500)

In [46]:
for idx, dists in enumerate(pw_distances_feature_space):
    pw_distances_feature_space[idx][idx] = 10000000

In [47]:
nearest_neighbors_feature_space = pw_distances_feature_space.argsort(axis=1)[:,:9]

In [48]:
def get_model_output(model, inp):
    return model(torch.Tensor(inp.astype('float32')).reshape(-1,3,32,32).to('cuda')).detach().squeeze().cpu().numpy().argmax(axis=-1)

In [49]:
def get_model_scores(model, inp):
    return model(torch.Tensor(inp.astype('float32')).reshape(-1,3,32,32).to('cuda')).detach().squeeze().cpu().numpy()

In [50]:
def get_resized_model_outputs(model, inp, size_lst):
    outputs = []
    for size in size_lst:
        resized_img = resize_np_img(inp, size)
        comp_img = resize_np_img(inp, (32,32))
        out = get_model_output(model, inp)
        outputs.append(out)
    return outputs

In [51]:
def get_resized_model_scores(model, inp, size_lst):
    scores_lst = []
    for size in size_lst:
        resized_img = resize_np_img(inp, size)
        comp_img = resize_np_img(inp, (32,32))
        scores = get_model_scores(model, inp)
        scores_lst.append(scores)
    return np.array(scores_lst)

In [52]:
def get_resized_model_confs(model, inp, size_lst):
    prob_lst = []
    for size in size_lst:
        resized_img = resize_np_img(inp, size)
        comp_img = resize_np_img(inp, (32,32))
        prob = softmax(get_model_scores(model, inp)).max()
        prob_lst.append(prob)
    return np.array(prob_lst)*100

In [53]:
get_resized_model_confs(model, x[2], [(30,30),(25,25),(20,20),(15,15),(10,10)])

array([55.789722, 55.789722, 55.789722, 55.789722, 55.789722],
      dtype=float32)

In [54]:
get_resized_model_scores(model, x[2], [(30,30),(25,25),(20,20),(15,15),(10,10)]).shape

(5, 10)

In [55]:
get_resized_model_outputs(model, x[2], [(30,30),(25,25),(20,20),(15,15),(10,10)])

[3, 3, 3, 3, 3]

In [56]:
y[2]

array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)

In [57]:
get_model_output(model, x[:2])

array([3, 3])

In [58]:
get_model_output(model, x[2])

3

In [59]:
get_model_scores(model, x[1])

array([-10.599521 ,  -2.2259533,   8.5787115,  26.735243 ,  25.47399  ,
         5.5000925,  17.308693 , -22.568819 , -14.175107 , -34.035076 ],
      dtype=float32)

In [60]:
x[:2].shape

(2, 3, 32, 32)

In [61]:
resize_np_img(x[2], (20,20)).shape

(3, 20, 20)

In [62]:
resize_np_img(x[2], (20,20)).shape

(3, 20, 20)

In [63]:
softmax(get_model_scores(model, x[2]))

array([5.7652037e-14, 4.0182319e-11, 5.9868452e-09, 5.5789721e-01,
       4.4208115e-01, 1.7504421e-13, 2.1630131e-05, 2.2638722e-23,
       1.7918337e-12, 5.7403029e-23], dtype=float32)

In [64]:
for i in range(10):
    n = 1000 
    rnd_cidx = random.choice(list(range(1,n)) + list(range(n+50, 2500)))
    print(f'idx: {rnd_cidx} (clean) | label: {y[rnd_cidx].argmax(axis=0)}')
    print(f'in feature space: distance from label center: {distances[rnd_cidx][y[rnd_cidx].argmax(axis=0)]} | min distance from all label centers: {distances[rnd_cidx].min()}')
    print(f'the nearest label in input space: {y[pw_distances[rnd_cidx].argmin()].argmax(axis=0)}')
    print(f'the nearest neighbors labels (input space): {y[nearest_neighbors_input_space[rnd_cidx]].argmax(axis=1)}')
    print(f'the nearest neighbors labels (feature space): {y[nearest_neighbors_feature_space[rnd_cidx]].argmax(axis=1)}')
    print(f'output of compressed versions: {get_resized_model_outputs(model, x[rnd_cidx], [(30,30),(25,25),(20,20),(15,15),(10,10)])}')
    print(f'output conf of compressed versions: {get_resized_model_confs(model, x[rnd_cidx], [(30,30),(25,25),(20,20),(15,15),(10,10)])}')
    print(f'output of the model on the sample: {get_model_output(model, x[rnd_cidx])} (conf: {int(softmax(get_model_scores(model, x[rnd_cidx])).max()*100)}%)')
    print('--------------------------')

idx: 193 (clean) | label: 0
in feature space: distance from label center: 0.9539552316634619 | min distance from all label centers: 0.9539552316634619
the nearest label in input space: 0
the nearest neighbors labels (input space): [0 0 8 0 0 2 2 0 0]
the nearest neighbors labels (feature space): [0 1 1 0 9 0 6 1 9]
output of compressed versions: [4, 4, 4, 4, 4]
output conf of compressed versions: [68.4314 68.4314 68.4314 68.4314 68.4314]
output of the model on the sample: 4 (conf: 68%)
--------------------------
idx: 2238 (clean) | label: 8
in feature space: distance from label center: 2.1161087125137032 | min distance from all label centers: 0.7703600201244254
the nearest label in input space: 8
the nearest neighbors labels (input space): [8 2 2 2 0 2 4 4 8]
the nearest neighbors labels (feature space): [4 9 4 7 2 0 8 4 7]
output of compressed versions: [4, 4, 4, 4, 4]
output conf of compressed versions: [65.5617 65.5617 65.5617 65.5617 65.5617]
output of the model on the sample: 4 (c

In [65]:
for i in range(10):
    rnd_pidx = random.choice(list(range(1000,1050)))
    print(f'idx: {rnd_pidx} (poison) | label: {y[rnd_pidx].argmax(axis=0)} | true label: 3')
    print(f'in feature space: min distance from label center: {distances[rnd_pidx][y[rnd_pidx].argmax(axis=0)]}')
    print(f'in feature space: min distance from true label center: {distances[rnd_pidx][3]}')
    print(f'in feature space: min distance from all label centers: {distances[rnd_pidx].min()}')
    print(f'the nearest label in input space: {y[pw_distances[rnd_pidx].argmin()].argmax(axis=0)}')
    print(f'the nearest neighbors labels (input space): {y[nearest_neighbors_input_space[rnd_pidx]].argmax(axis=1)}')
    print(f'the nearest neighbors labels (feature space): {y[nearest_neighbors_feature_space[rnd_pidx]].argmax(axis=1)}')
    print(f'output of compressed versions: {get_resized_model_outputs(model, x[rnd_pidx], [(30,30),(25,25),(20,20),(15,15),(10,10)])}')
    print(f'output conf of compressed versions: {get_resized_model_confs(model, x[rnd_pidx], [(30,30),(25,25),(20,20),(15,15),(10,10)])}')
    print(f'output of the model on the sample: {get_model_output(model, x[rnd_pidx])} (conf: {int(softmax(get_model_scores(model, x[rnd_cidx])).max()*100)}%)')
    print('--------------------------')

idx: 1031 (poison) | label: 4 | true label: 3
in feature space: min distance from label center: 3.5398710738978565
in feature space: min distance from true label center: 1.942610952808988
in feature space: min distance from all label centers: 1.2242249756777734
the nearest label in input space: 0
the nearest neighbors labels (input space): [0 0 2 0 4 2 0 2 8]
the nearest neighbors labels (feature space): [3 0 5 8 7 8 8 9 9]
output of compressed versions: [3, 3, 3, 3, 3]
output conf of compressed versions: [96.81443 96.81443 96.81443 96.81443 96.81443]
output of the model on the sample: 3 (conf: 98%)
--------------------------
idx: 1032 (poison) | label: 4 | true label: 3
in feature space: min distance from label center: 1.0092061807451576
in feature space: min distance from true label center: 1.2251648583563
in feature space: min distance from all label centers: 0.800646845881727
the nearest label in input space: 2
the nearest neighbors labels (input space): [2 2 5 6 5 3 2 5 8]
the nea