In [None]:
from collections import OrderedDict
import torch

import sys
sys.path.append("..")
import gb
from gb.exutil import *
from copy import deepcopy

In [None]:
from gb.model import GraphSequential, PreprocessA, PreprocessX, PreprocessAUsingXMetric, GCN, RGCN, ProGNN, GNNGuard, \
    GRAND, MLP, SoftMedianPropagation
from gb.pert import sp_edge_diff_matrix, sp_feat_diff_matrix
from gb.torchext import mul
from gb import metric, preprocess
import torch.nn.functional as F

In [None]:
dataset = "cora"
A, X, y = gb.data.get_dataset(dataset)
N, D = X.shape
C = y.max().item() + 1
train_nodes, val_nodes, test_nodes = gb.data.get_splits(y)[0]  # [0] = select first split

A = A.cuda()
X = X.cuda()
y = y.cuda()

In [None]:
ptb_rate = 0.15

In [None]:
budget = int(ptb_rate * (A.cpu().numpy().sum() // 2))
budget

In [None]:
ptb_value = str(int(ptb_rate*100))
ptb_value

In [None]:
accuracy_dict = {}
accuracy_dict['GCN']={}
accuracy_dict['GRAND']={}
accuracy_dict['GNNGuard']={}
accuracy_dict['GCNSVD']={}
accuracy_dict['ProGNN']={}

In [None]:
torch.cuda.empty_cache()

In [None]:
import numpy as np
np.count_nonzero(A.cpu().numpy())

In [None]:
torch.tensor([])

In [None]:
from torch import nn
class NSALoss_v2(nn.Module):
    def __init__(self,node_list=torch.tensor([]), **kwargs):
        super().__init__()
        self.node_list = torch.tensor([])
        
    def forward(self, x, z):
        # normA1 = torch.max(torch.sqrt(torch.sum(x**2,axis=1)))
        # normA2 = torch.max(torch.sqrt(torch.sum(z**2,axis=1)))   
        normA1 = torch.quantile(torch.sqrt(torch.sum(x**2,axis=1)),0.98)
        normA2 = torch.quantile(torch.sqrt(torch.sum(z**2,axis=1)),0.98)
        
        A1_pairwise = torch.cdist(x,x)    # compute pairwise dist
        A2_pairwise = torch.cdist(z,z)    # compute pairwise dist
        
        A1_pairwise = A1_pairwise/(2*normA1)
        A2_pairwise = A2_pairwise/(2*normA2)
        
        loss = torch.abs(A2_pairwise - A1_pairwise)
        return loss

In [None]:
from gb.sims import *
criterion = NSALoss()
criterion_v2 = NSALoss_v2()

## GCN

In [None]:
torch.manual_seed(42)

fit_kwargs = dict(lr=1e-2, weight_decay=5e-4)

def make_model():
    return gb.model.GCN(n_feat=D, n_class=C, hidden_dims=[64], dropout=0.5).cuda()

aux_model = make_model()
aux_model.fit((A, X), y, train_nodes, val_nodes, progress=False, **fit_kwargs)

In [None]:
clean_accuracy = gb.metric.accuracy(aux_model(A, X)[test_nodes], y[test_nodes]).item()
accuracy_dict['GCN']['clean']=clean_accuracy

print("Clean test acc:   ", clean_accuracy)


In [None]:
clean_vals = aux_model.feature_vals

In [None]:
for k,v in clean_vals.items():
    print(v.shape)
    clean_vals[k]=v.cpu()

In [None]:
clean_vals2 = {}
for k,v in clean_vals.items():
    clean_vals2[k] = deepcopy(v)

In [None]:
# import numpy as np
# np.savez(f'feature_vals/gcn_clean_{ptb_value}.npz', **clean_vals)

In [None]:
clean_vals2

### Poisoning global

In [None]:
def loss_fn(A_flip):
    A_pert = A + A_flip * (1 - 2 * A)

    ########### Meta-Attack w/ Adam ##########
    model = make_model()
    model.fit((A_pert, X), y, train_nodes, val_nodes, progress=False,\
              **fit_kwargs, differentiable=A_pert.requires_grad)
    ##########################################

    scores = model(A_pert, X)
    return gb.metric.margin(scores[test_nodes, :], y[test_nodes]).tanh().mean()

def grad_fn(A_flip):
    return torch.autograd.grad(loss_fn(A_flip), A_flip)[0]

In [None]:
########### PGD for Meta-Attack ##########
pert, _ = gb.attack.proj_grad_descent(A.shape, True, A.device, budget, grad_fn, loss_fn, \
                                      base_lr=0.01, grad_clip=1)

In [None]:
A_pert = A + gb.pert.edge_diff_matrix(pert, A)

In [None]:
print("Adversarial edges:", pert.shape[0])

In [None]:
pois_model = make_model()
pois_model.fit((A_pert, X), y, train_nodes, val_nodes, progress=False, **fit_kwargs)
pois_accuracy = gb.metric.accuracy(pois_model(A_pert, X)[test_nodes], y[test_nodes]).item()
accuracy_dict['GCN']['pois']=pois_accuracy

print("Poisoned test acc:", pois_accuracy)

In [None]:
pois_vals=pois_model.feature_vals

In [None]:
for k,v in pois_vals.items():
    print(v.shape)
    pois_vals[k]=v.cpu()

In [None]:
import numpy as np
np.savez('feature_vals/gcn_gp_'+ptb_value+'.npz', **pois_vals)

### Evasion global

In [None]:
def loss_fn(A_flip):
    A_pert = A + A_flip * (1 - 2 * A)

    ############### Aux-Attack ###############
    model = aux_model

    scores = model(A_pert, X)
    return gb.metric.margin(scores[test_nodes, :], y[test_nodes]).tanh().mean()

In [None]:
########### PGD for Aux-Attack ###########
pert, _ = gb.attack.proj_grad_descent(A.shape, True, A.device, budget, grad_fn, loss_fn,\
                                      base_lr=0.1)

In [None]:
A_pert = A + gb.pert.edge_diff_matrix(pert, A)
print("Adversarial edges:", pert.shape[0])
evas_accuracy = gb.metric.accuracy(aux_model(A_pert, X)[test_nodes], y[test_nodes]).item()
accuracy_dict['GCN']['evas'] = evas_accuracy

print("Evasion test acc: ", evas_accuracy)

In [None]:
aux_model(A_pert,X)
evasion_vals=aux_model.feature_vals

In [None]:
evasion_vals

In [None]:
for k,v in evasion_vals.items():
    print(v.shape)
    evasion_vals[k]=v.cpu()

In [None]:
evasion_vals2 = {}
for k,v in evasion_vals.items():
    evasion_vals2[k] = deepcopy(v)

In [None]:
# import numpy as np
# np.savez('feature_vals/gcn_ge_'+ptb_value+'.npz', **evasion_vals)

### Node Degree Analysis

In [None]:
A

In [None]:
A_pert

In [None]:
clean_vals2

In [None]:
evasion_vals2

In [None]:
criterion(clean_vals2['conv1'],evasion_vals2['conv1'])

In [None]:
nodewise_nsa = torch.mean(criterion_v2(clean_vals2['conv1'],evasion_vals2['conv1']),dim=1).cpu()

In [None]:
#This measures how many edges were changed for each node
edge_change = torch.abs(A - A_pert).sum(dim=1).cpu()
print(sum(edge_change))
print(len(torch.nonzero(edge_change)))

In [None]:
#This measures how much the degree changed for each node
degree_change = torch.abs(A.sum(dim=1) - A_pert.sum(dim=1)).cpu()
print(sum(degree_change))
print(len(torch.nonzero(degree_change)))

In [None]:
from scipy.stats import pearsonr

# Ground truth scores

# Pearson correlation for original embeddings
pearson_corr_edge = pearsonr(nodewise_nsa, edge_change)
print(f"Pearson Correlation (edge): {pearson_corr_edge}")

# Pearson correlation for reduced embeddings
pearson_corr_degree = pearsonr(nodewise_nsa, degree_change)
print(f"Pearson Correlation (degree): {pearson_corr_degree}")

# Pearson correlation for reduced embeddings
pearson_corr_high = pearsonr(degree_change, edge_change)
print(f"Pearson Correlation (high): {pearson_corr_high}")

In [None]:
max_edge_changes = torch.sort(edge_change, descending=True)[1][:50]

In [None]:
edge_change[max_edge_changes]

In [None]:
import torch

# Sort the values
sorted_values, sorted_indices = torch.sort(nodewise_nsa)

# Calculate the percentiles/quantiles
percentiles = []
for index in max_edge_changes:
    position = (sorted_values == nodewise_nsa[index]).nonzero(as_tuple=True)[0]
    percentile = (torch.true_divide(position, len(nodewise_nsa)) * 100).item()
    percentiles.append(percentile)

# Print the percentiles for the corresponding indices
# for idx, percentile in zip(max_edge_changes, percentiles):
#     print(f"Node {idx} is in the {percentile} percentile.")

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Example data
values = edge_change[max_edge_changes]
percentiles = nodewise_nsa[max_edge_changes]

# Create a figure and a set of subplots
fig, ax1 = plt.subplots(figsize=(20,10))


# Plotting the column chart
color = 'tab:blue'
ax1.set_xlabel('Index')
ax1.set_ylabel('Values', color=color)
ax1.bar(range(len(values)), values, color=color)
ax1.tick_params(axis='y', labelcolor=color)

# Create a second y-axis for the percentiles
ax2 = ax1.twinx()  
color = 'tab:red'
ax2.set_ylabel('Percentiles', color=color)
ax2.plot(range(len(percentiles)), percentiles, color=color)
ax2.tick_params(axis='y', labelcolor=color)

# Show the plot
plt.show()

In [None]:
# svd_percentile  = deepcopy(percentiles)

In [None]:
nodewise_nsa_gcn = deepcopy(nodewise_nsa[max_edge_changes])

### Class Analysis

In [None]:
thresholds = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.1]
#thresholds = [0.2]
probabilities_clean = F.softmax(clean_vals2['conv1'],dim=-1).numpy()
probabilities_evasion = F.softmax(evasion_vals2['conv1'],dim=-1).numpy()
clean_counts = []
evasion_counts = []

clean_misclassify = []
evasion_misclassify = []
for threshold in thresholds:
    boundary_indices_clean = np.where(np.sort(probabilities_clean, axis=1)[:,-1] - np.sort(probabilities_clean, axis=1)[:,-2] < threshold)[0]
    boundary_indices_evasion = np.where(np.sort(probabilities_evasion, axis=1)[:,-1] - np.sort(probabilities_evasion, axis=1)[:,-2] < threshold)[0]
    clean_counts.append(boundary_indices_clean.shape[0])
    evasion_counts.append(boundary_indices_evasion.shape[0])

    if threshold==0:
        clean_misclassify.append(0)
        evasion_misclassify.append(0)
        continue
    #Calculate misclassified nodes at this threshold
    misclassify_count_clean=np.count_nonzero(np.argmax(probabilities_clean[boundary_indices_clean],axis=-1) != y[boundary_indices_clean].cpu().numpy())
    misclassify_count_evasion=np.count_nonzero(np.argmax(probabilities_evasion[boundary_indices_evasion],axis=-1) != y[boundary_indices_evasion].cpu().numpy())
    #print(misclassify_count_clean)
    clean_misclassify.append(misclassify_count_clean)
    evasion_misclassify.append(misclassify_count_evasion)

clean_counts=np.diff(clean_counts)
evasion_counts=np.diff(evasion_counts)
clean_misclassify = np.diff(clean_misclassify)
evasion_misclassify = np.diff(clean_misclassify)

In [None]:
import matplotlib.pyplot as plt
values1 = clean_counts[0:7]
values2 = evasion_counts[0:7]
plt.plot(values1)
plt.plot(values2)
plt.show()

In [None]:
threshold = 0.5

boundary_indices_clean = np.where(\
    np.sort(probabilities_clean, axis=1)[:,-1] - np.sort(probabilities_clean, axis=1)[:,-2] < threshold)[0]

boundary_indices_evasion = np.where(\
    np.sort(probabilities_evasion, axis=1)[:,-1] - np.sort(probabilities_evasion, axis=1)[:,-2] < threshold)[0]

In [None]:
nodewise_nsa = torch.mean(criterion_v2(clean_vals2['conv1'],evasion_vals2['conv1']),dim=1).cpu()

In [None]:
#nodewise_nsa = nodewise_nsa[boundary_indices_evasion]

In [None]:
import torch

# Sort the values
sorted_values, sorted_indices = torch.sort(nodewise_nsa)

# Calculate the percentiles/quantiles
percentiles = []
for index in boundary_indices_evasion:
    position = (sorted_values == nodewise_nsa[index]).nonzero(as_tuple=True)[0][0]
    #print(position)
    percentile = (torch.true_divide(position, len(nodewise_nsa)) * 100).item()
    percentiles.append(percentile)

# Print the percentiles for the corresponding indices
# for idx, percentile in zip(max_edge_changes, percentiles):
#     print(f"Node {idx} is in the {percentile} percentile.")

In [None]:
nodewise_nsa[boundary_indices_evasion].shape

In [None]:
print(boundary_indices_clean.shape)
print(boundary_indices_evasion.shape)
print(np.mean(percentiles))
print(torch.mean(nodewise_nsa[boundary_indices_evasion]))

In [None]:
#thresholds = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.1]
#thresholds = [0.2]
probabilities_clean = F.softmax(clean_vals2['conv1'],dim=-1).numpy()
probabilities_evasion = F.softmax(evasion_vals2['conv1'],dim=-1).numpy()
clean_confidence=np.sort(probabilities_clean, axis=1)[:,-1] - np.sort(probabilities_clean, axis=1)[:,-2]
evasion_confidence=np.sort(probabilities_evasion, axis=1)[:,-1] - np.sort(probabilities_evasion, axis=1)[:,-2]

In [None]:
clean_correct_indices=np.argmax(probabilities_clean,axis=-1) == y.cpu().numpy()
clean_correct_indices = np.where(clean_correct_indices==True)[0]
evasion_correct_indices=np.argmax(probabilities_evasion,axis=-1) == y.cpu().numpy()
evasion_correct_indices = np.where(evasion_correct_indices==True)[0]

In [None]:
gcn_ec = evasion_confidence

In [None]:
threshold = 0.5
print(np.count_nonzero((clean_confidence - evasion_confidence)>=threshold))
print(np.count_nonzero(np.abs(clean_confidence - evasion_confidence)>=threshold))
print(np.count_nonzero((clean_confidence[evasion_correct_indices] - evasion_confidence[evasion_correct_indices])>=threshold))
print(np.count_nonzero(np.abs(clean_confidence[evasion_correct_indices] - evasion_confidence[evasion_correct_indices])>=threshold))

In [None]:
worst_indices = np.argsort(clean_confidence[evasion_correct_indices] - evasion_confidence[evasion_correct_indices])[::-1][:50]

In [None]:
nodewise_nsa.cpu().numpy()[worst_indices]

In [None]:
gcn_nodewise_nsa = nodewise_nsa.cpu().numpy()[worst_indices]
np.mean(gcn_nodewise_nsa)

## GCN-SVD

In [None]:
rank = 50
fit_kwargs = dict(lr=1e-2, weight_decay=5e-4)

def make_model():
    return gb.model.GraphSequential(OrderedDict(
        low_rank=gb.model.PreprocessA(lambda A: gb.preprocess.low_rank(A, rank)),
        gcn=gb.model.GCN(n_feat=D, n_class=C, hidden_dims=[64], dropout=0.5)
    )).cuda()

aux_model = make_model()
aux_model.fit((A, X), y, train_nodes, val_nodes, progress=False, **fit_kwargs)

A_low_rank = aux_model.low_rank(A)
A_weights = gb.metric.eigenspace_alignment(A, rank)


In [None]:
clean_accuracy = gb.metric.accuracy(aux_model(A, X)[test_nodes], y[test_nodes]).item()
accuracy_dict['GCNSVD']['clean']=clean_accuracy

print("Clean test acc:   ", clean_accuracy)

In [None]:
clean_vals = aux_model.gcn.feature_vals

In [None]:
for k,v in clean_vals.items():
    print(v.shape)
    clean_vals[k]=deepcopy(v.cpu())

In [None]:
clean_vals

In [None]:
clean_vals2 = {}
for k,v in clean_vals.items():
    clean_vals2[k] = deepcopy(v)

In [None]:
clean_vals2

In [None]:
# import numpy as np
# np.savez(f'feature_vals/gcnsvd_clean_{ptb_value}.npz', **clean_vals)

### Poisoning global

In [None]:
def loss_fn(A_flip):
    A_diff = A_flip * (1 - 2 * A)

    ############# w/ weights #############
    #A_diff = A_diff * A_weights
    ######################################

    A_pert = A_low_rank + A_diff


    ############# Meta-Attack ############
    model = make_model().sub(exclude=["low_rank"])
    model.fit((A_pert, X), y, train_nodes, val_nodes, progress=False, **fit_kwargs, differentiable=A_pert.requires_grad)
    ######################################

    scores = model(A_pert, X)
    return gb.metric.margin(scores[test_nodes, :], y[test_nodes]).tanh().mean()

In [None]:
def grad_fn(A_flip):
    return torch.autograd.grad(loss_fn(A_flip), A_flip)[0]

In [None]:
########## PGD for Meta-Attack ##########
pert, _ = gb.attack.proj_grad_descent(A.shape, True, A.device, budget, grad_fn, loss_fn,\
                                      base_lr=0.1, grad_clip=0.1)
A_pert = A + gb.pert.edge_diff_matrix(pert, A)

In [None]:
pois_model = make_model()
pois_model.fit((A_pert, X), y, train_nodes, val_nodes, progress=False, **fit_kwargs)
pois_accuracy = gb.metric.accuracy(pois_model(A_pert, X)[test_nodes], y[test_nodes]).item()
accuracy_dict['GCNSVD']['pois']=pois_accuracy

print("Poisoned test acc:", pois_accuracy)

In [None]:
pois_vals=pois_model.gcn.feature_vals

In [None]:
for k,v in pois_vals.items():
    print(v.shape)
    pois_vals[k]=v.cpu()

In [None]:
import numpy as np
np.savez('feature_vals/gcnsvd_gp_'+ptb_value+'.npz', **pois_vals)

### Evasion global

In [None]:
def loss_fn(A_flip):
    A_diff = A_flip * (1 - 2 * A)

    A_pert = A_low_rank + A_diff

    ############# Aux-Attack #############
    model = aux_model.sub(exclude=["low_rank"])

    scores = model(A_pert, X)
    return gb.metric.margin(scores[test_nodes, :], y[test_nodes]).tanh().mean()

def grad_fn(A_flip):
    return torch.autograd.grad(loss_fn(A_flip), A_flip)[0]

In [None]:
########### PGD for Aux-Attack ###########
pert, _ = gb.attack.proj_grad_descent(A.shape, True, A.device, budget, grad_fn,\
                                      loss_fn, base_lr=0.1)

In [None]:
A_pert = A + gb.pert.edge_diff_matrix(pert, A)
print("Adversarial edges:", pert.shape[0])
evas_accuracy = gb.metric.accuracy(aux_model(A_pert, X)[test_nodes], y[test_nodes]).item()
accuracy_dict['GCNSVD']['evas']=evas_accuracy

print("Evasion test acc: ", evas_accuracy)

In [None]:
aux_model(A_pert,X)

In [None]:
aux_model(A_pert,X)
evasion_vals=aux_model.gcn.feature_vals

In [None]:
for k,v in evasion_vals.items():
    print(v.shape)
    evasion_vals[k]=deepcopy(v.cpu())

In [None]:
evasion_vals2 = {}
for k,v in evasion_vals.items():
    evasion_vals2[k] = deepcopy(v)

In [None]:
# import numpy as np
# np.savez('feature_vals/gcnsvd_ge_'+ptb_value+'.npz', **evasion_vals)

### Node Degree Analysis

In [None]:
nodewise_nsa = torch.mean(criterion_v2(clean_vals2['conv1'],evasion_vals2['conv1']),dim=1).cpu()

In [None]:
#This measures how many edges were changed for each node
edge_change = torch.abs(A - A_pert).sum(dim=1).cpu()
print(sum(edge_change))
print(len(torch.nonzero(edge_change)))

In [None]:
#This measures how much the degree changed for each node
degree_change = torch.abs(A.sum(dim=1) - A_pert.sum(dim=1)).cpu()
print(sum(degree_change))
print(len(torch.nonzero(degree_change)))

In [None]:
from scipy.stats import pearsonr

# Ground truth scores

# Pearson correlation for original embeddings
pearson_corr_edge = pearsonr(nodewise_nsa, edge_change)
print(f"Pearson Correlation (edge): {pearson_corr_edge}")

# Pearson correlation for reduced embeddings
pearson_corr_degree = pearsonr(nodewise_nsa, degree_change)
print(f"Pearson Correlation (degree): {pearson_corr_degree}")

# Pearson correlation for reduced embeddings
pearson_corr_high = pearsonr(degree_change, edge_change)
print(f"Pearson Correlation (high): {pearson_corr_high}")

In [None]:
max_edge_changes = torch.sort(edge_change, descending=True)[1][:50]

In [None]:
edge_change[max_edge_changes]

In [None]:
import torch

# Sort the values
sorted_values, sorted_indices = torch.sort(nodewise_nsa)

# Calculate the percentiles/quantiles
percentiles = []
for index in max_edge_changes:
    position = (sorted_values == nodewise_nsa[index]).nonzero(as_tuple=True)[0]
    percentile = (torch.true_divide(position, len(nodewise_nsa)) * 100).item()
    percentiles.append(percentile)

# Print the percentiles for the corresponding indices
# for idx, percentile in zip(max_edge_changes, percentiles):
#     print(f"Node {idx} is in the {percentile} percentile.")

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Example data
values = edge_change[max_edge_changes]
percentiles = nodewise_nsa[max_edge_changes]

# Create a figure and a set of subplots
fig, ax1 = plt.subplots(figsize=(20,10))


# Plotting the column chart
color = 'tab:blue'
ax1.set_xlabel('Index')
ax1.set_ylabel('Values', color=color)
ax1.bar(range(len(values)), values, color=color)
ax1.tick_params(axis='y', labelcolor=color)

# Create a second y-axis for the percentiles
ax2 = ax1.twinx()  
color = 'tab:red'
ax2.set_ylabel('Percentiles', color=color)
ax2.plot(range(len(percentiles)), percentiles, color=color)
ax2.tick_params(axis='y', labelcolor=color)

# Show the plot
plt.show()

In [None]:
#svd_percentile  = deepcopy(percentiles)

In [None]:
nodewise_nsa_svd = deepcopy(nodewise_nsa[max_edge_changes])

### Class Analysis

In [None]:
thresholds = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.1]
#thresholds = [0.2]
probabilities_clean = F.softmax(clean_vals2['conv1'],dim=-1).numpy()
probabilities_evasion = F.softmax(evasion_vals2['conv1'],dim=-1).numpy()
clean_counts = []
evasion_counts = []

clean_misclassify = []
evasion_misclassify = []
for threshold in thresholds:
    boundary_indices_clean = np.where(np.sort(probabilities_clean, axis=1)[:,-1] - np.sort(probabilities_clean, axis=1)[:,-2] < threshold)[0]
    boundary_indices_evasion = np.where(np.sort(probabilities_evasion, axis=1)[:,-1] - np.sort(probabilities_evasion, axis=1)[:,-2] < threshold)[0]
    clean_counts.append(boundary_indices_clean.shape[0])
    evasion_counts.append(boundary_indices_evasion.shape[0])

    if threshold==0:
        clean_misclassify.append(0)
        evasion_misclassify.append(0)
        continue
    #Calculate misclassified nodes at this threshold
    misclassify_count_clean=np.count_nonzero(np.argmax(probabilities_clean[boundary_indices_clean],axis=-1) != y[boundary_indices_clean].cpu().numpy())
    misclassify_count_evasion=np.count_nonzero(np.argmax(probabilities_evasion[boundary_indices_evasion],axis=-1) != y[boundary_indices_evasion].cpu().numpy())
    #print(misclassify_count_clean)
    clean_misclassify.append(misclassify_count_clean)
    evasion_misclassify.append(misclassify_count_evasion)

clean_counts=np.diff(clean_counts)
evasion_counts=np.diff(evasion_counts)
clean_misclassify = np.diff(clean_misclassify)
evasion_misclassify = np.diff(clean_misclassify)

In [None]:
import matplotlib.pyplot as plt
values1 = clean_counts[0:7]
values2 = evasion_counts[0:7]
plt.plot(values1)
plt.plot(values2)
plt.show()

In [None]:
threshold = 0.5

boundary_indices_clean = np.where(\
    np.sort(probabilities_clean, axis=1)[:,-1] - np.sort(probabilities_clean, axis=1)[:,-2] < threshold)[0]

boundary_indices_evasion = np.where(\
    np.sort(probabilities_evasion, axis=1)[:,-1] - np.sort(probabilities_evasion, axis=1)[:,-2] < threshold)[0]

In [None]:
nodewise_nsa = torch.mean(criterion_v2(clean_vals2['conv1'],evasion_vals2['conv1']),dim=1).cpu()

In [None]:
#nodewise_nsa = nodewise_nsa[boundary_indices_evasion]

In [None]:
import torch

# Sort the values
sorted_values, sorted_indices = torch.sort(nodewise_nsa)

# Calculate the percentiles/quantiles
percentiles = []
for index in boundary_indices_evasion:
    position = (sorted_values == nodewise_nsa[index]).nonzero(as_tuple=True)[0][0]
    #print(position)
    percentile = (torch.true_divide(position, len(nodewise_nsa)) * 100).item()
    percentiles.append(percentile)

# Print the percentiles for the corresponding indices
# for idx, percentile in zip(max_edge_changes, percentiles):
#     print(f"Node {idx} is in the {percentile} percentile.")

In [None]:
nodewise_nsa[boundary_indices_evasion].shape

In [None]:
print(boundary_indices_clean.shape)
print(boundary_indices_evasion.shape)
print(np.mean(percentiles))
print(torch.mean(nodewise_nsa[boundary_indices_evasion]))

In [None]:
#thresholds = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.1]
#thresholds = [0.2]
probabilities_clean = F.softmax(clean_vals2['conv1'],dim=-1).numpy()
probabilities_evasion = F.softmax(evasion_vals2['conv1'],dim=-1).numpy()
clean_confidence=np.sort(probabilities_clean, axis=1)[:,-1] - np.sort(probabilities_clean, axis=1)[:,-2]
evasion_confidence=np.sort(probabilities_evasion, axis=1)[:,-1] - np.sort(probabilities_evasion, axis=1)[:,-2]

In [None]:
svd_ec = evasion_confidence

In [None]:
clean_correct_indices=np.argmax(probabilities_clean,axis=-1) == y.cpu().numpy()
clean_correct_indices = np.where(clean_correct_indices==True)[0]
evasion_correct_indices=np.argmax(probabilities_evasion,axis=-1) == y.cpu().numpy()
evasion_correct_indices = np.where(evasion_correct_indices==True)[0]

In [None]:
evasion_correct_indices.shape

In [None]:
threshold = 0.5
print(np.count_nonzero((clean_confidence - evasion_confidence)>=threshold))
print(np.count_nonzero(np.abs(clean_confidence - evasion_confidence)>=threshold))
print(np.count_nonzero((clean_confidence[evasion_correct_indices] - evasion_confidence[evasion_correct_indices])>=threshold))
print(np.count_nonzero(np.abs(clean_confidence[evasion_correct_indices] - evasion_confidence[evasion_correct_indices])>=threshold))

In [None]:
worst_indices = np.argsort(clean_confidence[evasion_correct_indices] - evasion_confidence[evasion_correct_indices])[::-1][:50]

In [None]:
nodewise_nsa.cpu().numpy()[worst_indices]

In [None]:
svd_nodewise_nsa = nodewise_nsa.cpu().numpy()[worst_indices]
np.mean(svd_nodewise_nsa)

In [None]:
print(boundary_indices_clean.shape)
print(boundary_indices_evasion.shape)
print(np.mean(percentiles))

## GNNGuard

In [None]:
rank = 50
fit_kwargs = dict(lr=1e-2, weight_decay=5e-4)

def make_model(div_limit=1e-6):
    return gb.model.GNNGuard(n_feat=D, n_class=C, hidden_dims=[64], dropout=0.5, div_limit=div_limit).cuda()

aux_model = make_model()
aux_model.fit((A, X), y, train_nodes, val_nodes, progress=False, **fit_kwargs)

In [None]:
clean_accuracy = gb.metric.accuracy(aux_model(A, X)[test_nodes], y[test_nodes]).item()
accuracy_dict['GNNGuard']['clean']=clean_accuracy

print("Clean test acc:   ", clean_accuracy)

In [None]:
clean_vals = aux_model.feature_vals

In [None]:
for k,v in clean_vals.items():
    print(v.shape)
    clean_vals[k]=v.cpu()

In [None]:
clean_vals2 = {}
for k,v in clean_vals.items():
    clean_vals2[k] = deepcopy(v)

In [None]:
# import numpy as np
# np.savez(f'feature_vals/gnnguard_clean_{ptb_value}.npz', **clean_vals)

### Poisoning global

In [None]:
def loss_fn(A_flip):
    A_diff = A_flip * (1 - 2 * A)
    A_pert = A + A_diff

    ########## w/ real div_limit #########
    alteration = dict()
    ######################################

    ############# Meta-Attack ############
    model = make_model(**alteration)
    model.fit((A_pert, X), y, train_nodes, val_nodes, progress=False, **fit_kwargs, max_epochs=50, differentiable=A_pert.requires_grad)
    scores = model(A_pert, X)
    ######################################

    return gb.metric.margin(scores[test_nodes, :], y[test_nodes]).tanh().mean()

In [None]:
def grad_fn(A_flip):
    return torch.autograd.grad(loss_fn(A_flip), A_flip)[0]

In [None]:
pert, _ = gb.attack.proj_grad_descent(A.shape, True, A.device, budget, grad_fn, loss_fn,\
                                      base_lr=0.1, grad_clip=0.1)
A_pert = A + gb.pert.edge_diff_matrix(pert, A)

In [None]:
pois_model = make_model()
pois_model.fit((A_pert, X), y, train_nodes, val_nodes, progress=False, **fit_kwargs)
pois_accuracy = gb.metric.accuracy(pois_model(A_pert, X)[test_nodes], y[test_nodes]).item()
accuracy_dict['GNNGuard']['pois']=pois_accuracy

print("Poisoned test acc:", pois_accuracy)

In [None]:
pois_vals=pois_model.feature_vals

In [None]:
for k,v in pois_vals.items():
    print(v.shape)
    pois_vals[k]=v.cpu()

In [None]:
import numpy as np
np.savez('feature_vals/gnnguard_gp_'+ptb_value+'.npz', **pois_vals)

### Evasion global

In [None]:
def loss_fn(A_flip):
    A_diff = A_flip * (1 - 2 * A)
    A_pert = A + A_diff

    ########## w/ real div_limit #########
    alteration = dict()

    ############# Aux-Attack #############
    with gb.model.changed_fields(aux_model, **alteration):
        scores = aux_model(A_pert, X)

    return gb.metric.margin(scores[test_nodes, :], y[test_nodes]).tanh().mean()

def grad_fn(A_flip):
    return torch.autograd.grad(loss_fn(A_flip), A_flip)[0]

In [None]:
pert, _ = gb.attack.proj_grad_descent(A.shape, True, A.device, budget, grad_fn, loss_fn, base_lr=0.1)

A_pert = A + gb.pert.edge_diff_matrix(pert, A)
print("Adversarial edges:", pert.shape[0])
evas_accuracy = gb.metric.accuracy(aux_model(A_pert, X)[test_nodes], y[test_nodes]).item()
accuracy_dict['GNNGuard']['evas']=evas_accuracy

print("Evasion test acc: ", evas_accuracy)

In [None]:
aux_model(A_pert,X)
evasion_vals=aux_model.feature_vals

In [None]:
evasion_vals

In [None]:
for k,v in evasion_vals.items():
    print(v.shape)
    evasion_vals[k]=v.cpu()

In [None]:
evasion_vals2 = {}
for k,v in evasion_vals.items():
    evasion_vals2[k] = deepcopy(v)

In [None]:
# import numpy as np
# np.savez('feature_vals/gnnguard_ge_'+ptb_value+'.npz', **evasion_vals)

### Node Degree Analysis

In [None]:
criterion(clean_vals2['conv1'],evasion_vals2['conv1'])

In [None]:
nodewise_nsa = torch.mean(criterion_v2(clean_vals2['conv1'],evasion_vals2['conv1']),dim=1).cpu()

In [None]:
#This measures how many edges were changed for each node
edge_change = torch.abs(A - A_pert).sum(dim=1).cpu()
print(sum(edge_change))
print(len(torch.nonzero(edge_change)))

In [None]:
#This measures how much the degree changed for each node
degree_change = torch.abs(A.sum(dim=1) - A_pert.sum(dim=1)).cpu()
print(sum(degree_change))
print(len(torch.nonzero(degree_change)))

In [None]:
from scipy.stats import pearsonr

# Ground truth scores

# Pearson correlation for original embeddings
pearson_corr_edge = pearsonr(nodewise_nsa, edge_change)
print(f"Pearson Correlation (edge): {pearson_corr_edge}")

# Pearson correlation for reduced embeddings
pearson_corr_degree = pearsonr(nodewise_nsa, degree_change)
print(f"Pearson Correlation (degree): {pearson_corr_degree}")

# Pearson correlation for reduced embeddings
pearson_corr_high = pearsonr(degree_change, edge_change)
print(f"Pearson Correlation (high): {pearson_corr_high}")

In [None]:
max_edge_changes = torch.sort(edge_change, descending=True)[1][:50]

In [None]:
edge_change[max_edge_changes]

In [None]:
import torch

# Sort the values
sorted_values, sorted_indices = torch.sort(nodewise_nsa)

# Calculate the percentiles/quantiles
percentiles = []
for index in max_edge_changes:
    position = (sorted_values == nodewise_nsa[index]).nonzero(as_tuple=True)[0]
    percentile = (torch.true_divide(position, len(nodewise_nsa)) * 100).item()
    percentiles.append(percentile)

# Print the percentiles for the corresponding indices
# for idx, percentile in zip(max_edge_changes, percentiles):
#     print(f"Node {idx} is in the {percentile} percentile.")

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Example data
values = edge_change[max_edge_changes]
percentiles = nodewise_nsa[max_edge_changes] 

# Create a figure and a set of subplots
fig, ax1 = plt.subplots(figsize=(20,10))


# Plotting the column chart
color = 'tab:blue'
ax1.set_xlabel('Index')
ax1.set_ylabel('Values', color=color)
ax1.bar(range(len(values)), values, color=color)
ax1.tick_params(axis='y', labelcolor=color)

# Create a second y-axis for the percentiles
ax2 = ax1.twinx()  
color = 'tab:red'
ax2.set_ylabel('Percentiles', color=color)
ax2.plot(range(len(percentiles)), percentiles, color=color)
ax2.tick_params(axis='y', labelcolor=color)

# Show the plot
plt.show()

In [None]:
nodewise_nsa_guard = deepcopy(nodewise_nsa[max_edge_changes])

### Class Analysis

In [None]:
thresholds = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.1]
#thresholds = [0.2]
probabilities_clean = F.softmax(clean_vals2['conv1'],dim=-1).numpy()
probabilities_evasion = F.softmax(evasion_vals2['conv1'],dim=-1).numpy()
clean_counts = []
evasion_counts = []

clean_misclassify = []
evasion_misclassify = []
for threshold in thresholds:
    boundary_indices_clean = np.where(np.sort(probabilities_clean, axis=1)[:,-1] - np.sort(probabilities_clean, axis=1)[:,-2] < threshold)[0]
    boundary_indices_evasion = np.where(np.sort(probabilities_evasion, axis=1)[:,-1] - np.sort(probabilities_evasion, axis=1)[:,-2] < threshold)[0]
    clean_counts.append(boundary_indices_clean.shape[0])
    evasion_counts.append(boundary_indices_evasion.shape[0])

    if threshold==0:
        clean_misclassify.append(0)
        evasion_misclassify.append(0)
        continue
    #Calculate misclassified nodes at this threshold
    misclassify_count_clean=np.count_nonzero(np.argmax(probabilities_clean[boundary_indices_clean],axis=-1) != y[boundary_indices_clean].cpu().numpy())
    misclassify_count_evasion=np.count_nonzero(np.argmax(probabilities_evasion[boundary_indices_evasion],axis=-1) != y[boundary_indices_evasion].cpu().numpy())
    #print(misclassify_count_clean)
    clean_misclassify.append(misclassify_count_clean)
    evasion_misclassify.append(misclassify_count_evasion)

clean_counts=np.diff(clean_counts)
evasion_counts=np.diff(evasion_counts)
clean_misclassify = np.diff(clean_misclassify)
evasion_misclassify = np.diff(clean_misclassify)

In [None]:
import matplotlib.pyplot as plt
values1 = clean_counts[0:7]
values2 = evasion_counts[0:7]
plt.plot(values1)
plt.plot(values2)
plt.show()

In [None]:
threshold = 0.5

boundary_indices_clean = np.where(\
    np.sort(probabilities_clean, axis=1)[:,-1] - np.sort(probabilities_clean, axis=1)[:,-2] < threshold)[0]

boundary_indices_evasion = np.where(\
    np.sort(probabilities_evasion, axis=1)[:,-1] - np.sort(probabilities_evasion, axis=1)[:,-2] < threshold)[0]

In [None]:
nodewise_nsa = torch.mean(criterion_v2(clean_vals2['conv1'],evasion_vals2['conv1']),dim=1).cpu()

In [None]:
#nodewise_nsa = nodewise_nsa[boundary_indices_evasion]

In [None]:
import torch

# Sort the values
sorted_values, sorted_indices = torch.sort(nodewise_nsa)

# Calculate the percentiles/quantiles
percentiles = []
for index in boundary_indices_evasion:
    position = (sorted_values == nodewise_nsa[index]).nonzero(as_tuple=True)[0][0]
    #print(position)
    percentile = (torch.true_divide(position, len(nodewise_nsa)) * 100).item()
    percentiles.append(percentile)

# Print the percentiles for the corresponding indices
# for idx, percentile in zip(max_edge_changes, percentiles):
#     print(f"Node {idx} is in the {percentile} percentile.")

In [None]:
nodewise_nsa[boundary_indices_evasion].shape

In [None]:
print(boundary_indices_clean.shape)
print(boundary_indices_evasion.shape)
print(np.mean(percentiles))
print(torch.mean(nodewise_nsa[boundary_indices_evasion]))

In [None]:
#thresholds = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.1]
#thresholds = [0.2]
probabilities_clean = F.softmax(clean_vals2['conv1'],dim=-1).numpy()
probabilities_evasion = F.softmax(evasion_vals2['conv1'],dim=-1).numpy()
clean_confidence=np.sort(probabilities_clean, axis=1)[:,-1] - np.sort(probabilities_clean, axis=1)[:,-2]
evasion_confidence=np.sort(probabilities_evasion, axis=1)[:,-1] - np.sort(probabilities_evasion, axis=1)[:,-2]

In [None]:
clean_correct_indices=np.argmax(probabilities_clean,axis=-1) == y.cpu().numpy()
clean_correct_indices = np.where(clean_correct_indices==True)[0]
evasion_correct_indices=np.argmax(probabilities_evasion,axis=-1) == y.cpu().numpy()
evasion_correct_indices = np.where(evasion_correct_indices==True)[0]

In [None]:
evasion_correct_indices.shape

In [None]:
guard_ec = evasion_confidence

In [None]:
threshold = 0.5
print(np.count_nonzero((clean_confidence - evasion_confidence)>=threshold))
print(np.count_nonzero(np.abs(clean_confidence - evasion_confidence)>=threshold))
print(np.count_nonzero((clean_confidence[evasion_correct_indices] - evasion_confidence[evasion_correct_indices])>=threshold))
print(np.count_nonzero(np.abs(clean_confidence[evasion_correct_indices] - evasion_confidence[evasion_correct_indices])>=threshold))

In [None]:
worst_indices = np.argsort(clean_confidence[evasion_correct_indices] - evasion_confidence[evasion_correct_indices])[::-1][:50]

In [None]:
nodewise_nsa.cpu().numpy()[worst_indices]

In [None]:
guard_nodewise_nsa = nodewise_nsa.cpu().numpy()[worst_indices]
np.mean(guard_nodewise_nsa)

In [None]:
print(boundary_indices_clean.shape)
print(boundary_indices_evasion.shape)
print(np.mean(percentiles))

## ProGNN

In [None]:
fit_kwargs = dict(gnn_lr=0.01,gnn_weight_decay=0.0005,adj_lr=0.01,adj_momentum=0.9,reg_adj_deviate=1.0)

def make_model(A):
    return gb.model.ProGNN(A, GCN(n_feat=D, n_class=C, bias=True, activation="relu", hidden_dims=[64],dropout=0.5)).cuda()


In [None]:
aux_model = make_model(A)
model_args = filter_model_args(aux_model, A, X)
aux_model.fit(model_args, y, train_nodes, val_nodes, progress=True, **fit_kwargs)

In [None]:
clean_vals = aux_model.feature_vals

In [None]:
clean_accuracy = gb.metric.accuracy(aux_model(X)[test_nodes], y[test_nodes]).item()
accuracy_dict['ProGNN']['clean']=clean_accuracy

print("Clean test acc:   ", clean_accuracy)

In [None]:
for k,v in clean_vals.items():
    print(v.shape)
    clean_vals[k]=v.cpu()

In [None]:
clean_vals2 = {}
for k,v in clean_vals.items():
    clean_vals2[k] = deepcopy(v)

In [None]:
clean_vals2

In [None]:
# import numpy as np
# np.savez(f'feature_vals/prognn_clean_{ptb_value}.npz', **clean_vals)

### Poisoning global

In [None]:
torch.cuda.empty_cache()

In [None]:
fit_kwargs2 = dict(gnn_lr=0.01,gnn_weight_decay=0.0005,adj_lr=0.01,adj_momentum=0.9,reg_adj_deviate=1.0,\
                   adj_optim_interval = 2, reg_adj_l1 = 5e-4, reg_adj_nuclear = 0, reg_feat_smooth = 1e-3)

def loss_fn(A_flip):
    A_diff = A_flip * (1 - 2 * A)
    A_pert = A + A_diff

    ############# Meta-Attack ############
    model = make_model(A_pert)
    model_args = filter_model_args(model, A_pert, X)
    model.fit(model_args, y, train_nodes, val_nodes, progress=False, **fit_kwargs2, differentiable=A_pert.requires_grad)
    #model.fit(X, y, train_nodes, val_nodes, progress=True, **fit_kwargs)
    scores = model(X)
    ######################################

    return gb.metric.margin(scores[test_nodes, :], y[test_nodes]).tanh().mean()

In [None]:
def grad_fn(A_flip):
    return torch.autograd.grad(loss_fn(A_flip), A_flip)[0]

In [None]:
pert, _ = gb.attack.proj_grad_descent(A.shape, True, A.device, budget, grad_fn, loss_fn,\
                                      base_lr=0.1, grad_clip=0.1)
A_pert = A + gb.pert.edge_diff_matrix(pert, A)

In [None]:
pois_model = make_model(A_pert)
model_args = filter_model_args(pois_model, A_pert, X)
pois_model.fit(model_args, y, train_nodes, val_nodes, progress=True, **fit_kwargs)
pois_accuracy = gb.metric.accuracy(pois_model(X)[test_nodes], y[test_nodes]).item()
accuracy_dict['ProGNN']['pois']=pois_accuracy

print("Poisoned test acc:", pois_accuracy)

In [None]:
pois_vals=pois_model.feature_vals

In [None]:
for k,v in pois_vals.items():
    print(v.shape)
    pois_vals[k]=v.cpu()

In [None]:
import numpy as np
np.savez('feature_vals/prognn_gp_'+ptb_value+'.npz', **pois_vals)

### Evasion global

In [None]:
def loss_fn(A_flip):
    A_diff = A_flip * (1 - 2 * A)
    A_pert = A + A_diff


    ############# Aux-Attack #############
    
    model = aux_model
    model.S = A_pert
    scores = model(X)
    return gb.metric.margin(scores[test_nodes, :], y[test_nodes]).tanh().mean()

def grad_fn(A_flip):
    return torch.autograd.grad(loss_fn(A_flip), A_flip)[0]

In [None]:
pert, _ = gb.attack.proj_grad_descent(A.shape, True, A.device, budget, grad_fn, loss_fn, base_lr=0.1)

A_pert = A + gb.pert.edge_diff_matrix(pert, A)
print("Adversarial edges:", pert.shape[0])
evas_accuracy = gb.metric.accuracy(aux_model(X)[test_nodes], y[test_nodes]).item()
accuracy_dict['ProGNN']['evas']=evas_accuracy

print("Evasion test acc: ", evas_accuracy)

In [None]:
aux_model(X)
evasion_vals=aux_model.feature_vals

In [None]:
for k,v in evasion_vals.items():
    evasion_vals[k]=v.cpu()

In [None]:
evasion_vals

In [None]:
evasion_vals2 = {}
for k,v in evasion_vals.items():
    evasion_vals2[k] = deepcopy(v)

In [None]:
# import numpy as np
# np.savez('feature_vals/prognn_ge_'+ptb_value+'.npz', **evasion_vals)

### Node Degree Analysis

In [None]:
criterion(clean_vals2['conv1'],evasion_vals2['conv1'])

In [None]:
nodewise_nsa = torch.mean(criterion_v2(clean_vals2['conv1'],evasion_vals2['conv1']),dim=1).cpu()

In [None]:
#This measures how many edges were changed for each node
edge_change = torch.abs(A - A_pert).sum(dim=1).cpu()
print(sum(edge_change))
print(len(torch.nonzero(edge_change)))

In [None]:
#This measures how much the degree changed for each node
degree_change = torch.abs(A.sum(dim=1) - A_pert.sum(dim=1)).cpu()
print(sum(degree_change))
print(len(torch.nonzero(degree_change)))

In [None]:
from scipy.stats import pearsonr

# Ground truth scores

# Pearson correlation for original embeddings
pearson_corr_edge = pearsonr(nodewise_nsa, edge_change)
print(f"Pearson Correlation (edge): {pearson_corr_edge}")

# Pearson correlation for reduced embeddings
pearson_corr_degree = pearsonr(nodewise_nsa, degree_change)
print(f"Pearson Correlation (degree): {pearson_corr_degree}")

# Pearson correlation for reduced embeddings
pearson_corr_high = pearsonr(degree_change, edge_change)
print(f"Pearson Correlation (high): {pearson_corr_high}")

In [None]:
max_edge_changes = torch.sort(edge_change, descending=True)[1][:50]

In [None]:
edge_change[max_edge_changes]

In [None]:
import torch

# Sort the values
sorted_values, sorted_indices = torch.sort(nodewise_nsa)

# Calculate the percentiles/quantiles
percentiles = []
for index in max_edge_changes:
    position = (sorted_values == nodewise_nsa[index]).nonzero(as_tuple=True)[0]
    percentile = (torch.true_divide(position, len(nodewise_nsa)) * 100).item()
    percentiles.append(percentile)

# Print the percentiles for the corresponding indices
# for idx, percentile in zip(max_edge_changes, percentiles):
#     print(f"Node {idx} is in the {percentile} percentile.")

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Example data
values = edge_change[max_edge_changes]
percentiles = nodewise_nsa[max_edge_changes]

# Create a figure and a set of subplots
fig, ax1 = plt.subplots(figsize=(20,10))


# Plotting the column chart
color = 'tab:blue'
ax1.set_xlabel('Index')
ax1.set_ylabel('Values', color=color)
ax1.bar(range(len(values)), values, color=color)
ax1.tick_params(axis='y', labelcolor=color)

# Create a second y-axis for the percentiles
ax2 = ax1.twinx()  
color = 'tab:red'
ax2.set_ylabel('Percentiles', color=color)
ax2.plot(range(len(percentiles)), percentiles, color=color)
ax2.tick_params(axis='y', labelcolor=color)

# Show the plot
plt.show()

In [None]:
#svd_percentile  = deepcopy(percentiles)

In [None]:
nodewise_nsa_prognn = deepcopy(nodewise_nsa[max_edge_changes])

In [None]:
nodewise_nsa_prognn

### Class Analysis

In [None]:
thresholds = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.1]
#thresholds = [0.2]
probabilities_clean = F.softmax(clean_vals2['conv1'],dim=-1).numpy()
probabilities_evasion = F.softmax(evasion_vals2['conv1'],dim=-1).numpy()
clean_counts = []
evasion_counts = []

clean_misclassify = []
evasion_misclassify = []
for threshold in thresholds:
    boundary_indices_clean = np.where(np.sort(probabilities_clean, axis=1)[:,-1] - np.sort(probabilities_clean, axis=1)[:,-2] < threshold)[0]
    boundary_indices_evasion = np.where(np.sort(probabilities_evasion, axis=1)[:,-1] - np.sort(probabilities_evasion, axis=1)[:,-2] < threshold)[0]
    clean_counts.append(boundary_indices_clean.shape[0])
    evasion_counts.append(boundary_indices_evasion.shape[0])

    if threshold==0:
        clean_misclassify.append(0)
        evasion_misclassify.append(0)
        continue
    #Calculate misclassified nodes at this threshold
    misclassify_count_clean=np.count_nonzero(np.argmax(probabilities_clean[boundary_indices_clean],axis=-1) != y[boundary_indices_clean].cpu().numpy())
    misclassify_count_evasion=np.count_nonzero(np.argmax(probabilities_evasion[boundary_indices_evasion],axis=-1) != y[boundary_indices_evasion].cpu().numpy())
    #print(misclassify_count_clean)
    clean_misclassify.append(misclassify_count_clean)
    evasion_misclassify.append(misclassify_count_evasion)

clean_counts=np.diff(clean_counts)
evasion_counts=np.diff(evasion_counts)
clean_misclassify = np.diff(clean_misclassify)
evasion_misclassify = np.diff(clean_misclassify)

In [None]:
import matplotlib.pyplot as plt
values1 = clean_counts[0:7]
values2 = evasion_counts[0:7]
plt.plot(values1)
plt.plot(values2)
plt.show()

In [None]:
threshold = 0.5

boundary_indices_clean = np.where(\
    np.sort(probabilities_clean, axis=1)[:,-1] - np.sort(probabilities_clean, axis=1)[:,-2] < threshold)[0]

boundary_indices_evasion = np.where(\
    np.sort(probabilities_evasion, axis=1)[:,-1] - np.sort(probabilities_evasion, axis=1)[:,-2] < threshold)[0]

In [None]:
nodewise_nsa = torch.mean(criterion_v2(clean_vals2['conv1'],evasion_vals2['conv1']),dim=1).cpu()

In [None]:
#nodewise_nsa = nodewise_nsa[boundary_indices_evasion]

In [None]:
import torch

# Sort the values
sorted_values, sorted_indices = torch.sort(nodewise_nsa)

# Calculate the percentiles/quantiles
percentiles = []
for index in boundary_indices_evasion:
    position = (sorted_values == nodewise_nsa[index]).nonzero(as_tuple=True)[0][0]
    #print(position)
    percentile = (torch.true_divide(position, len(nodewise_nsa)) * 100).item()
    percentiles.append(percentile)

# Print the percentiles for the corresponding indices
# for idx, percentile in zip(max_edge_changes, percentiles):
#     print(f"Node {idx} is in the {percentile} percentile.")

In [None]:
nodewise_nsa[boundary_indices_evasion].shape

In [None]:
print(boundary_indices_clean.shape)
print(boundary_indices_evasion.shape)
print(np.mean(percentiles))
print(torch.mean(nodewise_nsa[boundary_indices_evasion]))

In [None]:
#thresholds = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.1]
#thresholds = [0.2]
probabilities_clean = F.softmax(clean_vals2['conv1'],dim=-1).numpy()
probabilities_evasion = F.softmax(evasion_vals2['conv1'],dim=-1).numpy()
clean_confidence=np.sort(probabilities_clean, axis=1)[:,-1] - np.sort(probabilities_clean, axis=1)[:,-2]
evasion_confidence=np.sort(probabilities_evasion, axis=1)[:,-1] - np.sort(probabilities_evasion, axis=1)[:,-2]

In [None]:
prognn_ec = evasion_confidence

In [None]:
clean_correct_indices=np.argmax(probabilities_clean,axis=-1) == y.cpu().numpy()
clean_correct_indices = np.where(clean_correct_indices==True)[0]
evasion_correct_indices=np.argmax(probabilities_evasion,axis=-1) == y.cpu().numpy()
evasion_correct_indices = np.where(evasion_correct_indices==True)[0]

In [None]:
evasion_correct_indices.shape

In [None]:
threshold = 0.5
print(np.count_nonzero((clean_confidence - evasion_confidence)>=threshold))
print(np.count_nonzero(np.abs(clean_confidence - evasion_confidence)>=threshold))
print(np.count_nonzero((clean_confidence[evasion_correct_indices] - evasion_confidence[evasion_correct_indices])>=threshold))
print(np.count_nonzero(np.abs(clean_confidence[evasion_correct_indices] - evasion_confidence[evasion_correct_indices])>=threshold))

In [None]:
worst_indices = np.argsort(clean_confidence[evasion_correct_indices] - evasion_confidence[evasion_correct_indices])[::-1][:50]

In [None]:
nodewise_nsa.cpu().numpy()[worst_indices]

In [None]:
prognn_nodewise_nsa = nodewise_nsa.cpu().numpy()[worst_indices]
np.mean(prognn_nodewise_nsa)

## GRAND

In [None]:
model_kwargs1 = dict(hidden_dims=[64],dropout=0.5)
model_kwargs2 = dict(dropnode=0.5,mlp_input_dropout=0.5,order=2)

def make_model():
    return GRAND(MLP(n_feat=D, n_class=C, bias=True, **model_kwargs1),**model_kwargs2).cuda()

#aux_model = make_model()

In [None]:
aux_model = make_model()

In [None]:
fit_kwargs = dict(lr=0.1, weight_decay=1e-4)
aux_model.fit((A,X), y, train_nodes, val_nodes, progress=True, **fit_kwargs)

In [None]:
clean_accuracy = gb.metric.accuracy(aux_model(A,X)[test_nodes], y[test_nodes]).item()
accuracy_dict['GRAND']['clean']=clean_accuracy

print("Clean test acc:   ", clean_accuracy)

In [None]:
clean_vals = aux_model.mlp.feature_vals

In [None]:
for k,v in clean_vals.items():
    clean_vals[k] = v.cpu()

In [None]:
clean_vals2 = {}
for k,v in clean_vals.items():
    clean_vals2[k] = deepcopy(v)

In [None]:
clean_vals2

In [None]:
# import numpy as np
# np.savez(f'feature_vals/grand_clean_{ptb_value}.npz', **clean_vals)

### Poisoning global

In [None]:
def loss_fn(A_flip):
    A_diff = A_flip * (1 - 2 * A)
    A_pert = A + A_diff

    ############# Meta-Attack ############
    model = make_model()
    model.fit((A_pert, X), y, train_nodes, val_nodes, progress=False, **fit_kwargs,max_epochs=100, differentiable=A_pert.requires_grad)
    scores = model(A_pert, X)
    ######################################

    return gb.metric.margin(scores[test_nodes, :], y[test_nodes]).tanh().mean()

In [None]:
def grad_fn(A_flip):
    return torch.autograd.grad(loss_fn(A_flip), A_flip)[0]

In [None]:
pert, _ = gb.attack.proj_grad_descent(A.shape, True, A.device, budget, grad_fn, loss_fn,\
                                      base_lr=0.1, grad_clip=0.1)
A_pert = A + gb.pert.edge_diff_matrix(pert, A)

In [None]:
pois_model = make_model()
pois_model.fit((A_pert, X), y, train_nodes, val_nodes, progress=False, **fit_kwargs)
pois_accuracy = gb.metric.accuracy(pois_model(A_pert, X)[test_nodes], y[test_nodes]).item()
accuracy_dict['GRAND']['pois']=pois_accuracy

print("Poisoned test acc:", pois_accuracy)

In [None]:
pois_vals = pois_model.mlp.feature_vals
pois_vals

In [None]:
for k,v in pois_vals.items():
    pois_vals[k]=v.cpu()

In [None]:
import numpy as np
np.savez('feature_vals/grand_gp_'+ptb_value+'.npz', **pois_vals)

### Evasion global

In [None]:
def loss_fn(A_flip):
    A_diff = A_flip * (1 - 2 * A)
    A_pert = A + A_diff


    ############# Aux-Attack #############
    model = aux_model
    scores = model(A_pert, X)

    return gb.metric.margin(scores[test_nodes, :], y[test_nodes]).tanh().mean()

def grad_fn(A_flip):
    return torch.autograd.grad(loss_fn(A_flip), A_flip)[0]

In [None]:
pert, _ = gb.attack.proj_grad_descent(A.shape, True, A.device, budget, grad_fn, loss_fn, base_lr=0.1)

A_pert = A + gb.pert.edge_diff_matrix(pert, A)
print("Adversarial edges:", pert.shape[0])
evas_accuracy = gb.metric.accuracy(aux_model(A_pert, X)[test_nodes], y[test_nodes]).item()
accuracy_dict['GRAND']['evas']=evas_accuracy

print("Evasion test acc: ", evas_accuracy)

In [None]:
aux_model(A_pert,X)
evasion_vals = aux_model.mlp.feature_vals

In [None]:
for k,v in evasion_vals.items():
    evasion_vals[k]=v.cpu()

In [None]:
# import numpy as np
# np.savez('feature_vals/grand_ge_'+ptb_value+'.npz', **evasion_vals)

In [None]:
evasion_vals2 = {}
for k,v in evasion_vals.items():
    evasion_vals2[k] = deepcopy(v)

### Node Degree Analysis

In [None]:
criterion(clean_vals2['conv1'],evasion_vals2['conv1'])

In [None]:
nodewise_nsa = torch.mean(criterion_v2(clean_vals2['conv1'],evasion_vals2['conv1']),dim=1).cpu()

In [None]:
#This measures how many edges were changed for each node
edge_change = torch.abs(A - A_pert).sum(dim=1).cpu()
print(sum(edge_change))
print(len(torch.nonzero(edge_change)))

In [None]:
#This measures how much the degree changed for each node
degree_change = torch.abs(A.sum(dim=1) - A_pert.sum(dim=1)).cpu()
print(sum(degree_change))
print(len(torch.nonzero(degree_change)))

In [None]:
from scipy.stats import pearsonr

# Ground truth scores

# Pearson correlation for original embeddings
pearson_corr_edge = pearsonr(nodewise_nsa, edge_change)
print(f"Pearson Correlation (edge): {pearson_corr_edge}")

# Pearson correlation for reduced embeddings
pearson_corr_degree = pearsonr(nodewise_nsa, degree_change)
print(f"Pearson Correlation (degree): {pearson_corr_degree}")

# Pearson correlation for reduced embeddings
pearson_corr_high = pearsonr(degree_change, edge_change)
print(f"Pearson Correlation (high): {pearson_corr_high}")

In [None]:
max_edge_changes = torch.sort(edge_change, descending=True)[1][:50]

In [None]:
edge_change[max_edge_changes]

In [None]:
import torch

# Sort the values
sorted_values, sorted_indices = torch.sort(nodewise_nsa)

# Calculate the percentiles/quantiles
percentiles = []
for index in max_edge_changes:
    position = (sorted_values == nodewise_nsa[index]).nonzero(as_tuple=True)[0]
    percentile = (torch.true_divide(position, len(nodewise_nsa)) * 100).item()
    percentiles.append(percentile)

# Print the percentiles for the corresponding indices
# for idx, percentile in zip(max_edge_changes, percentiles):
#     print(f"Node {idx} is in the {percentile} percentile.")

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Example data
values = edge_change[max_edge_changes]
percentiles = nodewise_nsa[max_edge_changes]

# Create a figure and a set of subplots
fig, ax1 = plt.subplots(figsize=(20,10))


# Plotting the column chart
color = 'tab:blue'
ax1.set_xlabel('Index')
ax1.set_ylabel('Values', color=color)
ax1.bar(range(len(values)), values, color=color)
ax1.tick_params(axis='y', labelcolor=color)

# Create a second y-axis for the percentiles
ax2 = ax1.twinx()  
color = 'tab:red'
ax2.set_ylabel('Percentiles', color=color)
ax2.plot(range(len(percentiles)), percentiles, color=color)
ax2.tick_params(axis='y', labelcolor=color)

# Show the plot
plt.show()

In [None]:
#svd_percentile  = deepcopy(percentiles)

In [None]:
nodewise_nsa_grand = deepcopy(nodewise_nsa[max_edge_changes])

### Class Analysis

In [None]:
thresholds = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.1]
#thresholds = [0.2]
probabilities_clean = F.softmax(clean_vals2['conv1'],dim=-1).numpy()
probabilities_evasion = F.softmax(evasion_vals2['conv1'],dim=-1).numpy()
clean_counts = []
evasion_counts = []

clean_misclassify = []
evasion_misclassify = []
for threshold in thresholds:
    boundary_indices_clean = np.where(np.sort(probabilities_clean, axis=1)[:,-1] - np.sort(probabilities_clean, axis=1)[:,-2] < threshold)[0]
    boundary_indices_evasion = np.where(np.sort(probabilities_evasion, axis=1)[:,-1] - np.sort(probabilities_evasion, axis=1)[:,-2] < threshold)[0]
    clean_counts.append(boundary_indices_clean.shape[0])
    evasion_counts.append(boundary_indices_evasion.shape[0])

    if threshold==0:
        clean_misclassify.append(0)
        evasion_misclassify.append(0)
        continue
    #Calculate misclassified nodes at this threshold
    misclassify_count_clean=np.count_nonzero(np.argmax(probabilities_clean[boundary_indices_clean],axis=-1) != y[boundary_indices_clean].cpu().numpy())
    misclassify_count_evasion=np.count_nonzero(np.argmax(probabilities_evasion[boundary_indices_evasion],axis=-1) != y[boundary_indices_evasion].cpu().numpy())
    #print(misclassify_count_clean)
    clean_misclassify.append(misclassify_count_clean)
    evasion_misclassify.append(misclassify_count_evasion)

clean_counts=np.diff(clean_counts)
evasion_counts=np.diff(evasion_counts)
clean_misclassify = np.diff(clean_misclassify)
evasion_misclassify = np.diff(clean_misclassify)

In [None]:
import matplotlib.pyplot as plt
values1 = clean_counts[0:7]
values2 = evasion_counts[0:7]
plt.plot(values1)
plt.plot(values2)
plt.show()

In [None]:
threshold = 0.5

boundary_indices_clean = np.where(\
    np.sort(probabilities_clean, axis=1)[:,-1] - np.sort(probabilities_clean, axis=1)[:,-2] < threshold)[0]

boundary_indices_evasion = np.where(\
    np.sort(probabilities_evasion, axis=1)[:,-1] - np.sort(probabilities_evasion, axis=1)[:,-2] < threshold)[0]

In [None]:
nodewise_nsa = torch.mean(criterion_v2(clean_vals2['conv1'],evasion_vals2['conv1']),dim=1).cpu()

In [None]:
#nodewise_nsa = nodewise_nsa[boundary_indices_evasion]

In [None]:
import torch

# Sort the values
sorted_values, sorted_indices = torch.sort(nodewise_nsa)

# Calculate the percentiles/quantiles
percentiles = []
for index in boundary_indices_evasion:
    position = (sorted_values == nodewise_nsa[index]).nonzero(as_tuple=True)[0][0]
    #print(position)
    percentile = (torch.true_divide(position, len(nodewise_nsa)) * 100).item()
    percentiles.append(percentile)

# Print the percentiles for the corresponding indices
# for idx, percentile in zip(max_edge_changes, percentiles):
#     print(f"Node {idx} is in the {percentile} percentile.")

In [None]:
nodewise_nsa[boundary_indices_evasion].shape

In [None]:
print(boundary_indices_clean.shape)
print(boundary_indices_evasion.shape)
print(np.mean(percentiles))
print(torch.mean(nodewise_nsa[boundary_indices_evasion]))

In [None]:
#thresholds = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.1]
#thresholds = [0.2]
probabilities_clean = F.softmax(clean_vals2['conv1'],dim=-1).numpy()
probabilities_evasion = F.softmax(evasion_vals2['conv1'],dim=-1).numpy()
clean_confidence=np.sort(probabilities_clean, axis=1)[:,-1] - np.sort(probabilities_clean, axis=1)[:,-2]
evasion_confidence=np.sort(probabilities_evasion, axis=1)[:,-1] - np.sort(probabilities_evasion, axis=1)[:,-2]

In [None]:
grand_ec = evasion_confidence

In [None]:
clean_correct_indices=np.argmax(probabilities_clean,axis=-1) == y.cpu().numpy()
clean_correct_indices = np.where(clean_correct_indices==True)[0]
evasion_correct_indices=np.argmax(probabilities_evasion,axis=-1) == y.cpu().numpy()
evasion_correct_indices = np.where(evasion_correct_indices==True)[0]

In [None]:
evasion_correct_indices.shape

In [None]:
threshold = 0.5
print(np.count_nonzero((clean_confidence - evasion_confidence)>=threshold))
print(np.count_nonzero(np.abs(clean_confidence - evasion_confidence)>=threshold))
print(np.count_nonzero((clean_confidence[evasion_correct_indices] - evasion_confidence[evasion_correct_indices])>=threshold))
print(np.count_nonzero(np.abs(clean_confidence[evasion_correct_indices] - evasion_confidence[evasion_correct_indices])>=threshold))

In [None]:
worst_indices = np.argsort(clean_confidence[evasion_correct_indices] - evasion_confidence[evasion_correct_indices])[::-1][:50]

In [None]:
nodewise_nsa.cpu().numpy()[worst_indices]

In [None]:
grand_nodewise_nsa = nodewise_nsa.cpu().numpy()[worst_indices]
np.mean(grand_nodewise_nsa)

In [None]:
print(boundary_indices_clean.shape)
print(boundary_indices_evasion.shape)
print(np.mean(percentiles))

## Plot all

In [None]:
data = [gcn_ec, svd_ec, guard_ec, prognn_ec, grand_ec]
outlier_counts = []
for dataset in data:
    Q1 = np.percentile(dataset, 25)
    Q3 = np.percentile(dataset, 75)
    IQR = Q3 - Q1

    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    outliers = np.sum((dataset < lower_bound) | (dataset > upper_bound))
    outlier_counts.append(outliers)

In [None]:
outlier_counts

In [None]:
import seaborn as sns
plt.figure(figsize=(10,10))
data = [gcn_ec, svd_ec, guard_ec, prognn_ec, grand_ec]
labels = ["GCN","SVD-GCN", "GNNGuard", "ProGNN", "GRAND"]
sns.boxplot(data)
plt.xticks(range(len(labels)),labels)
for i in range(len(data)):
    plt.text(i+0.4, 0.05, f'{outlier_counts[i]}', ha='center', va='bottom', fontsize=20)

plt.ylabel('Classification Confidence', fontsize=20)
#plt.xlabel('GNN Architecture', fontsize=24)
plt.xticks(fontsize=20)
plt.yticks(fontsize=25)
#plt.title('Comparison of Confidence Levels Across Different Architectures')
sns.set(style='whitegrid')
plt.savefig("classification_confidence_evasion.png", bbox_inches='tight')
#plt.show()

In [None]:
#321,354, 0.0383 | 670, 877, 0.0605 | 341, 343, 0.0169 | 464, 486, 0.0355 | 0.030
differences = [354-321,877-670,343-341,486-464,0]
nsa_values = [0.0383, 0.0605, 0.0169, 0.035, 0.030]

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Example data
values = differences
percentiles = nsa_values
labels = ["GCN","SVD-GCN", "GNNGuard", "ProGNN", "GRAND"]

# Create a figure and a set of subplots
fig, ax1 = plt.subplots(figsize=(10,10))

# Plotting the column chart
color = 'tab:blue'
#ax1.set_xlabel('Architecture', fontsize=27)
ax1.set_ylabel('Boundary Node Increase', color=color, fontsize=24)
ax1.bar(labels, values, color=color)
ax1.tick_params(axis='y', labelcolor=color)
plt.yticks(fontsize=24)
plt.xticks(fontsize=20)

# Create a second y-axis for the percentiles
ax2 = ax1.twinx()  
color = 'tab:red'
ax2.set_ylabel('NSA of Boundary Nodes', color=color, fontsize=24)
ax2.plot(range(len(percentiles)), percentiles, color=color)
ax2.grid(False)
#ax1.grid(False)
ax2.tick_params(axis='y', labelcolor=color)

plt.yticks(fontsize=24)
plt.savefig("boundary_node_increase.png", bbox_inches='tight')
# Show the plot
plt.show()

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(20,10))
node_indices = range(len(grand_nodewise_nsa))

# Create a line plot for each list
plt.plot(node_indices, grand_nodewise_nsa, label='GRAND')
plt.plot(node_indices, prognn_nodewise_nsa, label='ProGNN')
plt.plot(node_indices, guard_nodewise_nsa, label='GNNGuard')
plt.plot(node_indices, svd_nodewise_nsa, label='SVD-GCN')
plt.plot(node_indices, gcn_nodewise_nsa, label='GCN')

# Adding labels and title
#plt.xlabel('Node Index',fontsize=17)
plt.ylabel('Nodewise NSA Value',fontsize=27)
#plt.title('Nodewise NSA values for the top 50 nodes with the greatest decline in Classification Confidence')
plt.xticks(fontsize=24)
plt.yticks(fontsize=27)
# Add a legend
plt.legend(fontsize=20)
plt.savefig("nodewise_confidence.png", bbox_inches='tight')
# Show the plot
#plt.show()


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(20,10))
node_indices = range(len(nodewise_nsa_grand))

# Create a line plot for each list
plt.plot(node_indices, nodewise_nsa_grand, label='GRAND')
plt.plot(node_indices, nodewise_nsa_prognn, label='ProGNN')
plt.plot(node_indices, nodewise_nsa_guard, label='GNNGuard')
plt.plot(node_indices, nodewise_nsa_svd, label='SVD-GCN')
plt.plot(node_indices, nodewise_nsa_gcn, label='GCN')

# Adding labels and title
plt.xlabel('Node Index')
plt.ylabel('Nodewise NSA Value')
plt.title('Nodewise NSA values for the top 50 nodes with the highest degree variance')

# Add a legend
plt.legend()

# Show the plot
plt.show()


## Save the accuracy

In [None]:
accuracy_dict

In [None]:
import numpy as np
!mkdir accuracy_vals
import pickle
save_path = 'accuracy_vals/'+ptb_value+'.pkl'
with open(save_path, 'wb') as file:
    pickle.dump(accuracy_dict, file)