In [1]:
import torch
import numpy as np
import torch.nn.functional as F
import torch.optim as optim
from deeprobust.graph.defense import GCN
from deeprobust.graph.targeted_attack import FGA
from deeprobust.graph.utils import *
from deeprobust.graph.data import Dataset
from tqdm import tqdm
import argparse
from experiments import split_dataset
from DistributedDefense import TwoPartyCNGCN
import networkx as nx
from scipy.sparse import csr_matrix
import Mahsa_backdoor_V0 as backdoor
from sklearn.model_selection import train_test_split
#from torch.utils.data import random_split

In [10]:
####################### Data loading and preprocessing #######################
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load data
dataset = "polblogs"
#data = Dataset(root='/tmp/', name=dataset)  : this is for unix-based systems

# Use the current directory for windows
data = Dataset(root='.', name=dataset)
#data = Dataset(root='/tmp/', name=dataset)

adj, features, labels = data.adj, data.features, data.labels
idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test


#split idx_test into two parts randomly
#test_size = 1- 1 / len(idx_test)  # 1 node for test_attack, the rest for test_clean
test_size = 0.1  # 10% for test_attack, 90% for test_clean
idx_test_clean, idx_test_attack = train_test_split(idx_test, test_size=test_size, random_state=42)

print("idx_test_attack", idx_test_attack)
print("idx_test_clean", idx_test_clean)
print("len idx attack", len(idx_test_attack))
print("len idx clean", len(idx_test_clean))
print("len idx test", len(idx_test))

# Split graph into two graphs 
proportion_of_common_links = 0.5
adj1, adj2 = split_dataset(adj, proportion_of_common_links) 


Loading polblogs dataset...
Selecting 1 largest connected components
idx_test_attack [1086  319  681 1218   82  416  376  754  996   39  605  758 1054  463
 1212   15 1060  264  830  222  304  396  244  910 1103  773  101  895
  172 1059   44 1071  563  789    9  310  812  134  138  756  495  215
  717  791  384 1145 1116  619  926  547 1101  980  358  592   92  680
  457  642 1036  927  837  768   80  237 1191  810   26  122  663  999
  106  594  746  116  488    0  288 1038  303  815  139 1081  616  595
  591  951  309  399  822  879  891  799 1029  579 1057  960  528  850]
idx_test_clean [ 671   83  373  221  567  662  354  629  972  526  962  718  933  695
  737 1123  130  440  819  355  965  111  627 1194  381  573  145  419
 1198  688 1189 1142  278  360  834   98  472  733  363  301  677  248
 1138  554  283  164  395  750 1091  510   89 1126  336  970  299  644
  423  588  798  589  913 1018  586  398   25  190  794  897  448  741
  280 1035  902  180  223  242 1164  227  274  

In [4]:
############################ tarin model initially and test accuracy ###########################
# Perform evaluation before attack to find the baseline accuracy

# Train GCN model
model = GCN(nfeat=features.shape[1], nclass=labels.max().item()+1,
            nhid=16, device=device, dropout=0.5)
model = model.to(device)
#data = data.to(device)
model.fit(features, adj, labels, idx_train, idx_val=idx_val, patience=30, train_iters=200)
model.eval()
output = model.test(idx_test)
#acc_test = accuracy(output, labels, idx_test)

accuracies = model.test(idx_test) 
print("Test accuracy: ", accuracies)

output_1 = model.test(idx_test_attack)
output_2 = model.test(idx_test_clean)

accuracy_test_attack_1 = model.test(idx_test_attack) 
accuracy_test_clean_1 = model.test(idx_test_clean)

print("Test accuracy on attack set: ", accuracy_test_attack_1)
print("Test accuracy on clean set: ", accuracy_test_clean_1)



  return torch.sparse.FloatTensor(sparseconcat.t(),sparsedata,torch.Size(sparse_mx.shape))


Test set results: loss= 0.1964 accuracy= 0.9448
Test set results: loss= 0.1964 accuracy= 0.9448
Test accuracy:  0.9447852760736196
Test set results: loss= 0.1961 accuracy= 0.9455
Test set results: loss= 0.1991 accuracy= 0.9388
Test set results: loss= 0.1961 accuracy= 0.9455
Test set results: loss= 0.1991 accuracy= 0.9388
Test accuracy on attack set:  0.9454545454545454
Test accuracy on clean set:  0.9387755102040817


In [None]:
####### evaluation before attack ############ NOT NEEDED #####
# Perform evaluation before attack to find the baseline accuracy
# it is evaluating the model but at the same time it adds the cryptographe too so we dont need it

threshold = 2               # threshold for dropping dissimilar edges
metric = "neighbors"        # metric for dropping dissimilar edges (neighbors, jaccard, cosine)
object = "links"            # object for defense (links, features)

model = TwoPartyCNGCN(dataset=dataset, nfeat=features.shape[1], nhid=16, nclass=labels.max().item() + 1,
                          device=device)
model.fit(adj1.copy(), adj2.copy(), features, features, labels, idx_train, threshold, metric=metric, object=object,
            train_iters=200, initialize=True, verbose=False, idx_val=idx_val)
model.eval()
accuracies = model.test(idx_test)  #accuracy of the model after the defense on all the test data - (all the nodes)
accuracies_ASR_before = model.test(idx_test_attack)
accuracies_clean_before = model.test(idx_test_clean)
print("accuracy before attack on test: ", accuracies_ASR_before)
print("accuracy before attack (clean): ", accuracies_clean_before)


# output = model.predict()
# target_node_label = output[target_node].max(0)[1]
# print("Predicted label for target node: ", target_node_label)
# print("True label for target node: ", target_label)



In [None]:
####### Mahsa attack ######### NOT NEEDED  #############
# Perform the attack
# version 0: attack on the whole graph - and on just one node

modified_adj1 =  adj1.copy()

# Create a NetworkX graph from the adjacency matrix
graph = nx.from_scipy_sparse_array(modified_adj1)

# Add labels to the graph
for node_id, label in enumerate(labels):
    graph.nodes[node_id]['label'] = label
# print(f"lenght of the labels : {len(labels)}") 
print(f"graph edges : {graph.number_of_edges()}")
print(f"graph nodes : {graph.number_of_nodes()}")


target_node, target_label, budget = backdoor.target(graph)  
 

print(f"target node : {target_node}, target label : {target_label}, budget : {budget}")
non_neighbor_opposit= backdoor.find_non_neighbor_opposit_label(graph, target_node, target_label)
max_same_min_opposit_label_neighbors= backdoor.find_max_same_min_opposit_label_neighbors(graph, non_neighbor_opposit)
nodes_for_attack = backdoor.nodes_for_attack(graph, target_node, max_same_min_opposit_label_neighbors, budget)
attacked_graph = backdoor.insert_edge(graph, target_node, nodes_for_attack)
backdoor.evaluate_graph(attacked_graph, nodes_for_attack, target_node, budget)
modified_adj1 =backdoor.convert(attacked_graph)


print(attacked_graph)



In [6]:
################################ Mahsa attack ##############################
# Perform the attack
# version 0: attack on the whole graph - on idx_test_attack nodes

modified_adj1 =  adj1.copy()

# Create a NetworkX graph from the adjacency matrix
graph = nx.from_scipy_sparse_array(modified_adj1)

# Add labels to the graph
for node_id, label in enumerate(labels):
    graph.nodes[node_id]['label'] = label
# print(f"lenght of the labels : {len(labels)}") 
print(f"graph edges : {graph.number_of_edges()}")
print(f"graph nodes : {graph.number_of_nodes()}")

# target_node, target_label, budget = backdoor.target(graph)  
# print(f"target node : {target_node}, target label : {target_label}, budget : {budget}")
budget = 5

#attacked_graph is initially set to graph, and then updated after each attack
# This means that each attack is performed on the graph resulting from the previous attacks.
attacked_graph = graph
for target_node in idx_test_attack:
    non_neighbor_opposit = backdoor.find_non_neighbor_opposit_label(attacked_graph, target_node)
    max_same_min_opposit_label_neighbors = backdoor.find_max_same_min_opposit_label_neighbors(attacked_graph, non_neighbor_opposit)
    nodes_for_attack = backdoor.nodes_for_attack(attacked_graph, target_node, max_same_min_opposit_label_neighbors, budget)
    attacked_graph = backdoor.insert_edge(attacked_graph, target_node, nodes_for_attack)
    backdoor.evaluate_graph(attacked_graph, nodes_for_attack, target_node, budget)


# non_neighbor_opposit= backdoor.find_non_neighbor_opposit_label(graph, target_node)
# max_same_min_opposit_label_neighbors= backdoor.find_max_same_min_opposit_label_neighbors(graph, non_neighbor_opposit)
# nodes_for_attack = backdoor.nodes_for_attack(graph, target_node, max_same_min_opposit_label_neighbors, budget)
# attacked_graph = backdoor.insert_edge(graph, target_node, nodes_for_attack)
# backdoor.evaluate_graph(attacked_graph, nodes_for_attack, target_node, budget)
modified_adj1 =backdoor.convert(attacked_graph)


print(attacked_graph)

graph edges : 16714
graph nodes : 1222
Edge insertion done and 5 edges has been inserted between 494 and : [(906, 0.014925373134328358, 8), (1177, 0.7788944723618091, 7), (1029, 0.8833333333333333, 5), (638, 0.7525773195876289, 5), (921, 0.933649289099526, 4)]
Edge insertion done and 5 edges has been inserted between 792 and : [(300, 0.9057971014492754, 3), (565, 0.7981651376146789, 3), (363, 0.9594594594594594, 2), (277, 0.7777777777777778, 2), (524, 0.7432432432432432, 2)]
Edge insertion done and 5 edges has been inserted between 67 and : [(672, 0.8703703703703703, 3), (837, 0.826797385620915, 3), (1177, 0.775, 3), (1199, 0.7413793103448276, 3), (1078, 0.9210526315789473, 2)]
Edge insertion done and 5 edges has been inserted between 1210 and : [(47, 0.9205776173285198, 9), (118, 0.9923076923076923, 8), (83, 0.9849624060150376, 8), (499, 0.9421487603305785, 8), (126, 0.8689458689458689, 8)]
Edge insertion done and 5 edges has been inserted between 299 and : [(672, 0.8623853211009175, 

In [8]:
################################ evaluation after attack ##############################
# accuracy after attack
model = GCN(nfeat=features.shape[1], nclass=labels.max().item()+1,
            nhid=16, device=device, dropout=0.5)
model = model.to(device)
model.fit(features, modified_adj1, labels, idx_train, idx_val=idx_val, patience=30, train_iters=200)
model.eval()

output = model.test(idx_test)

#acc_test = accuracy(output, labels, idx_test)

output_1 = model.test(idx_test_attack)
output_2 = model.test(idx_test_clean)

accuracy_test_attack_2 = model.test(idx_test_attack) 
accuracy_test_clean_2 = model.test(idx_test_clean)

print("Test accuracy on attack set: ", accuracy_test_attack_2)
print("Test accuracy on clean set: ", accuracy_test_clean_2)


Test set results: loss= 0.5463 accuracy= 0.7495
Test set results: loss= 0.5664 accuracy= 0.7307
Test set results: loss= 0.3659 accuracy= 0.9184
Test set results: loss= 0.5664 accuracy= 0.7307
Test set results: loss= 0.3659 accuracy= 0.9184
Test accuracy on attack set:  0.7306818181818182
Test accuracy on clean set:  0.9183673469387755


In [9]:
############################ Crypto'Graph defense ###########################
# Perform Crypto'Graph distributed defense

threshold = 2               # threshold for dropping dissimilar edges
metric = "neighbors"        # metric for dropping dissimilar edges (neighbors, jaccard, cosine)
object = "links"            # object for defense (links, features)

model = TwoPartyCNGCN(dataset=dataset, nfeat=features.shape[1], nhid=16, nclass=labels.max().item() + 1,
                          device=device)
model.fit(modified_adj1.copy(), adj2.copy(), features, features, labels, idx_train, threshold, metric=metric, object=object,
            train_iters=200, initialize=True, verbose=False, idx_val=idx_val)
model.eval()
accuracies = model.test(idx_test)  #accuracy of the model after the defense on all the test data - (all the nodes)
accuracy_test_attack_3 = model.test(idx_test_attack)
accuracy_test_clean_3 = model.test(idx_test_clean)
print("Test accuracy on attack set: ", accuracy_test_attack_3)
print("Test accuracy on clean set: ", accuracy_test_clean_3)

Dropping dissimilar edges using metric :  neighbors  on links
removed 1361 edges in polblogs 1
removed 720 edges in polblogs 2
*** polblogs 1 ***
Test set results: loss= 0.4840 accuracy= 0.7853
*** polblogs 2 ***
Test set results: loss= 0.2734 accuracy= 0.9039
*** polblogs 1 ***
Test set results: loss= 0.4864 accuracy= 0.7830
*** polblogs 2 ***
Test set results: loss= 0.2652 accuracy= 0.9136
*** polblogs 1 ***
Test set results: loss= 0.4617 accuracy= 0.8061
*** polblogs 2 ***
Test set results: loss= 0.3465 accuracy= 0.8163
accuracy after attack on test:  (0.7829545454545455, 0.9136363636363637)
accuracy after attack (clean):  (0.8061224489795918, 0.8163265306122449)


: 

In [None]:
########evaluation after attack################
ASR = accuracies_ASR - 

In [None]:
#### Mahsa Test after CryptoGraph ####
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score

# Get the model's predictions
# outputs = model(features) 

# Assume that 'new_features' is your new node features matrix
outputs1 = model.gcn1.forward(adj1, features)
outputs2 = model.gcn2.forward(features)

outputs = model.forward(features)

_ , predicted_classes = outputs.max(dim=1) # get the predicted classes for each node
_, predicted = outputs.max(1)

# Create a color map based on the predicted labels
color_map = predicted.numpy()  # convert to numpy array for indexing
# Draw the graph
nx.draw(graph, node_color=color_map, with_labels=True)
plt.show()


#evaluate accuracy of trained GCN model
# Get the true labels and predicted labels for the test nodes
true_labels = labels[idx_test].cpu().numpy()
predicted_labels = predicted[idx_test].cpu().numpy()

# Calculate the accuracy
accuracy = accuracy_score(true_labels, predicted_labels)
print(f'Accuracy: {accuracy}')



In [6]:
################################# Evaluation ###############################
print(f"Test accuracy: {accuracies[0]:.2f}")
print(f"Test accuracy after attack: {accuracies[1]:.2f}")
#print(f"Test accuracy after defense: {accuracies[2]:.2f}")
#print(f"Test accuracy after attack and defense: {accuracies[3]:.2f}")



Test accuracy: 0.80
Test accuracy after attack: 0.80
