In [2]:
import torch
import numpy as np
import torch.nn.functional as F
import torch.optim as optim
from deeprobust.graph.defense import GCN
from deeprobust.graph.targeted_attack import FGA
from deeprobust.graph.utils import *
from deeprobust.graph.data import Dataset
from tqdm import tqdm
import argparse
from experiments import split_dataset
from DistributedDefense import TwoPartyCNGCN
import networkx as nx
from scipy.sparse import csr_matrix
import Mahsa_backdoor_V0 as backdoor
#from torch.utils.data import random_split

In [8]:
####################### Data loading and preprocessing #######################
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load data
dataset = "polblogs"
#data = Dataset(root='/tmp/', name=dataset)  : this is for unix-based systems

# Use the current directory for windows
data = Dataset(root='.', name=dataset)
#data = Dataset(root='/tmp/', name=dataset)

adj, features, labels = data.adj, data.features, data.labels
idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test

###########
#split idx_test into two parts randomly
from sklearn.model_selection import train_test_split

test_size = 0.2  # 20% for test_attack, 80% for test_clean
idx_test_attack, idx_test_clean = train_test_split(idx_test, test_size=test_size, random_state=42)


# Split graph into two graphs 
proportion_of_common_links = 0.5
adj1, adj2 = split_dataset(adj, proportion_of_common_links) 


Loading polblogs dataset...
Selecting 1 largest connected components


In [17]:
############################ evaluation before attack ###########################
# Perform evaluation before attack to find the baseline accuracy

threshold = 2               # threshold for dropping dissimilar edges
metric = "neighbors"        # metric for dropping dissimilar edges (neighbors, jaccard, cosine)
object = "links"            # object for defense (links, features)

model = TwoPartyCNGCN(dataset=dataset, nfeat=features.shape[1], nhid=16, nclass=labels.max().item() + 1,
                          device=device)
model.fit(adj1.copy(), adj2.copy(), features, features, labels, idx_train, threshold, metric=metric, object=object,
            train_iters=200, initialize=True, verbose=False, idx_val=idx_val)
model.eval()
accuracies = model.test(idx_test)  #accuracy of the model after the defense on all the test data - (all the nodes)
accuracies_ASR_before = model.test(idx_test_attack)
accuracies_clean_before = model.test(idx_test_clean)
print("accuracy before attack on test: ", accuracies_ASR_before)
print("accuracy before attack (clean): ", accuracies_clean_before)


# output = model.predict()
# target_node_label = output[target_node].max(0)[1]
# print("Predicted label for target node: ", target_node_label)
# print("True label for target node: ", target_label)



Dropping dissimilar edges using metric :  neighbors  on links
removed 1669 edges in polblogs 1
removed 1659 edges in polblogs 2
*** polblogs 1 ***
Test set results: loss= 0.3366 accuracy= 0.8057
*** polblogs 2 ***
Test set results: loss= 0.3359 accuracy= 0.8047
*** polblogs 1 ***
Test set results: loss= 0.3362 accuracy= 0.8082
*** polblogs 2 ***
Test set results: loss= 0.3377 accuracy= 0.8043
*** polblogs 1 ***
Test set results: loss= 0.3379 accuracy= 0.7959
*** polblogs 2 ***
Test set results: loss= 0.3283 accuracy= 0.8061
accuracy before attack on test:  (0.8081841432225064, 0.8043478260869565)
accuracy before attack (clean):  (0.7959183673469388, 0.8061224489795918)


AttributeError: 'TwoPartyCNGCN' object has no attribute 'predict'

In [13]:
################################ Mahsa attack ###############################
# Perform the attack
# 
modified_adj1 =  adj1.copy()

# Create a NetworkX graph from the adjacency matrix
graph = nx.from_scipy_sparse_array(modified_adj1)

# Add labels to the graph
for node_id, label in enumerate(labels):
    graph.nodes[node_id]['label'] = label
# print(f"lenght of the labels : {len(labels)}") 
print(f"graph edges : {graph.number_of_edges()}")
print(f"graph nodes : {graph.number_of_nodes()}")


target_node, target_label, budget = backdoor.target(graph)   

print(f"target node : {target_node}, target label : {target_label}, budget : {budget}")
non_neighbor_opposit= backdoor.find_non_neighbor_opposit_label(graph, target_node, target_label)
max_same_min_opposit_label_neighbors= backdoor.find_max_same_min_opposit_label_neighbors(graph, non_neighbor_opposit)
nodes_for_attack = backdoor.nodes_for_attack(graph, target_node, max_same_min_opposit_label_neighbors, budget)
attacked_graph = backdoor.insert_edge(graph, target_node, nodes_for_attack)
backdoor.evaluate_graph(attacked_graph, nodes_for_attack, target_node, budget)
modified_adj1 =backdoor.convert(attacked_graph)


print(attacked_graph)



graph edges : 16714
graph nodes : 1222
Target node is 6 with label 0 and budget 200
target node : 6, target label : 0, budget : 200
Edge insertion is successful and 200 edges has been inserted between 6 and : [(672, 0.8703703703703703, 6), (591, 0.8133333333333334, 5), (1167, 0.8390804597701149, 4), (741, 0.8365384615384616, 4), (734, 0.8214285714285714, 4), (1077, 0.8142857142857143, 4), (1179, 0.5217391304347826, 4), (677, 0.25, 4), (906, 0.014925373134328358, 4), (881, 0.9647058823529412, 3), (921, 0.933649289099526, 3), (967, 0.8620689655172413, 3), (1134, 0.8571428571428571, 3), (1136, 0.8275862068965517, 3), (837, 0.826797385620915, 3), (977, 0.7692307692307693, 3), (638, 0.7525773195876289, 3), (867, 0.6666666666666666, 3), (742, 0.6538461538461539, 3), (894, 0.5, 3), (930, 0.9555555555555556, 2), (691, 0.9453125, 2), (931, 0.9411764705882353, 2), (829, 0.9175824175824175, 2), (1165, 0.9166666666666666, 2), (1006, 0.9078947368421053, 2), (708, 0.9032258064516129, 2), (597, 0.9, 

AttributeError: 'TwoPartyCNGCN' object has no attribute 'predict'

In [7]:
############################ Crypto'Graph defense ###########################
# Perform Crypto'Graph distributed defense

threshold = 2               # threshold for dropping dissimilar edges
metric = "neighbors"        # metric for dropping dissimilar edges (neighbors, jaccard, cosine)
object = "links"            # object for defense (links, features)

model = TwoPartyCNGCN(dataset=dataset, nfeat=features.shape[1], nhid=16, nclass=labels.max().item() + 1,
                          device=device)
model.fit(modified_adj1.copy(), adj2.copy(), features, features, labels, idx_train, threshold, metric=metric, object=object,
            train_iters=200, initialize=True, verbose=False, idx_val=idx_val)
model.eval()
accuracies = model.test(idx_test)  #accuracy of the model after the defense on all the test data - (all the nodes)
accuracies_ASR = model.test(idx_test_attack)
accuracies_clean = model.test(idx_test_clean)
print("accuracy after attack on test: ", accuracies_ASR)
print("accuracy after attack (clean): ", accuracies_clean)

Dropping dissimilar edges using metric :  neighbors  on links
removed 1636 edges in polblogs 1
removed 1577 edges in polblogs 2
*** polblogs 1 ***
Test set results: loss= 0.3695 accuracy= 0.7975
*** polblogs 2 ***
Test set results: loss= 0.3578 accuracy= 0.8016
*** polblogs 1 ***
Test set results: loss= 0.3604 accuracy= 0.8005
*** polblogs 2 ***
Test set results: loss= 0.3589 accuracy= 0.8018
*** polblogs 1 ***
Test set results: loss= 0.4060 accuracy= 0.7857
*** polblogs 2 ***
Test set results: loss= 0.3534 accuracy= 0.8010
accuracy after attack on test:  (0.8005115089514067, 0.8017902813299232)
accuracy after attack (clean):  (0.7857142857142857, 0.8010204081632653)


In [None]:
########evaluation after attack################
ASR = accuracies_ASR - 

In [12]:
#### Mahsa Test after CryptoGraph ####
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score

# Get the model's predictions
# outputs = model(features) 

# Assume that 'new_features' is your new node features matrix
outputs1 = model.gcn1.forward(adj1, features)
outputs2 = model.gcn2.forward(features)

outputs = model.forward(features)

_ , predicted_classes = outputs.max(dim=1) # get the predicted classes for each node
_, predicted = outputs.max(1)

# Create a color map based on the predicted labels
color_map = predicted.numpy()  # convert to numpy array for indexing
# Draw the graph
nx.draw(graph, node_color=color_map, with_labels=True)
plt.show()


#evaluate accuracy of trained GCN model
# Get the true labels and predicted labels for the test nodes
true_labels = labels[idx_test].cpu().numpy()
predicted_labels = predicted[idx_test].cpu().numpy()

# Calculate the accuracy
accuracy = accuracy_score(true_labels, predicted_labels)
print(f'Accuracy: {accuracy}')



AttributeError: 'numpy.ndarray' object has no attribute 'is_sparse'

In [6]:
################################# Evaluation ###############################
print(f"Test accuracy: {accuracies[0]:.2f}")
print(f"Test accuracy after attack: {accuracies[1]:.2f}")
#print(f"Test accuracy after defense: {accuracies[2]:.2f}")
#print(f"Test accuracy after attack and defense: {accuracies[3]:.2f}")



Test accuracy: 0.80
Test accuracy after attack: 0.80
