## GNN+WT-AWP for poisoning robustness

In [1]:
import numpy as np
from deeprobust.graph.data import Dataset
from deeprobust.graph.global_attack import Metattack,PGDAttack,DICE
from deeprobust.graph.utils import preprocess
import os
import torch
from deeprobust.graph.data import Dataset, PtbDataset,PrePtbDataset
from deeprobust.graph.defense import GCN, GCNJaccard, AdvTraining, SimPGCN, RGCN, ProGNN, GCNSVD
from scipy import sparse
torch.cuda.set_device(3) 
device = torch.device("cuda")

#### Generate perturbed graph

In [2]:
def get_perturbed_graph(attack_method,data_name,seed_data):
    if attack_method == 'dice':
        data = Dataset(root='/tmp/', name=data_name,setting = 'nettack',seed=seed_data)
        adj, features, labels = data.adj, data.features, data.labels
        idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
        perturbations = int(0.05 * (adj.sum()//2))
        model = DICE()
        model.attack(adj, labels, n_perturbations=perturbations)
    elif attack_method == 'pgd':
        data = Dataset(root='/tmp/', name=data_name,setting = 'nettack',seed=seed_data)
        adj, features, labels = data.adj, data.features, data.labels
        adj, features, labels = preprocess(adj, features, labels, preprocess_adj=False) # conver to tensor
        idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
        # Setup Victim Model
        victim_model = GCN(nfeat=features.shape[1], nclass=labels.max().item()+1,
                            nhid=16, dropout=0.5, weight_decay=5e-4, device='cpu').to('cpu')
        victim_model.fit(features, adj, labels, idx_train)
        # Setup Attack Model
        perturbations = int(0.05 * (adj.sum()//2))
        model = PGDAttack(model=victim_model, nnodes=adj.shape[0], loss_type='CE', device='cpu').to('cpu')
        model.attack(features, adj, labels, idx_train, n_perturbations=perturbations)
    elif attack_method == 'metattack':
        data = Dataset(root='/tmp/', name=data_name,setting = 'nettack',seed=seed_data)
        adj, features, labels = data.adj, data.features, data.labels
        adj, features, labels = preprocess(adj, features, labels, preprocess_adj=False) # conver to tensor
        idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
        print(len(idx_train),len(idx_val),len(idx_test))
        idx_unlabeled = np.union1d(idx_val, idx_test)
        # Setup Surrogate model
        perturbations = int(0.05 * (adj.sum()//2))
        surrogate = GCN(nfeat=features.shape[1], nclass=labels.max().item()+1,
                    nhid=16, dropout=0, with_relu=False, with_bias=False, device='cuda').to('cuda')
        surrogate.fit(features, adj, labels, idx_train, idx_val,)
        # Setup Attack Model
        model = Metattack(surrogate, nnodes=adj.shape[0], feature_shape=features.shape,
                attack_structure=True, attack_features=False, device='cuda', lambda_=0).to('cuda')
        # Attack
        model.attack(features, adj, labels, idx_train, idx_unlabeled, n_perturbations=perturbations, ll_constraint=True)
    return model.modified_adj

In [3]:
modified_adj = get_perturbed_graph(attack_method = 'pgd',data_name = 'cora',seed_data=0)

Loading cora dataset...
Selecting 1 largest connected components


To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /opt/conda/conda-bld/pytorch_1623448278899/work/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)
100%|█████████████████████████████████████████| 200/200 [00:36<00:00,  5.53it/s]


#### Train a GNN+WT-AWP model 
notice data_name and seed_data in train_wtawp() and train_normal() must be the same as in get_perturbed_graph()

In [4]:
def train_wtawp(data_name,model_name,seed_data,seed,weight,gamma,modified_adj):
 
    data = Dataset(root='/tmp/', name=data_name,setting = 'nettack',seed = seed_data)
    adj, features, labels = data.adj, data.features, data.labels
    idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
    modified_adj = sparse.csr_matrix(modified_adj)
    
    if model_name == "gcn":
        model = GCN(nfeat=features.shape[1], nclass=labels.max()+1, nhid=16, device=device)
        model = model.to(device)
        proxy = GCN(nfeat=features.shape[1], nclass=labels.max()+1, nhid=16, device=device)
        proxy = proxy.to(device)
        model.fit_mp(features, modified_adj, labels, idx_train,proxy=proxy,idx_val=idx_val,verbose=False,
                     awp_weight = weight, awp_gamma = gamma,awp_step = 1,train_iters = 200,idx_test = idx_test,seed=seed)
        model.eval()                
    elif model_name == "gcnjaccard":
        model = GCNJaccard(nfeat=features.shape[1], nclass=labels.max()+1, nhid=16, device=device)
        proxy = GCNJaccard(nfeat=features.shape[1], nclass=labels.max()+1, nhid=16, device=device)
        model = model.to(device)
        proxy = proxy.to(device)
        model.fit_mp(features, modified_adj, labels, idx_train,proxy=proxy,idx_val=idx_val,verbose=False,
                     awp_weight = weight, awp_gamma = gamma,awp_step = 1,train_iters = 200,idx_test = idx_test,seed=seed)
        model.eval()                
    elif model_name == "simpgcn":
        model = SimPGCN(nnodes=features.shape[0],nfeat=features.shape[1], nclass=labels.max()+1, nhid=16, device=device)
        proxy = SimPGCN(nnodes=features.shape[0],nfeat=features.shape[1], nclass=labels.max()+1, nhid=16, device=device)
        model = model.to(device)
        proxy = proxy.to(device)
        model.fit_mp(features, modified_adj, labels, idx_train,proxy=proxy,idx_val=idx_val,verbose=False,
                     awp_weight = weight, awp_gamma = gamma,awp_step = 1,train_iters = 200,idx_test = idx_test,seed=seed)
        model.eval()                
    elif model_name == "gcnsvd":
        model = GCNSVD(nfeat=features.shape[1], nclass=labels.max()+1, nhid=16, device=device)
        proxy = GCNSVD(nfeat=features.shape[1], nclass=labels.max()+1, nhid=16, device=device)
        model = model.to(device)
        proxy = proxy.to(device)
        model.fit_mp(features, modified_adj, labels, idx_train,proxy=proxy,idx_val=idx_val,verbose=False,
                     awp_weight = weight, awp_gamma = gamma,awp_step = 1,train_iters = 200,idx_test = idx_test,seed=seed)
        model.eval()            
    return model.test(idx_test)

In [5]:
train_wtawp(data_name='cora',model_name='gcn',seed_data=0,seed=1,weight = 0.5,gamma=0.7,modified_adj=modified_adj)

Loading cora dataset...
Selecting 1 largest connected components
Test set results: loss= 0.6178 accuracy= 0.8219


0.8219315895372235

#### Train a vanilla GNN model

In [6]:
def train_normal(data_name,model_name,seed_data,seed,modified_adj):
    data = Dataset(root='/tmp/', name=data_name,setting = 'nettack',seed = seed_data)
    adj, features, labels = data.adj, data.features, data.labels
    idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
    modified_adj = sparse.csr_matrix(modified_adj)       
    if model_name == "gcn":
        model = GCN(nfeat=features.shape[1], nclass=labels.max()+1, nhid=16, device=device)
        model = model.to(device)
        model.fit(features, modified_adj, labels, idx_train,idx_val,verbose=False,seed = seed,)
        model.eval()
    elif model_name == "gcnjaccard":
        model = GCNJaccard(nfeat=features.shape[1], nclass=labels.max()+1, nhid=16, device=device)
        model = model.to(device)
        model.fit(features, modified_adj, labels, idx_train,idx_val,verbose=False,seed = seed,)
        model.eval()
    elif model_name == "simpgcn":
        model = SimPGCN(nnodes=features.shape[0],nfeat=features.shape[1], nclass=labels.max()+1, nhid=16, device=device)
        model = model.to(device)
        model.fit(features, modified_adj, labels, idx_train,idx_val,verbose=False,seed = seed,)
        model.eval()
    elif model_name == "gcnsvd":

        model = GCNSVD(nfeat=features.shape[1], nclass=labels.max()+1, nhid=16, device=device)
        model = model.to(device)
        model.fit(features, modified_adj, labels, idx_train,idx_val,verbose=False,seed = seed,)
        model.eval()
            
    return model.test(idx_test)

In [7]:
train_normal(data_name='cora',model_name='gcn',seed_data=0,seed=1,modified_adj=modified_adj)

Loading cora dataset...
Selecting 1 largest connected components
Test set results: loss= 0.6635 accuracy= 0.8099


0.8098591549295775