In [1]:
from __future__ import division
from __future__ import print_function

import time
import torch
import numpy as np
import torch.nn.functional as F
from pygcn.gcnio.data import dataio
from pygcn.gcnio.util import utils
from pygcn.gcn import GCN
import scipy.sparse
import json
from sklearn.preprocessing import StandardScaler
import glog as log
import torch.optim as optim


In [2]:
cuda = torch.cuda.is_available()
print('cuda: %s' % cuda)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#device = 'cpu'

cuda: True


In [3]:
def load_data(prefix, normalize=True):
    adj_full = scipy.sparse.load_npz('./{}/adj_full.npz'.format(prefix))
    adj_train = scipy.sparse.load_npz('./{}/adj_train.npz'.format(prefix))
    role = json.load(open('./{}/role.json'.format(prefix)))
    feats = np.load('./{}/feats.npy'.format(prefix))
    class_map = json.load(open('./{}/class_map.json'.format(prefix)))
    class_map = {int(k):v for k,v in class_map.items()}
    assert len(class_map) == feats.shape[0]
    # ---- normalize feats ----
    train_nodes = np.array(list(set(adj_train.nonzero()[0])))
    train_feats = feats[train_nodes]
    scaler = StandardScaler()
    scaler.fit(train_feats)
    feats = scaler.transform(feats)
    # -------------------------
    return adj_full, adj_train, feats, class_map, role


def process_graph_data(adj_full, adj_train, feats, class_map, role):
    """
    setup vertex property map for output classes, train/val/test masks, and feats
    INPUT:
        G           graph-tool graph, full graph including training,val,testing
        feats       ndarray of shape |V|xf
        class_map   dictionary {vertex_id: class_id}
        val_nodes   index of validation nodes
        test_nodes  index of testing nodes
    OUTPUT:
        G           graph-tool graph unchanged
        role        array of size |V|, indicating 'train'/'val'/'test'
        class_arr   array of |V|x|C|, converted by class_map
        feats       array of features unchanged
    """
    num_vertices = adj_full.shape[0]
    if isinstance(list(class_map.values())[0],list):
        print("labels are list")
        num_classes = len(list(class_map.values())[0])
        class_arr = np.zeros((num_vertices, num_classes))
        for k,v in class_map.items():
            class_arr[k] = v
    else:
        num_classes = max(class_map.values()) - min(class_map.values()) + 1
        class_arr = np.zeros((num_vertices, 1))
        for k,v in class_map.items():
            class_arr[k] = v
    return adj_full, adj_train, feats, class_arr.astype(int), role

In [15]:
# make sure you use the same data splits as you generated attacks
seed = 15
np.random.seed(seed)
torch.manual_seed(seed)
if cuda:
    torch.cuda.manual_seed(seed)

# load original dataset (to get clean features and labels)
SMALL = True
if SMALL:
    dataset = 'polblogs'
    data = dataio.Dataset(root='/tmp/', name=dataset)
    adj, features, labels = data.adj, data.features, data.labels
    idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test
    
    log.info(type(adj))
    log.info(adj.shape)
    log.info(type(features))
    log.info(features.shape)
    log.info(type(labels))
    log.info(labels.shape)
    log.info(type(idx_train))
    log.info(idx_train.shape)
    log.info(type(idx_val))
    log.info(idx_val.shape)
    log.info(type(idx_test))
    log.info(idx_test.shape)
else:
    data_prefix = './data/flickr'
    temp_data = load_data(data_prefix)
    train_data = process_graph_data(*temp_data)
    adj,adj_train,features,labels,role = train_data
    features = scipy.sparse.csr_matrix(features)
    idx_train = np.array(role['tr'])
    idx_val = np.array(role['va'])
    idx_test = np.array(role['te'])
    log.info(type(adj))
    log.info(adj.shape)
    log.info(type(adj_train))
    log.info(adj_train.shape)
    log.info(type(features))
    log.info(features.shape)
    log.info(type(labels))
    log.info(labels.shape)
    log.info(type(labels[0]))
    log.info(type(idx_train))
    log.info(idx_train.shape)
    log.info(type(idx_val))
    log.info(idx_val.shape)
    log.info(type(idx_test))
    log.info(idx_test.shape)

I0228 19:50:19.497399 864 <ipython-input-15-c1f0a11b40fd>:37] <class 'scipy.sparse.csr.csr_matrix'>
I0228 19:50:19.498293 864 <ipython-input-15-c1f0a11b40fd>:38] (89250, 89250)
I0228 19:50:19.498900 864 <ipython-input-15-c1f0a11b40fd>:39] <class 'scipy.sparse.csr.csr_matrix'>
I0228 19:50:19.499469 864 <ipython-input-15-c1f0a11b40fd>:40] (89250, 89250)
I0228 19:50:19.500033 864 <ipython-input-15-c1f0a11b40fd>:41] <class 'scipy.sparse.csr.csr_matrix'>
I0228 19:50:19.500581 864 <ipython-input-15-c1f0a11b40fd>:42] (89250, 500)
I0228 19:50:19.501123 864 <ipython-input-15-c1f0a11b40fd>:43] <class 'numpy.ndarray'>
I0228 19:50:19.501703 864 <ipython-input-15-c1f0a11b40fd>:44] (89250, 1)
I0228 19:50:19.502253 864 <ipython-input-15-c1f0a11b40fd>:45] <class 'numpy.ndarray'>
I0228 19:50:19.502799 864 <ipython-input-15-c1f0a11b40fd>:46] <class 'numpy.ndarray'>
I0228 19:50:19.503351 864 <ipython-input-15-c1f0a11b40fd>:47] (44625,)
I0228 19:50:19.503894 864 <ipython-input-15-c1f0a11b40fd>:48] <class 

In [16]:
print(labels.max())

# Model and optimizer
if len(labels.shape)>1:
    model = GCN(nfeat=features.shape[1], nhid=32, nclass=len(labels[0]), device=device)
else:
    model = GCN(nfeat=features.shape[1], nhid=32, nclass=labels.max()+1, device=device)
    
    
optimizer = optim.Adam(model.parameters(),
                       lr=0.01, weight_decay=5e-4)

6


In [17]:
model = model.to(device)

model.fit(features, adj, labels, idx_train, train_iters=200, verbose=True)
# # using validation to pick model
# model.fit(features, perturbed_adj, labels, idx_train, idx_val, train_iters=200, verbose=True)
model.eval()
# You can use the inner function of model to test
model.test(idx_test)

RuntimeError: multi-target not supported at /pytorch/aten/src/THCUNN/generic/ClassNLLCriterion.cu:15