In [1]:
from __future__ import division
from __future__ import print_function

import os
import time
import argparse
import numpy as np

import torch
import torch.nn.functional as F
import torch.optim as optim
import scipy.sparse as sp # To test function load_data in utils.py

from pygcn.utils import load_data, accuracy
from pygcn.models import GCN

# Training settings
parser = argparse.ArgumentParser()
parser.add_argument('--no-cuda', action='store_true', default=False,
                    help='Disables CUDA training.')
parser.add_argument('--fastmode', action='store_true', default=False,
                    help='Validate during training pass.')
parser.add_argument('--seed', type=int, default=42, help='Random seed.')
parser.add_argument('--epochs', type=int, default=200,
                    help='Number of epochs to train.')
parser.add_argument('--lr', type=float, default=0.01,
                    help='Initial learning rate.')
parser.add_argument('--weight_decay', type=float, default=5e-4,
                    help='Weight decay (L2 loss on parameters).')
parser.add_argument('--hidden', type=int, default=16,
                    help='Number of hidden units.')
parser.add_argument('--dropout', type=float, default=0.5,
                    help='Dropout rate (1 - keep probability).')
#parser.add_argument("-f", "--fff", help="a dummy argument to fool ipython", default="1") ## Solution 1: To fix the argparse issue of Jupyter Notebook
#args = parser.parse_args() ## Solution 2:  to fix the argparse issue of Jupyter Notebook -- Replaced by args, unknown = parser.parse_known_args()
args, unknown = parser.parse_known_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()

np.random.seed(args.seed)
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

# Load data
adj, features, labels, idx_train, idx_val, idx_test = load_data()
print(adj)



Loading cora dataset...
tensor(indices=tensor([[   0,    8,   14,  ..., 1389, 2344, 2707],
                       [   0,    0,    0,  ..., 2707, 2707, 2707]]),
       values=tensor([0.1667, 0.1667, 0.0500,  ..., 0.2000, 0.5000, 0.2500]),
       size=(2708, 2708), nnz=13264, layout=torch.sparse_coo)


In [4]:
# Test load_data function in utils.py
def encode_onehot(labels):
    classes = set(labels)
    classes_dict = {c: np.identity(len(classes))[i, :] for i, c in
                    enumerate(classes)}
    labels_onehot = np.array(list(map(classes_dict.get, labels)),
                             dtype=np.int32)
    return labels_onehot
path = os.path.join(os.getcwd(),'pygcn/data/cora/')
dataset="cora"

    ## Replace path = "../data/cora/" by the absolute path: 
    ## path = os.path.join(os.path.dirname(os.path.abspath(__file__)),'data/cora/')
###    """Load citation network dataset (cora only for now)"""
print('Loading {} dataset...'.format(dataset))
    
idx_features_labels = np.genfromtxt("{}{}.content".format(path, dataset), dtype=np.dtype(str))
features = sp.csr_matrix(idx_features_labels[:, 1:-1], dtype=np.float32)
labels = encode_onehot(idx_features_labels[:, -1])

    # build graph
idx = np.array(idx_features_labels[:, 0], dtype=np.int32)
idx_map = {j: i for i, j in enumerate(idx)}
edges_unordered = np.genfromtxt("{}{}.cites".format(path, dataset), dtype=np.int32)
edges = np.array(list(map(idx_map.get, edges_unordered.flatten())), dtype=np.int32).reshape(edges_unordered.shape)
adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])),
                        shape=(labels.shape[0], labels.shape[0]),
                        dtype=np.float32)

    # build symmetric adjacency matrix
adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
adj = sp.csr_matrix(adj)
adj2 = sp.csr_matrix.multiply(adj,adj)
print(adj)
print(adj2)

Loading cora dataset...
  (0, 8)	1.0
  (0, 14)	1.0
  (0, 258)	1.0
  (0, 435)	1.0
  (0, 544)	1.0
  (1, 344)	1.0
  (2, 410)	1.0
  (2, 471)	1.0
  (2, 552)	1.0
  (2, 565)	1.0
  (3, 197)	1.0
  (3, 463)	1.0
  (3, 601)	1.0
  (4, 170)	1.0
  (5, 490)	1.0
  (5, 2164)	1.0
  (6, 251)	1.0
  (6, 490)	1.0
  (7, 258)	1.0
  (8, 0)	1.0
  (8, 14)	1.0
  (8, 258)	1.0
  (8, 435)	1.0
  (8, 751)	1.0
  (9, 308)	1.0
  :	:
  (2698, 2697)	1.0
  (2698, 2700)	1.0
  (2699, 2153)	1.0
  (2700, 2697)	1.0
  (2700, 2698)	1.0
  (2701, 2247)	1.0
  (2701, 2263)	1.0
  (2702, 881)	1.0
  (2702, 2624)	1.0
  (2703, 1221)	1.0
  (2703, 1409)	1.0
  (2703, 2200)	1.0
  (2704, 209)	1.0
  (2704, 2407)	1.0
  (2705, 1784)	1.0
  (2705, 1839)	1.0
  (2705, 1840)	1.0
  (2705, 2216)	1.0
  (2706, 1046)	1.0
  (2706, 1138)	1.0
  (2706, 1640)	1.0
  (2706, 1752)	1.0
  (2707, 774)	1.0
  (2707, 1389)	1.0
  (2707, 2344)	1.0
  (0, 8)	1.0
  (0, 14)	1.0
  (0, 258)	1.0
  (0, 435)	1.0
  (0, 544)	1.0
  (1, 344)	1.0
  (2, 410)	1.0
  (2, 471)	1.0
  (2, 552)	

In [16]:
def sp_square(adj):
    '''Perform square operation on COO-sparsed matrix'''
    adj_csr = sp.csr_matrix(adj)
    return sp.csr_matrix.dot(adj_csr, adj_csr)

In [17]:
from scipy.sparse import coo_matrix
row = np.array([0, 0, 3, 4])
col = np.array([0, 2, 2, 1])
data = np.array([4, 5, 7, 9])

In [30]:
#test
a = coo_matrix((data, (row, col)), shape = (5, 5))
a.toarray()

array([[4, 0, 5, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 7, 0, 0],
       [0, 9, 0, 0, 0]])

In [37]:
b = sp_square(a)
print(b.toarray())
print(np.dot(a.toarray(), a.toarray()) - b.toarray())
assert((np.dot(a.toarray(), a.toarray()) == b.toarray()).all())
print('OK!')

[[16  0 20  0  0]
 [ 0  0  0  0  0]
 [ 0  0  0  0  0]
 [ 0  0  0  0  0]
 [ 0  0  0  0  0]]
[[0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]
OK!


In [75]:
def sp_second_tied(adj):
    '''Input: adj in COO format; Output: adj second_tied in COO format'''
    rows, cols = a.nonzero()
    N = a.shape[0]
    adj = [set() for i in range(N)] #adjacency list initialization
    for i in range(len(rows)):
        adj[rows[i]].add(cols[i])
        adj[cols[i]].add(rows[i])
    print(adj)
    new_adj = adj.copy()
    #new adjacency matrix: if (i, j) then new neighbors of i = old neighbors union neighbors of j
    for i in range(len(adj)):
        for j in adj[i]:
            new_adj[i] = new_adj[i].union(adj[j])
    #construct resulting matrix from new adjacency list
    new_rows = []
    new_cols = []
    for i in range(len(new_adj)):
        for j in new_adj[i]:
            new_rows.append(i)
            new_cols.append(j)
    data = [1 for i in range(len(new_cols))]
    b = coo_matrix((data, (new_rows, new_cols)), shape = (N, N)) 
    print(b.toarray())
    return b
        

In [44]:
a.nonzero()

(array([0, 0, 3, 4]), array([0, 2, 2, 1]))

In [45]:
rows, cols = a.nonzero()

In [46]:
rows

array([0, 0, 3, 4])

In [47]:
a.shape

(5, 5)

In [67]:
row = np.array([0, 0, 3, 4])
col = np.array([0, 2, 2, 1])
data = np.array([1, 1, 1, 1])
a = coo_matrix((data, (row, col)), shape = (5, 5))

In [76]:
#test
sp_second_tied(a)

[{0, 2}, {4}, {0, 3}, {2}, {1}]
[[1 0 1 1 0]
 [0 1 0 0 1]
 [1 0 1 1 0]
 [1 0 1 1 0]
 [0 1 0 0 1]]


<5x5 sparse matrix of type '<class 'numpy.int32'>'
	with 13 stored elements in COOrdinate format>