In [2]:
import numpy as np
import scipy.sparse as sp
import torch

In [3]:
def encode_onehot(labels):
    classes = set(labels)
    classes_dict = {c: np.identity(len(classes))[i, :] for i, c in
                    enumerate(classes)}
    labels_onehot = np.array(list(map(classes_dict.get, labels)),
                             dtype=np.int32)
    return labels_onehot

def normalize(mx):
    """Row-normalize sparse matrix"""
    rowsum = np.array(mx.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    mx = r_mat_inv.dot(mx)
    return mx

def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape)

def load_data(path="../data/cora/", dataset="cora"):
    """Load citation network dataset (cora only for now)"""
    print('Loading {} dataset...'.format(dataset))

    idx_features_labels = np.genfromtxt("{}{}.content".format(path, dataset),
                                        dtype=np.dtype(str))
    features = sp.csr_matrix(idx_features_labels[:, 1:-1], dtype=np.float32)
    labels = encode_onehot(idx_features_labels[:, -1])

    # build graph
    idx = np.array(idx_features_labels[:, 0], dtype=np.int32)
    idx_map = {j: i for i, j in enumerate(idx)}
    edges_unordered = np.genfromtxt("{}{}.cites".format(path, dataset),
                                    dtype=np.int32)
    edges = np.array(list(map(idx_map.get, edges_unordered.flatten())),
                     dtype=np.int32).reshape(edges_unordered.shape)
    adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])),
                        shape=(labels.shape[0], labels.shape[0]),
                        dtype=np.float32)

    # build symmetric adjacency matrix
    adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)

    features = normalize(features)
    adj = normalize(adj + sp.eye(adj.shape[0]))

    idx_train = range(140)
    idx_val = range(200, 500)
    idx_test = range(500, 1500)

    features = torch.FloatTensor(np.array(features.todense()))
    labels = torch.LongTensor(np.where(labels)[1])
    adj = sparse_mx_to_torch_sparse_tensor(adj)

    idx_train = torch.LongTensor(idx_train)
    idx_val = torch.LongTensor(idx_val)
    idx_test = torch.LongTensor(idx_test)

    return adj, features, labels, idx_train, idx_val, idx_test

adj, features, labels, idx_train, idx_val, idx_test=load_data(path="./cora/", dataset="cora")

Loading cora dataset...


In [4]:
from GCNN import NodeClassificationGCNN

model = NodeClassificationGCNN(features.shape[1], 256, np.max(labels.detach().numpy())+1)

In [5]:
def accuracy(out,label):
    oneHotCodded = out.max(1)[1].type_as(label)
    return oneHotCodded.eq(label).double().sum()/len(label)

In [7]:
import torch.optim as optim
import torch.nn.functional as F

epochs=100
optimizer = optim.Adam(model.parameters(),lr=0.01)

for epoch in range(epochs):
    model.train()
    train_labels=labels[idx_train]
    val_labels=labels[idx_val]
    
    
    optimizer.zero_grad()
    output = model(features, adj)
    loss=F.nll_loss(output[idx_train],train_labels)
    print(f"Training epoch {epoch} ; accuracy: {accuracy(output[idx_train],train_labels)}; loss: {loss.item()}")
    loss.backward()
    optimizer.step()
    
    model.eval()
    output = model(features, adj)
    loss=F.nll_loss(output[idx_val],val_labels)
    print(f"Validation epoch {epoch} ; accuracy: {accuracy(output[idx_val],val_labels)}; loss: {loss.item()}")
    

Training epoch 0 ; accuracy: 0.85; loss: 1.035738229751587
Validation epoch 0 ; accuracy: 0.7833333333333333; loss: 1.151475191116333
Training epoch 1 ; accuracy: 0.8928571428571429; loss: 0.9011008143424988
Validation epoch 1 ; accuracy: 0.82; loss: 1.0657345056533813
Training epoch 2 ; accuracy: 0.9428571428571428; loss: 0.7846040725708008
Validation epoch 2 ; accuracy: 0.84; loss: 0.9868790507316589
Training epoch 3 ; accuracy: 0.9428571428571428; loss: 0.6801876425743103
Validation epoch 3 ; accuracy: 0.8433333333333334; loss: 0.9153319001197815
Training epoch 4 ; accuracy: 0.9428571428571428; loss: 0.5876632332801819
Validation epoch 4 ; accuracy: 0.85; loss: 0.8517352342605591
Training epoch 5 ; accuracy: 0.9642857142857143; loss: 0.49887800216674805
Validation epoch 5 ; accuracy: 0.8433333333333334; loss: 0.7961025238037109
Training epoch 6 ; accuracy: 0.9642857142857143; loss: 0.43331900238990784
Validation epoch 6 ; accuracy: 0.83; loss: 0.7486196756362915
Training epoch 7 ; a

In [None]:
model.eval()
test_labels=labels[idx_test]
output = model(features, adj)
loss=F.nll_loss(output[idx_test],test_labels)
print(f"Test set ; accuracy: {accuracy(output[idx_test],test_labels)}; loss: {loss.item()}")