In [None]:
from __future__ import division
from __future__ import print_function

import numpy as np
import scipy.sparse as sp
import bayesiancoresets as bc
import matplotlib.pyplot as plt

import torch
import torch.nn.functional as F
import torch.optim as optim

from pygcn.utils import accuracy, load_data, normalize, normalize_adj
from pygcn.models import GCN

import matlab
import matlab.engine

class argument:
    pass

# Selection algorithms

In [None]:
def selection(A, K, algorithm):
    alg = algorithm(A)
    alg.run(K)
    wts = alg.weights()
    new_K = K
    while len(wts.nonzero()[0]) < K:
        new_K += K - len(wts.nonzero()[0])
        alg.run(new_K)
        wts = alg.weights()
    return wts.nonzero()[0].tolist()

def selection_SP(A, K):
    eng = matlab.engine.start_matlab()
    M_mat = matlab.double([K])

    f_mat = matlab.double([A.numpy()[i].tolist() for i in range(A.size(0))])
    s = eng.SP(eng.transpose(f_mat), M_mat)
    ind = [int(ind) - 1 for ind in s[0]]

    eng.quit()
    return ind

# Train and test functions

In [None]:
def train(epoch):
    model.train()
    optimizer.zero_grad()
    output = model(features, adj)
    
    loss_train = F.nll_loss(output[idx_train], labels[idx_train])
    acc_train = accuracy(output[idx_train], labels[idx_train])
    loss_train.backward()
    optimizer.step()


def test():
    model.eval()
    output = model(features, adj)
    loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    acc_test = accuracy(output[idx_test], labels[idx_test])
    return loss_test.item(), acc_test.item()

# Load the dataset

In [None]:
adj, adj_flip, features, labels, idx_train, idx_val, idx_test = load_data(path="data/cora/")
adj_np = adj.to_dense().numpy()

# Train models and test the accuracy

In [None]:
args = argument()
args.cuda = True
args.fastmode = True
args.epochs = 200
args.lr = 1e-2
args.weight_decay = 5e-4
args.hidden = 16
args.dropout = 0
args.repeat = 100 # repeat this times to reduce randomness

# x = np.linspace(10, 50, 5).astype(int)
x = [20, 25, 30, 35, 40, 45, 50]

accumulator = np.zeros((len(x), 4))

for i, length in enumerate(x):
    print("length:", length)

    # GIGA
    idx_train = torch.LongTensor(selection(adj_np, length, bc.GIGA))
    idx_test = torch.LongTensor(list(set(range(features.size(0))) - set(idx_train.numpy())))

    for r in range(args.repeat):
        model = GCN(nfeat=features.shape[1],
                    nhid=args.hidden,
                    nclass=labels.max().item() + 1,
                    dropout=args.dropout)
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr, weight_decay=args.weight_decay)

        if args.cuda:
            model.cuda()
            features = features.cuda()
            adj = adj.cuda()
            labels = labels.cuda()
            idx_train = idx_train.cuda()
            idx_test = idx_test.cuda()

        for epoch in range(args.epochs):
            train(epoch)

        accumulator[i, 0] += test()[1]
    print('GIGA:', accumulator[i, 0] / args.repeat)

    # FW
    idx_train = torch.LongTensor(selection(adj_np, length, bc.FrankWolfe))
    idx_test = torch.LongTensor(list(set(range(features.size(0))) - set(idx_train.numpy())))

    for r in range(args.repeat):
        model = GCN(nfeat=features.shape[1],
                    nhid=args.hidden,
                    nclass=labels.max().item() + 1,
                    dropout=args.dropout)
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr, weight_decay=args.weight_decay)

        if args.cuda:
            model.cuda()
            features = features.cuda()
            adj = adj.cuda()
            labels = labels.cuda()
            idx_train = idx_train.cuda()
            idx_test = idx_test.cuda()

        for epoch in range(args.epochs):
            train(epoch)

        accumulator[i, 1] += test()[1]
    print('FW:', accumulator[i, 1] / args.repeat)

    # KSP
    idx_train = torch.LongTensor(selection_SP(adj_flip, length))
    idx_test = torch.LongTensor(list(set(range(features.size(0))) - set(idx_train.numpy())))

    for r in range(args.repeat):
        model = GCN(nfeat=features.shape[1],
                    nhid=args.hidden,
                    nclass=labels.max().item() + 1,
                    dropout=args.dropout)
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr, weight_decay=args.weight_decay)

        if args.cuda:
            model.cuda()
            features = features.cuda()
            adj = adj.cuda()
            labels = labels.cuda()
            idx_train = idx_train.cuda()
            idx_test = idx_test.cuda()

        for epoch in range(args.epochs):
            train(epoch)

        accumulator[i, 2] += test()[1]
    print('KSP:', accumulator[i, 2] / args.repeat)

    # RND
    for r in range(args.repeat):
        idx_train = torch.LongTensor(np.random.choice(features.size(0), length))
        idx_test = torch.LongTensor(list(set(range(features.size(0))) - set(idx_train.numpy())))

        model = GCN(nfeat=features.shape[1],
                    nhid=args.hidden,
                    nclass=labels.max().item() + 1,
                    dropout=args.dropout)
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr, weight_decay=args.weight_decay)

        if args.cuda:
            model.cuda()
            features = features.cuda()
            adj = adj.cuda()
            labels = labels.cuda()
            idx_train = idx_train.cuda()
            idx_test = idx_test.cuda()

        for epoch in range(args.epochs):
            train(epoch)

        accumulator[i, -1] += test()[1]
    print('RND:', accumulator[i, -1] / args.repeat)
    print("")
accumulator /= args.repeat

# Plot the results

In [None]:
import matplotlib.font_manager as font_manager

tfont = {'fontname':'Liberation Serif'}
plt.plot(x, [a for a in accumulator[:, 2]], "-*", label="KSP", linewidth=2, color="k")
plt.plot(x, [a for a in accumulator[:, 0]], "-*", label="GIGA", linewidth=2)
plt.plot(x, [a for a in accumulator[:, 1]], "-*", label="FW", linewidth=2)
plt.plot(x, [a for a in accumulator[:, 3]], "-*", label="RND", linewidth=2, color='tab:red')
plt.legend(prop=font_manager.FontProperties(family='Liberation Serif', size=13))
plt.xlabel("Number of Selected Points", size=14, **tfont)
plt.ylabel("Test Accuracy", size=14, **tfont)
plt.xticks([20, 25, 30, 35, 40, 45, 50], size=14, **tfont)
plt.yticks(size=14, **tfont)
plt.grid()
plt.show()