In [1]:
import os
import time
import torch
import pickle
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from utils import *

print(torch.__version__)
print(torch.cuda.is_available())

1.7.1+cpu
False


In [2]:
class SparseDropout(nn.Module):
    def __init__(self, dprob=0.5):
        super(SparseDropout, self).__init__()
        # dprob is ratio of dropout
        # convert to keep probability
        self.kprob = 1 - dprob

    def forward(self, x):
        mask = ((torch.rand(x._values().size()) + (self.kprob)).floor()).type(torch.uint8)
        rc = x._indices()[:, mask]
        val = x._values()[mask] * (1.0 / self.kprob)
        return torch.sparse.FloatTensor(rc, val)

In [3]:
import math
import torch
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module


class GraphConvolution(Module):
    """
    Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
    """
    def __init__(self, in_features, out_features, bias=True):
        super(GraphConvolution, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.FloatTensor(in_features, out_features))
        if bias:
            self.bias = Parameter(torch.FloatTensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)

    def forward(self, input, adj):
        support = torch.mm(input, self.weight)
        output = torch.spmm(adj, support)
        if self.bias is not None:
            return output + self.bias
        else:
            return output

    def __repr__(self):
        return self.__class__.__name__ + ' (' \
               + str(self.in_features) + ' -> ' \
               + str(self.out_features) + ')'

In [4]:
class GCN(nn.Module):
    def __init__(self, nfeat, nhid, nclass, dropout):
        super(GCN, self).__init__()

        self.gc1 = GraphConvolution(nfeat, nhid)
        self.gc2 = GraphConvolution(nhid, nclass)
        self.sparse_dropout = SparseDropout(dropout)
        self.dropout = dropout
        self.out1_names = [param[0] for param in self.gc1.named_parameters()]
        self.out2_names = [param[0] for param in self.gc2.named_parameters()]

    def forward(self, x, adj, dropout=False):
        if dropout:
            adj = self.sparse_dropout(adj)
        x = F.relu(self.gc1(x, adj))
        x = F.dropout(x, self.dropout, training=self.training)
        x = self.gc2(x, adj)
        return F.log_softmax(x, dim=1)

    def hidden(self, x, adj):
        out1 = self.gc1(x, adj)
        x = F.relu(out1)
        x = F.dropout(x, self.dropout, training=False)
        out2 = self.gc2(x, adj)
        return out1, out2

    def param_names(self):
        return self.out1_names, self.out2_names

In [5]:
sparse_adj, sparse_adj_train,sparse_adj_train_all,features, train_feature,train_feature_all,labels, train_labels, id_train, id_valid, id_test, num_labels = Origin_load_ether_data(cuda=False)
sparse_adj.setdiag(1.0)
sparse_adj_train.setdiag(1.0)
sparse_adj_train_all.setdiag(1.0)

coo = sparse_adj.tocoo()
values = coo.data
indices = np.vstack((coo.row, coo.col)).astype(np.int32)
_i = torch.LongTensor(indices)
_v = torch.FloatTensor(values)
shape = coo.shape
adj = torch.sparse.FloatTensor(_i, _v, torch.Size(shape))
# dataset splits
idx_train = np.array(np.zeros(features.shape[0]), dtype=np.bool)
idx_val = np.array(np.zeros(features.shape[0]), dtype=np.bool)
idx_test = np.array(np.zeros(features.shape[0]), dtype=np.bool)
id_train = np.array(id_train)
id_valid = np.array(id_valid)
id_test = np.array(id_test)
idx_train[id_train] = True
idx_val[id_valid] = True
idx_test[id_test] = True
idx_train = torch.tensor(idx_train)
idx_val = torch.tensor(idx_val)
idx_test = torch.tensor(idx_test)

features = (1402220, 12) type = <class 'numpy.ndarray'>
train_index 572
val_index 82
test_index 162
adj torch.Size([1402220, 1402220])
features torch.Size([1402220, 12])


  self._set_arrayXarray(i, j, x)


In [6]:
def accuracy(output, labels):
    preds = output.max(1)[1].type_as(labels)
    correct = preds.eq(labels).double()
    correct = correct.sum()
    return correct / len(labels)

In [None]:
def gcn(features, adj, labels, idx_train, idx_val, idx_test):
    lr, weight_decay, epochs = 0.01, 0, 2000
    cuda = False
    model = GCN(nfeat=features.shape[1],
                nhid=8,
                nclass=num_labels,
                dropout=0.)
    optimizer = optim.Adam(model.parameters(),
                           lr=lr,
                           weight_decay=weight_decay)

    if cuda:
        model.cuda()
        features = features.cuda()
        adj = adj.cuda()
        labels = labels.cuda()
        idx_train = idx_train.cuda()
        idx_val = idx_val.cuda()
        idx_test = idx_test.cuda()


    def train(epoch):
        t = time.time()
        model.train()
        optimizer.zero_grad()
        output = model(features, adj)
        loss_train = F.nll_loss(output[idx_train], labels[idx_train])
        acc_train = accuracy(output[idx_train], labels[idx_train])
        loss_train.backward()
        optimizer.step()

        model.eval()
        output = model(features, adj)

        loss_val = F.nll_loss(output[idx_val], labels[idx_val])
        acc_val = accuracy(output[idx_val], labels[idx_val])
        print('Epoch: {:04d}'.format(epoch+1),
              'loss_train: {:.4f}'.format(loss_train.item()),
              'acc_train: {:.4f}'.format(acc_train.item()),
              'loss_val: {:.4f}'.format(loss_val.item()),
              'acc_val: {:.4f}'.format(acc_val.item()),
              'time: {:.4f}s'.format(time.time() - t))
        return loss_val


    def test():
        model.eval()
        output = model(features, adj)
        loss_test = F.nll_loss(output[idx_test], labels[idx_test])
        acc_test = accuracy(output[idx_test], labels[idx_test])
        print("Test set results:",
              "loss= {:.4f}".format(loss_test.item()),
              "accuracy= {:.4f}".format(acc_test.item()))
        with open("gcn.pkl", "wb") as f:
            pickle.dump(output.detach().numpy(), f)
        return acc_test.detach().cpu().numpy()


    # Train model
    t_total = time.time()
    val_loss_list = []
    for epoch in range(epochs):
        loss_val = train(epoch)
#         val_loss_list.append(loss_val.detach().cpu().numpy())
#         if len(val_loss_list) > 50 and np.mean(val_loss_list[-50:]) < loss_val:
#             print("early stoping...")
#             break
    print("Optimization Finished!")
    print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

    # Testing
    return test()

acc_test_list = []
for i in range(1):
    acc_test = gcn(features, adj, labels, idx_train, idx_val, idx_test)
    acc_test_list.append(acc_test)
print("result: {}".format(np.mean(acc_test_list)))

Epoch: 0001 loss_train: 5375.3721 acc_train: 0.0734 loss_val: 4146.2910 acc_val: 0.0976 time: 2.6852s
Epoch: 0002 loss_train: 4019.2634 acc_train: 0.0857 loss_val: 3081.1636 acc_val: 0.1098 time: 2.9342s
Epoch: 0003 loss_train: 2890.0652 acc_train: 0.0997 loss_val: 2339.6265 acc_val: 0.1463 time: 2.8572s
Epoch: 0004 loss_train: 1968.0265 acc_train: 0.1294 loss_val: 1781.5703 acc_val: 0.1463 time: 2.6572s
Epoch: 0005 loss_train: 1224.3937 acc_train: 0.1626 loss_val: 1356.0747 acc_val: 0.1463 time: 2.7452s
Epoch: 0006 loss_train: 687.6473 acc_train: 0.2063 loss_val: 1212.9945 acc_val: 0.1463 time: 2.6842s
Epoch: 0007 loss_train: 615.3702 acc_train: 0.2115 loss_val: 1125.9908 acc_val: 0.1707 time: 2.7011s
Epoch: 0008 loss_train: 678.8439 acc_train: 0.2185 loss_val: 1075.7802 acc_val: 0.1707 time: 2.9672s
Epoch: 0009 loss_train: 725.3268 acc_train: 0.2150 loss_val: 1052.7775 acc_val: 0.2683 time: 3.0352s
Epoch: 0010 loss_train: 744.7923 acc_train: 0.2815 loss_val: 1101.5941 acc_val: 0.2927

Epoch: 0083 loss_train: 15.9488 acc_train: 0.4930 loss_val: 128.7782 acc_val: 0.4512 time: 2.5082s
Epoch: 0084 loss_train: 14.9682 acc_train: 0.4930 loss_val: 127.0585 acc_val: 0.4634 time: 2.5282s
Epoch: 0085 loss_train: 14.7474 acc_train: 0.4878 loss_val: 123.0071 acc_val: 0.4756 time: 2.6712s
Epoch: 0086 loss_train: 14.8296 acc_train: 0.4948 loss_val: 116.5164 acc_val: 0.4634 time: 2.8732s
Epoch: 0087 loss_train: 14.0276 acc_train: 0.4913 loss_val: 110.1985 acc_val: 0.4756 time: 2.4412s
Epoch: 0088 loss_train: 14.1522 acc_train: 0.4948 loss_val: 105.3570 acc_val: 0.4756 time: 2.4112s
Epoch: 0089 loss_train: 14.1652 acc_train: 0.4930 loss_val: 101.8155 acc_val: 0.4756 time: 2.4252s
Epoch: 0090 loss_train: 13.8681 acc_train: 0.4948 loss_val: 99.4776 acc_val: 0.4634 time: 2.5022s
Epoch: 0091 loss_train: 13.3455 acc_train: 0.4895 loss_val: 97.8988 acc_val: 0.4634 time: 2.5052s
Epoch: 0092 loss_train: 12.9885 acc_train: 0.4913 loss_val: 95.9035 acc_val: 0.4756 time: 2.6542s
Epoch: 0093 l

Epoch: 0167 loss_train: 5.2791 acc_train: 0.5699 loss_val: 11.8499 acc_val: 0.4878 time: 2.4982s
Epoch: 0168 loss_train: 5.2151 acc_train: 0.5699 loss_val: 10.3137 acc_val: 0.4756 time: 2.4782s
Epoch: 0169 loss_train: 5.2198 acc_train: 0.5647 loss_val: 10.0247 acc_val: 0.4878 time: 2.6082s
Epoch: 0170 loss_train: 5.1910 acc_train: 0.5717 loss_val: 8.4425 acc_val: 0.5000 time: 2.5912s
Epoch: 0171 loss_train: 5.0988 acc_train: 0.5734 loss_val: 9.1432 acc_val: 0.5122 time: 2.8392s
Epoch: 0172 loss_train: 5.2546 acc_train: 0.5717 loss_val: 8.1476 acc_val: 0.5000 time: 2.4242s
Epoch: 0173 loss_train: 5.0424 acc_train: 0.5752 loss_val: 8.0426 acc_val: 0.5122 time: 2.4422s
Epoch: 0174 loss_train: 5.0285 acc_train: 0.5787 loss_val: 9.4104 acc_val: 0.5122 time: 2.4772s
Epoch: 0175 loss_train: 5.3754 acc_train: 0.5699 loss_val: 8.9871 acc_val: 0.5122 time: 2.6412s
Epoch: 0176 loss_train: 5.0598 acc_train: 0.5734 loss_val: 7.0843 acc_val: 0.5122 time: 2.7392s
Epoch: 0177 loss_train: 5.9642 acc_tr

Epoch: 0252 loss_train: 4.5341 acc_train: 0.5787 loss_val: 15.9744 acc_val: 0.4878 time: 2.5452s
Epoch: 0253 loss_train: 5.0489 acc_train: 0.5857 loss_val: 25.6492 acc_val: 0.5000 time: 2.5742s
Epoch: 0254 loss_train: 4.1314 acc_train: 0.6014 loss_val: 36.7653 acc_val: 0.4390 time: 2.7162s
Epoch: 0255 loss_train: 5.2435 acc_train: 0.5857 loss_val: 38.0584 acc_val: 0.4634 time: 2.7262s
Epoch: 0256 loss_train: 3.7735 acc_train: 0.5892 loss_val: 41.4134 acc_val: 0.4512 time: 2.6802s
Epoch: 0257 loss_train: 4.1466 acc_train: 0.5822 loss_val: 46.9496 acc_val: 0.4756 time: 2.6172s
Epoch: 0258 loss_train: 3.7233 acc_train: 0.5997 loss_val: 51.3347 acc_val: 0.5000 time: 2.5182s
Epoch: 0259 loss_train: 4.0041 acc_train: 0.5944 loss_val: 50.4716 acc_val: 0.5122 time: 2.4092s
Epoch: 0260 loss_train: 4.0500 acc_train: 0.5909 loss_val: 52.5312 acc_val: 0.5000 time: 2.4972s
Epoch: 0261 loss_train: 3.9601 acc_train: 0.5927 loss_val: 57.2779 acc_val: 0.4878 time: 2.7012s
Epoch: 0262 loss_train: 4.0300

Epoch: 0336 loss_train: 2.8389 acc_train: 0.6119 loss_val: 6.5793 acc_val: 0.4756 time: 2.4602s
Epoch: 0337 loss_train: 4.5660 acc_train: 0.5962 loss_val: 10.8023 acc_val: 0.4878 time: 2.5432s
Epoch: 0338 loss_train: 3.5391 acc_train: 0.5647 loss_val: 12.4403 acc_val: 0.4878 time: 2.5832s
Epoch: 0339 loss_train: 4.8560 acc_train: 0.5664 loss_val: 11.3912 acc_val: 0.5122 time: 2.6352s
Epoch: 0340 loss_train: 3.9615 acc_train: 0.5979 loss_val: 8.2792 acc_val: 0.4878 time: 2.7062s
Epoch: 0341 loss_train: 2.6501 acc_train: 0.6084 loss_val: 5.2201 acc_val: 0.5000 time: 2.7832s
Epoch: 0342 loss_train: 7.9220 acc_train: 0.5839 loss_val: 10.1634 acc_val: 0.5122 time: 2.5042s
Epoch: 0343 loss_train: 3.2451 acc_train: 0.5857 loss_val: 13.1245 acc_val: 0.4756 time: 2.4302s
Epoch: 0344 loss_train: 5.3592 acc_train: 0.5682 loss_val: 13.5191 acc_val: 0.4634 time: 2.5122s
Epoch: 0345 loss_train: 5.5659 acc_train: 0.5752 loss_val: 11.5777 acc_val: 0.5122 time: 2.3972s
Epoch: 0346 loss_train: 4.1284 ac

Epoch: 0420 loss_train: 2.3841 acc_train: 0.6014 loss_val: 36.9275 acc_val: 0.5366 time: 2.7792s
Epoch: 0421 loss_train: 2.3652 acc_train: 0.6066 loss_val: 36.6624 acc_val: 0.5122 time: 2.4652s
Epoch: 0422 loss_train: 2.2974 acc_train: 0.6101 loss_val: 34.5044 acc_val: 0.5000 time: 2.4122s
Epoch: 0423 loss_train: 2.2273 acc_train: 0.6084 loss_val: 33.0212 acc_val: 0.4878 time: 2.4942s
Epoch: 0424 loss_train: 2.2136 acc_train: 0.6084 loss_val: 32.1325 acc_val: 0.5000 time: 2.3952s
Epoch: 0425 loss_train: 2.2710 acc_train: 0.6154 loss_val: 29.2694 acc_val: 0.5488 time: 2.5102s
Epoch: 0426 loss_train: 2.2782 acc_train: 0.6101 loss_val: 27.9029 acc_val: 0.5000 time: 2.5812s
Epoch: 0427 loss_train: 2.2735 acc_train: 0.6084 loss_val: 27.7412 acc_val: 0.4878 time: 3.1342s
Epoch: 0428 loss_train: 2.1691 acc_train: 0.6119 loss_val: 27.7285 acc_val: 0.4878 time: 2.7642s
Epoch: 0429 loss_train: 2.3766 acc_train: 0.6136 loss_val: 25.3108 acc_val: 0.5122 time: 2.4582s
Epoch: 0430 loss_train: 2.1515