In [1]:
import numpy as np
import pandas as pd
import pickle as pkl
import networkx as nx
import scipy.sparse as sp
from scipy.sparse.linalg.eigen.arpack import eigsh
import sys
import torch
import torch.nn as nn
from dgl.data import citation_graph as citegrh
from dgl import DGLGraph

from sklearn.metrics import f1_score

from models import DGI, LogReg
import process

In [2]:
def load_cora_data():
    data = citegrh.load_cora()
    features = torch.FloatTensor(data.features)
    print('len(features):', len(features))
    labels = pd.get_dummies(data.labels).values
    labels = torch.LongTensor(labels[np.newaxis])
    print('len(labels):', len(labels))
    mask = torch.ByteTensor(data.train_mask)
    g = nx.adjacency_matrix(data.graph)
    adj = process.normalize_adj(g + sp.eye(g.shape[0]))
    if sparse:
        sp_adj = process.sparse_mx_to_torch_sparse_tensor(adj)
    else:
        adj = (adj + sp.eye(adj.shape[0])).todense()
    return adj, sp_adj, features, labels, mask

In [8]:
dataset = 'cora'

# training params
batch_size = 1
nb_epochs = 10000
patience = 50
lr = 0.001
l2_coef = 0.0
drop_prob = 0.0
hid_units = 256
sparse = True
nonlinearity = 'prelu' # special name to separate parameters

In [4]:
adj, sp_adj, features, labels, mask = load_cora_data()

len(features): 2708
len(labels): 1


In [5]:
labels.shape

torch.Size([1, 2708, 7])

In [6]:
nb_nodes = features.shape[0]
ft_size = features.shape[1]
nb_classes = labels.shape[1]

idx_train = torch.LongTensor(range(0, 140))
idx_val = torch.LongTensor(range(140, 640))
idx_test = torch.LongTensor(list(range(nb_classes-1000,nb_classes)))

In [9]:
%%time

model = DGI(ft_size, hid_units, nonlinearity)
optimiser = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=l2_coef)

if torch.cuda.is_available():
    print('Using CUDA')
    model.cuda()
    features = features.cuda()
    if sparse:
        sp_adj = sp_adj.cuda()
    else:
        adj = adj.cuda()
    labels = labels.cuda()
    idx_train = idx_train.cuda()
    idx_val = idx_val.cuda()
    idx_test = idx_test.cuda()

b_xent = nn.BCEWithLogitsLoss()
xent = nn.CrossEntropyLoss()
cnt_wait = 0
best = 1e9
best_t = 0

for epoch in range(nb_epochs):
    model.train()
    optimiser.zero_grad()

    idx = np.random.permutation(nb_nodes)
    shuf_fts = features[idx, :] #add first dimension = 1

    lbl_1 = torch.ones(batch_size, nb_nodes)
    lbl_2 = torch.zeros(batch_size, nb_nodes)
    lbl = torch.cat((lbl_1, lbl_2), 1)

    if torch.cuda.is_available():
        shuf_fts = shuf_fts.cuda()
        lbl = lbl.cuda()
    
    logits = model(features, shuf_fts, sp_adj if sparse else adj, sparse, None, None, None) 

    loss = b_xent(logits, lbl)

    print('Loss:', loss)

    if loss < best:
        best = loss
        best_t = epoch
        cnt_wait = 0
        torch.save(model.state_dict(), 'best_dgi_1.pkl')
    else:
        cnt_wait += 1

    if cnt_wait == patience:
        print('Early stopping!')
        break

    loss.backward()
    optimiser.step()

print('Loading {}th epoch'.format(best_t))
model.load_state_dict(torch.load('best_dgi_1.pkl'))

embeds, _ = model.embed(features, sp_adj if sparse else adj, sparse, None)
train_embs = embeds[0, idx_train]
val_embs = embeds[0, idx_val]
test_embs = embeds[0, idx_test]

train_lbls = torch.argmax(labels[0, idx_train], dim=1)
val_lbls = torch.argmax(labels[0, idx_val], dim=1)
test_lbls = torch.argmax(labels[0, idx_test], dim=1)

tot = torch.zeros(1)
tot = tot.cuda()


Micro_f1 = []

for _ in range(50):
    log = LogReg(hid_units, nb_classes)
    opt = torch.optim.Adam(log.parameters(), lr=0.01, weight_decay=0.0)
    log.cuda()

    pat_steps = 0
    best_acc = torch.zeros(1)
    best_acc = best_acc.cuda()
    for _ in range(100):
        log.train()
        opt.zero_grad()

        logits = log(train_embs)
        loss = xent(logits, train_lbls)
        
        loss.backward()
        opt.step()

    logits = log(test_embs)
    preds = torch.argmax(logits, dim=1)

    micro_f1 = f1_score(test_lbls.data.cpu().numpy(), preds.data.cpu().numpy(), average="micro")
    Micro_f1.append(micro_f1)
    print(micro_f1)
    tot += micro_f1

print('Average micro_f1:', tot / 50)





Using CUDA
Loss: tensor(0.6932, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.6921, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.6909, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.6894, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.6875, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.6853, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.6828, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.6799, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.6771, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.6732, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.6690, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.6647, device='cuda:0', grad_

Loss: tensor(0.1500, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.1594, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.1506, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.1527, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.1483, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.1449, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.1438, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.1579, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.1445, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.1504, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.1490, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.1431, device='cuda:0', grad_fn=<BinaryC

Loss: tensor(0.0778, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0856, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0809, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0838, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0864, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0964, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0823, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0785, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0828, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0952, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0882, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0825, device='cuda:0', grad_fn=<BinaryC

Loss: tensor(0.0633, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0570, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0609, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0606, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0670, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0660, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0662, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0691, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0684, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0731, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0616, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0661, device='cuda:0', grad_fn=<BinaryC

Loss: tensor(0.0486, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0521, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0586, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0492, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0558, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0485, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0458, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0499, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0596, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0515, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0525, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0526, device='cuda:0', grad_fn=<BinaryC