In [1]:
import numpy as np
import pandas as pd
import pickle as pkl
import networkx as nx
import scipy.sparse as sp
from scipy.sparse.linalg.eigen.arpack import eigsh
import sys
import torch
import torch.nn as nn
from dgl.data import citation_graph as citegrh
from dgl import DGLGraph

from sklearn.metrics import f1_score

from models import DGI, LogReg
import process

In [2]:
def load_cora_data():
    data = citegrh.load_cora()
    features = torch.FloatTensor(data.features)
    print('len(features):', len(features))
    labels = pd.get_dummies(data.labels).values
    labels = torch.LongTensor(labels[np.newaxis])
    print('len(labels):', len(labels))
    mask = torch.ByteTensor(data.train_mask)
    g = nx.adjacency_matrix(data.graph)
    adj = process.normalize_adj(g + sp.eye(g.shape[0]))
    if sparse:
        sp_adj = process.sparse_mx_to_torch_sparse_tensor(adj)
    else:
        adj = (adj + sp.eye(adj.shape[0])).todense()
    return adj, sp_adj, features, labels, mask

In [16]:
dataset = 'cora'

# training params
batch_size = 1
nb_epochs = 10000
patience = 50
lr = 0.001
l2_coef = 0.0
drop_prob = 0.0
hid_units = 256
sparse = True
nonlinearity = 'prelu' # special name to separate parameters

In [17]:
adj, sp_adj, features, labels, mask = load_cora_data()

len(features): 2708
len(labels): 1


In [18]:
labels.shape

torch.Size([1, 2708, 7])

In [19]:
nb_nodes = features.shape[0]
ft_size = features.shape[1]
nb_classes = labels.shape[1]

idx_train = torch.LongTensor(range(0, 140))
idx_val = torch.LongTensor(range(140, 640))
idx_test = torch.LongTensor(list(range(nb_classes-1000,nb_classes)))

In [20]:
%%time

model = DGI(ft_size, hid_units, nonlinearity)
optimiser = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=l2_coef)

if torch.cuda.is_available():
    print('Using CUDA')
    model.cuda()
    features = features.cuda()
    if sparse:
        sp_adj = sp_adj.cuda()
    else:
        adj = adj.cuda()
    labels = labels.cuda()
    idx_train = idx_train.cuda()
    idx_val = idx_val.cuda()
    idx_test = idx_test.cuda()

b_xent = nn.BCEWithLogitsLoss()
xent = nn.CrossEntropyLoss()
cnt_wait = 0
best = 1e9
best_t = 0

for epoch in range(nb_epochs):
    model.train()
    optimiser.zero_grad()

    idx = np.random.permutation(nb_nodes)
    shuf_fts = features[idx, :] #add first dimension = 1

    lbl_1 = torch.ones(batch_size, nb_nodes)
    lbl_2 = torch.zeros(batch_size, nb_nodes)
    lbl = torch.cat((lbl_1, lbl_2), 1)

    if torch.cuda.is_available():
        shuf_fts = shuf_fts.cuda()
        lbl = lbl.cuda()
    
    logits = model(features, shuf_fts, sp_adj if sparse else adj, sparse, None, None, None) 

    loss = b_xent(logits, lbl)

    print('Loss:', loss)

    if loss < best:
        best = loss
        best_t = epoch
        cnt_wait = 0
        torch.save(model.state_dict(), 'best_dgi.pkl')
    else:
        cnt_wait += 1

    if cnt_wait == patience:
        print('Early stopping!')
        break

    loss.backward()
    optimiser.step()

print('Loading {}th epoch'.format(best_t))
model.load_state_dict(torch.load('best_dgi.pkl'))

embeds, _ = model.embed(features, sp_adj if sparse else adj, sparse, None)
train_embs = embeds[0, idx_train]
val_embs = embeds[0, idx_val]
test_embs = embeds[0, idx_test]

train_lbls = torch.argmax(labels[0, idx_train], dim=1)
val_lbls = torch.argmax(labels[0, idx_val], dim=1)
test_lbls = torch.argmax(labels[0, idx_test], dim=1)

tot = torch.zeros(1)
tot = tot.cuda()

tot1 = torch.zeros(1)
tot1 = tot.cuda()

Micro_f1 = []
Micro_f1_val = []

for _ in range(50):
    log = LogReg(hid_units, nb_classes)
    opt = torch.optim.Adam(log.parameters(), lr=0.01, weight_decay=0.0)
    log.cuda()

    pat_steps = 0
    best_acc = torch.zeros(1)
    best_acc = best_acc.cuda()
    for _ in range(100):
        log.train()
        opt.zero_grad()

        logits = log(train_embs)
        loss = xent(logits, train_lbls)
        
        loss.backward()
        opt.step()

    logits = log(test_embs)
    preds = torch.argmax(logits, dim=1)

    micro_f1 = f1_score(test_lbls.data.cpu().numpy(), preds.data.cpu().numpy(), average="micro")
    Micro_f1.append(micro_f1)
    print(micro_f1)

    
    logits1 = log(val_embs)
    preds1 = torch.argmax(logits1, dim=1)
    micro_f1_val = f1_score(val_lbls.data.cpu().numpy(), preds1.data.cpu().numpy(), average="micro")
    Micro_f1_val.append(micro_f1_val)
    print(micro_f1_val)

print('Average micro_f1_test:', np.mean(Micro_f1))
print('Average micro_f1_val:', np.mean(Micro_f1_val))


Using CUDA
Loss: tensor(0.6931, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.6919, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.6907, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.6892, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.6872, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.6849, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.6820, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.6794, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.6763, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.6729, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.6678, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.6634, device='cuda:0', grad_

Loss: tensor(0.1491, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.1551, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.1513, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.1546, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.1432, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.1420, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.1496, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.1408, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.1514, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.1447, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.1462, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.1352, device='cuda:0', grad_fn=<BinaryC

Loss: tensor(0.0840, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0950, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0846, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0934, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0882, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0908, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0906, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0820, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0914, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0928, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0852, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0911, device='cuda:0', grad_fn=<BinaryC

Loss: tensor(0.0646, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0777, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0690, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0658, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0658, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0688, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0734, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0679, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0645, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0636, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0614, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0608, device='cuda:0', grad_fn=<BinaryC

Loss: tensor(0.0555, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0559, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0581, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0554, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0510, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0511, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0524, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0536, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0515, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0527, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0515, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0604, device='cuda:0', grad_fn=<BinaryC

Loss: tensor(0.0429, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0471, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0414, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0410, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0407, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0427, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0423, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0463, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0394, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0395, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0452, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
Loss: tensor(0.0409, device='cuda:0', grad_fn=<BinaryC