In [28]:
from model import CCA_SSG, LogReg
from aug2 import gae_aug
from dataset import load

import numpy as np
import torch as th
import torch.nn as nn

from GAE import GraphAutoencoder
import torch.nn.functional as F

import warnings

warnings.filterwarnings('ignore')

from sklearn.metrics import roc_auc_score, average_precision_score
from util import mask_test_edges_dgl
import dgl
import torch.nn.functional as F
import pdb

In [29]:
# parser.add_argument('--gpu', type=int, default=0, help='GPU index.')
# parser.add_argument('--use_mlp', action='store_true', default=False, help='Use MLP instead of GNN')

In [30]:
def compute_loss_para(adj):
    pos_weight = ((adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum())
    norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2)
    weight_mask = adj.view(-1) == 1
    weight_tensor = th.ones(weight_mask.size(0))
    weight_tensor[weight_mask] = pos_weight
    return weight_tensor, norm

In [31]:
graph, feat, labels, num_class, train_idx, val_idx, test_idx = load('cora')
adj = graph.adj().to_dense()

weight_tensor, norm = compute_loss_para(adj)
    
in_dim = feat.shape[1]
z_dim = 16 
h_dim = [32]

sampler = GraphAutoencoder([in_dim, z_dim, h_dim])

loss_fn = F.binary_cross_entropy
optimizer = th.optim.Adam(sampler.parameters(), lr=1e-2) # , weight_decay=5e-4

for epoch in range(200):
    sampler.train()
    optimizer.zero_grad()
    
    reconstruction = sampler(graph, feat)

    loss = norm*loss_fn(reconstruction.view(-1), adj.view(-1), weight = weight_tensor)

    loss.backward()
    optimizer.step()

    print('Epoch={:03d}, loss={:.4f}'.format(epoch, loss.item()))

  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Epoch=000, loss=0.6931
Epoch=001, loss=0.6924
Epoch=002, loss=0.6904
Epoch=003, loss=0.6869
Epoch=004, loss=0.6828
Epoch=005, loss=0.6799
Epoch=006, loss=0.6788
Epoch=007, loss=0.6765
Epoch=008, loss=0.6723
Epoch=009, loss=0.6687
Epoch=010, loss=0.6657
Epoch=011, loss=0.6623
Epoch=012, loss=0.6580
Epoch=013, loss=0.6528
Epoch=014, loss=0.6476
Epoch=015, loss=0.6424
Epoch=016, loss=0.6367
Epoch=017, loss=0.6298
Epoch=018, loss=0.6221
Epoch=019, loss=0.6142
Epoch=020, loss=0.6058
Epoch=021, loss=0.5966
Epoch=022, loss=0.5870
Epoch=023, loss=0.5776
Epoch=024, loss=0.5692
Epoch=025, loss=0.5619
Epoch=026, loss=0.5563
Epoch=027, loss=0.5525
Epoch=028, loss=0.5501
Epoch=029, loss=0.5478
Epoch=030, loss=0.5450
Epoch=031, loss=0.5409
Epoch=032, loss=0.5357
Epoch=033, loss=0.5301
Epoch=034, loss=0.5248
Epoch=035, lo

In [32]:
reconstruction
rec = reconstruction.detach()
rec

tensor([[0.9163, 0.5949, 0.8117,  ..., 0.6599, 0.8985, 0.8761],
        [0.5949, 0.8745, 0.8659,  ..., 0.4400, 0.6388, 0.5919],
        [0.8117, 0.8659, 0.9165,  ..., 0.4713, 0.8278, 0.7814],
        ...,
        [0.6599, 0.4400, 0.4713,  ..., 0.7444, 0.5994, 0.5999],
        [0.8985, 0.6388, 0.8278,  ..., 0.5994, 0.8950, 0.8712],
        [0.8761, 0.5919, 0.7814,  ..., 0.5999, 0.8712, 0.8485]])

In [33]:
def get_scores(edges_pos, edges_neg, adj_rec):
    def sigmoid(x):
        return 1 / (1 + np.exp(-x))

    adj_rec = adj_rec.cpu()
    # Predict on test set of edges
    preds = []
    for e in edges_pos:
        preds.append(sigmoid(adj_rec[e[0], e[1]].item()))

    preds_neg = []
    for e in edges_neg:
        preds_neg.append(sigmoid(adj_rec[e[0], e[1]].data))

    preds_all = np.hstack([preds, preds_neg])
    labels_all = np.hstack([np.ones(len(preds)), np.zeros(len(preds_neg))])
    roc_score = roc_auc_score(labels_all, preds_all)
    ap_score = average_precision_score(labels_all, preds_all)

    return roc_score, ap_score

In [34]:
graph, feat, labels, num_class, train_idx, val_idx, test_idx = load('cora')
adj_orig = graph.adj().to_dense()
train_edge_idx, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges_dgl(graph, adj_orig)

# create train graph
train_edge_idx = th.tensor(train_edge_idx)
train_graph = dgl.edge_subgraph(graph, train_edge_idx, preserve_nodes=True)

# add self loop
#train_graph = dgl.remove_self_loop(train_graph)
#train_graph = dgl.add_self_loop(train_graph)
#n_edges = train_graph.number_of_edges()
#adj = train_graph.adjacency_matrix().to_dense()

# normalization
#degs = train_graph.in_degrees().float()
#norm = th.pow(degs, -0.5)
#norm[th.isinf(norm)] = 0
#train_graph.ndata['norm'] = norm.unsqueeze(1)

  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.


In [38]:
in_dim = feat.shape[1]

hid_dim = 512
out_dim = 512
n_layers = 2

model = CCA_SSG(in_dim, hid_dim, out_dim, n_layers, use_mlp=False)
lr1 = 1e-3 
wd1 = 0
optimizer = th.optim.Adam(model.parameters(), lr=lr1, weight_decay=wd1)

N = graph.number_of_nodes()

for epoch in range(50):
    model.train()
    optimizer.zero_grad()
    
    dfr = 0.2
    der = 0.2

    #graph1, feat1 = random_aug(graph, feat, 0.2, 0.2)
    #graph2, feat2 = random_aug(graph, feat, 0.2, 0.2)
    
    graph1, graph2, feat1, feat2 = gae_aug(rec, graph, feat, 0.20, 0.20)
    graph1 = graph1.remove_self_loop().add_self_loop()
    graph2 = graph2.remove_self_loop().add_self_loop()

    graph1 = graph1.add_self_loop()
    graph2 = graph2.add_self_loop()

    z1, z2 = model(graph1, feat1, graph2, feat2)

    c = th.mm(z1.T, z2)
    c1 = th.mm(z1.T, z1)
    c2 = th.mm(z2.T, z2)

    c = c / N
    c1 = c1 / N
    c2 = c2 / N

    loss_inv = -th.diagonal(c).sum()
    iden = th.tensor(np.eye(c.shape[0]))
    loss_dec1 = (iden - c1).pow(2).sum()
    loss_dec2 = (iden - c2).pow(2).sum()
    
    lambd = 1e-3
    
    loss = loss_inv + lambd * (loss_dec1 + loss_dec2)

    loss.backward()
    optimizer.step()

    print('Epoch={:03d}, loss={:.4f}'.format(epoch, loss.item()))


Epoch=000, loss=-307.7884
Epoch=001, loss=-357.1543
Epoch=002, loss=-363.2691
Epoch=003, loss=-364.5531
Epoch=004, loss=-377.0336
Epoch=005, loss=-388.0518
Epoch=006, loss=-398.3110
Epoch=007, loss=-391.5375
Epoch=008, loss=-411.2771
Epoch=009, loss=-422.5592
Epoch=010, loss=-409.8762
Epoch=011, loss=-415.7951
Epoch=012, loss=-410.1704
Epoch=013, loss=-420.2153
Epoch=014, loss=-419.0777
Epoch=015, loss=-429.3650
Epoch=016, loss=-420.8496
Epoch=017, loss=-418.1948
Epoch=018, loss=-424.1335
Epoch=019, loss=-428.0111
Epoch=020, loss=-429.7409
Epoch=021, loss=-436.2377
Epoch=022, loss=-430.6186
Epoch=023, loss=-429.4095
Epoch=024, loss=-435.1780
Epoch=025, loss=-441.9705
Epoch=026, loss=-437.6365
Epoch=027, loss=-434.6889
Epoch=028, loss=-442.6764
Epoch=029, loss=-439.3217
Epoch=030, loss=-438.4502
Epoch=031, loss=-448.4251
Epoch=032, loss=-439.7124
Epoch=033, loss=-445.2492
Epoch=034, loss=-446.3977
Epoch=035, loss=-448.8382
Epoch=036, loss=-449.9593
Epoch=037, loss=-449.0884
Epoch=038, l

In [39]:
print("=== Evaluation ===")
graph = train_graph.remove_self_loop().add_self_loop()
adj = graph.adj().to_dense()

weight_tensor, norm = compute_loss_para(adj)

embeds = model.get_embedding(graph, feat)

loss_fn = F.binary_cross_entropy
output_activation = nn.Sigmoid()
logreg = LogReg(embeds.shape[1], adj.shape[1])

logits_temp = logreg(embeds)
logits = output_activation(th.mm(logits_temp, logits_temp.t()))

val_roc, val_ap = get_scores(val_edges, val_edges_false, logits)
test_roc, test_ap = get_scores(test_edges, test_edges_false, logits)
print(test_roc, test_ap)

=== Evaluation ===
0.9554196895846904 0.9548354451996761


In [40]:
print("=== Evaluation ===")
graph = train_graph.remove_self_loop().add_self_loop()
adj = graph.adj().to_dense()

weight_tensor, norm = compute_loss_para(adj)

embeds = model.get_embedding(graph, feat)

''' Linear Evaluation '''
logreg = LogReg(embeds.shape[1], adj.shape[1])
lr2 = 1e-2
wd2 = 1e-4
opt = th.optim.Adam(logreg.parameters(), lr=lr2, weight_decay=wd2)

loss_fn = F.binary_cross_entropy
output_activation = nn.Sigmoid()

best_val_roc = 0
eval_roc = 0
best_val_ap = 0
eval_ap = 0
    
for epoch in range(2000):
    logreg.train()
    opt.zero_grad()
    logits_temp = logreg(embeds)
    logits = output_activation(th.mm(logits_temp, logits_temp.t()))
    
    # pdb.set_trace()
    loss = norm*loss_fn(logits.view(-1), adj.view(-1), weight = weight_tensor)

    loss.backward()
    opt.step()

    logreg.eval()
    with th.no_grad():
        val_roc, val_ap = get_scores(val_edges, val_edges_false, logits)
        test_roc, test_ap = get_scores(test_edges, test_edges_false, logits)

        if val_roc >= best_val_roc:
            best_val_roc = val_roc
            if test_roc > eval_roc:
                eval_roc = test_roc
        
        if val_ap >= best_val_ap:
            best_val_ap = val_ap
            if test_ap > eval_ap:
                eval_ap = test_ap

    print('Epoch:{}, val_ap:{:.4f}, val_roc:{:4f}, test_ap:{:4f}, test_roc:{:4f}'.format(epoch, val_ap, val_roc, test_ap, test_roc))
    print('Linear evaluation AP:{:.4f}'.format(eval_ap))
    print('Linear evaluation ROC:{:.4f}'.format(eval_roc))

=== Evaluation ===
Epoch:0, val_ap:0.9559, val_roc:0.956155, test_ap:0.952427, test_roc:0.953064
Linear evaluation AP:0.9524
Linear evaluation ROC:0.9531
Epoch:1, val_ap:0.7976, val_roc:0.768706, test_ap:0.781770, test_roc:0.765575
Linear evaluation AP:0.9524
Linear evaluation ROC:0.9531
Epoch:2, val_ap:0.7846, val_roc:0.751272, test_ap:0.771366, test_roc:0.751892
Linear evaluation AP:0.9524
Linear evaluation ROC:0.9531
Epoch:3, val_ap:0.7962, val_roc:0.765318, test_ap:0.781534, test_roc:0.763747
Linear evaluation AP:0.9524
Linear evaluation ROC:0.9531
Epoch:4, val_ap:0.8258, val_roc:0.800593, test_ap:0.807046, test_roc:0.794086
Linear evaluation AP:0.9524
Linear evaluation ROC:0.9531
Epoch:5, val_ap:0.8840, val_roc:0.868310, test_ap:0.863820, test_roc:0.859137
Linear evaluation AP:0.9524
Linear evaluation ROC:0.9531
Epoch:6, val_ap:0.9365, val_roc:0.930637, test_ap:0.918339, test_roc:0.916968
Linear evaluation AP:0.9524
Linear evaluation ROC:0.9531
Epoch:7, val_ap:0.9451, val_roc:0.94

Epoch:61, val_ap:0.9609, val_roc:0.960206, test_ap:0.954199, test_roc:0.957630
Linear evaluation AP:0.9541
Linear evaluation ROC:0.9576
Epoch:62, val_ap:0.9608, val_roc:0.960141, test_ap:0.954131, test_roc:0.957597
Linear evaluation AP:0.9541
Linear evaluation ROC:0.9576
Epoch:63, val_ap:0.9609, val_roc:0.960256, test_ap:0.954109, test_roc:0.957614
Linear evaluation AP:0.9541
Linear evaluation ROC:0.9576
Epoch:64, val_ap:0.9609, val_roc:0.960292, test_ap:0.954038, test_roc:0.957579
Linear evaluation AP:0.9541
Linear evaluation ROC:0.9576
Epoch:65, val_ap:0.9610, val_roc:0.960332, test_ap:0.954015, test_roc:0.957572
Linear evaluation AP:0.9541
Linear evaluation ROC:0.9576
Epoch:66, val_ap:0.9610, val_roc:0.960379, test_ap:0.953989, test_roc:0.957562
Linear evaluation AP:0.9541
Linear evaluation ROC:0.9576
Epoch:67, val_ap:0.9610, val_roc:0.960404, test_ap:0.953907, test_roc:0.957518
Linear evaluation AP:0.9541
Linear evaluation ROC:0.9576
Epoch:68, val_ap:0.9610, val_roc:0.960418, test_

KeyboardInterrupt: 