In [1]:
from utils.ArticlesHandler import ArticlesHandler
from utils import solve, embedding_matrix_2_kNN, get_rate, accuracy, precision, recall, f1_score
from utils import Config
import time
import numpy as np
import scipy.sparse as sp
from postprocessing.SelectLabelsPostprocessor import SelectLabelsPostprocessor
from pygcn.utils import encode_onehot, load_from_features, accuracy
from pygcn.models import GCN
import torch
import torch.nn.functional as F
import torch.optim as optim


Import config file and check some values

In [2]:
config = Config(file='config')

assert (config.num_fake_articles + config.num_real_articles > 
        config.num_nearest_neighbours), "Can't have more neighbours than nodes!"

print("Method of decomposition:", config.method_decomposition_embedding)

Method of decomposition: parafac


Import the articles and decompose the tensor.

In [3]:
print("Loading dataset", config.dataset_name)
articles = ArticlesHandler(config)

print("Performing decomposition...")
C = articles.get_tensor()

Loading dataset Random Poltical News Dataset
Performing decomposition...




Get the labels

In [4]:
config.set("num_unknown_labels", 195)

In [5]:
labels = articles.articles.labels
all_labels = articles.articles.labels_untouched

In [6]:
adj, features, all_labels = load_from_features(C, all_labels, config)
_, _, labels = load_from_features(C, labels, config)

In [7]:
# idx_train = range(150)
# idx_val = range(150, 175)
# idx_test = range(175, 200)
print(labels)
idx_train = np.where(labels)[0]
idx_val = np.where(1 - abs(labels))[0][:90]
idx_test = np.where(1 - abs(labels))[0][90:]

print(len(idx_train))

idx_train = torch.LongTensor(idx_train)
idx_val = torch.LongTensor(idx_val)
idx_test = torch.LongTensor(idx_test)

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2,
        2, 0, 0, 2, 0, 0, 2, 0, 2, 1, 2, 2, 2, 2, 1, 1, 2, 2, 1, 1, 1, 2, 1, 1,
        1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 2, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1,
        2, 1, 1, 2, 2, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 2, 1,
        1, 2, 2, 2, 1, 2])
75


In [10]:
cuda = False
hidden = 16
dropout = 0.5
lr = 0.01
weight_decay = 5e-4
fastmode = False
epochs = 430

# Model and optimizer
model = GCN(nfeat=features.shape[1],
            nhid=hidden,
            nclass=labels.max().item() + 1,
            dropout=dropout)
optimizer = optim.Adam(model.parameters(),
                       lr=lr, weight_decay=weight_decay)

if cuda:
    model.cuda()
    features = features.cuda()
    adj = adj.cuda()
    labels = labels.cuda()
    idx_train = idx_train.cuda()
    idx_val = idx_val.cuda()
    idx_test = idx_test.cuda()


def train(epoch):
    t = time.time()
    model.train()
    optimizer.zero_grad()
    output = model(features, adj)
    loss_train = F.nll_loss(output[idx_train], all_labels[idx_train])
    acc_train = accuracy(output[idx_train], all_labels[idx_train])
    loss_train.backward()
    optimizer.step()

    if not fastmode:
        # Evaluate validation set performance separately,
        # deactivates dropout during validation run.
        model.eval()
        output = model(features, adj)

    loss_val = F.nll_loss(output[idx_val], all_labels[idx_val])
    acc_val = accuracy(output[idx_val], all_labels[idx_val])
    print('Epoch: {:04d}'.format(epoch+1),
          'loss_train: {:.4f}'.format(loss_train.item()),
          'acc_train: {:.4f}'.format(acc_train.item()),
          'loss_val: {:.4f}'.format(loss_val.item()),
          'acc_val: {:.4f}'.format(acc_val.item()),
          'time: {:.4f}s'.format(time.time() - t))


def test():
    model.eval()
    output = model(features, adj)
    loss_test = F.nll_loss(output[idx_test], all_labels[idx_test])
    acc_test = accuracy(output[idx_test], all_labels[idx_test])
    print("Test set results:",
          "loss= {:.4f}".format(loss_test.item()),
          "accuracy= {:.4f}".format(acc_test.item()))


# Train model
t_total = time.time()
for epoch in range(epochs):
    train(epoch)
print("Optimization Finished!")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

# Testing
test()

Epoch: 0001 loss_train: 1.5240 acc_train: 0.4267 loss_val: 0.9701 acc_val: 0.5333 time: 0.0042s
Epoch: 0002 loss_train: 1.0529 acc_train: 0.5467 loss_val: 0.9428 acc_val: 0.5333 time: 0.0051s
Epoch: 0003 loss_train: 1.1073 acc_train: 0.5733 loss_val: 0.9221 acc_val: 0.5333 time: 0.0066s
Epoch: 0004 loss_train: 0.8769 acc_train: 0.5867 loss_val: 0.9013 acc_val: 0.5333 time: 0.0055s
Epoch: 0005 loss_train: 0.8511 acc_train: 0.5733 loss_val: 0.8857 acc_val: 0.5444 time: 0.0052s
Epoch: 0006 loss_train: 0.8511 acc_train: 0.5600 loss_val: 0.8675 acc_val: 0.5444 time: 0.0076s
Epoch: 0007 loss_train: 0.8288 acc_train: 0.5733 loss_val: 0.8500 acc_val: 0.5556 time: 0.0052s
Epoch: 0008 loss_train: 0.8886 acc_train: 0.6000 loss_val: 0.8333 acc_val: 0.5667 time: 0.0062s
Epoch: 0009 loss_train: 0.9678 acc_train: 0.5467 loss_val: 0.8222 acc_val: 0.5667 time: 0.0052s
Epoch: 0010 loss_train: 0.7250 acc_train: 0.5867 loss_val: 0.8113 acc_val: 0.5667 time: 0.0035s
Epoch: 0011 loss_train: 0.7796 acc_train

Epoch: 0101 loss_train: 0.4237 acc_train: 0.7867 loss_val: 0.5659 acc_val: 0.7667 time: 0.0033s
Epoch: 0102 loss_train: 0.3521 acc_train: 0.8800 loss_val: 0.5653 acc_val: 0.7667 time: 0.0043s
Epoch: 0103 loss_train: 0.4065 acc_train: 0.8267 loss_val: 0.5664 acc_val: 0.7556 time: 0.0039s
Epoch: 0104 loss_train: 0.3808 acc_train: 0.8267 loss_val: 0.5678 acc_val: 0.7556 time: 0.0031s
Epoch: 0105 loss_train: 0.3440 acc_train: 0.8400 loss_val: 0.5692 acc_val: 0.7556 time: 0.0029s
Epoch: 0106 loss_train: 0.4217 acc_train: 0.8133 loss_val: 0.5689 acc_val: 0.7556 time: 0.0028s
Epoch: 0107 loss_train: 0.4270 acc_train: 0.8133 loss_val: 0.5701 acc_val: 0.7556 time: 0.0039s
Epoch: 0108 loss_train: 0.3631 acc_train: 0.8267 loss_val: 0.5706 acc_val: 0.7556 time: 0.0059s
Epoch: 0109 loss_train: 0.3906 acc_train: 0.8267 loss_val: 0.5710 acc_val: 0.7556 time: 0.0059s
Epoch: 0110 loss_train: 0.4640 acc_train: 0.7467 loss_val: 0.5689 acc_val: 0.7556 time: 0.0054s
Epoch: 0111 loss_train: 0.3800 acc_train

Epoch: 0199 loss_train: 0.2934 acc_train: 0.8933 loss_val: 0.5348 acc_val: 0.7667 time: 0.0054s
Epoch: 0200 loss_train: 0.3460 acc_train: 0.8533 loss_val: 0.5354 acc_val: 0.7667 time: 0.0041s
Epoch: 0201 loss_train: 0.3837 acc_train: 0.8267 loss_val: 0.5357 acc_val: 0.7667 time: 0.0050s
Epoch: 0202 loss_train: 0.3708 acc_train: 0.7867 loss_val: 0.5359 acc_val: 0.7667 time: 0.0083s
Epoch: 0203 loss_train: 0.3515 acc_train: 0.8400 loss_val: 0.5346 acc_val: 0.7667 time: 0.0047s
Epoch: 0204 loss_train: 0.3782 acc_train: 0.8133 loss_val: 0.5383 acc_val: 0.7778 time: 0.0077s
Epoch: 0205 loss_train: 0.7697 acc_train: 0.7600 loss_val: 0.5351 acc_val: 0.7778 time: 0.0108s
Epoch: 0206 loss_train: 0.3344 acc_train: 0.8533 loss_val: 0.5371 acc_val: 0.7889 time: 0.0087s
Epoch: 0207 loss_train: 0.3571 acc_train: 0.8667 loss_val: 0.5398 acc_val: 0.7889 time: 0.0117s
Epoch: 0208 loss_train: 0.3344 acc_train: 0.8267 loss_val: 0.5425 acc_val: 0.8000 time: 0.0047s
Epoch: 0209 loss_train: 0.2997 acc_train

Epoch: 0312 loss_train: 0.3141 acc_train: 0.8533 loss_val: 0.5124 acc_val: 0.7889 time: 0.0047s
Epoch: 0313 loss_train: 0.2806 acc_train: 0.8667 loss_val: 0.5121 acc_val: 0.7889 time: 0.0037s
Epoch: 0314 loss_train: 0.3337 acc_train: 0.8667 loss_val: 0.5118 acc_val: 0.7889 time: 0.0046s
Epoch: 0315 loss_train: 0.3038 acc_train: 0.8533 loss_val: 0.5113 acc_val: 0.7889 time: 0.0039s
Epoch: 0316 loss_train: 0.3075 acc_train: 0.8667 loss_val: 0.5104 acc_val: 0.7889 time: 0.0041s
Epoch: 0317 loss_train: 0.2898 acc_train: 0.8800 loss_val: 0.5099 acc_val: 0.7889 time: 0.0029s
Epoch: 0318 loss_train: 0.3197 acc_train: 0.8667 loss_val: 0.5098 acc_val: 0.7889 time: 0.0053s
Epoch: 0319 loss_train: 0.3836 acc_train: 0.8400 loss_val: 0.5089 acc_val: 0.7667 time: 0.0049s
Epoch: 0320 loss_train: 0.2975 acc_train: 0.8800 loss_val: 0.5083 acc_val: 0.7667 time: 0.0037s
Epoch: 0321 loss_train: 0.2877 acc_train: 0.8933 loss_val: 0.5078 acc_val: 0.7667 time: 0.0031s
Epoch: 0322 loss_train: 0.3210 acc_train

Epoch: 0425 loss_train: 0.3305 acc_train: 0.8533 loss_val: 0.5103 acc_val: 0.7556 time: 0.0046s
Epoch: 0426 loss_train: 0.3136 acc_train: 0.8400 loss_val: 0.5111 acc_val: 0.7556 time: 0.0043s
Epoch: 0427 loss_train: 0.3356 acc_train: 0.8267 loss_val: 0.5120 acc_val: 0.7556 time: 0.0052s
Epoch: 0428 loss_train: 0.3063 acc_train: 0.8667 loss_val: 0.5132 acc_val: 0.7778 time: 0.0053s
Epoch: 0429 loss_train: 0.2976 acc_train: 0.8400 loss_val: 0.5146 acc_val: 0.7778 time: 0.0063s
Epoch: 0430 loss_train: 0.3513 acc_train: 0.8267 loss_val: 0.5154 acc_val: 0.7778 time: 0.0037s
Optimization Finished!
Total time elapsed: 2.0845s
Test set results: loss= 0.2259 accuracy= 0.8696
