In [1]:
from utils.ArticlesHandler import ArticlesHandler
from utils import solve, embedding_matrix_2_kNN, get_rate, accuracy, precision, recall, f1_score
from utils import Config
import time
import numpy as np
import scipy.sparse as sp
from postprocessing.SelectLabelsPostprocessor import SelectLabelsPostprocessor
from pygcn.utils import encode_onehot, load_from_features, accuracy
from pygcn.models import GCN
import torch
import torch.nn.functional as F
import torch.optim as optim


Import config file and check some values

In [2]:
config = Config(file='config')

assert (config.num_fake_articles + config.num_real_articles > 
        config.num_nearest_neighbours), "Can't have more neighbours than nodes!"

print("Method of decomposition:", config.method_decomposition_embedding)

Method of decomposition: GloVe


Import the articles and decompose the tensor.

In [3]:
print("Loading dataset", config.dataset_name)
articles = ArticlesHandler(config)

print("Performing decomposition...")
C = articles.get_tensor()

Loading dataset Random Poltical News Dataset
Performing decomposition...
Loading Glove Model
Done. 400000  words loaded!


Get the labels

In [4]:
config.set("num_unknown_labels", 195)

In [5]:
labels = articles.articles.labels
all_labels = articles.articles.labels_untouched

In [6]:
adj, features, all_labels = load_from_features(C, all_labels, config)
_, _, labels = load_from_features(C, labels, config)

In [7]:
# idx_train = range(150)
# idx_val = range(150, 175)
# idx_test = range(175, 200)
print(labels)
idx_train = np.where(labels)[0]
idx_val = np.where(1 - abs(labels))[0][:90]
idx_test = np.where(1 - abs(labels))[0][90:]

print(len(idx_train))

idx_train = torch.LongTensor(idx_train)
idx_val = torch.LongTensor(idx_val)
idx_test = torch.LongTensor(idx_test)

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 2, 2,
        2, 2, 2, 1, 1, 1])
10


In [10]:
cuda = False
hidden = 16
dropout = 0.5
lr = 0.01
weight_decay = 5e-4
fastmode = False
epochs = 500

# Model and optimizer
model = GCN(nfeat=features.shape[1],
            nhid=hidden,
            nclass=labels.max().item() + 1,
            dropout=dropout)
optimizer = optim.Adam(model.parameters(),
                       lr=lr, weight_decay=weight_decay)

if cuda:
    model.cuda()
    features = features.cuda()
    adj = adj.cuda()
    labels = labels.cuda()
    idx_train = idx_train.cuda()
    idx_val = idx_val.cuda()
    idx_test = idx_test.cuda()


def train(epoch):
    t = time.time()
    model.train()
    optimizer.zero_grad()
    output = model(features, adj)
    loss_train = F.nll_loss(output[idx_train], all_labels[idx_train])
    acc_train = accuracy(output[idx_train], all_labels[idx_train])
    loss_train.backward()
    optimizer.step()

    if not fastmode:
        # Evaluate validation set performance separately,
        # deactivates dropout during validation run.
        model.eval()
        output = model(features, adj)

    loss_val = F.nll_loss(output[idx_val], all_labels[idx_val])
    acc_val = accuracy(output[idx_val], all_labels[idx_val])
    print('Epoch: {:04d}'.format(epoch+1),
          'loss_train: {:.4f}'.format(loss_train.item()),
          'acc_train: {:.4f}'.format(acc_train.item()),
          'loss_val: {:.4f}'.format(loss_val.item()),
          'acc_val: {:.4f}'.format(acc_val.item()),
          'time: {:.4f}s'.format(time.time() - t))


def test():
    model.eval()
    output = model(features, adj)
    loss_test = F.nll_loss(output[idx_test], all_labels[idx_test])
    acc_test = accuracy(output[idx_test], all_labels[idx_test])
    print("Test set results:",
          "loss= {:.4f}".format(loss_test.item()),
          "accuracy= {:.4f}".format(acc_test.item()))


# Train model
t_total = time.time()
for epoch in range(epochs):
    train(epoch)
print("Optimization Finished!")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

# Testing
test()

Epoch: 0001 loss_train: 0.9865 acc_train: 0.5000 loss_val: 0.9175 acc_val: 0.5556 time: 0.0048s
Epoch: 0002 loss_train: 0.9603 acc_train: 0.5000 loss_val: 0.8732 acc_val: 0.5333 time: 0.0050s
Epoch: 0003 loss_train: 0.8525 acc_train: 0.6000 loss_val: 0.8406 acc_val: 0.5111 time: 0.0043s
Epoch: 0004 loss_train: 0.8005 acc_train: 0.7000 loss_val: 0.8147 acc_val: 0.4556 time: 0.0070s
Epoch: 0005 loss_train: 0.6382 acc_train: 0.9000 loss_val: 0.7976 acc_val: 0.4444 time: 0.0065s
Epoch: 0006 loss_train: 0.7644 acc_train: 0.4000 loss_val: 0.7790 acc_val: 0.4444 time: 0.0067s
Epoch: 0007 loss_train: 0.8114 acc_train: 0.4000 loss_val: 0.7519 acc_val: 0.4444 time: 0.0039s
Epoch: 0008 loss_train: 0.6737 acc_train: 0.6000 loss_val: 0.7263 acc_val: 0.5000 time: 0.0073s
Epoch: 0009 loss_train: 0.8677 acc_train: 0.4000 loss_val: 0.7116 acc_val: 0.5333 time: 0.0055s
Epoch: 0010 loss_train: 0.7122 acc_train: 0.7000 loss_val: 0.7050 acc_val: 0.5333 time: 0.0037s
Epoch: 0011 loss_train: 0.7205 acc_train

Epoch: 0109 loss_train: 0.4445 acc_train: 0.8000 loss_val: 0.6723 acc_val: 0.7000 time: 0.0065s
Epoch: 0110 loss_train: 0.4212 acc_train: 0.7000 loss_val: 0.6685 acc_val: 0.7111 time: 0.0036s
Epoch: 0111 loss_train: 0.4622 acc_train: 0.7000 loss_val: 0.6688 acc_val: 0.7222 time: 0.0035s
Epoch: 0112 loss_train: 0.3875 acc_train: 0.8000 loss_val: 0.6734 acc_val: 0.6889 time: 0.0055s
Epoch: 0113 loss_train: 0.4406 acc_train: 0.8000 loss_val: 0.6862 acc_val: 0.6889 time: 0.0037s
Epoch: 0114 loss_train: 0.4060 acc_train: 0.6000 loss_val: 0.7129 acc_val: 0.6444 time: 0.0041s
Epoch: 0115 loss_train: 0.3611 acc_train: 0.7000 loss_val: 0.7379 acc_val: 0.6000 time: 0.0060s
Epoch: 0116 loss_train: 0.4826 acc_train: 0.8000 loss_val: 0.7373 acc_val: 0.6111 time: 0.0053s
Epoch: 0117 loss_train: 0.4164 acc_train: 0.9000 loss_val: 0.7196 acc_val: 0.6444 time: 0.0046s
Epoch: 0118 loss_train: 0.4488 acc_train: 0.6000 loss_val: 0.6898 acc_val: 0.6778 time: 0.0038s
Epoch: 0119 loss_train: 0.4247 acc_train

Epoch: 0213 loss_train: 0.2959 acc_train: 0.9000 loss_val: 0.7775 acc_val: 0.6333 time: 0.0090s
Epoch: 0214 loss_train: 0.2901 acc_train: 0.9000 loss_val: 0.7736 acc_val: 0.6889 time: 0.0037s
Epoch: 0215 loss_train: 0.3096 acc_train: 0.8000 loss_val: 0.7765 acc_val: 0.6444 time: 0.0029s
Epoch: 0216 loss_train: 0.3209 acc_train: 0.9000 loss_val: 0.7914 acc_val: 0.6556 time: 0.0032s
Epoch: 0217 loss_train: 0.2773 acc_train: 0.9000 loss_val: 0.8208 acc_val: 0.6222 time: 0.0038s
Epoch: 0218 loss_train: 0.3267 acc_train: 0.9000 loss_val: 0.8322 acc_val: 0.6222 time: 0.0059s
Epoch: 0219 loss_train: 0.2564 acc_train: 0.9000 loss_val: 0.8320 acc_val: 0.6222 time: 0.0065s
Epoch: 0220 loss_train: 0.3076 acc_train: 0.8000 loss_val: 0.7997 acc_val: 0.6556 time: 0.0072s
Epoch: 0221 loss_train: 0.3128 acc_train: 0.8000 loss_val: 0.7787 acc_val: 0.6778 time: 0.0050s
Epoch: 0222 loss_train: 0.3459 acc_train: 0.8000 loss_val: 0.7822 acc_val: 0.6889 time: 0.0060s
Epoch: 0223 loss_train: 0.2734 acc_train

Epoch: 0345 loss_train: 0.2328 acc_train: 1.0000 loss_val: 0.8701 acc_val: 0.6222 time: 0.0050s
Epoch: 0346 loss_train: 0.1666 acc_train: 0.9000 loss_val: 0.9231 acc_val: 0.6111 time: 0.0042s
Epoch: 0347 loss_train: 0.2358 acc_train: 0.9000 loss_val: 0.9567 acc_val: 0.6222 time: 0.0043s
Epoch: 0348 loss_train: 0.2942 acc_train: 0.9000 loss_val: 0.9414 acc_val: 0.6111 time: 0.0031s
Epoch: 0349 loss_train: 0.2129 acc_train: 0.9000 loss_val: 0.9149 acc_val: 0.6222 time: 0.0033s
Epoch: 0350 loss_train: 0.2175 acc_train: 0.9000 loss_val: 0.8986 acc_val: 0.6667 time: 0.0064s
Epoch: 0351 loss_train: 0.2157 acc_train: 1.0000 loss_val: 0.9006 acc_val: 0.6556 time: 0.0049s
Epoch: 0352 loss_train: 0.1112 acc_train: 1.0000 loss_val: 0.9125 acc_val: 0.6556 time: 0.0038s
Epoch: 0353 loss_train: 0.2218 acc_train: 1.0000 loss_val: 0.9168 acc_val: 0.6556 time: 0.0036s
Epoch: 0354 loss_train: 0.2322 acc_train: 0.9000 loss_val: 0.9221 acc_val: 0.6667 time: 0.0074s
Epoch: 0355 loss_train: 0.1521 acc_train

Epoch: 0440 loss_train: 0.2463 acc_train: 0.9000 loss_val: 1.0771 acc_val: 0.6333 time: 0.0095s
Epoch: 0441 loss_train: 0.1492 acc_train: 1.0000 loss_val: 1.0733 acc_val: 0.6444 time: 0.0053s
Epoch: 0442 loss_train: 0.1477 acc_train: 1.0000 loss_val: 1.0691 acc_val: 0.6444 time: 0.0059s
Epoch: 0443 loss_train: 0.1955 acc_train: 0.9000 loss_val: 1.0654 acc_val: 0.6444 time: 0.0069s
Epoch: 0444 loss_train: 0.2013 acc_train: 0.9000 loss_val: 1.0630 acc_val: 0.6444 time: 0.0047s
Epoch: 0445 loss_train: 0.1572 acc_train: 1.0000 loss_val: 1.0724 acc_val: 0.6444 time: 0.0063s
Epoch: 0446 loss_train: 0.1260 acc_train: 1.0000 loss_val: 1.0760 acc_val: 0.6333 time: 0.0044s
Epoch: 0447 loss_train: 0.1327 acc_train: 0.9000 loss_val: 1.0611 acc_val: 0.6556 time: 0.0036s
Epoch: 0448 loss_train: 0.1564 acc_train: 1.0000 loss_val: 1.0545 acc_val: 0.6556 time: 0.0051s
Epoch: 0449 loss_train: 0.0845 acc_train: 1.0000 loss_val: 1.0532 acc_val: 0.6556 time: 0.0056s
Epoch: 0450 loss_train: 0.1607 acc_train