### Applying AGNN From PyTorch Geometric to Fake News Dataset (BuzzFeed Political News Dataset)

In [101]:
from utils.ArticlesHandler import ArticlesHandler
from utils import solve, embedding_matrix_2_kNN, get_rate, accuracy, precision, recall, f1_score
from utils import Config
import time
import numpy as np
import scipy.sparse as sp
from postprocessing.SelectLabelsPostprocessor import SelectLabelsPostprocessor
from pygcn.utils import encode_onehot, accuracy, load_from_features
import torch
from torch_geometric.data import Data
from torch_geometric.utils import sparse as S
#from model import AGNN

In [40]:
config = Config(file='config')

assert (config.num_fake_articles + config.num_real_articles > 
        config.num_nearest_neighbours), "Can't have more neighbours than nodes!"

print("Method of decomposition:", config.method_decomposition_embedding)

Method of decomposition: GloVe


In [41]:
print("Loading dataset", config.dataset_name)
articles = ArticlesHandler(config)

print("Performing decomposition...")
C = articles.get_tensor()

Loading dataset Buzzfeed Political News Dataset
Performing decomposition...
Loading Glove Model
Done. 400000  words loaded!


In [43]:
config.set("num_unknown_labels", 195)

In [44]:
labels = articles.articles.labels
all_labels = articles.articles.labels_untouched

In [45]:
adj, features, all_labels = load_from_features(C, all_labels, config)
_, _, labels = load_from_features(C, labels, config)

In [161]:
#print(labels)
idx_train = np.where(labels)[0]
idx_val = np.where(1 - abs(labels))[0][:90]
idx_test = np.where(1 - abs(labels))[0][90:]

#print(len(idx_train))

idx_train = torch.LongTensor(idx_train)
idx_val = torch.LongTensor(idx_val)
idx_test = torch.LongTensor(idx_test)

num_features = features.shape[1]
num_classes = 2

#--------Converting Adjacency Matrix into the form that is compatible with PyTorch Geometric ------------------------------
adj_dense = adj.to_dense()
index, value = S.dense_to_sparse(adj_dense)

#print(index.shape)
#print(num_features)

data = Data(x=features, edge_index=index)

#print(data.x.shape)

### PyTorch_Geometric AGNN

In [162]:
import torch.optim as optim
import torch.nn.functional as F
import torch_geometric.transforms as T
from torch_geometric.nn import AGNNConv
import torch.nn as nn
from torch.nn.parameter import Parameter

In [163]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.lin1 = torch.nn.Linear(num_features, 16)
            
        self.prop1 = AGNNConv(requires_grad=False)
        self.prop2 = AGNNConv(requires_grad=True)
        self.lin2 = torch.nn.Linear(16, num_classes)

        self.prop1_mat_weight = torch.tensor(0) 
        self.prop2_mat_weight = torch.tensor(0)

        self.edge_index_with_self_lopp = torch.tensor(0)

    def forward(self):
        x = F.dropout(data.x,0.5,training=self.training)
        x = F.relu(self.lin1(data.x))

        self.edge_index_with_self_loop , self.prop1_mat_weight = self.prop1.propagation_matrix(x, data.edge_index)
        x = self.prop1(x, data.edge_index)

        _ , self.prop2_mat_weight = self.prop2.propagation_matrix(x, data.edge_index)
        x = self.prop2(x, data.edge_index)
        
        x = F.dropout(x,0.5,training=self.training)
        x = self.lin2(x)
        return F.log_softmax(x, dim=1)

In [164]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#device = 'cpu'
model = Net().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)


def train(epoch):
    t = time.time()
    model.train()
    optimizer.zero_grad()
    output = model()
    
    loss_train = F.nll_loss(output[idx_train], all_labels[idx_train])
    acc_train = accuracy(output[idx_train], all_labels[idx_train])
    loss_train.backward()
    optimizer.step()
    
    loss_val = F.nll_loss(output[idx_val], all_labels[idx_val])
    acc_val = accuracy(output[idx_val], all_labels[idx_val])
    print('Epoch: {:04d}'.format(epoch+1),
          'loss_train: {:.4f}'.format(loss_train.item()),
          'acc_train: {:.4f}'.format(acc_train.item()),
          'loss_val: {:.4f}'.format(loss_val.item()),
          'acc_val: {:.4f}'.format(acc_val.item()),
          'time: {:.4f}s'.format(time.time() - t))


def test():
    model.eval()
    output = model()
    loss_test = F.nll_loss(output[idx_test], all_labels[idx_test])
    acc_test = accuracy(output[idx_test], all_labels[idx_test])
    print("Test set results:",
          "loss= {:.4f}".format(loss_test.item()),
          "accuracy= {:.4f}".format(acc_test.item()))

### Training on FakeNews Dataset

In [165]:
num_epochs = 400

#Train the model
t_total = time.time()
for epoch in range(0, num_epochs+1):
    train(epoch)
        
print("\nDone with Training\n")

print("\nTotal time elapsed: {:.4f}s\n".format(time.time() - t_total))

#Test the model
test()

Epoch: 0001 loss_train: 0.6939 acc_train: 0.5000 loss_val: 0.6913 acc_val: 0.5000 time: 0.0064s
Epoch: 0002 loss_train: 0.6991 acc_train: 0.4000 loss_val: 0.6967 acc_val: 0.4667 time: 0.0074s
Epoch: 0003 loss_train: 0.6960 acc_train: 0.4500 loss_val: 0.7003 acc_val: 0.4556 time: 0.0077s
Epoch: 0004 loss_train: 0.6940 acc_train: 0.5000 loss_val: 0.6963 acc_val: 0.4667 time: 0.0106s
Epoch: 0005 loss_train: 0.7012 acc_train: 0.4500 loss_val: 0.7036 acc_val: 0.4111 time: 0.0066s
Epoch: 0006 loss_train: 0.6916 acc_train: 0.5250 loss_val: 0.7054 acc_val: 0.3778 time: 0.0063s
Epoch: 0007 loss_train: 0.6973 acc_train: 0.4750 loss_val: 0.6909 acc_val: 0.5444 time: 0.0071s
Epoch: 0008 loss_train: 0.6901 acc_train: 0.6000 loss_val: 0.6910 acc_val: 0.5222 time: 0.0068s
Epoch: 0009 loss_train: 0.6979 acc_train: 0.4250 loss_val: 0.6925 acc_val: 0.4778 time: 0.0066s
Epoch: 0010 loss_train: 0.6894 acc_train: 0.5500 loss_val: 0.6924 acc_val: 0.5000 time: 0.0067s
Epoch: 0011 loss_train: 0.6870 acc_train

Epoch: 0088 loss_train: 0.6483 acc_train: 0.6250 loss_val: 0.7515 acc_val: 0.4333 time: 0.0101s
Epoch: 0089 loss_train: 0.6702 acc_train: 0.5500 loss_val: 0.6842 acc_val: 0.5444 time: 0.0113s
Epoch: 0090 loss_train: 0.6673 acc_train: 0.5250 loss_val: 0.7050 acc_val: 0.5444 time: 0.0070s
Epoch: 0091 loss_train: 0.7229 acc_train: 0.4750 loss_val: 0.7302 acc_val: 0.4556 time: 0.0066s
Epoch: 0092 loss_train: 0.7466 acc_train: 0.4500 loss_val: 0.6828 acc_val: 0.5222 time: 0.0068s
Epoch: 0093 loss_train: 0.6565 acc_train: 0.6750 loss_val: 0.6942 acc_val: 0.5000 time: 0.0066s
Epoch: 0094 loss_train: 0.7141 acc_train: 0.5000 loss_val: 0.7402 acc_val: 0.4889 time: 0.0066s
Epoch: 0095 loss_train: 0.6676 acc_train: 0.6250 loss_val: 0.7074 acc_val: 0.5333 time: 0.0067s
Epoch: 0096 loss_train: 0.7233 acc_train: 0.4250 loss_val: 0.6627 acc_val: 0.6111 time: 0.0072s
Epoch: 0097 loss_train: 0.6886 acc_train: 0.4750 loss_val: 0.6747 acc_val: 0.6000 time: 0.0069s
Epoch: 0098 loss_train: 0.6744 acc_train

Epoch: 0196 loss_train: 0.6254 acc_train: 0.7250 loss_val: 0.7059 acc_val: 0.4889 time: 0.0076s
Epoch: 0197 loss_train: 0.6073 acc_train: 0.7750 loss_val: 0.6770 acc_val: 0.5778 time: 0.0068s
Epoch: 0198 loss_train: 0.6511 acc_train: 0.6250 loss_val: 0.6764 acc_val: 0.5000 time: 0.0067s
Epoch: 0199 loss_train: 0.6848 acc_train: 0.5250 loss_val: 0.6836 acc_val: 0.5222 time: 0.0079s
Epoch: 0200 loss_train: 0.6662 acc_train: 0.5250 loss_val: 0.6789 acc_val: 0.5333 time: 0.0069s
Epoch: 0201 loss_train: 0.6742 acc_train: 0.5500 loss_val: 0.6672 acc_val: 0.5556 time: 0.0069s
Epoch: 0202 loss_train: 0.6889 acc_train: 0.4500 loss_val: 0.7032 acc_val: 0.4667 time: 0.0067s
Epoch: 0203 loss_train: 0.6538 acc_train: 0.6500 loss_val: 0.6590 acc_val: 0.5778 time: 0.0071s
Epoch: 0204 loss_train: 0.6439 acc_train: 0.6500 loss_val: 0.6855 acc_val: 0.5000 time: 0.0070s
Epoch: 0205 loss_train: 0.6435 acc_train: 0.6750 loss_val: 0.7037 acc_val: 0.4889 time: 0.0072s
Epoch: 0206 loss_train: 0.6222 acc_train

Epoch: 0303 loss_train: 0.6597 acc_train: 0.6000 loss_val: 0.6792 acc_val: 0.5444 time: 0.0073s
Epoch: 0304 loss_train: 0.6111 acc_train: 0.7250 loss_val: 0.7106 acc_val: 0.4778 time: 0.0118s
Epoch: 0305 loss_train: 0.6672 acc_train: 0.5750 loss_val: 0.6573 acc_val: 0.6333 time: 0.0066s
Epoch: 0306 loss_train: 0.6962 acc_train: 0.5250 loss_val: 0.7026 acc_val: 0.4667 time: 0.0087s
Epoch: 0307 loss_train: 0.6403 acc_train: 0.6500 loss_val: 0.6961 acc_val: 0.4778 time: 0.0070s
Epoch: 0308 loss_train: 0.6404 acc_train: 0.6250 loss_val: 0.6703 acc_val: 0.5333 time: 0.0095s
Epoch: 0309 loss_train: 0.6142 acc_train: 0.6750 loss_val: 0.6525 acc_val: 0.6000 time: 0.0075s
Epoch: 0310 loss_train: 0.6289 acc_train: 0.6750 loss_val: 0.6516 acc_val: 0.6222 time: 0.0114s
Epoch: 0311 loss_train: 0.6655 acc_train: 0.6000 loss_val: 0.7175 acc_val: 0.4556 time: 0.0079s
Epoch: 0312 loss_train: 0.6819 acc_train: 0.5750 loss_val: 0.6894 acc_val: 0.4778 time: 0.0063s
Epoch: 0313 loss_train: 0.6117 acc_train

In [24]:
#print('----Edge Index With Self Loop-------')
#print(model.edge_index_with_self_loop)

#print('----Edge Index With Self Loop shape---')
#print(model.edge_index_with_self_loop.shape)


#----Printing the Shapes of the Propagation Matrices for corresponding Propagation Layer

#print('Propagation Matrix Weights Shape')

#print(model.prop1_mat_weight.shape)
#print(model.prop2_mat_weight.shape)
#print(model.prop3_mat_weight.shape)

#-----------Printing the Propagation Weights for 3 Propagation Layers ----------------------

#print('Propagation Matrix Weight')

#print(model.prop1_mat_weight)
#print(model.prop2_mat_weight)
#print(model.prop3_mat_weight)