In [14]:
from collections import namedtuple
from itertools import product

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from torch_geometric.nn import global_mean_pool
from torch_geometric.datasets import UPFD
from torch_geometric.loader import DataLoader
from tqdm.notebook import tqdm
from box import Box
from sklearn.metrics import accuracy_score

from layers import PARMAImproved

In [22]:
class PARMAGC(nn.Module):
    
    def __init__(self,
                 in_dim,
                 hidden_dim,
                 out_dim,
                 period,
                 timestamps,
                 dropout=0.2,
                 lin_dim=16):
        super(PARMAGC, self).__init__()
        self.dropout = dropout
        # self.conv1 = PARMA(in_dim, hidden_dim, period, timestamps, dropout=dropout)
        self.conv1 = PARMAImproved(in_dim, hidden_dim, period, timestamps, dropout=dropout)
        # self.conv2 = PARMA(hidden_dim, hidden_dim, period, timestamps, dropout=dropout)
        # self.conv = PARMA(in_dim, hidden_dim, period, timestamps, dropout=dropout)
        self.linear1 = nn.Linear(hidden_dim, hidden_dim)
        self.linear2 = nn.Linear(hidden_dim, out_dim)
    
    def reset_parameters(self):
        self.conv1.reset_parameters()
        # self.conv2.reset_parameters()
        self.linear1.reset_parameters()
        self.linear2.reset_parameters()
    
    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        # out = F.relu(self.conv1(x, edge_index, shift_op))
        out = self.conv1(x, edge_index)
        # out = F.dropout(out, p=self.dropout, training=self.training)
        # out = F.relu(self.conv2(out, edge_index, shift_op))
        # out = self.conv2(out, edge_index, shift_op)
        out = global_mean_pool(out, batch)
        
        out = F.relu(self.linear1(out))
        # out = F.dropout(out, p=self.dropout, training=self.training)
        # out = F.softmax(self.linear2(out), dim=-1)
        out = self.linear2(out)
        return F.log_softmax(out, dim=-1)
        # return out

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [4]:
config = Box({
    'batch_size': 128,
    'epochs': 35,
    'lr': 1e-2,
    'l2': 1e-2,
    'hidden_dim': 128,
    'dropout': 0.2,
})

In [5]:
Result = namedtuple('Result', ['name', 'feature', 'conv_layer', 'accuracy', 'train_mean', 'train_std', 'val_mean', 'val_std'])

In [25]:
def train(model: nn.Module,
          optimizer: torch.optim.Optimizer,
          train_loader: DataLoader,
          device: torch.device):
    model.train()

    train_loss = 0
    for data in train_loader:
        optimizer.zero_grad()
        out = model(data.to(device))
        loss = F.nll_loss(out, data.y.to(device))
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    return train_loss / len(train_loader)

def validate(model: nn.Module,
             val_loader: DataLoader,
             device: torch.device):
    model.eval()

    val_loss = 0
    with torch.inference_mode():
        for data in val_loader:
            out = model(data.to(device))
            loss = F.nll_loss(out, data.y.to(device))
            val_loss += loss.item()
    return val_loss / len(val_loader)

def test(model: nn.Module,
         test_loader: DataLoader,
         device: torch.device):
    pred = []
    true = []
    model.eval()
    with torch.inference_mode():
        for data in test_loader:
            out = model(data.to(device))
            preds = torch.argmax(F.softmax(out, dim=1), dim=-1)
            pred.append(preds.detach().cpu().numpy())
            true.append(data.y.detach().cpu().numpy())

    pred = np.concatenate(pred)
    true = np.concatenate(true)
    acc = accuracy_score(true, pred)

    return acc

In [6]:
names = ['politifact', 'gossipcop']
features = ['content', 'bert', 'profile', 'spacy']
root = './datasets/UPFD'

conv_layers = ['parma']
combinations = list(product(names, features, conv_layers))

In [26]:
results = []

for name, feature, conv_layer in tqdm(combinations, desc=f"Combination"):
    train_dataset = UPFD(root=root, name=name, feature=feature, split='train')
    val_dataset = UPFD(root=root, name=name, feature=feature, split='val')
    test_dataset = UPFD(root=root, name=name, feature=feature, split='test')
    
    train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=config.batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=config.batch_size, shuffle=False)
    
    model = PARMAGC(in_dim=train_dataset.num_features,
                    hidden_dim=config.hidden_dim,
                    out_dim=train_dataset.num_classes,
                    period=3,
                    timestamps=2,
                    dropout=config.dropout).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=config.lr, weight_decay=config.l2)
    
    train_losses = []
    val_losses = []

    for epoch in tqdm(range(config.epochs), desc=f"Training"):
        train_loss = train(model, optimizer, train_loader, device)
        train_losses.append(train_loss)
        
        val_loss = validate(model, val_loader, device)
        val_losses.append(val_loss)
    
    acc = test(model, test_loader, device)
    train_mean, train_std = np.mean(train_losses), np.std(train_losses)
    val_mean, val_std = np.mean(val_losses), np.std(val_losses)
    
    curr_result = Result(name=name, feature=feature, conv_layer=conv_layer, accuracy=acc, train_mean=train_mean, train_std=train_std, val_mean=val_mean, val_std=val_std)
    results.append(curr_result)

Combination:   0%|          | 0/8 [00:00<?, ?it/s]

Training:   0%|          | 0/35 [00:00<?, ?it/s]

Training:   0%|          | 0/35 [00:00<?, ?it/s]

Training:   0%|          | 0/35 [00:00<?, ?it/s]

Training:   0%|          | 0/35 [00:00<?, ?it/s]

Training:   0%|          | 0/35 [00:00<?, ?it/s]

Training:   0%|          | 0/35 [00:00<?, ?it/s]

Training:   0%|          | 0/35 [00:00<?, ?it/s]

Training:   0%|          | 0/35 [00:00<?, ?it/s]

In [30]:
sorted(results, key=lambda x: x.accuracy, reverse=True)

[Result(name='gossipcop', feature='spacy', conv_layer='parma', accuracy=0.5007841087297439, train_mean=0.7499298750408112, train_std=0.23374992296715227, val_mean=0.7048322732107982, val_std=0.023982452855680927),
 Result(name='gossipcop', feature='content', conv_layer='parma', accuracy=0.4992158912702561, train_mean=1.0138953063223097, train_std=1.5245220278875373, val_mean=0.8672856555666242, val_std=1.0007127697280696),
 Result(name='gossipcop', feature='bert', conv_layer='parma', accuracy=0.4992158912702561, train_mean=1.543632871007162, train_std=3.6744304037774884, val_mean=0.8064835575648717, val_std=0.5909600238280133),
 Result(name='gossipcop', feature='profile', conv_layer='parma', accuracy=0.4992158912702561, train_mean=0.7557739526506454, train_std=0.3145598440609304, val_mean=0.6975631373269218, val_std=0.010847165674549426),
 Result(name='politifact', feature='content', conv_layer='parma', accuracy=0.48868778280542985, train_mean=7.162691800934928, train_std=15.5147903530