# Graph Deep Learning on Graph to study Fake News

In [1]:
!pip install dgl

Collecting dgl
  Using cached dgl-2.1.0-cp312-cp312-manylinux1_x86_64.whl.metadata (553 bytes)
Collecting scipy>=1.1.0 (from dgl)
  Using cached scipy-1.14.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
Collecting networkx>=2.1 (from dgl)
  Using cached networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
Collecting requests>=2.19.0 (from dgl)
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting tqdm (from dgl)
  Downloading tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting torchdata>=0.5.0 (from dgl)
  Using cached torchdata-0.9.0-cp312-cp312-manylinux1_x86_64.whl.metadata (5.5 kB)
Collecting charset-normalizer<4,>=2 (from requests>=2.19.0->dgl)
  Using cached charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (34 kB)
Collecting idna<4,>=2.5 (from requests>=2.19.0->dgl)
  Using cached idna-3.10-py3-none-any.whl.metadata (10 kB)
Collecting urllib3<3,>=1.21.1 (from requests>=2.19.0->dgl)

In [3]:
!pip install torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
Collecting aiohttp (from torch_geometric)
  Downloading aiohttp-3.11.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.7 kB)
Collecting aiohappyeyeballs>=2.3.0 (from aiohttp->torch_geometric)
  Downloading aiohappyeyeballs-2.4.3-py3-none-any.whl.metadata (6.1 kB)
Collecting aiosignal>=1.1.2 (from aiohttp->torch_geometric)
  Downloading aiosignal-1.3.1-py3-none-any.whl.metadata (4.0 kB)
Collecting attrs>=17.3.0 (from aiohttp->torch_geometric)
  Downloading attrs-24.2.0-py3-none-any.whl.metadata (11 kB)
Collecting frozenlist>=1.1.1 (from aiohttp->torch_geometric)
  Downloading frozenlist-1.5.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting multidict<7.0,>=4.5 (from aiohttp->torch_geometric)
  Downloading multidict-6.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.0 

In [50]:
from torch_geometric.datasets import UPFD
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GCNConv, global_max_pool
from torch.nn.modules import Linear
from torch_geometric.transforms import ToUndirected
from torch.functional import F
import torch
import argparse
import os.path as osp


The dataset is initialized there. Note that:
- `feature` can be `content` (raw content of the tweet), `bert` (content transformed by a transformer), `profile` (user profile info such as number of tweets, followers, and join date), `spacy` (content transformed by a simple NLP model)
- `dataset` is either `politifact` or `gossipcop`

In [58]:
file = '..'

dataset = 'gossipcop'#'politifact'
feature = 'spacy'
model = 'GCN'

path = osp.join(osp.dirname(osp.realpath(file)), '..', 'data', 'UPFD')
train_dataset = UPFD(path, dataset, feature, 'train')
val_dataset = UPFD(path, dataset, feature, 'val')
test_dataset = UPFD(path, dataset, feature, 'test')

print(len(train_dataset) + len(val_dataset) + len(test_dataset))
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=128,shuffle=False)

Downloading https://drive.usercontent.google.com/download?id=1VskhAQ92PrT4sWEKQ2v2-AJhEcpp4A81&confirm=t
Extracting /home/crvr/Ulm/data/UPFD/gossipcop/raw/data.zip
Processing...
Done!


5464


### Model

In [59]:
class Net(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels,
                 concat=False):
        super().__init__()
        self.concat = concat
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.lin0 = Linear(in_channels, hidden_channels)
        self.lin1 = Linear(2 * hidden_channels, hidden_channels)
        self.lin2 = Linear(hidden_channels, out_channels)

    def forward(self, x, edge_index, batch):
        h = self.conv1(x, edge_index).relu()
        h = global_max_pool(h, batch)
        if self.concat:
            # Get the root node (tweet) features of each graph:
            root = (batch[1:] - batch[:-1]).nonzero(as_tuple=False).view(-1)
            root = torch.cat([root.new_zeros(1), root + 1], dim=0)
            news = x[root]
            news = self.lin0(news).relu()
            h = self.lin1(torch.cat([news, h], dim=-1)).relu()
        h = self.lin2(h)
        return h.log_softmax(dim=-1)

In [60]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net(train_dataset.num_features, 128,
            train_dataset.num_classes, concat=True).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.01)

In [61]:
def train():
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.batch)
        loss = F.nll_loss(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += float(loss) * data.num_graphs

    return total_loss / len(train_loader.dataset)
train()



0.6882300553741035

In [62]:
@torch.no_grad()
def test(loader):
    model.eval()

    total_correct = total_examples = 0
    for data in loader:
        data = data.to(device)
        pred = model(data.x, data.edge_index, data.batch).argmax(dim=-1)
        total_correct += int((pred == data.y).sum())
        total_examples += data.num_graphs

    return total_correct / total_examples

In [63]:
for epoch in range(1, 61):
    loss = train()
    train_acc = test(train_loader)
    val_acc = test(val_loader)
    test_acc = test(test_loader)
    print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}, Train: {train_acc:.4f}, 'f'Val: {val_acc:.4f}, Test: {test_acc:.4f}')

Epoch: 01, Loss: 0.6811, Train: 0.7766, Val: 0.8059, Test: 0.7878
Epoch: 02, Loss: 0.6739, Train: 0.6612, Val: 0.6502, Test: 0.6398
Epoch: 03, Loss: 0.6494, Train: 0.8068, Val: 0.8370, Test: 0.8092
Epoch: 04, Loss: 0.6045, Train: 0.8571, Val: 0.8645, Test: 0.8544
Epoch: 05, Loss: 0.5239, Train: 0.8718, Val: 0.8919, Test: 0.8756
Epoch: 06, Loss: 0.4105, Train: 0.9002, Val: 0.9194, Test: 0.8999
Epoch: 07, Loss: 0.3053, Train: 0.9185, Val: 0.9286, Test: 0.9187
Epoch: 08, Loss: 0.2522, Train: 0.9240, Val: 0.9322, Test: 0.9205
Epoch: 09, Loss: 0.2342, Train: 0.9185, Val: 0.9231, Test: 0.9166
Epoch: 10, Loss: 0.2206, Train: 0.9203, Val: 0.9231, Test: 0.9179
Epoch: 11, Loss: 0.2294, Train: 0.9359, Val: 0.9414, Test: 0.9341
Epoch: 12, Loss: 0.2154, Train: 0.9386, Val: 0.9432, Test: 0.9362
Epoch: 13, Loss: 0.2043, Train: 0.9423, Val: 0.9487, Test: 0.9357
Epoch: 14, Loss: 0.2040, Train: 0.9405, Val: 0.9414, Test: 0.9360
Epoch: 15, Loss: 0.1959, Train: 0.9451, Val: 0.9469, Test: 0.9381
Epoch: 16,