# Graph Deep Learning on Graph to study Fake News

In [1]:
!pip install dgl

Collecting dgl
  Using cached dgl-2.1.0-cp312-cp312-manylinux1_x86_64.whl.metadata (553 bytes)
Collecting scipy>=1.1.0 (from dgl)
  Using cached scipy-1.14.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
Collecting networkx>=2.1 (from dgl)
  Using cached networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
Collecting requests>=2.19.0 (from dgl)
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting tqdm (from dgl)
  Downloading tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting torchdata>=0.5.0 (from dgl)
  Using cached torchdata-0.9.0-cp312-cp312-manylinux1_x86_64.whl.metadata (5.5 kB)
Collecting charset-normalizer<4,>=2 (from requests>=2.19.0->dgl)
  Using cached charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (34 kB)
Collecting idna<4,>=2.5 (from requests>=2.19.0->dgl)
  Using cached idna-3.10-py3-none-any.whl.metadata (10 kB)
Collecting urllib3<3,>=1.21.1 (from requests>=2.19.0->dgl)

In [3]:
!pip install torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
Collecting aiohttp (from torch_geometric)
  Downloading aiohttp-3.11.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.7 kB)
Collecting aiohappyeyeballs>=2.3.0 (from aiohttp->torch_geometric)
  Downloading aiohappyeyeballs-2.4.3-py3-none-any.whl.metadata (6.1 kB)
Collecting aiosignal>=1.1.2 (from aiohttp->torch_geometric)
  Downloading aiosignal-1.3.1-py3-none-any.whl.metadata (4.0 kB)
Collecting attrs>=17.3.0 (from aiohttp->torch_geometric)
  Downloading attrs-24.2.0-py3-none-any.whl.metadata (11 kB)
Collecting frozenlist>=1.1.1 (from aiohttp->torch_geometric)
  Downloading frozenlist-1.5.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting multidict<7.0,>=4.5 (from aiohttp->torch_geometric)
  Downloading multidict-6.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.0 

In [90]:
from torch_geometric.datasets import UPFD
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GCNConv, global_mean_pool
from torch.nn.modules import Linear
from torch_geometric.transforms import ToUndirected
from torch.functional import F
import torch
import argparse
import os.path as osp


The dataset is initialized there. Note that:
- `feature` can be `content` (raw content of the tweet), `bert` (content transformed by a transformer), `profile` (user profile info such as number of tweets, followers, and join date), `spacy` (content transformed by a simple NLP model)
- `dataset` is either `politifact` or `gossipcop`

In [139]:
file = '..'

dataset = 'gossipcop'#'politifact'
feature = 'bert'
model = 'GCN'

path = osp.join(osp.dirname(osp.realpath(file)), '..', 'data', 'UPFD')
train_dataset = UPFD(path, dataset, feature, 'train')
val_dataset = UPFD(path, dataset, feature, 'val')
test_dataset = UPFD(path, dataset, feature, 'test')

print(len(train_dataset) + len(val_dataset) + len(test_dataset))
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=128,shuffle=False)

Processing...
Done!


5464


### Model

In [140]:
class Net(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels,
                 concat=False):
        super().__init__()
        self.concat = concat
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.mean_pooling = global_mean_pool # They use Max_pool in the article but that doesn't work pretty well.
        self.lin1 = Linear(hidden_channels, 2 * hidden_channels)
        self.lin2 = Linear(2*hidden_channels,2)

    def forward(self, x, edge_index, batch):
        x = self.conv1(x, edge_index)
        #print(f"Après 1ere couche: {x.shape}")
        x = torch.nn.functional.selu(x)
        
        x = self.conv2(x, edge_index)
        #print(f"Après 2eme couche: {x.shape}")
        x = torch.nn.functional.selu(x)
        x = self.mean_pooling(x, batch)
        #print(f"Après 3eme couche: {x.shape}")
        x = torch.nn.functional.selu(x)
        x = self.lin1(x)
        #print(f"Après 4eme couche: {x.shape}")
        x = torch.nn.functional.selu(x)
        x = self.lin2(x)
        #print(f"Après 5eme couche: {x.shape}")
        return x.log_softmax(dim=-1)

In [145]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net(train_dataset.num_features, 128,
            train_dataset.num_classes, concat=True).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.003, weight_decay=0.001)

In [146]:
def train():
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.batch)
        loss = F.nll_loss(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += float(loss) * data.num_graphs

    return total_loss / len(train_loader.dataset)
train()

1.3816660378442143

In [147]:
@torch.no_grad()
def test(loader):
    model.eval()

    total_correct = total_examples = 0
    for data in loader:
        data = data.to(device)
        pred = model(data.x, data.edge_index, data.batch).argmax(dim=-1)
        total_correct += int((pred ==
         data.y).sum())
        total_examples += data.num_graphs

    return total_correct / total_examples

In [None]:
for epoch in range(1, 200):
    loss = train()
    train_acc = test(train_loader)
    val_acc = test(val_loader)
    test_acc = test(test_loader)
    print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}, Train: {train_acc:.4f}, 'f'Val: {val_acc:.4f}, Test: {test_acc:.4f}')

Epoch: 01, Loss: 0.7167, Train: 0.5183, Val: 0.4908, Test: 0.5086
Epoch: 02, Loss: 0.6242, Train: 0.7280, Val: 0.7381, Test: 0.7222
Epoch: 03, Loss: 0.5364, Train: 0.7262, Val: 0.7271, Test: 0.7138
Epoch: 04, Loss: 0.5321, Train: 0.7756, Val: 0.7784, Test: 0.7493
Epoch: 05, Loss: 0.5165, Train: 0.7253, Val: 0.7271, Test: 0.7023
Epoch: 06, Loss: 0.4650, Train: 0.8059, Val: 0.7930, Test: 0.7595
Epoch: 07, Loss: 0.4498, Train: 0.7344, Val: 0.7106, Test: 0.6997
Epoch: 08, Loss: 0.4492, Train: 0.7811, Val: 0.7637, Test: 0.7457
Epoch: 09, Loss: 0.4153, Train: 0.8013, Val: 0.7619, Test: 0.7475
Epoch: 10, Loss: 0.4232, Train: 0.7830, Val: 0.7454, Test: 0.7344
Epoch: 11, Loss: 0.3780, Train: 0.8471, Val: 0.7857, Test: 0.7778
Epoch: 12, Loss: 0.3313, Train: 0.8581, Val: 0.7967, Test: 0.7778
Epoch: 13, Loss: 0.3319, Train: 0.8416, Val: 0.7967, Test: 0.7640
Epoch: 14, Loss: 0.3701, Train: 0.8700, Val: 0.7747, Test: 0.7781
Epoch: 15, Loss: 0.3155, Train: 0.8864, Val: 0.7821, Test: 0.7880
Epoch: 16,