# Graph Deep Learning on Graph to study Fake News

In [156]:
!pip install dgl



In [157]:
!pip install torch_geometric



In [178]:
from torch_geometric.datasets import UPFD
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GCNConv, global_mean_pool
from torch.nn.modules import Linear
from torch_geometric.transforms import ToUndirected
from torch.functional import F
import torch
import argparse
import os.path as osp


The dataset is initialized there. Note that:
- `feature` can be `content` (raw content of the tweet), `bert` (content transformed by a transformer), `profile` (user profile info such as number of tweets, followers, and join date), `spacy` (content transformed by a simple NLP model)
- `dataset` is either `politifact` or `gossipcop`

In [179]:
from torch_geometric.data import DataLoader
from torch_geometric.datasets import UPFD
import os.path as osp
import torch

# Paths and settings
_file_ = '..'
file = _file_
dataset = 'gossipcop'  # or 'politifact'
path = osp.join(osp.dirname(osp.realpath(file)), '..', 'data', 'UPFD')

# Load datasets with bert and profile features separately
train_dataset_bert = UPFD(path, dataset, 'bert', 'train')
train_dataset_profile = UPFD(path, dataset, 'profile', 'train')

val_dataset_bert = UPFD(path, dataset, 'bert', 'val')
val_dataset_profile = UPFD(path, dataset, 'profile', 'val')

test_dataset_bert = UPFD(path, dataset, 'bert', 'test')
test_dataset_profile = UPFD(path, dataset, 'profile', 'test')

# Check that both datasets are aligned
assert len(train_dataset_bert) == len(train_dataset_profile)
assert len(val_dataset_bert) == len(val_dataset_profile)
assert len(test_dataset_bert) == len(test_dataset_profile)

# Function to combine features
def combine_features(dataset_bert, dataset_profile):
    combined_data = []
    for data_bert, data_profile in zip(dataset_bert, dataset_profile):
        data_bert.x = torch.cat([data_bert.x, data_profile.x], dim=-1)  # Concatenate features
        combined_data.append(data_bert)
    return combined_data

# Combine features for train, val, and test datasets
train_dataset = combine_features(train_dataset_bert, train_dataset_profile)
val_dataset = combine_features(val_dataset_bert, val_dataset_profile)
test_dataset = combine_features(test_dataset_bert, test_dataset_profile)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)
print(len(train_dataset) + len(val_dataset) + len(test_dataset))

5464


### Model

In [185]:
class Net(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels,
                 concat=False):
        super().__init__()
        self.concat = concat
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.max_pooling = global_mean_pool # They use Max_pool in the article but that doesn't work pretty well.
        self.lin1 = Linear(hidden_channels, 2 * hidden_channels)
        self.lin2 = Linear(2*hidden_channels,2)

    def forward(self, x, edge_index, batch):
        x = self.conv1(x, edge_index)
        #print(f"Après 1ere couche: {x.shape}")
        x = torch.nn.functional.selu(x)
        
        x = self.conv2(x, edge_index)
        #print(f"Après 2eme couche: {x.shape}")
        x = torch.nn.functional.selu(x)
        x = self.max_pooling(x, batch)
        #print(f"Après 3eme couche: {x.shape}")
        x = torch.nn.functional.selu(x)
        x = self.lin1(x)
        #print(f"Après 4eme couche: {x.shape}")
        x = torch.nn.functional.selu(x)
        x = self.lin2(x)
        #print(f"Après 5eme couche: {x.shape}")
        return x.softmax(dim=-1)

In [186]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net(778, 128,2, concat=True).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.003, weight_decay=0.01)

In [187]:
def train():
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.batch)
        loss = F.nll_loss(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += float(loss) * data.num_graphs

    return total_loss / len(train_loader.dataset)
train()

-0.4858785642809047

In [188]:
@torch.no_grad()
def test(loader):
    model.eval()

    total_correct = total_examples = 0
    for data in loader:
        data = data.to(device)
        pred = model(data.x, data.edge_index, data.batch).argmax(dim=-1)
        total_correct += int((pred ==
         data.y).sum())
        total_examples += data.num_graphs

    return total_correct / total_examples

In [189]:
for epoch in range(1, 200):
    loss = train()
    train_acc = test(train_loader)
    val_acc = test(val_loader)
    test_acc = test(test_loader)
    print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}, Train: {train_acc:.4f}, 'f'Val: {val_acc:.4f}, Test: {test_acc:.4f}')

Epoch: 01, Loss: -0.4900, Train: 0.4899, Val: 0.5147, Test: 0.5008
Epoch: 02, Loss: -0.4862, Train: 0.4899, Val: 0.5147, Test: 0.5008
Epoch: 03, Loss: -0.5095, Train: 0.6310, Val: 0.6447, Test: 0.6388
Epoch: 04, Loss: -0.6253, Train: 0.6090, Val: 0.6154, Test: 0.6066
Epoch: 05, Loss: -0.6984, Train: 0.7445, Val: 0.7418, Test: 0.7162
Epoch: 06, Loss: -0.7333, Train: 0.7454, Val: 0.7216, Test: 0.7073
Epoch: 07, Loss: -0.7419, Train: 0.7637, Val: 0.7363, Test: 0.7201
Epoch: 08, Loss: -0.7722, Train: 0.8004, Val: 0.7601, Test: 0.7520
Epoch: 09, Loss: -0.7600, Train: 0.6822, Val: 0.6813, Test: 0.6683
Epoch: 10, Loss: -0.6970, Train: 0.7582, Val: 0.7418, Test: 0.7227
Epoch: 11, Loss: -0.7238, Train: 0.7866, Val: 0.7656, Test: 0.7470
Epoch: 12, Loss: -0.7583, Train: 0.7317, Val: 0.7051, Test: 0.7049
Epoch: 13, Loss: -0.7571, Train: 0.7225, Val: 0.7033, Test: 0.6942
Epoch: 14, Loss: -0.7213, Train: 0.7509, Val: 0.7234, Test: 0.7185
Epoch: 15, Loss: -0.7827, Train: 0.7940, Val: 0.7711, Test: 0.

KeyboardInterrupt: 