In [3]:
!pip install datasets
!pip install ogb

Collecting ogb
  Downloading ogb-1.3.6-py3-none-any.whl.metadata (6.2 kB)
Collecting outdated>=0.2.0 (from ogb)
  Downloading outdated-0.2.2-py2.py3-none-any.whl.metadata (4.7 kB)
Collecting littleutils (from outdated>=0.2.0->ogb)
  Downloading littleutils-0.2.4-py3-none-any.whl.metadata (679 bytes)
Downloading ogb-1.3.6-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.8/78.8 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading outdated-0.2.2-py2.py3-none-any.whl (7.5 kB)
Downloading littleutils-0.2.4-py3-none-any.whl (8.1 kB)
Installing collected packages: littleutils, outdated, ogb
Successfully installed littleutils-0.2.4 ogb-1.3.6 outdated-0.2.2


In [8]:
!pip install torch-geometric

Collecting torch-geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/63.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.1/1.1 MB[0m [31m43.1 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m29.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch-geometric
Successfully installed torch-geometric-2.6.1


In [20]:
!pip install torch-geometric
!pip install ogb




In [59]:
import torch
import torch.optim as optim
from torch.nn.functional import softmax
import torch.nn as nn
import torch.nn.functional as F
from ogb.graphproppred import PygGraphPropPredDataset, Evaluator
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torch_geometric.utils import to_dense_adj

# Charger le dataset OGB-MOLHIV
dataset = PygGraphPropPredDataset(name="ogbg-molhiv")
evaluator = Evaluator(name="ogbg-molhiv")
print(f"Dataset Features: {dataset.num_tasks}")
print(f"Number of Graphs: {len(dataset)}")
print(f"Task Type: {dataset.task_type}")

Dataset Features: 1
Number of Graphs: 41127
Task Type: binary classification


  self.data, self.slices = torch.load(self.processed_paths[0])


In [60]:
# Extraire les labels (classification binaire)
labels = [graph.y.item() for graph in dataset]
unique_labels, counts = np.unique(labels, return_counts=True)
print("Class Distribution:")
for label, count in zip(unique_labels, counts):
    print(f"Class {label}: {count} graphs")


Class Distribution:
Class 0: 39684 graphs
Class 1: 1443 graphs


In [61]:
# Calculer les poids des classes
class_weights = compute_class_weight(class_weight='balanced', classes=unique_labels, y=labels)
class_weights = torch.tensor(class_weights, dtype=torch.float)
print("Class Weights:", class_weights)

Class Weights: tensor([ 0.5182, 14.2505])


In [66]:
# Dataset synthétique
class OGBDataset(Dataset):
    def __init__(self, num_graphs=400, num_nodes=50, in_features=5):
        self.graphs = []
        for _ in range(num_graphs):
            node_features = torch.randn(num_nodes, in_features)
            adj_matrix = torch.randint(0, 2, (num_nodes, num_nodes)).float()
            graph_label = torch.randint(0, 2, (1,))
            self.graphs.append((node_features, adj_matrix, graph_label))

    def __len__(self):
        return len(self.graphs)

    def __getitem__(self, idx):
        return self.graphs[idx]

In [67]:
# Graph Attention Layer avec gestion des lots
class GraphAttentionLayer(nn.Module):
    def __init__(self, in_features, out_features, dropout, alpha):
        super(GraphAttentionLayer, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.dropout = dropout
        self.alpha = alpha

        self.W = nn.Parameter(torch.empty(size=(in_features, out_features)))
        self.a = nn.Parameter(torch.empty(size=(2 * out_features, 1)))

        self.leakyrelu = nn.LeakyReLU(alpha)
        nn.init.xavier_uniform_(self.W.data, gain=1.414)
        nn.init.xavier_uniform_(self.a.data, gain=1.414)

    def forward(self, h, adj):
        Wh = torch.matmul(h, self.W)  # (batch_size, num_nodes, out_features)
        batch_size, num_nodes, _ = Wh.size()

        # Calcul des attentions
        a_input = torch.cat(
            [
                Wh.repeat(1, 1, num_nodes).view(batch_size, num_nodes * num_nodes, -1),
                Wh.repeat(1, num_nodes, 1)
            ],
            dim=-1
        ).view(batch_size, num_nodes, num_nodes, 2 * self.out_features)
        e = self.leakyrelu(torch.matmul(a_input, self.a).squeeze(-1))

        # Appliquer le masque
        zero_vec = -9e15 * torch.ones_like(e)
        attention = torch.where(adj > 0, e, zero_vec)
        attention = torch.softmax(attention, dim=-1)
        attention = torch.nn.functional.dropout(attention, p=self.dropout, training=self.training)

        h_prime = torch.matmul(attention, Wh)
        return h_prime


In [68]:
# GAT Model
class GAT(nn.Module):
    def __init__(self, n_features, hidden_dim, n_classes, dropout, alpha, n_heads):
        super(GAT, self).__init__()
        self.attentions = nn.ModuleList(
            [GraphAttentionLayer(n_features, hidden_dim, dropout, alpha) for _ in range(n_heads)]
        )
        self.out_att = GraphAttentionLayer(hidden_dim * n_heads, n_classes, dropout, alpha)

    def forward(self, x, adj):
        x = torch.cat([att(x, adj) for att in self.attentions], dim=-1)
        x = torch.nn.functional.elu(x)
        x = self.out_att(x, adj)
        return x.mean(dim=1)  # Réduction pour obtenir (batch_size, n_classes)


In [70]:
# Training
def train():
    dataset = OGBDataset()
    train_loader = DataLoader(dataset, batch_size=16, shuffle=True)

    model = GAT(n_features=5, hidden_dim=8, n_classes=1, dropout=0.6, alpha=0.2, n_heads=4)
    criterion = nn.BCEWithLogitsLoss(pos_weight=class_weights[1])
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4)

    for epoch in range(200):
        model.train()
        epoch_loss = 0
        correct = 0  # Compteur des prédictions correctes
        total = 0  # Nombre total de prédictions

        for node_features, adj, graph_label in train_loader:
            optimizer.zero_grad()

            # Forward pass
            output = model(node_features, adj)
            graph_label = graph_label.float().squeeze()
            loss = criterion(output.squeeze(), graph_label)

            # Calcul de la précision
            predicted = (torch.sigmoid(output.squeeze()) > 0.5).float()  # Prédiction binaire
            correct += (predicted == graph_label).sum().item()  # Incrémenter les prédictions correctes
            total += graph_label.size(0)  # Incrémenter le nombre total de prédictions

            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

        accuracy = correct / total  # Calcul de la précision
        print(f"Epoch {epoch + 1}, Loss: {epoch_loss:.4f}, Accuracy: {accuracy:.4f}")


if __name__ == "__main__":
    train()

Epoch 1, Loss: 65.2929, Accuracy: 0.5300
Epoch 2, Loss: 48.9009, Accuracy: 0.5225
Epoch 3, Loss: 46.7120, Accuracy: 0.5225
Epoch 4, Loss: 46.2699, Accuracy: 0.5225
Epoch 5, Loss: 46.4562, Accuracy: 0.5225
Epoch 6, Loss: 46.9131, Accuracy: 0.5225
Epoch 7, Loss: 46.6503, Accuracy: 0.5225
Epoch 8, Loss: 45.9744, Accuracy: 0.5225
Epoch 9, Loss: 46.5244, Accuracy: 0.5225
Epoch 10, Loss: 45.8967, Accuracy: 0.5225
Epoch 11, Loss: 45.7316, Accuracy: 0.5225
Epoch 12, Loss: 46.0354, Accuracy: 0.5225
Epoch 13, Loss: 45.9202, Accuracy: 0.5225
Epoch 14, Loss: 45.9127, Accuracy: 0.5225
Epoch 15, Loss: 46.5570, Accuracy: 0.5225
Epoch 16, Loss: 45.6458, Accuracy: 0.5225
Epoch 17, Loss: 45.0143, Accuracy: 0.5225
Epoch 18, Loss: 45.7555, Accuracy: 0.5225
Epoch 19, Loss: 45.6593, Accuracy: 0.5225
Epoch 20, Loss: 45.9280, Accuracy: 0.5225
Epoch 21, Loss: 45.9663, Accuracy: 0.5225
Epoch 22, Loss: 45.2144, Accuracy: 0.5225
Epoch 23, Loss: 45.0930, Accuracy: 0.5225
Epoch 24, Loss: 45.7026, Accuracy: 0.5225
E