In [None]:
!pip install torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.5.2-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch_geometric
Successfully installed torch_geometric-2.5.2


In [None]:
from torch_geometric.datasets import QM9

dataset = QM9(root='/tmp/QM9')

Downloading https://data.pyg.org/datasets/qm9_v3.zip
Extracting /tmp/QM9/raw/qm9_v3.zip
Processing...
Using a pre-processed version of the dataset. Please install 'rdkit' to alternatively process the raw data.
Done!


In [None]:
import torch
from torch_geometric.datasets import QM9
from scipy.sparse import csr_matrix
from scipy.sparse.csgraph import reverse_cuthill_mckee
import numpy as np

# Process each graph
adj_matrices = []
max_bandwidth = 0

for data in dataset:
    # Convert PyG data to scipy sparse matrix
    edge_index = data.edge_index
    num_nodes = data.num_nodes
    adj_matrix = csr_matrix((np.ones(edge_index.shape[1]), (edge_index[0], edge_index[1])),
                            shape=(num_nodes, num_nodes))

    # Apply Cuthill-McKee algorithm
    perm = reverse_cuthill_mckee(adj_matrix, symmetric_mode=True)
    adj_matrix_reordered = adj_matrix[perm, :][:, perm]

    # Find bandwidth
    nonzero_indices = adj_matrix_reordered.nonzero()
    bandwidth = np.max(np.abs(nonzero_indices[0] - nonzero_indices[1]))
    if bandwidth > max_bandwidth:
        max_bandwidth = bandwidth

    # Truncate adjacency matrix
    truncated_adj_matrix = adj_matrix_reordered[:max_bandwidth, :max_bandwidth].toarray()  # Convert to dense matrix

    # Convert to PyTorch tensor
    truncated_adj_matrix = torch.tensor(truncated_adj_matrix, dtype=torch.float)

    # Append to the list
    adj_matrices.append(truncated_adj_matrix)

# Pad adjacency matrices
padded_adj_matrices = []
for adj_matrix in adj_matrices:
    # Pad the matrix to have shape (max_bandwidth, max_bandwidth)
    padded_adj_matrix = np.pad(adj_matrix, ((0, max_bandwidth - adj_matrix.shape[0]), (0, 0)))
    # Convert array to tensor
    padded_adj_matrix = torch.tensor(padded_adj_matrix, dtype=torch.float)
    padded_adj_matrices.append(padded_adj_matrix)

In [None]:
# Pad adjacency matrices
padded_adj_matrices = []
for adj_matrix in adj_matrices:
    # Pad the matrix to have shape (max_bandwidth, max_bandwidth)
    padded_adj_matrix = np.pad(adj_matrix, ((0, max_bandwidth - adj_matrix.shape[0]), (0, max_bandwidth - adj_matrix.shape[1])))
    # Convert array to tensor
    padded_adj_matrix = torch.tensor(padded_adj_matrix, dtype=torch.float)
    padded_adj_matrices.append(padded_adj_matrix)

# Stack padded adjacency matrices
padded_adj_matrices = torch.stack(padded_adj_matrices)



In [None]:
from sklearn.model_selection import train_test_split

# Split the dataset into training set and test set
train_adj_matrices, test_adj_matrices = train_test_split(padded_adj_matrices, test_size=0.2)

# Use train_adj_matrices for training and test_adj_matrices for testing

# Define RNN model
class RNN(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.i2h = torch.nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = torch.nn.Linear(input_size + hidden_size, output_size)
        self.softmax = torch.nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        combined = torch.cat((input, hidden), 1)
        hidden = self.i2h(combined)
        output = self.i2o(combined)
        output = self.softmax(output)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, self.hidden_size)

# Initialize model, loss function, and optimizer
input_size = max_bandwidth  # Assuming input size is equal to the maximum bandwidth
hidden_size = 128
output_size = max_bandwidth  # Assuming output size is equal to the maximum bandwidth



In [None]:
model = RNN(input_size, hidden_size, output_size)
criterion = torch.nn.CrossEntropyLoss()  # Use CrossEntropyLoss instead of NLLLoss
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)  # Reduce learning rate
num_epochs = 2

# Training the RNN
for epoch in range(num_epochs):
    for adj_matrix in train_adj_matrices:
        optimizer.zero_grad()
        hidden = model.initHidden()
        loss = 0

        for i in range(len(adj_matrix) - 1):
            output, hidden = model(adj_matrix[i].unsqueeze(0), hidden)  # Unsqueezing to add batch dimension
            target = torch.argmax(adj_matrix[i+1])
            loss += criterion(output, target.unsqueeze(0))  # Unsqueezing to match the shape of target

        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}')

Epoch [1/2], Loss: 10.84885311126709
Epoch [2/2], Loss: 10.7432279586792


In [None]:
# Testing the RNN
model.eval()  # Set the model to evaluation mode
total_loss = 0
with torch.no_grad():
    for adj_matrix in test_adj_matrices:
        hidden = model.initHidden()
        loss = 0

        for i in range(len(adj_matrix) - 1):
            output, hidden = model(adj_matrix[i].unsqueeze(0), hidden)
            target = torch.argmax(adj_matrix[i+1])
            loss += criterion(output, target.unsqueeze(0))

        total_loss += loss.item()

average_loss = total_loss / len(test_adj_matrices)
print(f'Average Loss on Test Set: {average_loss}')


Average Loss on Test Set: 13.28559131744772


In [None]:
# Display predictions
model.eval()  # Set the model to evaluation mode
with torch.no_grad():
    for adj_matrix in test_adj_matrices:
        hidden = model.initHidden()
        predicted_adj_matrix = np.zeros((max_bandwidth, max_bandwidth))  # Initialize empty adjacency matrix

        for i in range(len(adj_matrix) - 1):
            output, hidden = model(adj_matrix[i].unsqueeze(0), hidden)
            predicted_index = torch.argmax(output).item()
            predicted_adj_matrix[i+1, predicted_index] = 1  # Set the predicted edge in the adjacency matrix

        print("Predicted Adjacency Matrix:")
        print(predicted_adj_matrix)
        print("\n")


[1;30;43mLe flux de sortie a été tronqué et ne contient que les 5000 dernières lignes.[0m
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]


Predicted Adjacency Matrix:
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0.