In [115]:
import torch
import dgl
import dgl.nn.pytorch as dglnn
from torch import nn
import torch.nn.functional as F
import dgl.function as fn

In [131]:
# class EquivariantLayer(nn.Module):
#     def __init__(self, in_dim, out_dim):
#         super(EquivariantLayer, self).__init__()
#         self.layer = dglnn.GraphConv(in_dim, out_dim)  # Placeholder, should be SE(3) equivariant
        
#     def forward(self, g, features, coords):
#         h = self.layer(g, features)  # apply graph convolution
#         # Apply transformations on coordinates for equivariance if needed
#         return h, coords
    
class EquivariantLayer(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(EquivariantLayer, self).__init__()
        self.linear = nn.Linear(input_dim, hidden_dim)
        self.coord_linear = nn.Linear(input_dim, hidden_dim)

    def forward(self, g, features, coordinates):
        # Apply linear transformation to node features
        h = self.linear(features)
        
        # Calculate messages with equivariance: update coordinates based on node features
        g.ndata['h'] = h
        g.ndata['coord'] = coordinates
        g.update_all(fn.copy_u('coord', 'm'), fn.mean('m', 'h_coord'))
        
        # Update node coordinates
        coord_updates = self.coord_linear(g.ndata['h_coord'])
        new_coordinates = coordinates + coord_updates

        return h, new_coordinates

In [36]:
# Define the Denoising Model
# class EquivariantDiffusionModel(nn.Module):
#     def __init__(self, node_feat_dim, hidden_dim):
#         super(EquivariantDiffusionModel, self).__init__()
#         self.node_encoder = nn.Linear(node_feat_dim, hidden_dim)
#         self.equiv_layers = nn.ModuleList([EquivariantLayer(hidden_dim, hidden_dim) for _ in range(3)])
#         self.node_decoder = nn.Linear(hidden_dim, node_feat_dim)
    
#     def forward(self, g, node_features, coords):
#         # Encode node
#         h = F.relu(self.node_encoder(node_features))        
#         for equiv_layer in self.equiv_layers:
#             h, coords = equiv_layer(g, h, coords)
        
#         # Decode to original feature dimension
#         denoised_features = self.node_decoder(h)
#         return denoised_features, coords

In [152]:
class EquivariantDiffusionModel(nn.Module):
    def __init__(self, node_feat_dim, hidden_dim, num_layers=3):
        super(EquivariantDiffusionModel, self).__init__()
        self.num_layers = num_layers
        self.hidden_dim = hidden_dim # 128

        # Initial linear layer for node features
        self.input_layer = nn.Linear(node_feat_dim, hidden_dim) # 11, 128

        # Define multiple equivariant layers
        self.equiv_layers = nn.ModuleList([
            EquivariantLayer(hidden_dim, hidden_dim) for _ in range(num_layers)
        ])

        # Output layers for final features and coordinates
        self.output_layer = nn.Linear(hidden_dim, 1)  # For final prediction

    def forward(self, g, node_features, coordinates):
        print("this is the g, node_feature, coordinates shape")
        print(g)
        print(node_features.shape)
        print(coordinates.shape)
        # Initial feature transformation
        h = F.relu(self.input_layer(node_features))

        # Forward pass through equivariant layers
        for i in range(self.num_layers):
            h, coordinates = self.equiv_layers[i](g, h, coordinates)

        # Final prediction
        pred = self.output_layer(h)

        return pred, coordinates


In [153]:
# Sample Diffusion Process for 3D Denoising
class DiffusionProcess:
    def __init__(self, timesteps, beta_start=0.0001, beta_end=0.02):
        self.timesteps = timesteps
        self.beta = torch.linspace(beta_start, beta_end, timesteps)
    
    def forward_diffusion(self, coords, t):
        noise = torch.randn_like(coords)
        return coords * (1 - self.beta[t]).sqrt() + noise * self.beta[t].sqrt()
    
    def reverse_denoising(self, model, g, features, coords, t):
        denoised_features, denoised_coords = model(g, features, coords)
        # Implement denoising step based on learned prediction and noise schedule
        return denoised_coords

In [154]:
import torch
import dgl
from dgl.data import QM9

# Load QM9 dataset
qm9_data = QM9(label_keys=['mu', 'alpha', 'homo', 'lumo', 'gap', 'r2', 'zpve'])

# Preprocess dataset to get graphs, node features, edge features, and 3D coordinates
def process_qm9_data(data):
    graphs, node_features, coordinates = [], [], []
    for i in range(10):
        g = data[i][0]  # DGLGraph for the molecule
        coords = g.ndata['R']  # 3D coordinates of atoms
        
        # Append graph and features
        graphs.append(g)
        node_features.append(g.ndata['Z'])
        coordinates.append(coords)
    
    return graphs, node_features, coordinates

graphs, node_features, coordinates = process_qm9_data(qm9_data)
print(node_features[1])


tensor([7, 1, 1, 1])


In [155]:
print(graphs[0].ndata['Z'])
print(node_features[0])
print(coordinates[0])


tensor([6, 1, 1, 1, 1])
tensor([6, 1, 1, 1, 1])
tensor([[-1.2698e-02,  1.0858e+00,  8.0010e-03],
        [ 2.1504e-03, -6.0313e-03,  1.9761e-03],
        [ 1.0117e+00,  1.4638e+00,  2.7657e-04],
        [-5.4082e-01,  1.4475e+00, -8.7664e-01],
        [-5.2381e-01,  1.4379e+00,  9.0640e-01]])


In [157]:
from torch.utils.data import Dataset

class QM9Dataset(Dataset):
    def __init__(self, graphs, node_features, coordinates):
        self.graphs = graphs
        self.node_features = node_features
        self.coordinates = coordinates
    
    def __len__(self):
        return len(self.graphs)
    
    def __getitem__(self, idx):
        return (self.graphs[idx], self.node_features[idx], self.coordinates[idx])

# Initialize QM9 dataset and DataLoader
qm9_dataset = QM9Dataset(graphs, node_features, coordinates)

In [158]:
print(qm9_dataset[3][1])

tensor([6, 6, 1, 1])


In [159]:
import dgl
import torch

# Custom collate function to handle DGLGraphs in the DataLoader
def collate_fn(batch):
    graphs, node_features, coordinates = map(list, zip(*batch))
    print("before cat: ==========>")
    print(node_features)
    
    for i, (g, n_feat, coord) in enumerate(zip(graphs, node_features, coordinates)):
        print(f"Graph {i} - Num nodes: {g.number_of_nodes()}")
        print(f"Node feature shape: {n_feat.shape}")
        print(f"Coordinate shape: {coord.shape}")
    
    # Batch graphs
    batched_graph = dgl.batch(graphs)
    
    # Concatenate node and edge features along the batch dimension
    batched_node_features = torch.cat(node_features, dim=0).float()
    batched_coordinates = torch.cat(coordinates, dim=0).float()
    print("Hellooo ------>")
    print(batched_node_features)
    
    print("this is the batch_coordinates")
    print(batched_coordinates)
    
    
    return batched_graph, batched_node_features, batched_coordinates


In [160]:
from torch.utils.data import DataLoader

# Assuming `qm9_dataset` is your custom Dataset
dataloader = DataLoader(qm9_dataset, batch_size=2, shuffle=True, collate_fn=collate_fn)

In [161]:
import torch
import torch.optim as optim
import torch.nn.functional as F

# Assuming the following are already defined:
# - `EquivariantDiffusionModel`: the model class for denoising.
# - `DiffusionProcess`: the diffusion process class with forward and reverse diffusion methods.
# - `molecule_dataset`: a dataset of molecular graphs, with nodes (atoms), edges (bonds), and coordinates.

# Hyperparameters
epochs = 100        # Number of epochs
batch_size = 16     # Batch size
learning_rate = 1e-4  # Learning rate

# Initialize model, diffusion process, and optimizer
model = EquivariantDiffusionModel(node_feat_dim=11, hidden_dim=128)
diffusion_process = DiffusionProcess(timesteps=1000)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(epochs):
    model.train()
    epoch_loss = 0
    
    for batch in dataloader:
        g, node_features, true_coords = batch  # Unpack batch (graph, features, coordinates)

        # Sample a random timestep t
        t = torch.randint(0, diffusion_process.timesteps, (1,)).item()

        # Add noise to the true coordinates (forward diffusion process)
        noisy_coords = diffusion_process.forward_diffusion(true_coords, t)
        
        # Predict the denoised coordinates from the noisy ones
        denoised_features, denoised_coords = model(g, node_features, noisy_coords)
        
        # Calculate the loss as the mean squared error between predicted and actual noise
        noise = noisy_coords - true_coords  # Calculate actual noise added
        loss = F.mse_loss(denoised_coords, noisy_coords - noise)  # Denoising loss
        
        # Backpropagation and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
    
    # Print epoch summary
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss / len(dataloader)}")

print("Training complete.")


[tensor([6, 6, 1, 1, 1, 1, 1, 1]), tensor([6, 6, 1, 1])]
Graph 0 - Num nodes: 8
Node feature shape: torch.Size([8])
Coordinate shape: torch.Size([8, 3])
Graph 1 - Num nodes: 4
Node feature shape: torch.Size([4])
Coordinate shape: torch.Size([4, 3])
Hellooo ------>
tensor([6., 6., 1., 1., 1., 1., 1., 1., 6., 6., 1., 1.])
this is the batch_coordinates
tensor([[-0.0187,  1.5256,  0.0104],
        [ 0.0021, -0.0039,  0.0020],
        [ 0.9949,  1.9397,  0.0029],
        [-0.5421,  1.9236, -0.8651],
        [-0.5252,  1.9142,  0.9000],
        [ 0.5255, -0.4019,  0.8775],
        [-1.0115, -0.4180,  0.0095],
        [ 0.5086, -0.3925, -0.8876],
        [ 0.5995,  0.0000,  1.0000],
        [-0.5995,  0.0000,  1.0000],
        [-1.6616,  0.0000,  1.0000],
        [ 1.6616,  0.0000,  1.0000]])
this is the g, node_feature, coordinates shape
Graph(num_nodes=12, num_edges=68,
      ndata_schemes={'R': Scheme(shape=(3,), dtype=torch.float32), 'Z': Scheme(shape=(), dtype=torch.int64)}
      edata_s

RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x12 and 11x128)