In [2]:
import pandas as pd
import torch
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
import torch.nn.functional as F
from torch.optim import Adam
from sklearn.model_selection import train_test_split
from data import load_qm9, preprocess_qm9, get_chiral_molecules
from models import E3EquivariantGNN, SE3EquivariantGNN
from train_eval import train_model, evaluate_model
import numpy as np
from tqdm import tqdm
from torch_geometric.datasets import QM9

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
# Load the QM9 dataset directly
filepath = "../datasets/"
qm9_dataset = QM9(root=filepath)

Downloading https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/molnet_publish/qm9.zip
Extracting ..\datasets\raw\qm9.zip
Downloading https://ndownloader.figshare.com/files/3195404
Processing...
100%|██████████| 133885/133885 [01:56<00:00, 1151.57it/s]
Done!


In [9]:
data=qm9_dataset[0]

In [16]:
data.y

tensor([[    0.0000,    13.2100,   -10.5499,     3.1865,    13.7363,    35.3641,
             1.2177, -1101.4878, -1101.4098, -1101.3840, -1102.0229,     6.4690,
           -17.1722,   -17.2868,   -17.3897,   -16.1519,   157.7118,   157.7100,
           157.7070]])

In [5]:
# Get chiral molecules
chiral_molecules = get_chiral_molecules(qm9_dataset)

# Output the result
print(f"Number of chiral molecules found: {len(chiral_molecules)}")
print("Sample chiral molecules (SMILES):", chiral_molecules[:5])

Number of chiral molecules found: 93980
Sample chiral molecules (SMILES): ['[H]C([H])([H])[C@]1([H])OC1([H])[H]', '[H]N([H])[C@@]([H])(C#N)C([H])([H])[H]', '[H]C#C[C@]([H])(O[H])C([H])([H])[H]', '[H]O[C@@]([H])(C#N)C([H])([H])[H]', '[H]O[C@@]([H])(C([H])=O)C([H])([H])[H]']


In [None]:
# Define the models
e3_model = E3EquivariantGNN(input_irreps="1x0e", hidden_irreps="1x1e", output_irreps="1x0e").to(device)
se3_model = SE3EquivariantGNN(input_irreps="1x0e", hidden_irreps="1x1e", output_irreps="1x0e").to(device)

# Define optimizers and loss function
optimizer_e3 = Adam(e3_model.parameters(), lr=0.001)
optimizer_se3 = Adam(se3_model.parameters(), lr=0.001)

In [None]:
# Preprocess the dataset
processed_qm9 = preprocess_qm9(qm9_dataset)

print(f"Processed {len(processed_qm9)} molecules.")
print(f"Example molecule:\n{processed_qm9[0]}")

In [None]:
# Split dataset into train and test sets
train_data, test_data = train_test_split(processed_qm9, test_size=0.2, random_state=42)

# Create PyTorch DataLoader objects
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

In [None]:
# Train and evaluate
epochs = 10
for epoch in range(epochs):
    train_model(e3_model, train_loader, optimizer_e3)
    train_model(se3_model, train_loader, optimizer_se3)
    train_loss_e3, train_acc_e3 = evaluate_model(e3_model, train_loader)
    train_loss_se3, train_acc_se3 = evaluate_model(se3_model, train_loader)
    test_loss_e3, test_acc_e3 = evaluate_model(e3_model, test_loader)
    test_loss_se3, test_acc_se3 = evaluate_model(se3_model, test_loader)
    print(f"[Epoch {epoch+1}] Train Acc (E3): {train_acc_e3:.4f}, Train Acc (SE3): {train_acc_se3:.4f}")
    print(f"[Epoch {epoch+1}] Test Acc (E3): {test_acc_e3:.4f}, Test Acc (SE3): {test_acc_se3:.4f}")