<a href="https://colab.research.google.com/github/apoorvapu/data_science/blob/main/molecule_diffusion_NOTworking.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install torch torchvision diffusers transformers

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Using cached nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Using cached nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Using cached nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Using cached nvidia_curand_cu12

In [2]:
!pip install rdkit pubchempy tqdm py3Dmol

Collecting rdkit
  Using cached rdkit-2024.9.6-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.0 kB)
Collecting pubchempy
  Using cached PubChemPy-1.0.4-py3-none-any.whl
Collecting py3Dmol
  Using cached py3Dmol-2.4.2-py2.py3-none-any.whl.metadata (1.9 kB)
Using cached rdkit-2024.9.6-cp311-cp311-manylinux_2_28_x86_64.whl (34.3 MB)
Using cached py3Dmol-2.4.2-py2.py3-none-any.whl (7.0 kB)
Installing collected packages: py3Dmol, pubchempy, rdkit
Successfully installed pubchempy-1.0.4 py3Dmol-2.4.2 rdkit-2024.9.6


## download 1000 molecules 3D .sdf files from pubchem and store in a molecules directory

In [3]:
import os
import requests
from tqdm import tqdm

# Make sure the folder exists
os.makedirs("Molecules", exist_ok=True)

downloaded = 0
target = 1000
cid = 1

while downloaded < target and cid < 10000:
    try:
        url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{cid}/SDF?record_type=3d"
        response = requests.get(url, timeout=10)
        if response.status_code == 200 and len(response.text) > 100:
            file_path = f"Molecules/mol_{cid}.sdf"
            with open(file_path, "w") as f:
                f.write(response.text)
            downloaded += 1
        cid += 1
    except Exception as e:
        print(f"CID {cid} failed: {e}")
        cid += 1


KeyboardInterrupt: 

In [4]:
ls Molecules/*.sdf | wc -l


934


In [6]:
import os
from rdkit import Chem
from rdkit.Chem import AllChem
import py3Dmol
from IPython.display import display

def view_mol(mol):
    # Generate 3D coordinates if missing
    if mol.GetNumConformers() == 0:
        AllChem.EmbedMolecule(mol)
    mb = Chem.MolToMolBlock(mol)
    viewer = py3Dmol.view(width=400, height=350)
    viewer.addModel(mb, 'mol')
    viewer.setStyle({'stick': {}})
    viewer.setBackgroundColor('white')
    viewer.zoomTo()
    return viewer

# List SDF files
sdf_dir = "Molecules"
sdf_files = sorted([f for f in os.listdir(sdf_dir) if f.endswith(".sdf")])

# Visualize first 5 molecules (change range as needed)
for sdf_file in sdf_files[:5]:
    sdf_path = os.path.join(sdf_dir, sdf_file)
    mol = Chem.MolFromMolFile(sdf_path, removeHs=False)
    if mol:
        print(f"Showing: {sdf_file}")
        view_mol(mol).show()


Showing: mol_1.sdf


Showing: mol_10.sdf


Showing: mol_1000.sdf


Showing: mol_1001.sdf


Showing: mol_1002.sdf


In [7]:
#!pip install torch-geometric torch_scatter

In [8]:
import os
import torch
import numpy as np
import matplotlib.pyplot as plt
from torch import nn
from torch.utils.data import Dataset, DataLoader
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import MolToFile
import py3Dmol
from tqdm import tqdm
from torch.nn import functional as F

# Define atom types and their properties
atom_types = ['C', 'N', 'O', 'F', 'S', 'Cl', 'P', 'Br', 'I']
atom_type_to_idx = {atom: idx for idx, atom in enumerate(atom_types)}
idx_to_atom_type = {idx: atom for atom, idx in atom_type_to_idx.items()}


MoleculeDataset Class

This class handles loading molecular data from SDF files:

It scans a directory for SDF files
For each molecule in the files, it:

Extracts atom types (C, N, O, etc.)

Gets 3D coordinates for each atom

Converts these to PyTorch tensors

Centers the molecule coordinates
(ensuring translation invariance)

Stores the processed molecule data

In [9]:
class MoleculeDataset(Dataset):
    def __init__(self, sdf_dir):
        self.sdf_dir = sdf_dir
        self.file_list = [f for f in os.listdir(sdf_dir) if f.endswith('.sdf')]
        self.molecules = []
        self.process_molecules()

    def process_molecules(self):
        print(f"Processing {len(self.file_list)} SDF files...")
        for file_name in tqdm(self.file_list):
            file_path = os.path.join(self.sdf_dir, file_name)
            suppl = Chem.SDMolSupplier(file_path)

            for mol in suppl:
                if mol is None:
                    continue

                # Get atom types and coordinates
                atom_types_list = []
                atom_coords = []

                for atom in mol.GetAtoms():
                    symbol = atom.GetSymbol()
                    if symbol in atom_type_to_idx:
                        atom_types_list.append(atom_type_to_idx[symbol])
                    else:
                        # Skip atoms not in our predefined types
                        continue

                # Get 3D coordinates (use conformers)
                conformer = mol.GetConformer()
                for i in range(mol.GetNumAtoms()):
                    pos = conformer.GetAtomPosition(i)
                    atom_coords.append([pos.x, pos.y, pos.z])

                # Skip if we don't have coordinates for all atoms
                if len(atom_coords) != len(atom_types_list):
                    continue

                # Convert to tensors
                atom_types_tensor = torch.tensor(atom_types_list, dtype=torch.long)
                atom_coords_tensor = torch.tensor(atom_coords, dtype=torch.float)

                # Center the molecule (translation invariance)
                center = atom_coords_tensor.mean(dim=0, keepdim=True)
                atom_coords_tensor = atom_coords_tensor - center

                self.molecules.append({
                    'atom_types': atom_types_tensor,
                    'atom_coords': atom_coords_tensor,
                    'mol': mol  # Keep the original molecule for visualization
                })

        print(f"Successfully processed {len(self.molecules)} molecules")

    def __len__(self):
        return len(self.molecules)

    def __getitem__(self, idx):
        return self.molecules[idx]



Edge Creation Function
create_edge_indices() determines which atoms are connected based on their physical proximity. This helps model molecular bonds by identifying atoms that are close enough to potentially share a bond.

In [10]:
# Function to create edge indices for each molecule
def create_edge_indices(coords, max_distance=4.0):
    """Create edge indices based on distance threshold"""
    n_atoms = coords.shape[0]
    edge_indices = []

    for i in range(n_atoms):
        for j in range(n_atoms):
            if i != j:  # Avoid self-loops
                # Calculate distance between atoms
                dist = torch.norm(coords[i] - coords[j])
                if dist < max_distance:
                    edge_indices.append([i, j])

    if not edge_indices:
        # If no edges, create at least one dummy edge to avoid errors
        edge_indices.append([0, 0])

    return torch.tensor(edge_indices, dtype=torch.long).t()  # Shape [2, num_edges]



Collate Function
collate_fn() handles batching of molecules with different numbers of atoms:

It pads tensors to the same size
Creates masks to identify which positions contain actual atoms vs. padding

In [11]:

# Collate function for batching
def collate_fn(batch):
    atom_types = [item['atom_types'] for item in batch]
    atom_coords = [item['atom_coords'] for item in batch]
    mols = [item['mol'] for item in batch]

    # Create masks for padding
    max_atoms = max(len(types) for types in atom_types)
    batch_size = len(batch)

    # Initialize padded tensors
    padded_types = torch.zeros((batch_size, max_atoms), dtype=torch.long)
    padded_coords = torch.zeros((batch_size, max_atoms, 3), dtype=torch.float)
    masks = torch.zeros((batch_size, max_atoms), dtype=torch.bool)

    # Fill in data with padding
    for i, (types, coords) in enumerate(zip(atom_types, atom_coords)):
        n_atoms = len(types)
        padded_types[i, :n_atoms] = types
        padded_coords[i, :n_atoms] = coords
        masks[i, :n_atoms] = 1  # Mark actual atoms as True

    return {
        'atom_types': padded_types,
        'atom_coords': padded_coords,
        'masks': masks,
        'mols': mols
    }


EquivariantLayer Class:
This is a neural network layer that respects the physical properties of 3D space:

It processes node features through MLPs (Multi-Layer Perceptrons)

Computes messages between nodes based on their features and distances

Updates node features based on aggregated messages

Ensures the operations respect physical invariances

In [12]:

# E(3) Equivariant Layer using simple custom message passing
class EquivariantLayer(nn.Module):
    def __init__(self, hidden_dim):
        super(EquivariantLayer, self).__init__()

        self.node_mlp = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.SiLU(),
            nn.Linear(hidden_dim, hidden_dim)
        )

        self.message_mlp = nn.Sequential(
            nn.Linear(hidden_dim * 2 + 1, hidden_dim),  # 2*node_feat + distance
            nn.SiLU(),
            nn.Linear(hidden_dim, hidden_dim)
        )

        self.update_mlp = nn.Sequential(
            nn.Linear(hidden_dim * 2, hidden_dim),  # original + messages
            nn.SiLU(),
            nn.Linear(hidden_dim, hidden_dim)
        )

        self.coord_mlp = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.SiLU(),
            nn.Linear(hidden_dim, 1, bias=False)
        )

    def forward(self, x, coords, mask=None):
        """
        Args:
            x: Node features [batch_size, n_nodes, hidden_dim]
            coords: Node coordinates [batch_size, n_nodes, 3]
            mask: Node mask [batch_size, n_nodes]
        """
        batch_size, n_nodes, _ = x.shape

        # Initial node transformation
        h = self.node_mlp(x)

        # Compute pairwise distances and messages
        messages = torch.zeros_like(h)

        for b in range(batch_size):
            valid_mask = mask[b] if mask is not None else torch.ones(n_nodes, dtype=torch.bool, device=x.device)
            valid_idx = torch.where(valid_mask)[0]
            n_valid = len(valid_idx)

            if n_valid <= 1:
                continue  # Skip if only one or no valid nodes

            # Only process valid nodes
            h_valid = h[b, valid_idx]
            coords_valid = coords[b, valid_idx]

            # Compute all pairwise messages between valid nodes
            for i in range(n_valid):
                for j in range(n_valid):
                    if i != j:  # Avoid self-loops
                        # Compute distance
                        rel_pos = coords_valid[i] - coords_valid[j]
                        dist = torch.norm(rel_pos)

                        # Prepare message inputs
                        edge_feat = torch.cat([h_valid[i], h_valid[j], dist.unsqueeze(0)])

                        # Compute message
                        msg = self.message_mlp(edge_feat)

                        # Aggregate message
                        messages[b, valid_idx[i]] += msg

        # Update node features
        x_new = self.update_mlp(torch.cat([x, messages], dim=-1))

        # Apply mask if provided
        if mask is not None:
            x_new = x_new * mask.unsqueeze(-1)

        return x_new, coords



DiffusionModel Class
The core neural network model that:

Embeds atom types into high-dimensional vectors

Embeds timesteps (for the diffusion process)

Processes features through equivariant layers

Outputs:

Predicted noise in atom coordinates

Predictions for atom types

In [13]:
# Diffusion Model
class DiffusionModel(nn.Module):
    def __init__(self, node_features=len(atom_types), hidden_dim=128, n_layers=4):
        super(DiffusionModel, self).__init__()

        # Embedding for atom types
        self.atom_embedding = nn.Embedding(node_features, hidden_dim)

        # Timestep embedding
        self.time_embedding = nn.Sequential(
            nn.Linear(1, hidden_dim),
            nn.SiLU(),
            nn.Linear(hidden_dim, hidden_dim)
        )

        # Equivariant layers
        self.layers = nn.ModuleList()
        for _ in range(n_layers):
            self.layers.append(EquivariantLayer(hidden_dim))

        # Output heads
        self.pos_head = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.SiLU(),
            nn.Linear(hidden_dim, 3)  # (x, y, z) coordinates
        )

        self.atom_type_head = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.SiLU(),
            nn.Linear(hidden_dim, node_features)  # Atom type logits
        )

    def forward(self, atom_types, atom_coords, t, masks=None):
        batch_size, n_atoms = atom_types.shape

        # Embed atom types
        x = self.atom_embedding(atom_types)  # [batch, n_atoms, hidden_dim]

        # Embed timestep
        time_emb = self.time_embedding(t.unsqueeze(-1))  # [batch, hidden_dim]

        # Add time embedding to node features
        x = x + time_emb.unsqueeze(1)  # [batch, n_atoms, hidden_dim]

        # Process through equivariant layers
        for layer in self.layers:
            x, _ = layer(x, atom_coords, masks)

        # Predict noise in coordinates
        pos_noise = self.pos_head(x)  # [batch, n_atoms, 3]

        # Predict atom types
        atom_logits = self.atom_type_head(x)  # [batch, n_atoms, n_atom_types]

        if masks is not None:
            # Apply mask to only predict for actual atoms
            pos_noise = pos_noise * masks.unsqueeze(-1)
            atom_logits = atom_logits * masks.unsqueeze(-1)

        return pos_noise, atom_logits



MoleculeDiffusion Class
Implements the diffusion process:

Forward diffusion: diffuse() gradually adds noise to molecule coordinates according to a schedule

Reverse diffusion: sample() generates new molecules by iteratively denoising random coordinates

In [20]:
# Simplified diffusion process
class MoleculeDiffusion:
    def __init__(self, model, n_steps=1000, beta_min=1e-4, beta_max=0.02):
        self.model = model
        self.n_steps = n_steps
        self.beta_min = beta_min
        self.beta_max = beta_max

        # Precompute noise schedule
        self.betas = torch.linspace(beta_min, beta_max, n_steps)
        self.alphas = 1 - self.betas
        self.alpha_cumprod = torch.cumprod(self.alphas, dim=0)

    def diffuse(self, atom_coords, t):
        """Add noise to coordinates according to diffusion schedule"""
        batch_size = atom_coords.shape[0]
        self.alpha_cumprod = self.alpha_cumprod.to(device)
        # Convert t to integer indices
        t_idx = (t * self.n_steps).long().clamp(0, self.n_steps - 1)
        t_idx =t_idx.to(device)
        # Get alpha_cumprod for the given timestep
        a_t = self.alpha_cumprod[t_idx].view(batch_size, 1, 1)

        # Sample noise
        noise = torch.randn_like(atom_coords)

        # Add noise to coordinates
        noisy_coords = torch.sqrt(a_t) * atom_coords + torch.sqrt(1 - a_t) * noise

        return noisy_coords, noise

    def sample(self, batch_size, n_atoms, device='cpu'):
        """Generate new molecules"""
        # Start from random noise
        atom_coords = torch.randn(batch_size, n_atoms, 3, device=device)
        atom_types = torch.zeros(batch_size, n_atoms, dtype=torch.long, device=device)

        # Generate masks (all True for simplicity)
        masks = torch.ones(batch_size, n_atoms, dtype=torch.bool, device=device)

        # Iteratively denoise
        for i in tqdm(range(self.n_steps - 1, 0, -1)):
            t = torch.ones(batch_size, device=device) * i / self.n_steps

            with torch.no_grad():
                # Predict noise
                pred_noise, atom_logits = self.model(atom_types, atom_coords, t, masks)

                # Update coordinates
                alpha = self.alphas[i]
                alpha_cumprod = self.alpha_cumprod[i]
                beta = self.betas[i]

                if i > 1:
                    noise = torch.randn_like(atom_coords)
                else:
                    noise = torch.zeros_like(atom_coords)

                # Update coordinates
                atom_coords = (atom_coords - beta * pred_noise / torch.sqrt(1 - alpha_cumprod)) / torch.sqrt(alpha)
                atom_coords = atom_coords + torch.sqrt(beta) * noise

                # Update atom types (use softmax to get probabilities)
                if i < self.n_steps // 2:  # Only update types in later steps
                    atom_probs = F.softmax(atom_logits, dim=-1)
                    atom_types = torch.argmax(atom_probs, dim=-1)

        return atom_coords, atom_types, masks


The train() function handles model training:

For each batch of molecules:

Adds noise to coordinates based on random timesteps

Passes through the model to predict the noise and atom types

Calculates losses (coordinate loss and atom type loss)

Updates the model parameters using backpropagation

In [15]:

# Training function
def train(model, diffusion, dataloader, optimizer, device, epochs=2):
    model.train()

    for epoch in range(epochs):
        total_loss = 0
        coord_loss = 0
        type_loss = 0

        for batch in tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}"):
            atom_types = batch['atom_types'].to(device)
            atom_coords = batch['atom_coords'].to(device)
            masks = batch['masks'].to(device)

            batch_size = atom_types.shape[0]

            # Sample random timesteps
            t = torch.rand(batch_size, device=device)

            # Add noise to coordinates
            noisy_coords, noise = diffusion.diffuse(atom_coords, t)

            # Predict noise
            pred_noise, atom_logits = model(atom_types, noisy_coords, t, masks)

            # Calculate coordinate loss (MSE on noise prediction)
            c_loss = F.mse_loss(pred_noise, noise, reduction='none')
            c_loss = (c_loss * masks.unsqueeze(-1)).sum() / (masks.sum() * 3 + 1e-8)

            # Calculate atom type loss (cross entropy) - FIXED SECTION
            atom_logits_flat = atom_logits.view(-1, len(atom_type_to_idx))  # Use the correct number of atom types
            atom_types_flat = atom_types.view(-1)
            mask_flat = masks.view(-1)

            # Only calculate loss for actual atoms (where mask is True)
            valid_indices = torch.where(mask_flat)[0]
            if len(valid_indices) > 0:
                # Make sure all indices are within bounds
                valid_indices = valid_indices[valid_indices < atom_logits_flat.shape[0]]
                if len(valid_indices) > 0:
                    valid_logits = atom_logits_flat[valid_indices]
                    valid_types = atom_types_flat[valid_indices]

                    # Ensure target values are within the valid range for the number of atom types
                    if torch.max(valid_types) >= len(atom_type_to_idx):
                        print(f"Warning: Found atom type index {torch.max(valid_types).item()} which is >= number of atom types {len(atom_type_to_idx)}")
                        valid_types = torch.clamp(valid_types, 0, len(atom_type_to_idx) - 1)

                    t_loss = F.cross_entropy(valid_logits, valid_types)
                else:
                    t_loss = torch.tensor(0.0, device=device)
            else:
                t_loss = torch.tensor(0.0, device=device)

            # Total loss
            loss = c_loss + t_loss

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            coord_loss += c_loss.item()
            type_loss += t_loss.item()

        # Print statistics
        avg_loss = total_loss / len(dataloader)
        avg_coord_loss = coord_loss / len(dataloader)
        avg_type_loss = type_loss / len(dataloader)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}, Coord Loss: {avg_coord_loss:.4f}, Type Loss: {avg_type_loss:.4f}")

    return model



In [30]:
# Generate and visualize molecules
def visualize_molecules(atom_coords, atom_types, n_mols=3):
    # Convert generated atom types and coordinates to RDKit molecules
    molecules = []

    for i in range(min(n_mols, len(atom_coords))):
        coords = atom_coords[i].detach().cpu().numpy()
        types = atom_types[i].detach().cpu().numpy()

        # Create empty molecule
        mol = Chem.RWMol()

        # Add atoms
        atom_objs = []
        for j, atom_type in enumerate(types):
            if atom_type < len(idx_to_atom_type):
                atom_symbol = idx_to_atom_type[atom_type.item()]
                atom = Chem.Atom(atom_symbol)
                atom_idx = mol.AddAtom(atom)
                atom_objs.append(atom_idx)

        # Guesstimate bonds based on distance
        conf = Chem.Conformer(mol.GetNumAtoms())
        for j in range(mol.GetNumAtoms()):
            #conf.SetAtomPosition(j, (coords[j]))
            x, y, z = coords[j]
            conf.SetAtomPosition(j, Chem.rdGeometry.Point3D(float(x), float(y), float(z)))


        mol.AddConformer(conf)

        # Add bonds based on distance for dimension 0 with size 243
        for j in range(mol.GetNumAtoms()):
            for k in range(j + 1, mol.GetNumAtoms()):
                dist = np.linalg.norm(coords[j] - coords[k])

                # Simplified bond creation based on distance
                if dist < 1.7:  # Typical bond length
                    mol.AddBond(j, k, Chem.BondType.SINGLE)

        # Convert to molecule
        try:
            molecule = mol.GetMol()
            # Try to sanitize and optimize
            Chem.SanitizeMol(molecule)
            molecules.append(molecule)
        except Exception as e:
            print(f"Failed to create molecule {i}: {str(e)}")

    return molecules

# Interactive 3D visualization with py3Dmol
def display_3d_molecules(molecules, size=(400, 400), style="stick"):
    if not molecules:
        print("No valid molecules to display")
        return

    for i, mol in enumerate(molecules):
        viewer = py3Dmol.view(width=size[0], height=size[1])

        # Convert to mol block
        mol_block = Chem.MolToMolBlock(mol)
        viewer.addModel(mol_block, "mol")

        # Set visualization style
        viewer.setStyle({style:{}})
        viewer.zoomTo()
        #viewer.show()
        view_mol(mol).show()
        print(f"Molecule {i+1}")



In [21]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Load dataset
data_dir = "Molecules"
dataset = MoleculeDataset(data_dir)

    # Create dataloader
batch_size = 128
dataloader = DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=True,
        collate_fn=collate_fn,
        num_workers=4
)

    # Initialize model
model = DiffusionModel().to(device)

    # Initialize diffusion process
diffusion = MoleculeDiffusion(model)

    # Initialize optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.002)

    # Train model
model = train(model, diffusion, dataloader, optimizer, device, epochs=1)


Processing 934 SDF files...


100%|██████████| 934/934 [00:00<00:00, 1994.86it/s]


Successfully processed 929 molecules


Epoch 1/1: 100%|██████████| 8/8 [14:52<00:00, 111.53s/it]


Epoch 1/1, Loss: 2.8779, Coord Loss: 1.2072, Type Loss: 1.6707


In [23]:
    # Generate new molecules
print("Generating new molecules...")
atom_coords, atom_types, masks = diffusion.sample(
        batch_size=10,
        n_atoms=8,  # Average small molecule size
        device=device
)

Generating new molecules...


100%|██████████| 999/999 [11:48<00:00,  1.41it/s]


In [28]:
    # Visualize generated molecules
molecules = visualize_molecules(atom_coords, atom_types)

In [31]:
display_3d_molecules(molecules)

Molecule 1


Molecule 2


Molecule 3


In [34]:
size=(400, 400)
style="stick"
if not molecules:
        print("No valid molecules to display")

for i, mol in enumerate(molecules):
        viewer = py3Dmol.view(width=size[0], height=size[1])

        # Convert to mol block
        mol_block = Chem.MolToMolBlock(mol)
        viewer.addModel(mol_block, "mol")

        # Set visualization style
        viewer.setStyle({style:{}})
        viewer.zoomTo()
        viewer.show()
        #view_mol(mol).show()
        print(f"Molecule {i+1}")

Molecule 1


Molecule 2


Molecule 3


In [40]:
from rdkit.Chem import SDWriter

# Save molecules to SDF file
sdf_writer = SDWriter("generated_molecules.sdf")
for mol in molecules:
    sdf_writer.write(mol)
sdf_writer.close()

print("\nSaved molecules to generated_molecules.sdf")




Saved molecules to generated_molecules.sdf
