In [None]:
import openbabel

def convert_smiles_to_pdb(smiles_string, output_pdb_filename='output.pdb'):
    # Create an OBMol object
    mol = openbabel.OBMol()

    # Initialize the OpenBabel conversion
    conv = openbabel.OBConversion()
    conv.SetInAndOutFormats("smi", "pdb")

    # Read the SMILES string
    conv.ReadString(mol, smiles_string)

    # Write the PDB file
    conv.WriteFile(mol, output_pdb_filename)

# Example usage:
smiles_string = 'CSCCC1NC(=O)C(CCC(N)=O)NC(=O)C(CC(=O)O)NC(=O)C(N)CSSCC(C(=O)NC(CCC(=O)O)C(=O)N2CCCC2C(=O)NC(CC(C)C)C(=O)O)NC(=O)C(CCC(=O)O)NC1=O'
convert_smiles_to_pdb(smiles_string, 'output.pdb')


In [82]:
from rdkit import Chem
from rdkit.Chem import AllChem

def convert_smiles_to_pdb_with_minimization(smiles_string, output_pdb_filename='output_minimized.pdb'):
    # Convert SMILES to RDKit molecule
    mol = Chem.MolFromSmiles(smiles_string)

    # Generate 3D coordinates
    AllChem.EmbedMolecule(mol, randomSeed=42)

    # Perform force field minimization
    AllChem.MMFFOptimizeMolecule(mol)
    
    # Write the PDB file with minimized coordinates
    Chem.MolToPDBFile(mol, output_pdb_filename)

# Example usage:
smiles_string = 'CSCCC1NC(=O)C2CSSCC(NC(=O)C(CCC(=O)O)NC(=O)C3CCCN3C(=O)C(CCCCN)NC1=O)C(=O)N2'
convert_smiles_to_pdb_with_minimization(smiles_string, 'output_minimized.pdb')


[17:05:34] Molecule does not have explicit Hs. Consider calling AddHs()
[17:05:34] Molecule does not have explicit Hs. Consider calling AddHs()


In [None]:
from rdkit import Chem
from rdkit.Chem import AllChem
import openbabel

def convert_smiles_to_pdb_with_rdkit(smiles_string, output_pdb_filename='output_rdkit.pdb'):
    # Convert SMILES to RDKit molecule
    mol = Chem.MolFromSmiles(smiles_string)

    # Generate 3D coordinates
    AllChem.EmbedMolecule(mol, randomSeed=42)

    # Write the PDB file with 3D coordinates
    Chem.MolToPDBFile(mol, output_pdb_filename)

def convert_smiles_to_pdb_with_openbabel(smiles_string, output_pdb_filename='output_openbabel.pdb'):
    # Create an OBMol object
    mol = openbabel.OBMol()

    # Initialize the OpenBabel conversion
    conv = openbabel.OBConversion()
    conv.SetInAndOutFormats("smi", "pdb")

    # Read the SMILES string
    conv.ReadString(mol, smiles_string)

    # Write the PDB file
    conv.WriteFile(mol, output_pdb_filename)

# Example usage:
smiles_string = 'CSCCC1NC(=O)C(CCC(N)=O)NC(=O)C(CC(=O)O)NC(=O)C(N)CSSCC(C(=O)NC(CCC(=O)O)C(=O)N2CCCC2C(=O)NC(CC(C)C)C(=O)O)NC(=O)C(CCC(=O)O)NC1=O'

# Generate PDB files using RDKit and OpenBabel
convert_smiles_to_pdb_with_rdkit(smiles_string, 'output_rdkit.pdb')
convert_smiles_to_pdb_with_openbabel(smiles_string, 'output_openbabel.pdb')

# Read RDKit-generated PDB file (A)
with open('output_rdkit.pdb', 'r') as rdkit_file:
    lines_rdkit = []
    A = rdkit_file.readlines()
    for line in A:
        if line.startswith('HETATM'):
            lines_rdkit.append(line)


# Read OpenBabel-generated PDB file (B)
with open('output_openbabel.pdb', 'r') as openbabel_file:
    lines_openbabel = []
    B = openbabel_file.readlines()
    for line in B:
        if line.startswith('ATOM'):
            lines_openbabel.append(line)

# Combine information from A and B to create C
lines_combined = []
for line_openbabel in lines_openbabel:

            atom_info_rdkit = lines_rdkit.pop()[30:54]
            line_combined = line_openbabel[:30] + atom_info_rdkit + line_openbabel[54:]
            lines_combined.append(line_combined)
with open('output_combined.pdb', 'w') as output_file:
    output_file.writelines(lines_combined)
lines_combined


In [None]:
import pandas as pd

# Convert lines_combined to a DataFrame
columns = ['ATOM', 'serial', 'atom_name', 'res_name', 'chain_id', 'res_seq', 'x', 'y', 'z', 'occupancy', 'temp_factor', 'element']
data = [line.split() for line in lines_combined]
df = pd.DataFrame(data, columns=columns)
print(df)
# Sort DataFrame based on the sixth column (res_seq)
df['res_seq'] = df['res_seq'].astype(int)  # Convert 'res_seq' column to integers for proper sorting
df = df.sort_values(by='res_seq')
print(df)
# Rewrite the second column starting from 1
df['serial'] = range(1, len(df) + 1)

# Convert the DataFrame back to PDB format
output_pdb_filename = 'output_sorted.pdb'
with open(output_pdb_filename, 'w') as output_file:
    for _, row in df.iterrows():
        line = f"{row['ATOM']:<6}{row['serial']:>5}  {row['atom_name']:<4}{row['res_name']:>3} {row['chain_id']:<1}{row['res_seq']:>4}    {float(row['x']):>8.3f}{float(row['y']):>8.3f}{float(row['z']):>8.3f}{row['occupancy']:>6}{row['temp_factor']:>6}          {row['element']:<2}\n"
        output_file.write(line)


In [None]:
from rdkit import Chem
from rdkit.Chem import AllChem
from openmm import app, unit, Platform, XmlSerializer
from openmm.app import PDBFile
from openmm import System, LangevinIntegrator, Context

def convert_smiles_to_pdb(smiles_string, output_pdb_filename='output.pdb'):
    # Convert SMILES to RDKit molecule
    mol = Chem.MolFromSmiles(smiles_string)

    # Generate 3D coordinates
    AllChem.EmbedMolecule(mol, randomSeed=42)
    
    AllChem.MMFFOptimizeMolecule(mol) # type: ignore
    # Write the PDB file with 3D coordinates using RDKit
    PDBFile.writeFile(mol, output_pdb_filename)

def calculate_potential_energy(pdb_file_path):
    # Load PDB file
    pdb = PDBFile(pdb_file_path)

    # Create a System object
    forcefield = app.ForceField('amber99sbildn.xml', 'tip3p.xml')
    system = forcefield.createSystem(pdb.topology, nonbondedMethod=app.PME, constraints=app.HBonds)

    # Set up integrator
    integrator = LangevinIntegrator(300*unit.kelvin, 1.0/unit.picoseconds, 2.0*unit.femtoseconds)
    platform = Platform.getPlatformByName('Reference')
    context = Context(system, integrator, platform)

    # Set positions
    context.setPositions(pdb.positions)

    # Minimize energy
    app.LocalEnergyMinimizer.minimize(context)

    # Get potential energy
    state = context.getState(getEnergy=True)
    potential_energy = state.getPotentialEnergy().value_in_unit(unit.kilocalories_per_mole)

    return potential_energy

# Example usage:
smiles_string = 'CSCCC1NC(=O)C2CSSCC(NC(=O)C(CCC(=O)O)NC(=O)C3CCCN3C(=O)C(CCCCN)NC1=O)C(=O)N2'
output_pdb_filename = 'output.pdb'

# Convert SMILES to PDB
convert_smiles_to_pdb(smiles_string, output_pdb_filename)

# Calculate potential energy using OpenMM
energy = calculate_potential_energy(output_pdb_filename)
print(f'Potential Energy: {energy} kcal/mol')
