In [None]:
import os
import rdkit
import mordred
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Descriptors
from rdkit.Chem import rdMolDescriptors
from rdkit.Chem import SDMolSupplier
from mordred import Calculator, descriptors

In [1]:
#load, sanitize add H atoms and forcefield, saves the processed files in a specified output folder
def sanitize_add_hydrogens_and_force_field(mol_file, output_folder):
    try:
        # Load mol file
        mol = Chem.MolFromMolFile(mol_file, sanitize=False)
        # Sanitize molecule
        Chem.SanitizeMol(mol)
        # Add hydrogen atoms
        mol = Chem.AddHs(mol)
        
        # Add force field
        AllChem.EmbedMolecule(mol)
        AllChem.UFFOptimizeMolecule(mol)
        
        # Write the sanitized molecule with hydrogens and force field added
        output_file = os.path.splitext(os.path.basename(mol_file))[0] + '_sanitized.mol'
        output_path = os.path.join(output_folder, output_file)
        Chem.MolToMolFile(mol, output_path)
        
        print(f"Sanitized molecule with force field saved to {output_path}")
    except Exception as e:
        print(f"Error processing file {mol_file}: {e}")

def sanitize_folder(input_folder, output_folder):
    if not os.path.exists(input_folder):
        print(f"Folder {input_folder} does not exist.")
        return
    
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    for file in os.listdir(input_folder):
        if file.endswith(".mol"):
            file_path = os.path.join(input_folder, file)
            sanitize_add_hydrogens_and_force_field(file_path, output_folder)

if __name__ == "__main__":
    input_folder = input("Enter the path to the folder containing .mol files: ")
    output_folder = input("Enter the path to the folder where sanitized files should be saved: ")
    sanitize_folder(input_folder, output_folder)


Enter the path to the folder containing .mol files:  
Enter the path to the folder where sanitized files should be saved:  


NameError: name 'os' is not defined

In [None]:
## calculate 2D and 3D descriptors

def calculate_descriptors(mol_file):
    mol = Chem.MolFromMolFile(mol_file)
    if mol is None:
        return None
    descriptors = {}
    
    # Calculate 2D descriptors
    for desc_name, desc_func in Descriptors.descList:
        try:
            descriptors[desc_name] = desc_func(mol)
        except Exception as e:
            descriptors[desc_name] = None
    
    # Calculate 3D descriptors
    for desc_name, desc_func in Descriptors3D.descList:
        try:
            descriptors[desc_name] = desc_func(mol)
        except Exception as e:
            descriptors[desc_name] = None
            
    return descriptors

def main(folder_path):
    # Check if folder exists
    if not os.path.isdir(folder_path):
        print("Folder does not exist.")
        return

    # Get list of Mol files
    mol_files = [f for f in os.listdir(folder_path) if f.endswith('.mol')]

    # Initialize an empty list to store descriptors for each molecule
    descriptors_list = []

    # Calculate descriptors for each Mol file
    for mol_file in mol_files:
        mol_file_path = os.path.join(folder_path, mol_file)
        descriptors = calculate_descriptors(mol_file_path)
        if descriptors:
            # Remove "sanitized" from the filename
            filename = mol_file.replace('_sanitized', '')
            descriptors['Filename'] = filename  # Add filename to descriptors
            descriptors_list.append(descriptors)

    # Convert list of descriptors to DataFrame
    df = pd.DataFrame(descriptors_list)

    # Reorder columns to have Filename as the first column
    cols = df.columns.tolist()
    cols = ['Filename'] + [col for col in cols if col != 'Filename']
    df = df[cols]

    # Write DataFrame to Excel
    output_excel = os.path.join(folder_path, 'descriptors.xlsx')
    df.to_excel(output_excel, index=False)
    print(f"Descriptors saved to {output_excel}")

if __name__ == "__main__":
    folder_path = input("Enter the path to the folder containing .mol files: ")
    main(folder_path)


In [2]:
import os
import pandas as pd
from mordred import Calculator, descriptors
from rdkit import Chem
from rdkit.Chem import SDMolSupplier

# Define the folder containing .mol files
folder_path = '/Users/nora/Desktop/PhD_LMU/MDS_LNPs/Felix_Polymere_Project/mol_files/sanitised'

# Initialize the calculator
calc = Calculator(descriptors, ignore_3D=True)  # Ignore 3D descriptors for simplicity

# Create a DataFrame to store descriptors
data = []

# Iterate over each .mol file in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.mol'):
        file_path = os.path.join(folder_path, filename)
        filename = mol_file.replace('_sanitized', '')
            descriptors['Filename'] = filename  # Add filename to descriptors
            descriptors_list.append(descriptors)
        
        # Load the molecule from the .mol file
        suppl = SDMolSupplier(file_path)
        
        # Iterate over each molecule in the .mol file
        for mol in suppl:
            if mol is not None:
                # Calculate descriptors for the molecule
                desc_values = calc(mol)
  
                # Append descriptors to data list with the filename as the first column
                data.append([filename] + list(desc_values.values()))

# Convert the list of descriptors to a DataFrame
columns = ['Filename'] + list(desc_values.keys())
df = pd.DataFrame(data, columns=columns)

# Save the DataFrame to an Excel file
output_file = '/Users/nora/Desktop/PhD_LMU/MDS_LNPs/Felix_Polymere_Project/mol_files/mordred_descriptors.xlsx'
df.to_excel(output_file, index=False)

print(f"Descriptors saved to {output_file}")


IndentationError: unexpected indent (1669471838.py, line 21)

In [3]:
import os
import pandas as pd
from mordred import Calculator, descriptors
from rdkit import Chem
from rdkit.Chem import SDMolSupplier

# Define the folder containing .mol files
folder_path = '/Users/nora/Desktop/PhD_LMU/MDS_LNPs/Felix_Polymere_Project/mol_files/sanitised'

# Initialize the calculator
calc = Calculator(descriptors, ignore_3D=True)  # Ignore 3D descriptors for simplicity

# Create a DataFrame to store descriptors
data = []

# Iterate over each .mol file in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.mol'):
        file_path = os.path.join(folder_path, filename)
        # Modify filename to remove "_sanitized"
        filename = filename.replace('_sanitized', '')
        
        # Load the molecule from the .mol file
        suppl = SDMolSupplier(file_path)
        
        # Iterate over each molecule in the .mol file
        for mol in suppl:
            if mol is not None:
                # Calculate descriptors for the molecule
                desc_values = calc(mol)
  
                # Append descriptors to data list with the modified filename as the first column
                data.append([filename] + list(desc_values.values()))

# Convert the list of descriptors to a DataFrame
columns = ['Filename'] + list(desc_values.keys())
df = pd.DataFrame(data, columns=columns)

# Save the DataFrame to an Excel file
output_file = '/Users/nora/Desktop/PhD_LMU/MDS_LNPs/Felix_Polymere_Project/mol_files/mordred_descriptors.xlsx'
df.to_excel(output_file, index=False)

print(f"Descriptors saved to {output_file}")


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Descriptors saved to /Users/nora/Desktop/PhD_LMU/MDS_LNPs/Felix_Polymere_Project/mol_files/mordred_descriptors.xlsx
