In [18]:
import os
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem


# Load dataset
csv_file = "filtered_data_updated_final.csv"
output_folder = "xyz_solvents"

# Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Read the CSV file
df = pd.read_csv(csv_file)

# Assuming the SMILES column is named 'solvent_smiles'
if 'solvent_smiles' not in df.columns:
    raise ValueError("CSV file must contain a column named 'solvent_smiles'")

def smiles_to_xyz(solute_smiles, filename):
    """Converts a SMILES string to an XYZ file and saves it."""
    mol = Chem.MolFromSmiles(solute_smiles)
    if mol is None:
        print(f"Skipping invalid SMILES: {solvent_smiles}")
        return

    # Add Hydrogens
    mol = Chem.AddHs(mol)

    # Generate 3D coordinates
    AllChem.EmbedMolecule(mol, AllChem.ETKDG())
    AllChem.UFFOptimizeMolecule(mol)

    # Extract atom coordinates
    conf = mol.GetConformer()
    num_atoms = mol.GetNumAtoms()

    with open(filename, "w") as f:
        f.write(f"{num_atoms}\nGenerated by RDKit\n")
        for i in range(num_atoms):
            atom = mol.GetAtomWithIdx(i)
            pos = conf.GetAtomPosition(i)
            f.write(f"{atom.GetSymbol()} {pos.x:.6f} {pos.y:.6f} {pos.z:.6f}\n")

# Process each SMILES entry
for index, row in df.iterrows():
    solvent_smiles = row['solvent_smiles']
    solvent_name = row['solvent_name'] 
    output_file = os.path.join(output_folder, f"{solvent_name.replace(' ', '_')}.xyz")
    smiles_to_xyz(solvent_smiles, output_file)

print(f"XYZ files saved in '{output_folder}'")

XYZ files saved in 'xyz_solvents'


In [20]:
# assign directory
output_dir = os.getcwd() + '\\MOPAC_input_solvents'
xyz_dir = os.getcwd() + '\\xyz_solvents'
csv_file = os.getcwd()  # Update this to your CSV file path

# Create the new directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
    
print (xyz_dir)

filtered_data_updated = pd.read_csv('filtered_data_updated_final.csv')

C:\Users\emmad\OneDrive - University of Leeds\leeds admin\chem\3650\coding\xyz_solvents


In [22]:
# Iterate through solute-solvent combinations
for index, row in filtered_data_updated.iterrows():
    solvent = row['solvent_name']

    #Replace spaces with underscores in the solvent name
    solvent = solvent.replace(" ", "_")

    # Construct filenames
    xyz_filename = f"{solvent}.xyz"
    mop_filename = f"MNDO_{solvent}.mop"

    full_xyz_filename = os.path.join(xyz_dir, xyz_filename)
    full_mop_filename = os.path.join(output_dir, mop_filename)

    # Open files and process
    if os.path.exists(full_xyz_filename):
        with open(full_xyz_filename, 'r') as xyz_file, open(full_mop_filename, 'w') as mop_file:
            # Write MOP header with correct EPS value
            mop_file.write(f'AUX LARGE OPT FORCE THERMO MNDO T=128H RECALC=5 GNORM=0.01 LET SCFCRT=0.0000001\n')
            mop_file.write(f"{solvent}\n\n")  # Write solvent name

            # Copy atomic coordinates from XYZ file (skip first 2 lines)
            lines = xyz_file.readlines()[2:]
            for line in lines:
                mop_file.write(line)

        print(f"Created: {full_mop_filename}")
    else:
        print(f"Missing XYZ file: {full_xyz_filename}, skipping...")

Created: C:\Users\emmad\OneDrive - University of Leeds\leeds admin\chem\3650\coding\MOPAC_input_solvents\MNDO_1-propanol.mop
Created: C:\Users\emmad\OneDrive - University of Leeds\leeds admin\chem\3650\coding\MOPAC_input_solvents\MNDO_2-propanol.mop
Created: C:\Users\emmad\OneDrive - University of Leeds\leeds admin\chem\3650\coding\MOPAC_input_solvents\MNDO_butyl_acetate.mop
Created: C:\Users\emmad\OneDrive - University of Leeds\leeds admin\chem\3650\coding\MOPAC_input_solvents\MNDO_ethanol.mop
Created: C:\Users\emmad\OneDrive - University of Leeds\leeds admin\chem\3650\coding\MOPAC_input_solvents\MNDO_ethyl_acetate.mop
Created: C:\Users\emmad\OneDrive - University of Leeds\leeds admin\chem\3650\coding\MOPAC_input_solvents\MNDO_methyl_acetate.mop
Created: C:\Users\emmad\OneDrive - University of Leeds\leeds admin\chem\3650\coding\MOPAC_input_solvents\MNDO_1-butanol.mop
Created: C:\Users\emmad\OneDrive - University of Leeds\leeds admin\chem\3650\coding\MOPAC_input_solvents\MNDO_water.mop