# Generate gaussian input files (*.com) from the SMILES string of molecules

In [28]:
import os
import pandas as pd
import numpy as np
import rdkit.Chem as Chem
from rdkit.Chem import AllChem

In [29]:
df=pd.read_csv(r"Data\123Benzothiadiazole_smiles.csv",usecols=["SMILES"])
df.head()

Unnamed: 0,SMILES
0,Cc1c2c(cc3nsnc13)N(C)CCN2CCN(C)C
1,CCN1CCN(CCOC)c2c1cc1nsnc1c2C
2,CCNc1c2c(cc3nsnc13)N(C)CCN2C(C)C
3,CCCN1CCN(CC)c2cc3nsnc3c(NCC)c21
4,CCNc1c2c(cc3nsnc13)N(CC)CCN2C(C)C


In [30]:
def smiles2dotcom(list_of_smiles,method="wB97XD",basis="CEP-31G",Freq=" ",sol=" ",charge=0,mult=1,fileName="g16_input"):
    print("Saving files in: ", os.getcwd())
    """ 
    This function will take a list of smiles (or a Pandas series of SMILES) and generate .com files (in current folder)
    For single SMILES string: smiles_to_dotcom(["CC"])
    """
    def get_atoms(mol):
        atoms = [a.GetSymbol() for a in mol.GetAtoms()]
        return atoms
    
    def generate_structure_from_smiles(smiles):
        # Generate a 3D structure from smiles
        mol = Chem.MolFromSmiles(smiles)
        mol = Chem.AddHs(mol)
        status = AllChem.EmbedMolecule(mol)
        status = AllChem.MMFFOptimizeMolecule(mol) #UFFOptimizeMolecule(mol)
        conformer = mol.GetConformer()
        coordinates = conformer.GetPositions()
        coordinates = np.array(coordinates)
        atoms = get_atoms(mol)
        return atoms, coordinates

    def mkGaussianInputScriptNeutral(comName,method,basis, fragment,atoms,coordinates,charge,mult):
        file = open(comName+".com", 'w')
        file.write("""%mem=16GB \n""")
        file.write("""%Chk="""+comName+""".chk \n""")
        file.write("""#p """+ method +"""/""" + basis+ " opt " + Freq + "   " + sol + """\n\n""")
        file.write(fragment + " " + str(charge) + " " + str(mult)+"\n\n")
        file.write(str(charge)+""" """+str(mult)+"""\n""")
        for i,atom in enumerate(atoms):
            file.write(str(atom) + "\t"+str(coordinates[i][0]) + "\t\t"+str(coordinates[i][1]) + "\t\t"+str(coordinates[i][2]) + "\n")
        file.write("\n")
        file.close()   

    for i,smilesName in enumerate(list_of_smiles):
        atoms,coordinates=generate_structure_from_smiles(smilesName)
        fileNameNeutral = fileName + "-" + method +"-"+str(i+1)
        mkGaussianInputScriptNeutral(fileNameNeutral,method,basis,smilesName, atoms, coordinates,charge, mult)
    

## Input files for Neutral Molecules

In [31]:
list_of_smiles=df.SMILES.to_list()[:5]
list_of_smiles

['Cc1c2c(cc3nsnc13)N(C)CCN2CCN(C)C',
 'CCN1CCN(CCOC)c2c1cc1nsnc1c2C',
 'CCNc1c2c(cc3nsnc13)N(C)CCN2C(C)C',
 'CCCN1CCN(CC)c2cc3nsnc3c(NCC)c21',
 'CCNc1c2c(cc3nsnc13)N(CC)CCN2C(C)C']

In [32]:
smiles2dotcom(list_of_smiles,method="B3LYP",basis="6-31+G(d,p)",Freq=" ",sol="scrf(cpcm,solvent=acetonitrile)",charge=0,mult=1,fileName="smi2com_neutral")

Saving files in:  c:\Users\jaina\Box\RDKit_Utility_Functions


## Input files for reduced Molecules

In [33]:
smiles2dotcom(list_of_smiles,method="B3LYP",basis="6-31+G(d,p)",Freq=" ",sol="scrf(cpcm,solvent=acetonitrile)",charge=-1,mult=2,fileName="smi2com_reduced")

Saving files in:  c:\Users\jaina\Box\RDKit_Utility_Functions


# Delete files from the folder
- comment following lines if you want to keep the files

In [34]:
for file in os.listdir(r'./'):
    if file.endswith(".com"):
        os.remove(file)