In [1]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem import Descriptors
from rdkit.Chem import AllChem
import py3Dmol
from IPython.display import display
import os

# Function to validate SMILES string
def validate_smiles(smiles):
    try:
        Chem.MolFromSmiles(smiles)
        return True
    except:
        return False

# Function to get molecular formula
def get_molecular_formula(mol):
    return Chem.rdMolDescriptors.CalcMolFormula(mol)

# Function to process SMILES string
def process_smiles(smiles, df_dataset):
    mol = Chem.MolFromSmiles(smiles)

    if mol is not None:
        mol = Chem.AddHs(mol)
        mw = Descriptors.MolWt(mol)
        num_atoms = mol.GetNumAtoms()
        num_bonds = mol.GetNumBonds()

        AllChem.EmbedMolecule(mol)
        mol_xyz = Chem.MolToXYZBlock(mol)
        p = py3Dmol.view(width=400, height=400)
        p.addModel(mol_xyz, 'xyz')
        p.setStyle({'stick': {}})
        p.setBackgroundColor('white')
        p.zoomTo()
        p.show()

        mol_img = Draw.MolToImage(mol)
        display(mol_img)

        molecular_formula = get_molecular_formula(mol)

        row = df_dataset.loc[df_dataset['canonicalsmiles'] == smiles].iloc[0]
        iupac_name = row['iupacname']
        exact_mass = row['exactmass']
        monoisotopic_mass = row['monoisotopicmass']

        data = {
            'Property': ['Molecular Weight', 'Number of Atoms', 'Number of Bonds', 'Molecular Formula',
                         'IUPAC Name', 'Exact Mass', 'Monoisotopic Mass'],
            'Value': [mw, num_atoms, num_bonds, molecular_formula,
                      iupac_name, exact_mass, monoisotopic_mass]
        }

        df = pd.DataFrame(data)
        display(df)
    else:
        print("Invalid SMILES string")

def main():
    smiles = input("Enter the SMILES string of the molecule: ")

    if validate_smiles(smiles):
        # Get the directory of the current script
        current_dir = os.path.dirname(os.path.abspath(__file__))
        
        # Construct the dataset path
        dataset_filename = 'PubChem_compound_list_hMMhFlIeN6IAjL-VPe32vf3MSqzNijeITa0sxFa8PsVWpQI.numbers'
        dataset_path = os.path.join(current_dir, dataset_filename)

        # Load the dataset
        df_dataset = pd.read_csv(dataset_path)

        # Process the SMILES string
        process_smiles(smiles, df_dataset)
    else:
        print("Invalid SMILES string")

if __name__ == "__main__":
    main()


Enter the SMILES string of the molecule:  CCO


NameError: name '__file__' is not defined