In [9]:
import rdkit
from rdkit import Chem
from rdkit.Chem import AllChem
from openbabel import openbabel
import pubchempy as pcp
import numpy as np
import py3Dmol
import os
import ipywidgets as widgets
from ipywidgets import interact, fixed, IntSlider, Text, Dropdown, ToggleButton, Button, FloatSlider, Checkbox
from IPython.display import display
obMol = openbabel.OBMol()
obConv = openbabel.OBConversion()
"""IMPORTANT: DO NOT USE ANY OTHER VARIABLES NAMED obMol OR obConv!!!"""

from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import rdCIPLabeler
from rdkit.Chem import rdAbbreviations
IPythonConsole.drawOptions.addAtomIndices = False
IPythonConsole.ipython_useSVG=True
IPythonConsole.molSize = 300,300


from pointgroup import PointGroup
import pymsym

In [10]:
class DataCache:
    def __init__(self):
        self.cache = {}

    def add_data(self, key, value):
        if key not in self.cache:
            self.cache[key] = [value]
        else:
            if value not in self.cache[key]:
                self.cache[key].append(value)

    def get_data(self, key):
        return self.cache.get(key)

    def print_data(self, target):
        if target == 'smiles':
            print(f"Data type: SMILES; Data: {self.cache['SMILES']}")
        elif target == 'cid':
            print(f"Data type: SMILES; Data: {self.cache['CID']}")
        elif target == 'inchi':
            print(f"Data type: SMILES; Data: {self.cache['InChi']}")
        elif target == 'inchikey':
            print(f"Data type: SMILES; Data: {self.cache['InChiKey']}")
        elif target == 'name':
            print(f"Data type: SMILES; Data: {self.cache['Name']}")
        elif target == 'all':
            for key, value in self.cache.items():
                print(f"Data type: {key}; Data: {value}")

In [62]:
class converter:
    def __init__(self, data: str, data_type: str, data_cache: DataCache):
        """Initializes the converter with the input data and its type"""
        self.data = data
        self.data_type = data_type.lower()
        self.data_cache = data_cache

    def convert(self, target_format: str):
        """Converts the input data to the target format"""
        target_format = target_format.lower()
        if self.data_type == 'name':
            self.data_cache.add_data('Name', self.data)
            smiles = self.name_to_smiles()
            sdf = self.name_to_sdf()
            if target_format == 'smiles':
                return smiles
            elif target_format == 'cid':
                return self.smiles_to_cid(smiles)
            elif target_format == 'sdf':
                return sdf
            elif target_format == 'inchi':
                return self.smiles_to_inchi(smiles)
            elif target_format == 'inchikey':
                return self.smiles_to_inchikey(smiles)
            elif target_format == 'xyz':
                return self.sdf_to_xyz(sdf, smiles)
            elif target_format == 'zmat':
                return self.sdf_to_zmat(sdf, smiles)
            elif target_format == 'name':
                return self.data
        elif self.data_type == 'smiles':
            self.data_cache.add_data('SMILES', self.data)
            smiles = self.data
            sdf = self.smiles_to_sdf()
            if target_format == 'smiles':
                return self.data
            elif target_format == 'cid':
                return self.smiles_to_cid(smiles)
            elif target_format == 'sdf':
                return sdf
            elif target_format == 'inchi':
                return self.smiles_to_inchi(smiles)
            elif target_format == 'inchikey':
                return self.smiles_to_inchikey(smiles)
            elif target_format == 'xyz':
                return self.sdf_to_xyz(sdf, smiles)
            elif target_format == 'zmat':
                return self.sdf_to_zmat(sdf, smiles)
            elif target_format == 'name':
                return self.smiles_to_name1()
        elif self.data_type == 'inchi':
            self.data_cache.add_data('InChi', self.data)
            smiles = self.inchi_to_smiles()
            sdf = self.inchi_to_sdf()
            if target_format == 'smiles':
                return smiles
            elif target_format == 'cid':
                return self.smiles_to_cid(smiles)
            elif target_format == 'sdf':
                return sdf
            elif target_format == 'inchi':
                return self.data
            elif target_format == 'inchikey':
                return self.smiles_to_inchikey(smiles)
            elif target_format == 'xyz':
                return self.sdf_to_xyz(sdf, smiles)
            elif target_format == 'zmat':
                return self.sdf_to_zmat(sdf, smiles)
            elif target_format == 'name':
                return self.smiles_to_name2(smiles)
        elif self.data_type == 'inchikey':
            self.data_cache.add_data('InChiKey', self.data)
            smiles = self.inchikey_to_smiles()
            sdf = self.inchikey_to_sdf()
            if target_format == 'smiles':
                return smiles
            elif target_format == 'cid':
                return self.smiles_to_cid(smiles)
            elif target_format == 'sdf':
                return sdf
            elif target_format == 'inchi':
                return self.smiles_to_inchi(smiles)
            elif target_format == 'inchikey':
                return self.data
            elif target_format == 'xyz':
                return self.sdf_to_xyz(sdf, smiles)
            elif target_format == 'zmat':
                return self.sdf_to_zmat(sdf, smiles)
            elif target_format == 'name':
                return self.smiles_to_name2(smiles)
        elif self.data_type == 'cid':
            self.data_cache.add_data('CID', self.data)
            smiles = self.cid_to_smiles()
            sdf = self.cid_to_sdf()
            if target_format == 'smiles':
                return smiles
            elif target_format == 'cid':
                return self.data
            elif target_format == 'sdf':
                return sdf
            elif target_format == 'inchi':
                return self.smiles_to_inchi(smiles)
            elif target_format == 'inchikey':
                return self.smiles_to_inchikey(smiles)
            elif target_format == 'xyz':
                return self.sdf_to_xyz(sdf, smiles)
            elif target_format == 'zmat':
                return self.sdf_to_zmat(sdf, smiles)
            elif target_format == 'name':
                return self.smiles_to_name2(smiles)

    def name_to_smiles(self):
        """Converts molecule name to SMILES"""
        try:
            c = pcp.get_compounds(self.data, 'name')
            smiles = c[0].isomeric_smiles
            self.data_cache.add_data('SMILES', smiles)
            return smiles
        except IndexError:
            return None

    def cid_to_smiles(self):
        """Converts CID to SMILES"""
        try:
            cid = pcp.get_compounds(self.data, 'cid')
            smiles = cid[0].isomeric_smiles
            self.data_cache.add_data('SMILES', smiles)
            return smiles
        except IndexError:
            return None

    def smiles_to_name1(self):
        """Converts SMILES to molecule name"""
        try:
            smi = pcp.get_compounds(self.data, 'smiles')
            name = smi[0].iupac_name
            self.data_cache.add_data('Name', name)
            return name
        except IndexError:
            return None

    def smiles_to_name2(self, smiles):
        """Converts SMILES to molecule name"""
        try:
            smi = pcp.get_compounds(smiles, 'smiles')
            name = smi[0].iupac_name
            self.data_cache.add_data('Name', name)
            return name
        except IndexError:
            return None

    def inchi_to_smiles(self):
        """Converts InChi to SMILES"""
        try:
            ic = pcp.get_compounds(self.data, 'inchi')
            smiles = ic[0].isomeric_smiles
            self.data_cache.add_data('SMILES', smiles)
            return smiles
        except IndexError:
            return None

    def inchikey_to_smiles(self):
        """Converts InChiKey to SMILES"""
        try:
            ick = pcp.get_compounds(self.data, 'inchikey')
            smiles = ick[0].isomeric_smiles
            self.data_cache.add_data('SMILES', smiles)
            return smiles
        except IndexError:
            return None

    def smiles_to_cid(self, smiles):
        """Converts SMILES to CID"""
        try:
            c = pcp.get_cids(smiles, 'smiles', list_return='flat')
            cid = c[0]
            self.data_cache.add_data('CID', cid)
            return cid
        except IndexError:
            return None
        
    def smiles_to_sdf(self):
        """Converts SMILES to SDF"""
        try:
            file_path = f'./3Dfiles/{self.data}.sdf'
            try:
                pcp.download('SDF', file_path, self.data, 'smiles', overwrite=True)
                with open(file_path, 'r') as f:
                    return f.read()
            except Exception as e:
                print("Error during download:", e)
                return None
        except IndexError:
            return None

    def name_to_sdf(self):
        """Converts name to SDF"""
        try:
            file_path = f'./3Dfiles/{self.data}.sdf'
            try:
                pcp.download('SDF', file_path, self.data, 'name', overwrite=True)
                with open(file_path, 'r') as f:
                    return f.read()
            except Exception as e:
                print("Error during download:", e)
                return None
        except IndexError:
            return None

    def inchi_to_sdf(self):
        """Converts inchi to SDF"""
        try:
            file_path = f'./3Dfiles/{self.data}.sdf'
            try:
                pcp.download('SDF', file_path, self.data, 'inchi', overwrite=True)
                with open(file_path, 'r') as f:
                    return f.read()
            except Exception as e:
                print("Error during download:", e)
                return None
        except IndexError:
            return None

    def inchikey_to_sdf(self):
        """Converts inchikey to SDF"""
        try:
            file_path = f'./3Dfiles/{self.data}.sdf'
            try:
                pcp.download('SDF', file_path, self.data, 'inchikey', overwrite=True)
                with open(file_path, 'r') as f:
                    return f.read()
            except Exception as e:
                print("Error during download:", e)
                return None
        except IndexError:
            return None

    def cid_to_sdf(self):
        """Converts name to SDF"""
        try:
            file_path = f'./3Dfiles/{self.data}.sdf'
            try:
                pcp.download('SDF', file_path, self.data, 'cid', overwrite=True)
                with open(file_path, 'r') as f:
                    return f.read()
            except Exception as e:
                print("Error during download:", e)
                return None
        except IndexError:
            return None

    def smiles_to_inchi(self, smiles):
        """Converts SMILES to InChI"""
        try:
            obMol = openbabel.OBMol()
            obConv = openbabel.OBConversion()
            obConv.SetInAndOutFormats("smiles", "inchi")
            obConv.ReadString(obMol, smiles)
            ic = obConv.WriteString(obMol)
            self.data_cache.add_data('InChi', ic)
            return ic
        except IndexError:
            return None

    def smiles_to_inchikey(self, smiles):
        """Converts SMILES to InChiKey"""
        try:
            obMol = openbabel.OBMol()
            obConv = openbabel.OBConversion()
            obConv.SetInAndOutFormats("smiles", "inchikey")
            obConv.ReadString(obMol, smiles)
            ick = obConv.WriteString(obMol)
            self.data_cache.add_data('InChiKey', ick)
            return ick
        except IndexError:
            return None

    def sdf_to_xyz(self, sdf, smiles):
        """Converts SDF to XYZ"""
        name = self.smiles_to_name2(smiles)
        directory = './3Dfiles/'
        try:
            obMol = openbabel.OBMol()
            obConv = openbabel.OBConversion()
            obConv.SetInAndOutFormats("sdf", "xyz")
            obConv.ReadString(obMol, sdf)
            xyz = obConv.WriteString(obMol)
            if not os.path.exists(directory):
                os.makedirs(directory)
            file_path = os.path.join('./3Dfiles/', f"{name}.xyz")
            with open(file_path, "w") as file:
                file.write(xyz)
            return xyz
        except IndexError:
            return None

    def sdf_to_zmat(self, sdf, smiles):
        """Converts SDF to zmat"""
        name = self.smiles_to_name2(smiles)
        directory = './3Dfiles/'
        try:
            obMol = openbabel.OBMol()
            obConv = openbabel.OBConversion()
            obConv.SetInAndOutFormats("sdf", "gzmat")
            obConv.ReadString(obMol, sdf)
            zmat = obConv.WriteString(obMol)
            if not os.path.exists(directory):
                os.makedirs(directory)
            file_path = os.path.join('./3Dfiles/', f"{name}.txt")
            with open(file_path, "w") as file:
                file.write(zmat)
            return zmat
        except IndexError:
            return None

In [63]:
cache = DataCache()
sdf_cache = {}

In [64]:
def get_sdf(identifier, identifier_type):
    directory = './3Dfiles/'
    if not os.path.exists(directory):
        os.makedirs(directory)

    # Define the file path for the SDF file
    sdf_file = os.path.join(directory, f"{identifier}.sdf")

    # Check if the SDF file already exists
    if not os.path.exists(sdf_file):
        # If not, convert SMILES to SDF and save it
        molecule = converter(identifier, identifier_type, cache)
        sdf = molecule.convert('sdf')
        if not sdf:
            print("Failed to convert molecule to SDF.")
            return None
        
        with open(sdf_file, 'w') as f:
            f.write(sdf)
    else:
        # If the SDF file exists, read it
        with open(sdf_file, 'r') as f:
            sdf = f.read()

    return sdf

In [1]:
import pubchempy as pcp
from rdkit import Chem
from rdkit.Chem import Draw

def get_smiles_from_name(common_name):
    try:
        compounds = pcp.get_compounds(common_name, 'name')
        return compounds[0].isomeric_smiles
    except IndexError:
        return None

def is_valid_molecule(molecule_name):
    smiles = get_smiles_from_name(molecule_name) if not Chem.MolFromSmiles(molecule_name) else molecule_name
    molecule = Chem.MolFromSmiles(smiles)
    return molecule is not None

def visualize_molecule(molecule_name):
    smiles = get_smiles_from_name(molecule_name) if not Chem.MolFromSmiles(molecule_name) else molecule_name
    molecule = Chem.MolFromSmiles(smiles)
    if molecule:
        # Generate a 2D structure for the molecule
        Chem.rdDepictor.Compute2DCoords(molecule)
        # Draw the molecule
        img = Draw.MolToImage(molecule)
        return img
    else:
        print(f"The molecule name '{molecule_name}' is not valid.")
        return None

# Example usage:
common_name = "ethanol"
img = visualize_molecule(common_name)
img.show() if img else None


[15:09:35] SMILES Parse Error: syntax error while parsing: ethanol
[15:09:35] SMILES Parse Error: Failed parsing SMILES 'ethanol' for input: 'ethanol'


In [None]:
from rdkit import Chem
from rdkit.Chem import AllChem
from pyscf import gto, symm

def get_point_group_from_smiles(smiles):
    # Create a molecule object from SMILES string
    molecule = Chem.MolFromSmiles(smiles)

    # Add hydrogens
    molecule = Chem.AddHs(molecule)

    # Generate 3D coordinates
    AllChem.EmbedMolecule(molecule)
    AllChem.UFFOptimizeMolecule(molecule)

    # Get atomic coordinates in the format required by PySCF
    conf = molecule.GetConformer()
    atom_coords = ''
    for atom in molecule.GetAtoms():
        pos = conf.GetAtomPosition(atom.GetIdx())
        atom_coords += f"{atom.GetSymbol()} {pos.x} {pos.y} {pos.z};"

    # Define the molecule in PySCF
    mol = gto.M(atom=atom_coords)

    # Detect and print the point group
    mol.build()
    point_group_info = symm.detect_symm(mol)
    point_group = point_group_info[0][0]  # Access the first element of the list
    return point_group

# Example usage:
smiles_string = 'CCO'  # Replace with your molecule's SMILES string
point_group = get_point_group_from_smiles(smiles_string)
print(f'The point group of the molecule is: {point_group}')


In [None]:
import streamlit as st
import py3Dmol
from stmol import showmol
st.sidebar.title('Show Proteins')
prot_str='1A2C,1BML,1D5M,1D5X,1D5Z,1D6E,1DEE,1E9F,1FC2,1FCC,1G4U,1GZS,1HE1,1HEZ,1HQR,1HXY,1IBX,1JBU,1JWM,1JWS'
prot_list=prot_str.split(',')
bcolor = st.sidebar.color_picker('Pick A Color', '#00f900')
protein=st.sidebar.selectbox('select protein',prot_list)
style = st.sidebar.selectbox('style',['line','cross','stick','sphere','cartoon','clicksphere'])
spin = st.sidebar.checkbox('Spin', value = False)
xyzview = py3Dmol.view(query='pdb:'+protein)
xyzview.setStyle({style:{'color':'spectrum'}})
xyzview.setBackgroundColor(bcolor)
if spin:
    xyzview.spin(True)
else:
    xyzview.spin(False)
xyzview.zoomTo()
showmol(xyzview,height=500,width=800)

In [None]:
import streamlit as st
from stmol import showmol
import py3Dmol

from rdkit import Chem
from rdkit.Chem import AllChem

st.title('RDKit + Py3DMOL 😀')

def check_smiles(smiles):
    try:
        mol = Chem.MolFromSmiles(smiles)
        if mol is not None:
            return mol
        else:
            return "Invalid SMILES"
    except:
        return "Invalid SMILES"

def makeblock(smi):
    mol = Chem.MolFromSmiles(smi)
    mol = Chem.AddHs(mol)
    AllChem.EmbedMolecule(mol)
    mblock = Chem.MolToMolBlock(mol)
    return mblock

def render_mol(xyz):
    xyzview = py3Dmol.view()#(width=400,height=400)
    xyzview.addModel(xyz,'mol')
    xyzview.setStyle({'stick':{}})
    xyzview.setBackgroundColor('white')
    xyzview.zoomTo()
    showmol(xyzview,height=500,width=500)

compound_smiles=st.text_input('SMILES please','CC')
check_smiles(smiles=compound_smiles)

blk=makeblock(compound_smiles)
render_mol(blk)

In [None]:
import pointgroup

# Define the coordinates of the atoms in the molecule
# For H2O, the structure can be defined as follows:
atoms = PointGroup(positions= [
    [0.0, 0.0, 0.0],
    [0.758, 0.586, 0.0],
    [-0.758, 0.586, 0.0]],
    symbols = ["O","H","H"])

# Get the point group
pg = atoms.get_point_group()
print(f"The point group of H2O is: {pg}")


In [None]:
from pointgroup import PointGroup


pg = PointGroup(positions= coordinates_list, 
                symbols=atom_symbols)

print('Point group: ', pg.get_point_group())

In [None]:
"SAME AS NEXT CELL, BUT THE OTHER ONE IS SLIGHTLY BETTER"
from rdkit import Chem
from rdkit.Chem import AllChem

# Your SMILES string
smiles = "O"

# Convert SMILES to a molecule object
mol = Chem.MolFromSmiles(smiles)

# Add hydrogens to the molecule
mol_with_h = Chem.AddHs(mol)

# Generate 3D coordinates
AllChem.EmbedMolecule(mol_with_h, AllChem.ETKDG())

x_coord = []
y_coord = []
z_coord = []
# Print the coordinates of each atom
for atom in mol_with_h.GetAtoms():
    pos = mol_with_h.GetConformer().GetAtomPosition(atom.GetIdx())
    print(f"Atom: {atom.GetSymbol()}, Coordinates: {pos.x}, {pos.y}, {pos.z}")
#    x_coord.append(pos.x)
#    y_coord.append(pos.y)
#    z_coord.append(pos.z)


#for item1, item2, item3 in  zip(x_coord, y_coord, z_coord):
#    print(item1)

In [None]:
import pubchempy as pcp

def get_molecule_name_from_smiles(smiles):
    compounds = pcp.get_compounds(smiles, 'smiles')
    if compounds:
        # Assuming the first compound is the one we want
        compound = compounds[0]
        return compound.iupac_name  # or compound.common_name for common name
    else:
        return "No compound found for the given SMILES."

In [77]:
from rdkit import Chem
from rdkit.Chem import AllChem
from pointgroup import PointGroup

def get_smiles_from_name_or_confirm_smiles(input_string):
    # This part checks if the input is already a valid SMILES.
    if Chem.MolFromSmiles(input_string) is not None:
        return input_string  # This will return the input SMILES.

    # If it's not, it will search to convert it to SMILES
    compounds = pcp.get_compounds(input_string, 'name')
    if compounds:
        compound = compounds[0]
        return compound.isomeric_smiles
    else:
        return None  # No valid compound was found for the input

def point_group_from_smiles(smiles):  

    # Convert SMILES to a molecule object
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        print("Invalid SMILES input. Please provide a valid SMILES string.")
    else:
        # Add hydrogens to the molecule
        mol_with_h = Chem.AddHs(mol)

        # Generate 3D coordinates
        AllChem.EmbedMolecule(mol_with_h, AllChem.ETKDG())

        # Create a list to store coordinates
        coordinates_list = []
        atom_symbols = []
        
        # Iterate over atoms and get their coordinates
        for atom in mol_with_h.GetAtoms():
            pos = mol_with_h.GetConformer().GetAtomPosition(atom.GetIdx())
            coordinates_list.append([pos.x, pos.y, pos.z])
            atom_symbols.append(atom.GetAtomicNum())

    pg = pymsym.get_point_group(atomic_numbers = atom_symbols, positions= coordinates_list)

    return pg

if __name__ == "__main__":
    #input_string = input("Input SMILES or molecule name: ")
    input_string = "Methane"
    smiles_name = get_smiles_from_name_or_confirm_smiles(input_string)
    pg = point_group_from_smiles(smiles_name)
    mol_name = get_molecule_name_from_smiles(smiles_name)

    if pg:
        print(f"Point group of {mol_name} is {pg}")




[15:23:17] SMILES Parse Error: syntax error while parsing: Methane
[15:23:17] SMILES Parse Error: Failed parsing SMILES 'Methane' for input: 'Methane'


Point group of methane is C1


In [None]:
from rdkit import Chem
from rdkit.Chem import AllChem
from pointgroup import PointGroup

"""def get_mol_from_sdf(sdf_file):
    # Read SDF file and return the first molecule
    suppl = Chem.SDMolSupplier(sdf_file)
    for mol in suppl:
        if mol is not None:
            return mol
    return None  # No valid molecule was found in the SDF file"""

def point_group_from_mol(data_cache, identifier, identifier_type):  
    # Check if the SDF file is already cached
    if identifier in sdf_cache:
        sdf = sdf_cache[identifier]
    else:
        # Fetch the SDF file for the new molecule identifier
        sdf = get_sdf(identifier, identifier_type)
        # Cache the loaded SDF file
        sdf_cache[identifier] = sdf

    # Add hydrogens to the molecule
    mol_with_h = Chem.AddHs(mol)

    # Generate 3D coordinates
    AllChem.EmbedMolecule(mol_with_h, AllChem.ETKDG())

    # Create a list to store coordinates
    coordinates_list = []
    atom_symbols = []
    
    # Iterate over atoms and get their coordinates
    for atom in mol_with_h.GetAtoms():
        pos = mol_with_h.GetConformer().GetAtomPosition(atom.GetIdx())
        coordinates_list.append([pos.x, pos.y, pos.z])
        atom_symbols.append(atom.GetSymbol())

    pg = PointGroup(positions=coordinates_list, symbols=atom_symbols)

    return pg.get_point_group()

if __name__ == "__main__":
    sdf_file = input("Input path to SDF file: ")
    mol = get_mol_from_sdf(sdf_file)
    pg = point_group_from_mol(mol)

    if pg:
        print(f"Point group is {pg}")


In [None]:
from rdkit import Chem

def point_group_from_sdf_old(sdf_file):
    sdf_file = get_sdf(identifier,identifier_type)
    suppl = Chem.SDMolSupplier(sdf_file)
    for mol in suppl:
        if mol is not None:  
            conf = mol.GetConformer()
            coordinates_list = []
            atom_symbols = []
            for atom in mol.GetAtoms():
                aid = atom.GetIdx()
                pos = conf.GetAtomPosition(aid)
                coordinates_list.append([pos.x, pos.y, pos.z])
                atom_symbols.append(atom.GetSymbol())

    pg = PointGroup(positions= coordinates_list, symbols=atom_symbols)
    return pg.get_point_group()


# Example 
#sdf_file_path = './3Dfiles/benzene.sdf'
if __name__ == "__main__":
    sdf_file = get_sdf("benzene", "Name")
    #identifier = input("Input name of molecule ") 
    #identifier_type = input("Input type of entry ('Name', 'SMILES', 'InChi', 'InChiKey', 'CID')")
    pg = point_group_from_sdf(sdf_file)
    if pg:
        print(f"Point group of {identifier} is {pg}")



In [130]:
from rdkit import Chem

def point_group_from_sdf(sdf_string):
    mol = Chem.MolFromMolBlock(sdf_string)
    
    if mol is not None:  
        conf = mol.GetConformer()
        coordinates_list = []
        atom_symbols = []
        for atom in mol.GetAtoms():
            aid = atom.GetIdx()
            pos = conf.GetAtomPosition(aid)
            coordinates_list.append([pos.x, pos.y, pos.z])
            atom_symbols.append(atom.GetSymbol())
    else:
        print("Invalid SDF input. Please provide a valid SDF string.")
        return None

    pg = PointGroup(positions= coordinates_list, symbols=atom_symbols)
    return pg.get_point_group()

if __name__ == "__main__":
    sdf_file = get_sdf("28557", "CID")
    #identifier = input("Input name of molecule ") 
    #identifier_type = input("Input type of entry ('Name', 'SMILES', 'InChi', 'InChiKey', 'CID')")
    mol_name = identify_chemical_identifier("28557")
    pg = point_group_from_sdf(sdf_file)
    if pg:
        print(f"Point group of {mol_name} is {pg}") # Ajouter un moyen de convertir dans le nom de la molécule


Point group of 1,4,7,10,13,16-hexaoxacyclooctadecane is C2h


In [158]:
from rdkit import Chem

def pg_from_sdf(sdf_string):
    mol = Chem.MolFromMolBlock(sdf_string)
    mol_name = identify_chemical_identifier(input_string=identifier)

    if mol is not None:  
        conf = mol.GetConformer()
        coordinates_list = []
        atomic_number = []
        for atom in mol.GetAtoms():
            aid = atom.GetIdx()
            pos = conf.GetAtomPosition(aid)
            coordinates_list.append([pos.x, pos.y, pos.z])
            atomic_number.append(atom.GetAtomicNum())
    else:
        print("Invalid SDF input. Please provide a valid SDF string.")
        return None

    pg = pymsym.get_point_group(atomic_numbers = atomic_number, positions=coordinates_list)
    return f"Point group of {mol_name} is {pg}"

if __name__ == "__main__":
    sdf_file = get_sdf("7628", "CID")
   #mol_name = identify_chemical_identifier(input_string=identifier)
    #identifier = input("Input name of molecule ") 
    #identifier_type = input("Input type of entry ('Name', 'SMILES', 'InChi', 'InChiKey', 'CID')")
    print(pg_from_sdf(sdf_file))
    #if pg:
        #print(f"Point group of {mol_name} is {pg}") # Ajouter un moyen de convertir dans le nom de la molécule)



Point group of hydrazine is D3h


In [65]:
import re

def identify_chemical_identifier(input_string):
    cid_pattern = r'^\d+$'
    smiles_pattern = r'^[CcNnOoPpSsFfClBrIi%0-9=\-\[\]\(\)\/\+\#\$:\.\,\\\/\@]+$'
    inchi_pattern = r'^InChI=1S?\/[0-9A-Za-z\.\/\-\(\),]+$'
    inchikey_pattern = r'^[A-Z]{14}-[A-Z]{10}-[A-Z]$'
    name_pattern = r'^[a-zA-Z0-9\s\-]+[a-zA-Z0-9\s\-]*$'

    if re.match(cid_pattern, input_string):
        mol_name_input = converter(input_string, "CID", cache)
        mol_name = mol_name_input.convert("name")
        return mol_name
    elif re.match(smiles_pattern, input_string):
        mol_name_input = converter(input_string, "SMILES", cache)
        mol_name = mol_name_input.convert("name")
        return mol_name
    elif re.match(inchi_pattern, input_string):
        mol_name_input = converter(input_string, "inchi", cache)
        mol_name = mol_name_input.convert("name")
        return mol_name
    elif re.match(inchikey_pattern, input_string):
        mol_name_input = converter(input_string, "inchikey", cache)
        mol_name = mol_name_input.convert("name")
        return mol_name
    elif re.match(name_pattern, input_string):
        if is_valid_molecule(input_string):
            return input_string
        else:
            return "Error"
    else:
        return "Unknown format"


In [68]:
my_input = converter("O=C=O", "SMILES", cache)
my_name = my_input.convert("name")
print(my_name)

my_input2 = converter("C1=CC=CC=C1", "SMILES", cache)
my_name2 = my_input2.convert("name")
print(my_name2)

my_input3 = converter("[C-]#[O+]", "SMILES", cache)
my_name3 = my_input3.convert("name")
print(my_name3)


#nn = identify_chemical_identifier("C(=O)=O")
nm = identify_chemical_identifier("CCO")
print(nm)


None
benzene
carbon monoxide
ethanol


In [159]:
from rdkit import Chem
import pymsym


def pg_from_sdf(sdf_string):
    mol = Chem.MolFromMolBlock(sdf_string)
    if mol is not None:
        conf = mol.GetConformer()
        coordinates_list = []
        atomic_numbers = []
        for atom in mol.GetAtoms():
            aid = atom.GetIdx()
            pos = conf.GetAtomPosition(aid)
            coordinates_list.append([pos.x, pos.y, pos.z])
            atomic_numbers.append(atom.GetAtomicNum())
        
        # Use the pymsym package to get the point group
        pg = pymsym.get_point_group(atomic_numbers=atomic_numbers, positions=coordinates_list)
        return pg
    else:
        print("Invalid SDF input. Please provide a valid SDF string.")
        return None

if __name__ == "__main__":
    # Example identifier and type
    identifier = "7628"  # Replace with your identifier
    identifier_type = "CID"  # Replace with your identifier type ('Name', 'SMILES', 'InChi', 'InChiKey', 'CID')
    mol_name = identify_chemical_identifier(input_string=identifier)

    sdf_string = get_sdf(identifier, identifier_type)
    pg = pg_from_sdf(sdf_string)
    
    if pg:
        print(f"Point group of {mol_name} is: {pg}")
    else:
        print("Point group could not be determined.")


Point group of boric acid is: D3h


In [18]:
def pg_from_sdf(identifier, identifier_type):
    sdf_string = get_sdf(identifier, identifier_type)
    mol = Chem.MolFromMolBlock(sdf_string)
    mol_name = identify_chemical_identifier(input_string=identifier)
    if mol is not None:
        conf = mol.GetConformer()
        coordinates_list = []
        atomic_numbers = []
        for atom in mol.GetAtoms():
            aid = atom.GetIdx()
            pos = conf.GetAtomPosition(aid)
            coordinates_list.append([pos.x, pos.y, pos.z])
            atomic_numbers.append(atom.GetAtomicNum())
        
        # Use the pymsym package to get the point group
        pg = pymsym.get_point_group(atomic_numbers=atomic_numbers, positions=coordinates_list)
        return f"Point group of {mol_name} is {pg}"
    else:
        print("Invalid SDF input. Please provide a valid SDF string.")
        return None

In [19]:
identifier = "7628"  # Replace with your identifier
identifier_type = "CID"  # Replace with your identifier type ('Name', 'SMILES', 'InChi', 'InChiKey', 'CID')
pg = pg_from_sdf(identifier="7628",identifier_type="CID")
print(pg)

Point group of boric acid is D3h
