# rdkit分子输出

In [1]:
from rdkit import Chem

## 输出SMILES/SMARTS
输出SMILES：`MolToSmiles(mol, isomericSmiles, kekuleSmiles, canonical, …)`               
`kekuleSmiles`：默认False，不使用kekule时：脂肪族碳用"C"表示（大写），芳香族用"c"表示（小写）        
`isomericSmiles`：默认True，区分同分异构体（"@“表示手性，”\“和”/"表示顺反异构）            
`canonical`：默认True，输出标准SMILES      


In [3]:
Chem.MolToSmiles?

[0;31mDocstring:[0m
MolToSmiles( (Mol)mol, (SmilesWriteParams)params) -> str :
    Returns the canonical SMILES string for a molecule

    C++ signature :
        std::string MolToSmiles(RDKit::ROMol,RDKit::SmilesWriteParams)

MolToSmiles( (Mol)mol [, (bool)isomericSmiles=True [, (bool)kekuleSmiles=False [, (int)rootedAtAtom=-1 [, (bool)canonical=True [, (bool)allBondsExplicit=False [, (bool)allHsExplicit=False [, (bool)doRandom=False]]]]]]]) -> str :
    Returns the canonical SMILES string for a molecule
      ARGUMENTS:
    
        - mol: the molecule
        - isomericSmiles: (optional) include information about stereochemistry in
          the SMILES.  Defaults to true.
        - kekuleSmiles: (optional) use the Kekule form (no aromatic bonds) in
          the SMILES.  Defaults to false.
        - rootedAtAtom: (optional) if non-negative, this forces the SMILES 
          to start at a particular atom. Defaults to -1.
        - canonical: (optional) if false no attempt will be m

In [2]:
m1 = Chem.MolFromSmiles('C1=CC=CC=CC=C1')
m2 = Chem.MolFromSmiles('C1=CC=CC=C1')
m3 = Chem.MolFromSmiles('C1=COC=C1')
mols = [m1, m2, m3]
print([Chem.MolToSmiles(mol) for mol in mols])



['C1=CC=CC=CC=C1', 'c1ccccc1', 'c1ccoc1']


### 输出kekule式

kekule形式：在符合4N+2规则的芳香体系中，通过使用双键代替小写的碳原子来表示芳香性           
4N+2规则：也叫Hueckel规则，在闭环共轭体系中，当π电子数为4n+2时，才具有芳香性        

In [6]:
for mol in mols:
    Chem.Kekulize(mol)
print([Chem.MolToSmiles(mol, kekuleSmiles=True) for mol in mols])



['C1=CC=CC=CC=C1', 'C1=CC=CC=C1', 'C1=COC=C1']


### 设置立体参数
通过isomericSmiles控制

In [7]:
m = Chem.MolFromSmiles('C[C@H](O)c1ccccc1')
print(Chem.MolToSmiles(m))

print(Chem.MolToSmiles(m, isomericSmiles=False))



C[C@H](O)c1ccccc1
CC(O)c1ccccc1


## 批量输出SMILES
批量输出SMILES：`SmilesWriter(fileName, delimiter, includeHeader, nameHeader, isomericSmiles, kekuleSmiles)`
`fileName`：输出文件名            
`delimiter`：分隔符，默认为空格’ ’             
`includeHeader`：是否写入表头，默认True            
`nameHeader`：分子名一列的列名，默认’Name’          
`isomericSmiles`：立体信息，默认True             
`kekuleSmiles`：kekule形式，默认False           

In [8]:
writer = Chem.SmilesWriter('data/batch.smi', delimiter='\t')
for i, mol in enumerate(mols):
    writer.write(mol)
writer.close()


### 批量输出SMILES和属性
批量输出SMILES及属性，通过以下函数进行操作：             
`mol.GetPropNames()`，查看分子属性列表            
`mol.GetProp()`，获取相应属性               
`mol.SetProp(key, val)`，新增属性名key、对应属性值val              
`writer.SetProps()`，设置哪些属性要输出                    


In [9]:
from rdkit.Chem import Descriptors

writer = Chem.SmilesWriter('data/batch_smiles.smi', delimiter='\t', nameHeader='mol_id')
writer.SetProps(['LOGP', 'MW'])
for i, mol in enumerate(mols):
     mw = Descriptors.ExactMolWt(mol)
     logp = Descriptors.MolLogP(mol)
     mol.SetProp('MW', '%.2f' %(mw))
     mol.SetProp('LOGP', '%.2f' %(logp))
     mol.SetProp('_Name', 'No_%s' %(i))
     writer.write(mol)
writer.close()
print('number of mols:', writer.NumMols())

print('mol properties:', [i for i in mol.GetPropNames()])


number of mols: 3
mol properties: ['MW', 'LOGP']


## 输出SMARTS
输出SMARTS：`MolToSmarts()`

In [10]:
Chem.MolToSmarts(m3, isomericSmiles=True)

'[#6]1:[#6]:[#8]:[#6]:[#6]:1'

## 输出`.sdf`
批量输出到文件：`SDWriter()`        
使用方法类似于SMILES的批量输出        
      

In [11]:
writer = Chem.SDWriter('data/batch.sdf')
writer.SetProps(['LOGP', 'MW'])
for i, mol in enumerate(mols):
     mw = Descriptors.ExactMolWt(mol)
     logp = Descriptors.MolLogP(mol)
     mol.SetProp('MW', '%.2f' %(mw))
     mol.SetProp('LOGP', '%.2f' %(logp))
     mol.SetProp('_Name', 'No_%s' %(i))
     writer.write(mol)
writer.close()


## .批量输出到.gz


In [12]:
import gzip
outf = gzip.open('data/batch.sdf.gz','wt+')
writer = Chem.SDWriter(outf)
for mol in mols:
    writer.write(mol)
writer.close()
outf.close()


## 输出.mol
直接输出：`MolToMolBlock()`


In [13]:
print(Chem.MolToMolBlock(m1))

No_0
     RDKit          2D

  8  8  0  0  0  0  0  0  0  0999 V2000
    1.9598    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    1.3858   -1.3858    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -0.0000   -1.9598    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -1.3858   -1.3858    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -1.9598    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -1.3858    1.3858    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    1.9598    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    1.3858    1.3858    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
  1  2  2  0
  2  3  1  0
  3  4  2  0
  4  5  1  0
  5  6  2  0
  6  7  1  0
  7  8  2  0
  8  1  1  0
M  END



输出到文件：`MolToMolFile(mol, filename, includeStereo, …)`     
`mol`：mol对象          
`filename`：文件名         
`includeStereo`：立体信息，默认True         

In [14]:
m1.SetProp('_Name', 'cyclobutane')
Chem.MolToMolFile(m1, 'data/sig.mol')


## 输出其他格式:pdb, fasta, xyz…

```python
# PDB
Chem.MolToPDBBlock()
Chem.MolToPDBFile()
Chem.PDBWriter()
# FASTA
Chem.MolToFASTA()
# XYZ
Chem.MolToXYZBlock()
Chem.MolToXYZFile()

```