# 1. Getting started

## Begin by importing PubChemPy

In [1]:
from __future__ import annotations

import pubchempy as pcp

## Let's get the Compound with CID 5090

In [2]:
c = pcp.Compound.from_cid('5090')

In [5]:
c.molecular_weight

'314.4'

## Now we have a Compound object called 'c'. We can get all the information we need from this object
### ex) molecular formula, molecular weight, isomeric smiles, inchi, xlogp, iupac name, synonyms, etc...

In [4]:
print(c.molecular_formula, '\n', c.molecular_weight, '\n', c.isomeric_smiles, '\n', 
      c.inchi, '\n', c.xlogp, '\n', c.iupac_name, '\n', c.synonyms)

C17H14O4S 
 314.4 
 CS(=O)(=O)C1=CC=C(C=C1)C2=C(C(=O)OC2)C3=CC=CC=C3 
 InChI=1S/C17H14O4S/c1-22(19,20)14-9-7-12(8-10-14)15-11-21-17(18)16(15)13-5-3-2-4-6-13/h2-10H,11H2,1H3 
 2.3 
 3-(4-methylsulfonylphenyl)-4-phenyl-2H-furan-5-one 
 ['rofecoxib', '162011-90-7', 'Vioxx', 'Ceoxx', 'MK 966', '4-[4-(methylsulfonyl)phenyl]-3-phenylfuran-2(5H)-one', '4-(4-(Methylsulfonyl)phenyl)-3-phenylfuran-2(5H)-one', 'MK-966', 'MK 0966', 'MK-0966', '4-[4-(methylsulfonyl)phenyl]-3-phenyl-2(5H)-furanone', 'MK0966', '3-(4-methylsulfonylphenyl)-4-phenyl-2H-furan-5-one', 'UNII-0QTW8Z7MCR', 'Rofecoxib (Vioxx)', 'C17H14O4S', '3-phenyl-4-[4-(methylsulfonyl)phenyl]-2(5H)-furanone', '0QTW8Z7MCR', 'CHEMBL122', '4-(4-(Methylsulfonyl)phenyl)-3-phenyl-2(5H)-furanone', 'CHEBI:8887', 'TRM-201', '2(5H)-Furanone, 4-[4-(methylsulfonyl)phenyl]-3-phenyl-', 'refecoxib', 'NCGC00095118-01', '4-(4-methanesulfonylphenyl)-3-phenyl-2,5-dihydrofuran-2-one', 'Vioxx Dolor', '2(5H)-Furanone, 4-(4-(methylsulfonyl)phenyl)-3-phenyl-', 'V

In [2]:
# 만약 바로 Compound와 get_compounds를 사용하고 싶으면
import pubchempy as pcp
from pubchempy import Compound, get_compounds, get_cids # 을 사용함.
#c = Compound.from_cid(1423)
#cs = get_compounds('Aspirin', 'name')

## Searching

### 내가 찾는 화합물의 CID를 모를 때? get_compounds()를 사용한다.

In [3]:
results = get_compounds('Glucose', 'name')
print(results)

[Compound(5793)]


In [8]:
for compound in results:
    print(compound)

Compound(5793)


In [4]:
for compound in results:
    print(compound.isomeric_smiles)

C([C@@H]1[C@H]([C@@H]([C@H](C(O1)O)O)O)O)O


### Retreving the record for a SMILES string is just as easy

In [15]:
pcp.get_compounds('C1=CC2=C(C3=C(C=CC=N3)C=C2)N=C1', 'smiles')

[Compound(1318)]

# 2. Searching

## Searching 2D and 3D coordinates

In [21]:
pcp.get_compounds('aspirin', 'name', record_type = '3d')

[Compound(2244)]

## Getting a full results list for common compound names

### list_return='flat' 원래 Default 값은 SID와 CID가 같이 출력이되지만 list_return='flat' 을 지정해주면 해당되는 CID만 출력하게 함.

In [33]:
results = pcp.get_cids('glucose', 'name', 'substance', list_return='flat')
list = [] 
for compound in results:
    add = pcp.Compound.from_cid(compound)
    list.append(add) # append 함수 이용.
list

[Compound(206),
 Compound(5793),
 Compound(24749),
 Compound(64689),
 Compound(79025),
 Compound(107526),
 Compound(5282499)]

In [34]:
results = pcp.get_cids('glucose', 'name', 'substance', list_return='flat')
[pcp.Compound.from_cid(compound) for compound in results] # list comprehension 이용.

[Compound(206),
 Compound(5793),
 Compound(24749),
 Compound(64689),
 Compound(79025),
 Compound(107526),
 Compound(5282499)]

# 3. Compound

In [36]:
c = pcp.Compound.from_cid(6819)
c

Compound(6819)

## Dictionary representation
### - 'Compound' has a 'record' property
### - The 'record' is a dictionary that contains the all the information about the compound.
### - And all other properties are derived from this record.
### - Additionally, each 'Compound' provides a to_dict() method
### - to_dict() method returns PubChemPy's own dictionary representation of the Compound data.

In [37]:
c = pcp.Compound.from_cid(962)
c.to_dict()

{'atom_stereo_count': 0,
 'atoms': [{'aid': 1, 'number': 8, 'element': 'O', 'x': 2.5369, 'y': -0.155},
  {'aid': 2, 'number': 1, 'element': 'H', 'x': 3.0739, 'y': 0.155},
  {'aid': 3, 'number': 1, 'element': 'H', 'x': 2, 'y': 0.155}],
 'bond_stereo_count': 0,
 'bonds': [{'aid1': 1, 'aid2': 2, 'order': 1},
  {'aid1': 1, 'aid2': 3, 'order': 1}],
 'cactvs_fingerprint': '000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000

In [44]:
c.to_dict(properties=['atoms','bonds', 'cid', 'inchi'])

{'atoms': [{'aid': 1, 'number': 8, 'element': 'O', 'x': 2.5369, 'y': -0.155},
  {'aid': 2, 'number': 1, 'element': 'H', 'x': 3.0739, 'y': 0.155},
  {'aid': 3, 'number': 1, 'element': 'H', 'x': 2, 'y': 0.155}],
 'bonds': [{'aid1': 1, 'aid2': 2, 'order': 1},
  {'aid1': 1, 'aid2': 3, 'order': 1}],
 'cid': 962,
 'inchi': 'InChI=1S/H2O/h1H2'}

# 4. substance

## Retrieving substances

In [49]:
results = pcp.get_substances('Coumarin 343', 'name')
print(results)

[Substance(57269654), Substance(85084977), Substance(126686397), Substance(143491255), Substance(152243230), Substance(162092514), Substance(162189467), Substance(186021999), Substance(206257050), Substance(252110292), Substance(253662386), Substance(254792418), Substance(273956259), Substance(310270503), Substance(312673838), Substance(318035922), Substance(328749455), Substance(341849648), Substance(347708970), Substance(347814069), Substance(347985590), Substance(374099395), Substance(375628387), Substance(375783268), Substance(384252054), Substance(385668079), Substance(385848977), Substance(386276812), Substance(386504630), Substance(402321720), Substance(402408734), Substance(438496721), Substance(439355660), Substance(439643149), Substance(441119732), Substance(441554824), Substance(446149268), Substance(446377648), Substance(438546303)]


## Retrieving substances example

In [52]:
substance = pcp.Substance.from_sid(223766453)
print('substance.synonyms:', substance.synonyms, '\n', 'substance.source_id:', substance.source_id, '\n', 'substance.standardized_cid:', substance.standardized_cid, '\n',
      'substance.standardized_compound:', substance.standardized_compound)


substance.synonyms: ['2-(Acetyloxy)-benzoic acid', '2-(acetyloxy)benzoic acid', '2-acetoxy benzoic acid', '2-acetoxy-benzoic acid', '2-acetoxybenzoic acid', '2-acetyloxybenzoic acid', 'Acetylsalicyclic acid', 'BSYNRYMUTXBXSQ-UHFFFAOYSA-N', 'acetoxybenzoic acid', 'acetyl salicyclic acid', 'acetyl salicylic acid', 'acetyl-salicylic acid', 'acetylsalicylic acid', 'aspirin', 'o-acetoxybenzoic acid'] 
 substance.source_id: BSYNRYMUTXBXSQ-UHFFFAOYSA-N 
 substance.standardized_cid: 2244 
 substance.standardized_compound: Compound(2244)
