In [11]:
from chemdataextractor import Document
from chemdataextractor.model import Compound
from chemdataextractor.doc import Paragraph, Heading

In [12]:
d = Document(Heading(u'CH3NH3I was synthesized using the method described by Michael M. Lee, et al.'), 
             Paragraph(u'A concentrated aqueous solution of hydroiodic acid (HI) (15.0 ml, 57 wt% in water, Alfa Aesar) was reacted with methylamine (CH3NH2) (13.5 ml, 40 wt% in aqueous solution, Alfa Aesar) at 0 C for 2 h with constant stirring under a nitrogen atmosphere. Methylammonium iodide was crystallized through removing the solvent by a rotary evaporator. The generated white powder was washed with diethyl ether (Alfa Aesar) three times and dried under vacuum overnight.'))

In [13]:
d

In [14]:
d.records.serialize()

[{'names': ['CH3NH3I']}]

In [15]:
# Handling boiling properties

from chemdataextractor.model import BaseModel, StringType, ListType, ModelType

class BoilingPoint(BaseModel):
    value = StringType()
    units = StringType()
    
Compound.boiling_points = ListType(ModelType(BoilingPoint))

In [30]:
# Writing new parser
import re
from chemdataextractor.parse import R, I, W, Optional, merge

prefix = I(u'at\s')
print(type(prefix))
units = R(u'^[CFK]\.?$')(u'units')
print(type(units))
value = R(u'^\d+(\.\d+)?$')(u'value')
print(type(value))
bp = (prefix + value + units)(u'bp')
print(type(bp))


<class 'chemdataextractor.parse.elements.IWord'>
<class 'chemdataextractor.parse.elements.Regex'>
<class 'chemdataextractor.parse.elements.Regex'>
<class 'chemdataextractor.parse.elements.And'>


In [31]:
from chemdataextractor.parse.base import BaseParser
from chemdataextractor.utils import first

class BpParser(BaseParser):
    root = bp

    def interpret(self, result, start, end):
        compound = Compound(
            boiling_points=[
                BoilingPoint(
                    value=first(result.xpath('./value/text()')),
                    units=first(result.xpath('./units/text()'))
                )
            ]
        )
        yield compound


In [32]:
Paragraph.parsers = [BpParser()]

In [33]:
d.records.serialize()

[{'names': ['CH3NH3I']}]