In [None]:
units = {
    'i'    : 'input',
    'o'    : 'output',
    'dist' : 'distillation column',
    'cyc'  : 'solvent based azeotropic distillation', 
    'r'    : 'reactor',
    'sw'   : 'pressure swing distillation',
    'pms'  : 'polar molecule sieve based separation',
    'ms'   : 'molecular sieve based separation',
    'lmem' : 'liquid membrane based separation',
    'gmem' : 'gas membrane based separation',
    'crs'  : 'crystallization',
    'ab'   : 'absorption'
}

## Rules for Process Groups

In [6]:
from pyparsing import Literal, Word, Group, Forward
from pyparsing import Optional, OneOrMore, ZeroOrMore, nestedExpr
from pyparsing import alphas, nums

component = Word(alphas.upper(), exact=1)
mixture = Group(OneOrMore(component))

unit = Optional(Word(alphas.lower()), default='dist')
stream = Group(unit + mixture)

unit_ = Optional(Word(alphas.lower()), default='s')
stream_ = Group(unit_ + mixture)

lp = Literal("(")
rp = Literal(")")
sl = Literal("/")

processgroup = lp.suppress() + stream + ZeroOrMore(sl.suppress() + stream_) + rp.suppress()

processgroup.runTests("""\
    (A/BC)
    (ABC/DE)
    (cycA/B)
    (fABC/BCD)
    (rABC/nE/pABCD)
    (rABC/nE/pABCD)
    (swA/B)
    (pmsABC/D)
    (msABC/D)
    (lmemABC/D)
    (gmemABC/D)
    (crsABC/D)
    (abEAB/eF/EABF/EF)
    (iABCD)
    (oABD)
    (ABC/D)
    (rAB/pABD)
""")


(A/BC)
[['dist', ['A']], ['s', ['B', 'C']]]
[0]:
  ['dist', ['A']]
  [0]:
    dist
  [1]:
    ['A']
[1]:
  ['s', ['B', 'C']]
  [0]:
    s
  [1]:
    ['B', 'C']


(ABC/DE)
[['dist', ['A', 'B', 'C']], ['s', ['D', 'E']]]
[0]:
  ['dist', ['A', 'B', 'C']]
  [0]:
    dist
  [1]:
    ['A', 'B', 'C']
[1]:
  ['s', ['D', 'E']]
  [0]:
    s
  [1]:
    ['D', 'E']


(cycA/B)
[['cyc', ['A']], ['s', ['B']]]
[0]:
  ['cyc', ['A']]
  [0]:
    cyc
  [1]:
    ['A']
[1]:
  ['s', ['B']]
  [0]:
    s
  [1]:
    ['B']


(fABC/BCD)
[['f', ['A', 'B', 'C']], ['s', ['B', 'C', 'D']]]
[0]:
  ['f', ['A', 'B', 'C']]
  [0]:
    f
  [1]:
    ['A', 'B', 'C']
[1]:
  ['s', ['B', 'C', 'D']]
  [0]:
    s
  [1]:
    ['B', 'C', 'D']


(rABC/nE/pABCD)
[['r', ['A', 'B', 'C']], ['n', ['E']], ['p', ['A', 'B', 'C', 'D']]]
[0]:
  ['r', ['A', 'B', 'C']]
  [0]:
    r
  [1]:
    ['A', 'B', 'C']
[1]:
  ['n', ['E']]
  [0]:
    n
  [1]:
    ['E']
[2]:
  ['p', ['A', 'B', 'C', 'D']]
  [0]:
    p
  [1]:
    ['A', 'B', 'C', 'D']


(rABC/nE/

(True,
 [('(A/BC)',
   ([(['dist', (['A'], {})], {}), (['s', (['B', 'C'], {})], {})], {})),
  ('(ABC/DE)',
   ([(['dist', (['A', 'B', 'C'], {})], {}), (['s', (['D', 'E'], {})], {})], {})),
  ('(cycA/B)', ([(['cyc', (['A'], {})], {}), (['s', (['B'], {})], {})], {})),
  ('(fABC/BCD)',
   ([(['f', (['A', 'B', 'C'], {})], {}), (['s', (['B', 'C', 'D'], {})], {})], {})),
  ('(rABC/nE/pABCD)',
   ([(['r', (['A', 'B', 'C'], {})], {}), (['n', (['E'], {})], {}), (['p', (['A', 'B', 'C', 'D'], {})], {})], {})),
  ('(rABC/nE/pABCD)',
   ([(['r', (['A', 'B', 'C'], {})], {}), (['n', (['E'], {})], {}), (['p', (['A', 'B', 'C', 'D'], {})], {})], {})),
  ('(swA/B)', ([(['sw', (['A'], {})], {}), (['s', (['B'], {})], {})], {})),
  ('(pmsABC/D)',
   ([(['pms', (['A', 'B', 'C'], {})], {}), (['s', (['D'], {})], {})], {})),
  ('(msABC/D)',
   ([(['ms', (['A', 'B', 'C'], {})], {}), (['s', (['D'], {})], {})], {})),
  ('(lmemABC/D)',
   ([(['lmem', (['A', 'B', 'C'], {})], {}), (['s', (['D'], {})], {})], {})),
  (

## Rules for Describing Connections

In [7]:
gt = Literal('>')
lt = Literal('<')
connector = Optional( gt | lt, default=gt)

processgroupnum = Word(nums, exact=1)

connection = (processgroup + ZeroOrMore(connector + (processgroup | processgroupnum)))

connection.runTests("""\
    (iA)(rAB/pABCD)
    (iA)(oB)(cC)
    (iA)<(oB)>2
""")


(iA)(rAB/pABCD)
[['i', ['A']], ">", ['r', ['A', 'B']], ['p', ['A', 'B', 'C', 'D']]]
[0]:
  ['i', ['A']]
  [0]:
    i
  [1]:
    ['A']
[1]:
  ">"
[2]:
  ['r', ['A', 'B']]
  [0]:
    r
  [1]:
    ['A', 'B']
[3]:
  ['p', ['A', 'B', 'C', 'D']]
  [0]:
    p
  [1]:
    ['A', 'B', 'C', 'D']


(iA)(oB)(cC)
[['i', ['A']], ">", ['o', ['B']], ">", ['c', ['C']]]
[0]:
  ['i', ['A']]
  [0]:
    i
  [1]:
    ['A']
[1]:
  ">"
[2]:
  ['o', ['B']]
  [0]:
    o
  [1]:
    ['B']
[3]:
  ">"
[4]:
  ['c', ['C']]
  [0]:
    c
  [1]:
    ['C']


(iA)<(oB)>2
[['i', ['A']], '<', ['o', ['B']], '>', '2']
[0]:
  ['i', ['A']]
  [0]:
    i
  [1]:
    ['A']
[1]:
  <
[2]:
  ['o', ['B']]
  [0]:
    o
  [1]:
    ['B']
[3]:
  >
[4]:
  2



(True,
 [('(iA)(rAB/pABCD)',
   ([(['i', (['A'], {})], {}), ">", (['r', (['A', 'B'], {})], {}), (['p', (['A', 'B', 'C', 'D'], {})], {})], {})),
  ('(iA)(oB)(cC)',
   ([(['i', (['A'], {})], {}), ">", (['o', (['B'], {})], {}), ">", (['c', (['C'], {})], {})], {})),
  ('(iA)<(oB)>2',
   ([(['i', (['A'], {})], {}), '<', (['o', (['B'], {})], {}), '>', '2'], {}))])

## Rules for Describing Branches

In [9]:
#branch = Forward()

#branch = "[" + connector + OneOrMore(processgroup | processgroupnum | branch) + "]" 

branch = nestedExpr(opener="[", closer="]")

branch.runTests("""\
  #  [(oA)]
  #  [<(oD)]
  #  [<(A/BD)]
    [(A/BD)(BD/B)[(oA)]]
""")

#  [(oA)]
#  [<(oD)]
#  [<(A/BD)]
[(A/BD)(BD/B)[(oA)]]
[['(A/BD)(BD/B)', ['(oA)']]]
[0]:
  ['(A/BD)(BD/B)', ['(oA)']]
  [0]:
    (A/BD)(BD/B)
  [1]:
    ['(oA)']



(True,
 [('[(A/BD)(BD/B)[(oA)]]', ([(['(A/BD)(BD/B)', (['(oA)'], {})], {})], {}))])

In [None]:

processgroupnum = Word(nums, exact=1)

connection = (Literal('<') | Literal('>')) + Optional(processgroupnum)


branch = "[" + ZeroOrMore(connection) + ZeroOrMore(processgroup) + "]" 

sfiles = OneOrMore(processgroup) \
    + ZeroOrMore(ZeroOrMore(connection) + (branch | processgroup | processgroupnum))


sfiles.runTests("""\


    (iA)(rAB/pABCD)[(iB)]    
    (iA)(rAB/pABCD)[<(iB)]
    (iA)(rAB/pABCD)<1<2[<(iB)](mABC/D)[<(oD)](A/BC)1(cycB/C)2(oC)
    (iB)(rAB/pABCD)<1<2[<(iA)](mABC/D)[<(oD)](A/BC)1(cycB/C)2(oC)
    (iA)(rAB/pABCD)[<(iB)](mABC/D)[(oD)](A/BC)[(oA)](oBC)
    (iA)[(oB)(oC)[(oD)]]
    (iABCDE)(AB/CDE)[(A/B)[(oA)](oB)]
""");

