# SFILES Directed Graph

This notebook demonstrates the compilation of SFILES descriptors to a directed graph.

## Parsing Rules

The next cell develops a representation of the SFILES grammar using the `pyparsing` library. In this case, the `unit` match is not restricted to a specific list of unit operations. Instead, `unit` is any sequence of lower case alphabetic characters.

In [26]:
from pyparsing import Literal, Word, Group, Suppress
from pyparsing import Optional, OneOrMore, ZeroOrMore, oneOf, nestedExpr
from pyparsing import alphas, nums

LPAR  = Suppress("(")
RPAR  = Suppress(")")
LBRA  = Suppress("[")
RBRA  = Suppress("]")
SLASH = Suppress("/")
GT = Literal(">")
LT = Literal("<")

# components
component = Word(alphas.upper(), exact=1)

# mixtures
mixture = Group(OneOrMore(component))

# first unit and stream in a process group
unit = Optional(Word(alphas.lower()), default='dist')
stream = Group(unit + mixture)

# subsequent units and streams in a process group
unit_ = Optional(Word(alphas.lower()), default='s')
stream_ = Group(unit_ + mixture)

# process group
processgroup = Group(LPAR + stream + ZeroOrMore(SLASH + stream_) + RPAR)
           
# a process group sequence is comprised of connectors, process group, and recycles                                             
connector = Optional(GT | LT, default=GT)
recycle = Word(nums, exact=1)
sequence = Group(processgroup + ZeroOrMore(connector + (processgroup | recycle )))

# nested branches
branchsequence = OneOrMore(connector + (processgroup | recycle ))
branch = nestedExpr(opener=LBRA, closer=RBRA, content=branchsequence)

# sfiles expression start with sequence
sfiles = sequence + ZeroOrMore(branch | sequence)

# example
results = sfiles.parseString('(iA)(rAB/pABCD)<1<2[<(iB)](mABC/D)[<(oD)](A/BC)1(cycB/C)2(oC)')
results.asList()

[[[['i', ['A']]],
  ">",
  [['r', ['A', 'B']], ['p', ['A', 'B', 'C', 'D']]],
  '<',
  '1',
  '<',
  '2'],
 ['<', [['i', ['B']]]],
 [[['m', ['A', 'B', 'C']], ['s', ['D']]]],
 ['<', [['o', ['D']]]],
 [[['dist', ['A']], ['s', ['B', 'C']]],
  ">",
  '1',
  ">",
  [['cyc', ['B']], ['s', ['C']]],
  ">",
  '2',
  ">",
  [['o', ['C']]]]]

In [112]:
graph = {}
components = set()
units = set()

lastProcessgroup = ['']
lastConnector = ''
recycleA = {}
recycleB = {}

def componentAction(t):
    components.add(t[0])
    return t
component.setParseAction(componentAction)

def mixtureAction(t):
    return ''.join(t[0])
mixture.setParseAction(mixtureAction)

def unitAction(t):
    units.add(t[0])
    return t
unit.setParseAction(unitAction)

def streamAction(t):
    return ''.join(t[0])
stream.setParseAction(streamAction)
stream_.setParseAction(streamAction)

def processgroupAction(t):
    global lastConnector
    pg = '(' + '/'.join(t[0]) + ')'
    graph[pg] = set()
    print(pg, lastProcessgroup)
    if lastProcessgroup[-1] and ('>' in lastConnector):
        graph[lastProcessgroup[-1]].add(pg)
    elif lastProcessgroup[-1] and ('>' in lastConnector):
        graph[pg].add(lastProcessgroup[-1])
    lastProcessgroup[-1] = pg
    return pg
processgroup.setParseAction(processgroupAction)
    
def connectorAction(t):
    global lastConnector
    lastConnector = str(t[0])
    return t
connector.setParseAction(connectorAction)

def recycleAction(t):
    global lastProcessgroup, lastConnector
    if '<' in lastConnector:
        if t[0] in recycleA.keys():
            src = recycleA[t[0]]
            dst = lastProcessgroup[-1]
            graph[src].add(dst)
        else:
            recycleB[t[0]] = lastProcessgroup[-1]
    else:
        if t[0] in recycleB.keys():
            src = lastProcessgroup[-1]
            dst = recycleB[t[0]]
            graph[src].add(dst)
        else:
            recycleA[t[0]] = lastProcessgroup[-1]

def sequenceAction(t):
    global lastProcessgroup
    lastProcessgroup.append(t[0][-1])
    return t
sequence.setParseAction(sequenceAction)
branchsequence.setParseAction(sequenceAction)

def branchAction(t):
    global lastProcessgroup
    lastProcessgroup.pop(-1)
    lastProcessgroup.pop(-1)
    return t
branch.setParseAction(branchAction)

results = sfiles.parseString('(iA)(rAB/pABCD)<1<2[<(iB)](mABC/D)[<(oD)](A/BC)1(cycB/C)2(oC)')

print('Components:', components)
print('Units:', units)
print()
for u, s in graph.items():
    fmt = '{0:>12s}:'
    print(fmt.format(u), s)

(iA) ['']
(rAB/pABCD) ['(iA)']
(iB) ['(rAB/pABCD)', '2']
(mABC/sD) ['(rAB/pABCD)']
(oD) ['(mABC/sD)', '(mABC/sD)']
(distA/sBC) ['(mABC/sD)']
(cycB/sC) ['(distA/sBC)']
(oC) ['(cycB/sC)']
Components: {'D', 'C', 'B', 'A'}
Units: {'cyc', 'o', 'i', 'dist', 'r', 'm'}

        (iA): {'(rAB/pABCD)'}
 (rAB/pABCD): {'(mABC/sD)'}
        (iB): set()
   (mABC/sD): {'(distA/sBC)'}
        (oD): set()
 (distA/sBC): {'(cycB/sC)'}
   (cycB/sC): {'(oC)'}
        (oC): set()
