In [8]:
file = '001.pd'

In [36]:
tags = ['canvas','obj','connect']

In [32]:
with open(file, 'r') as f:
    data = f.read()

In [23]:
data

'#N canvas 1837 346 591 300 10;\n#X obj 137 73 osc~ 440;\n#X obj 137 121 *~ 0.5;\n#X obj 137 178 dac~;\n#X msg 137 26 proviamo con la supercazzola prematurata con scappellamento\na sinistra;\n#X text 267 113 tastooma;\n#X connect 0 0 1 0;\n#X connect 1 0 2 0;\n#X connect 1 0 2 1;\n#X connect 3 0 0 0;\n'

In [33]:
data = data.replace("\n"," ")
data = data.replace(";","")
data = [x.strip(None)[2:] for x in data.split("#") if x != '']

In [46]:
data

['canvas 1837 346 591 300 10',
 'obj 137 73 osc~ 440',
 'obj 137 121 *~ 0.5',
 'obj 137 178 dac~',
 'msg 137 26 proviamo con la supercazzola prematurata con scappellamento a sinistra',
 'text 267 113 tastooma',
 'connect 0 0 1 0',
 'connect 1 0 2 0',
 'connect 1 0 2 1',
 'connect 3 0 0 0']

In [59]:
structure = {}
obj = []
connect = []

for tag in tags:
    for s in data:
        if s.find(tag,0,8) != -1:
            value = " ".join(s.split(" ")[1:])
            if tag == 'obj':
                obj.append(value)
            elif tag == 'connect':
                connect.append(value)
            else:
                structure[tag] = value
    structure['obj'] = [x+' '+str(n) for n,x in enumerate(obj)]
    structure['connect'] = connect

In [60]:
structure

{'canvas': '1837 346 591 300 10',
 'obj': ['137 73 osc~ 440 0', '137 121 *~ 0.5 1', '137 178 dac~ 2'],
 'connect': ['0 0 1 0', '1 0 2 0', '1 0 2 1', '3 0 0 0']}

In [74]:
# calclex.py

from sly import Lexer

class CalcLexer(Lexer):
    def __init__(self):
        self.obj_counter = 0
    
    # Set of token names.   This is always required
    tokens = { CANVAS, OBJ, TEXT, CONNECT, DECLARE, FLOATATOM, MSG, FLOAT, END, LITERAL }

    # String containing ignored characters between tokens
    ignore = r' \t'
    ignore_newline = r'\s*\n\s*'
    ignore_prefix = r'#[A-Z]'

    # Regular expression rules for tokens
    #PREFIX = r'#[A-Z]'
    CANVAS = r'canvas'
    OBJ = r'obj'
    TEXT = r'text'
    CONNECT = r'connect'
    DECLARE = r'declare'
    FLOATATOM = r'floatatom'
    MSG = r'msg'
    #ID      = r'[a-zA-Z_][a-zA-Z0-9_]*'
    FLOAT = r'\d+\.*\d*'
    #NUMBER  = r'\d+'
    #END  = r';'
    LITERAL = r'[a-zA-Z_*~\(\)]+'
    
    @_(r';')
    def END(self, t):
        self.lineno += len(t.value)
        return t

if __name__ == '__main__':
    data = """
    #N canvas 1837 346 591 300 10;
    #X obj 137 73 osc~ 440;
    #X obj 137 121 *~ 0.5;
    #X obj 137 178 dac~;
    #X msg 137 26 proviamo con la supercazzola prematurata con scappellamento
    a sinistra;
    #X text 267 113 tastooma;
    #X connect 0 0 1 0;
    #X connect 1 0 2 0;
    #X connect 1 0 2 1;
    #X connect 3 0 0 0;"""
    lexer = CalcLexer()
    for tok in lexer.tokenize(data):
        print('type=%r, value=%r' % (tok.type, tok.value))


type='CANVAS', value='canvas'
type='FLOAT', value='1837'
type='FLOAT', value='346'
type='FLOAT', value='591'
type='FLOAT', value='300'
type='FLOAT', value='10'
type='END', value=';'
type='OBJ', value='obj'
type='FLOAT', value='137'
type='FLOAT', value='73'
type='LITERAL', value='osc~'
type='FLOAT', value='440'
type='END', value=';'
type='OBJ', value='obj'
type='FLOAT', value='137'
type='FLOAT', value='121'
type='LITERAL', value='*~'
type='FLOAT', value='0.5'
type='END', value=';'
type='OBJ', value='obj'
type='FLOAT', value='137'
type='FLOAT', value='178'
type='LITERAL', value='dac~'
type='END', value=';'
type='MSG', value='msg'
type='FLOAT', value='137'
type='FLOAT', value='26'
type='LITERAL', value='proviamo'
type='LITERAL', value='con'
type='LITERAL', value='la'
type='LITERAL', value='supercazzola'
type='LITERAL', value='prematurata'
type='LITERAL', value='con'
type='LITERAL', value='scappellamento'
type='LITERAL', value='a'
type='LITERAL', value='sinistra'
type='END', value=';'
type

In [150]:
from sly import Parser

class CalcParser(Parser):
    # Get the token list from the lexer (required)
    tokens = CalcLexer.tokens

    # Grammar rules and actions
    @_('CANVAS FLOAT FLOAT FLOAT FLOAT FLOAT END')
    def expr(self, p):
        return p[0],int(p[1]),int(p[2]),int(p[3]),int(p[4])
    
    
    @_('OBJ FLOAT FLOAT LITERAL expr_args END')
    def expr(self, p):
        return p
    
    @_('FLOAT expr_args')
    def expr_args(self, p):
        return p[0],p.expr_args
    
    @_('FLOAT')
    def expr_args(self, p):
        return p.FLOAT
    




In [151]:
if __name__ == '__main__':
    lexer = CalcLexer()
    parser = CalcParser()

    text = '#X obj 137 73 osc~ 440 12 7 19;'
    result = parser.parse(lexer.tokenize(text))
    print(result)

('expr', 'obj', '137', '73', 'osc~', '19', ';')
