### Read Lexems file


In [249]:
fs = require('fs')

{
  appendFile: [Function: appendFile],
  appendFileSync: [Function: appendFileSync],
  access: [Function: access],
  accessSync: [Function: accessSync],
  chown: [Function: chown],
  chownSync: [Function: chownSync],
  chmod: [Function: chmod],
  chmodSync: [Function: chmodSync],
  close: [Function: close],
  closeSync: [Function: closeSync],
  copyFile: [Function: copyFile],
  copyFileSync: [Function: copyFileSync],
  createReadStream: [Function: createReadStream],
  createWriteStream: [Function: createWriteStream],
  exists: [Function: exists],
  existsSync: [Function: existsSync],
  fchown: [Function: fchown],
  fchownSync: [Function: fchownSync],
  fchmod: [Function: fchmod],
  fchmodSync: [Function: fchmodSync],
  fdatasync: [Function: fdatasync],
  fdatasyncSync: [Function: fdatasyncSync],
  fstat: [Function: fstat],
  fstatSync: [Function: fstatSync],
  fsync: [Function: fsync],
  fsyncSync: [Function: fsyncSync],
  ftruncate: [Function: ftruncate],
  ftruncateSync: [Function: 

In [267]:
lexer_str = fs.readFileSync('lexems.ho').toString()

'fin_phrase := \\.\r\n' +
  'fin_expression := \\,\r\n' +
  'left_par := \\(\r\n' +
  'right_par := \\)\r\n' +
  'liaison := \\+\r\n' +
  'appartient := _\r\n' +
  'deco_fort := \\*\\*\r\n' +
  'deco_plusieurs := \\*\r\n' +
  'mot := [A-Za-z]+\r\n' +
  'id := :\\d+\r\n' +
  'ref := @\\d+\r\n' +
  'present_droit := ->\r\n' +
  'passe_droit := \\\\>\r\n' +
  'futur_droit := />\r\n' +
  'present_gauche := <-\r\n' +
  'passe_gauche := </\r\n' +
  'futur_gauche := <\\\\'

### Parse the lexems

Create the tokenizer which each regex

In [276]:
tokenizer = {}
lexems = lexer_str.split('\r\n')
lexems.forEach(l => { 
    let parsed = l.trim().split(':=')
    regex = "^" + parsed[1].trim()
    tokenizer[parsed[0].trim()] = new RegExp(regex)
})

In [277]:
function toTokens(phrase){
    let tokens = []
    for(cursor = 0; cursor < phrase.length; ++cursor){
        // ignore spaces
        if(phrase[cursor] === " "){
            continue
        }
        let found = false
        for (let el of Object.entries(tokenizer)){
            let regex = el[1]
            let tokenName = el[0]
            let res = phrase.slice(cursor).match(regex)
            if(res){
                console.log("wow we found a " + tokenName + " with value " + res)
                tokens.push({type: tokenName, value: res[0]})
                found = true
                cursor += res[0].length - 1
                break
            }
        }
        if(!found){
            console.log("arf, could not tokenize character " + phrase[cursor])
        }
    }
    return tokens
}

In [269]:
input = "(Faisan + Multicolore):1 \\> Etre -> Oiseau, *Chasseurs* -> (Chasser + Beaucoup) -> @1"

'(Faisan + Multicolore):1 \\> Etre -> Oiseau, *Chasseurs* -> (Chasser + Beaucoup) -> @1'

In [274]:
tokens = toTokens(input)

wow we found a left_par with value (
wow we found a mot with value Faisan
wow we found a liaison with value +
wow we found a mot with value Multicolore
wow we found a right_par with value )
wow we found a id with value :1
wow we found a passe_droit with value \>
wow we found a mot with value Etre
wow we found a present_droit with value ->
wow we found a mot with value Oiseau
wow we found a fin_expression with value ,
wow we found a deco_plusieurs with value *
wow we found a mot with value Chasseurs
wow we found a deco_plusieurs with value *
wow we found a present_droit with value ->
wow we found a left_par with value (
wow we found a mot with value Chasser
wow we found a liaison with value +
wow we found a mot with value Beaucoup
wow we found a right_par with value )
wow we found a present_droit with value ->
wow we found a ref with value @1


[
  { type: 'left_par', value: '(' },
  { type: 'mot', value: 'Faisan' },
  { type: 'liaison', value: '+' },
  { type: 'mot', value: 'Multicolore' },
  { type: 'right_par', value: ')' },
  { type: 'id', value: ':1' },
  { type: 'passe_droit', value: '\\>' },
  { type: 'mot', value: 'Etre' },
  { type: 'present_droit', value: '->' },
  { type: 'mot', value: 'Oiseau' },
  { type: 'fin_expression', value: ',' },
  { type: 'deco_plusieurs', value: '*' },
  { type: 'mot', value: 'Chasseurs' },
  { type: 'deco_plusieurs', value: '*' },
  { type: 'present_droit', value: '->' },
  { type: 'left_par', value: '(' },
  { type: 'mot', value: 'Chasser' },
  { type: 'liaison', value: '+' },
  { type: 'mot', value: 'Beaucoup' },
  { type: 'right_par', value: ')' },
  { type: 'present_droit', value: '->' },
  { type: 'ref', value: '@1' }
]

In [275]:
grammar_str = fs.readFileSync('grammar.ho').toString()

'texte      := phrase fin_phrase | texte ;\r\n' +
  '\r\n' +
  'phrase     := expression fin_expression \r\n' +
  '           |  expression \r\n' +
  '           |  phrase ;\r\n' +
  '\r\n' +
  'expression := groupe action groupe action groupe \r\n' +
  '           |  groupe ;\r\n' +
  '\r\n' +
  'groupe      := groupe_mot | groupe_mot id ;\r\n' +
  '\r\n' +
  'groupe_mot := noeud_vide |\r\n' +
  '              noeud_mot |\r\n' +
  '              left_par noeud_mot right_par |\r\n' +
  '              left_par noeud_mot liaison_groupe_mot right_par ;\r\n' +
  '            \r\n' +
  'liaison_groupe_mot := liaison noeud_mot | liaison_groupe_mot ;\r\n' +
  '\r\n' +
  'link_mot   := liaison | appartient ;\r\n' +
  '\r\n' +
  'noeud_mot  := deco_mot mot deco_mot | mot | ref ;\r\n' +
  '\r\n' +
  'noeud_vide := left_par right_par ;\r\n' +
  '\r\n' +
  'deco_mot   := deco_plusieurs | deco_fort ;\r\n' +
  '\r\n' +
  'action     := present_gauche | present_droit | passe_gauche | passe_droit | fu

In [282]:
grammar = {}
rules = grammar_str.split(';')
console.log(rules)
rules.pop()
rules.forEach(r => { 
    let parsed = r.trim().split(':=')
    let tokens = parsed[1].split('|').map(tok => tok.trim())
    console.log(tokens)
})

[
  'texte      := phrase fin_phrase | texte ',
  '\r\n' +
    '\r\n' +
    'phrase     := expression fin_expression \r\n' +
    '           |  expression \r\n' +
    '           |  phrase ',
  '\r\n' +
    '\r\n' +
    'expression := groupe action groupe action groupe \r\n' +
    '           |  groupe ',
  '\r\n\r\ngroupe      := groupe_mot | groupe_mot id ',
  '\r\n' +
    '\r\n' +
    'groupe_mot := noeud_vide |\r\n' +
    '              noeud_mot |\r\n' +
    '              left_par noeud_mot right_par |\r\n' +
    '              left_par noeud_mot liaison_groupe_mot right_par ',
  '\r\n' +
    '            \r\n' +
    'liaison_groupe_mot := liaison noeud_mot | liaison_groupe_mot ',
  '\r\n\r\nlink_mot   := liaison | appartient ',
  '\r\n\r\nnoeud_mot  := deco_mot mot deco_mot | mot | ref ',
  '\r\n\r\nnoeud_vide := left_par right_par ',
  '\r\n\r\ndeco_mot   := deco_plusieurs | deco_fort ',
  '\r\n' +
    '\r\n' +
    'action     := present_gauche | present_droit | passe_gauche | 

TypeError: Cannot read property 'split' of undefined