### Read Lexems file


In [1]:
fs = require('fs')

{
  appendFile: [Function: appendFile],
  appendFileSync: [Function: appendFileSync],
  access: [Function: access],
  accessSync: [Function: accessSync],
  chown: [Function: chown],
  chownSync: [Function: chownSync],
  chmod: [Function: chmod],
  chmodSync: [Function: chmodSync],
  close: [Function: close],
  closeSync: [Function: closeSync],
  copyFile: [Function: copyFile],
  copyFileSync: [Function: copyFileSync],
  createReadStream: [Function: createReadStream],
  createWriteStream: [Function: createWriteStream],
  exists: [Function: exists],
  existsSync: [Function: existsSync],
  fchown: [Function: fchown],
  fchownSync: [Function: fchownSync],
  fchmod: [Function: fchmod],
  fchmodSync: [Function: fchmodSync],
  fdatasync: [Function: fdatasync],
  fdatasyncSync: [Function: fdatasyncSync],
  fstat: [Function: fstat],
  fstatSync: [Function: fstatSync],
  fsync: [Function: fsync],
  fsyncSync: [Function: fsyncSync],
  ftruncate: [Function: ftruncate],
  ftruncateSync: [Function: 

In [2]:
lexer_str = fs.readFileSync('lexems.ho').toString()

'fin_phrase := \\.\r\n' +
  'fin_expression := \\,\r\n' +
  'left_par := \\(\r\n' +
  'right_par := \\)\r\n' +
  'liaison := \\+\r\n' +
  'appartient := _\r\n' +
  'deco_fort := \\*\\*\r\n' +
  'deco_plusieurs := \\*\r\n' +
  'mot := [A-Za-z]+\r\n' +
  'id := :\\d+\r\n' +
  'ref := @\\d+\r\n' +
  'present_droit := ->\r\n' +
  'passe_droit := \\\\>\r\n' +
  'futur_droit := />\r\n' +
  'present_gauche := <-\r\n' +
  'passe_gauche := </\r\n' +
  'futur_gauche := <\\\\'

### Parse the lexems

Create the tokenizer which each regex

In [3]:
tokenizer = {}
lexems = lexer_str.split('\r\n')
lexems.forEach(l => { 
    let parsed = l.trim().split(':=')
    regex = "^" + parsed[1].trim()
    tokenizer[parsed[0].trim()] = new RegExp(regex)
})

In [4]:
function toTokens(phrase){
    let tokens = []
    for(cursor = 0; cursor < phrase.length; ++cursor){
        // ignore spaces
        if(phrase[cursor] === " "){
            continue
        }
        let found = false
        for (let el of Object.entries(tokenizer)){
            let regex = el[1]
            let tokenName = el[0]
            let res = phrase.slice(cursor).match(regex)
            if(res){
                console.log("wow we found a " + tokenName + " with value " + res)
                tokens.push({type: tokenName, value: res[0]})
                found = true
                cursor += res[0].length - 1
                break
            }
        }
        if(!found){
            console.log("arf, could not tokenize character " + phrase[cursor])
        }
    }
    return tokens
}

In [5]:
input = "(Faisan + Multicolore):1 \\> Etre -> Oiseau, *Chasseurs* -> (Chasser + Beaucoup) -> @1."

'(Faisan + Multicolore):1 \\> Etre -> Oiseau, *Chasseurs* -> (Chasser + Beaucoup) -> @1.'

In [6]:
tokens = toTokens(input)

wow we found a left_par with value (
wow we found a mot with value Faisan
wow we found a liaison with value +
wow we found a mot with value Multicolore
wow we found a right_par with value )
wow we found a id with value :1
wow we found a passe_droit with value \>
wow we found a mot with value Etre
wow we found a present_droit with value ->
wow we found a mot with value Oiseau
wow we found a fin_expression with value ,
wow we found a deco_plusieurs with value *
wow we found a mot with value Chasseurs
wow we found a deco_plusieurs with value *
wow we found a present_droit with value ->
wow we found a left_par with value (
wow we found a mot with value Chasser
wow we found a liaison with value +
wow we found a mot with value Beaucoup
wow we found a right_par with value )
wow we found a present_droit with value ->
wow we found a ref with value @1
wow we found a fin_phrase with value .


[
  { type: 'left_par', value: '(' },
  { type: 'mot', value: 'Faisan' },
  { type: 'liaison', value: '+' },
  { type: 'mot', value: 'Multicolore' },
  { type: 'right_par', value: ')' },
  { type: 'id', value: ':1' },
  { type: 'passe_droit', value: '\\>' },
  { type: 'mot', value: 'Etre' },
  { type: 'present_droit', value: '->' },
  { type: 'mot', value: 'Oiseau' },
  { type: 'fin_expression', value: ',' },
  { type: 'deco_plusieurs', value: '*' },
  { type: 'mot', value: 'Chasseurs' },
  { type: 'deco_plusieurs', value: '*' },
  { type: 'present_droit', value: '->' },
  { type: 'left_par', value: '(' },
  { type: 'mot', value: 'Chasser' },
  { type: 'liaison', value: '+' },
  { type: 'mot', value: 'Beaucoup' },
  { type: 'right_par', value: ')' },
  { type: 'present_droit', value: '->' },
  { type: 'ref', value: '@1' },
  { type: 'fin_phrase', value: '.' }
]

In [7]:
grammar_str = fs.readFileSync('grammar.ho').toString()

'texte      := phrase fin_phrase | phrase fin_phrase texte;\r\n' +
  '\r\n' +
  'phrase     := expression fin_expression \r\n' +
  '           |  expression \r\n' +
  '           |  expression fin_expression phrase ;\r\n' +
  '\r\n' +
  'expression := groupe action groupe action groupe \r\n' +
  '           |  groupe ;\r\n' +
  '\r\n' +
  'groupe      := groupe_mot | groupe_mot id ;\r\n' +
  '\r\n' +
  'groupe_mot := noeud_vide |\r\n' +
  '              noeud_mot |\r\n' +
  '              left_par noeud_mot right_par |\r\n' +
  '              left_par noeud_mot liaison_groupe_mot right_par ;\r\n' +
  '            \r\n' +
  'liaison_groupe_mot := liaison noeud_mot | liaison noeud_mot liaison_groupe_mot ;\r\n' +
  '\r\n' +
  'link_mot   := liaison | appartient ;\r\n' +
  '\r\n' +
  'noeud_mot  := deco_mot mot deco_mot | mot | ref ;\r\n' +
  '\r\n' +
  'noeud_vide := left_par right_par ;\r\n' +
  '\r\n' +
  'deco_mot   := deco_plusieurs | deco_fort ;\r\n' +
  '\r\n' +
  'action     := pre

In [8]:
grammar = {}
rules = grammar_str.split(';')
rules.pop()
rules.forEach(r => { 
    let parsed = r.split(':=')
    let tokens = parsed[1].split('|').map(tok => tok.trim().split(' '))
    grammar[parsed[0].trim()] = tokens
})
grammar

{
  texte: [ [ 'phrase', 'fin_phrase' ], [ 'phrase', 'fin_phrase', 'texte' ] ],
  phrase: [
    [ 'expression', 'fin_expression' ],
    [ 'expression' ],
    [ 'expression', 'fin_expression', 'phrase' ]
  ],
  expression: [
    [ 'groupe', 'action', 'groupe', 'action', 'groupe' ],
    [ 'groupe' ]
  ],
  groupe: [ [ 'groupe_mot' ], [ 'groupe_mot', 'id' ] ],
  groupe_mot: [
    [ 'noeud_vide' ],
    [ 'noeud_mot' ],
    [ 'left_par', 'noeud_mot', 'right_par' ],
    [ 'left_par', 'noeud_mot', 'liaison_groupe_mot', 'right_par' ]
  ],
  liaison_groupe_mot: [
    [ 'liaison', 'noeud_mot' ],
    [ 'liaison', 'noeud_mot', 'liaison_groupe_mot' ]
  ],
  link_mot: [ [ 'liaison' ], [ 'appartient' ] ],
  noeud_mot: [ [ 'deco_mot', 'mot', 'deco_mot' ], [ 'mot' ], [ 'ref' ] ],
  noeud_vide: [ [ 'left_par', 'right_par' ] ],
  deco_mot: [ [ 'deco_plusieurs' ], [ 'deco_fort' ] ],
  action: [
    [ 'present_gauche' ],
    [ 'present_droit' ],
    [ 'passe_gauche' ],
    [ 'passe_droit' ],
    [ 'futur_g

on prend un token
on prend toutes les regles qui commencent par ce token
pour chaque regle, tant qu'elles peuvent consommer des token on continue à prendre des tokens
( mot + mot )

In [9]:
function parse(tokens, rules){
    console.log("RULES")
    console.log(tokens, rules)
    if(tokens.length === 0){
        console.log("no more tokens")
        return
    }
    for(rule in rules){
        filter_rules = rules[rule].filter(r => r[0] == tokens[0].type)
        if(filter_rules.length > 0){
            console.log(tokens[0], filter_rules)
            newRules = {}
            newRules[rule] = filter_rules
            parse(tokens.slice(1, ), newRules)
        }
    }
}


In [10]:
console.log(tokens.slice(0,10).map(e => e.type))

[
  'left_par',
  'mot',
  'liaison',
  'mot',
  'right_par',
  'id',
  'passe_droit',
  'mot',
  'present_droit',
  'mot'
]


In [11]:
var input1 = ["left_par", "mot", "right_par"]
var input2 = ["left_par", "deco_fort", "mot", "deco_plusieurs", "right_par"]

var rules = {
    "groupe_mot": [
        "noeud_vide",
        "noeud_mot",
        ["left_par", "noeud_mot", "right_par"]
    ],
    "noeud_vide": [
        ["left_par", "right_par"]
    ],
    "noeud_mot": [
        ["deco_mot", "mot", "deco_mot"],
        "mot",
        "ref"
    ],
    "deco_mot": [
        "deco_plusieurs",
        "deco_fort"
    ]
}

function isToken(tokens, token, rules) {
    console.log("test token: " + token + " on: " + tokens + "\n")
    // console.log(token)
    // si jamais on cherche à vérifier plusieurs type de token
    if (Array.isArray(token)) {
        var total = 0
        // alors pour chacun des types
        for (var i = 0; i < token.length; i++) {
            // on relance la fonction avec un subset de tokens, le token à vérifier
            // on récupère la valeur de count de ces branches et on incrémente le total avec
            var count = isToken(tokens.slice(total), token[i], rules)
            if(count == 0) {
                return 0
            }
            total += count
        }
        return total
    }
    // si jamais il y a un seul token à vérifier et que c'est celui de token[0] on renvoie 1
    // i.e si c'est un symbole terminal
    if (tokens[0] == token){
        return 1
    }
    
    // sinon si il y a une regle associée au token que l'on cherche à trouver
    if (rules[token] !== undefined) {
        var max = 0
        rules[token].forEach(t => {
            var count = isToken(tokens, t, rules)
            if (count > max) {
                max = count
            }
        });
        return max
    }
    return 0

}

console.log(isToken(input2, "groupe_mot", rules))

test token: groupe_mot on: left_par,deco_fort,mot,deco_plusieurs,right_par

test token: noeud_vide on: left_par,deco_fort,mot,deco_plusieurs,right_par

test token: left_par,right_par on: left_par,deco_fort,mot,deco_plusieurs,right_par

test token: left_par on: left_par,deco_fort,mot,deco_plusieurs,right_par

test token: right_par on: deco_fort,mot,deco_plusieurs,right_par

test token: noeud_mot on: left_par,deco_fort,mot,deco_plusieurs,right_par

test token: deco_mot,mot,deco_mot on: left_par,deco_fort,mot,deco_plusieurs,right_par

test token: deco_mot on: left_par,deco_fort,mot,deco_plusieurs,right_par

test token: deco_plusieurs on: left_par,deco_fort,mot,deco_plusieurs,right_par

test token: deco_fort on: left_par,deco_fort,mot,deco_plusieurs,right_par

test token: mot on: left_par,deco_fort,mot,deco_plusieurs,right_par

test token: ref on: left_par,deco_fort,mot,deco_plusieurs,right_par

test token: left_par,noeud_mot,right_par on: left_par,deco_fort,mot,deco_plusieurs,right_par

t

In [13]:
isToken(["mot", "fin_phrase"], "texte", grammar)

test token: texte on: mot,fin_phrase

test token: phrase,fin_phrase on: mot,fin_phrase

test token: phrase on: mot,fin_phrase

test token: expression,fin_expression on: mot,fin_phrase

test token: expression on: mot,fin_phrase

test token: groupe,action,groupe,action,groupe on: mot,fin_phrase

test token: groupe on: mot,fin_phrase

test token: groupe_mot on: mot,fin_phrase

test token: groupe_mot on: mot,fin_phrase

test token: noeud_vide on: mot,fin_phrase

test token: noeud_vide on: mot,fin_phrase

test token: left_par,right_par on: mot,fin_phrase

test token: left_par on: mot,fin_phrase

test token: noeud_mot on: mot,fin_phrase

test token: noeud_mot on: mot,fin_phrase

test token: deco_mot,mot,deco_mot on: mot,fin_phrase

test token: deco_mot on: mot,fin_phrase

test token: deco_plusieurs on: mot,fin_phrase

test token: deco_plusieurs on: mot,fin_phrase

test token: deco_fort on: mot,fin_phrase

test token: deco_fort on: mot,fin_phrase

test token: mot on: mot,fin_phrase

test toke

test token: deco_mot on: mot,fin_phrase

test token: deco_plusieurs on: mot,fin_phrase

test token: deco_plusieurs on: mot,fin_phrase

test token: deco_fort on: mot,fin_phrase

test token: deco_fort on: mot,fin_phrase

test token: mot on: mot,fin_phrase

test token: mot on: mot,fin_phrase

test token: ref on: mot,fin_phrase

test token: ref on: mot,fin_phrase

test token: left_par,noeud_mot,right_par on: mot,fin_phrase

test token: left_par on: mot,fin_phrase

test token: left_par,noeud_mot,liaison_groupe_mot,right_par on: mot,fin_phrase

test token: left_par on: mot,fin_phrase

test token: groupe_mot,id on: mot,fin_phrase

test token: groupe_mot on: mot,fin_phrase

test token: noeud_vide on: mot,fin_phrase

test token: noeud_vide on: mot,fin_phrase

test token: left_par,right_par on: mot,fin_phrase

test token: left_par on: mot,fin_phrase

test token: noeud_mot on: mot,fin_phrase

test token: noeud_mot on: mot,fin_phrase

test token: deco_mot,mot,deco_mot on: mot,fin_phrase

test toke

test token: deco_mot on: mot,fin_phrase

test token: deco_plusieurs on: mot,fin_phrase

test token: deco_plusieurs on: mot,fin_phrase

test token: deco_fort on: mot,fin_phrase

test token: deco_fort on: mot,fin_phrase

test token: mot on: mot,fin_phrase

test token: mot on: mot,fin_phrase

test token: ref on: mot,fin_phrase

test token: ref on: mot,fin_phrase

test token: left_par,noeud_mot,right_par on: mot,fin_phrase

test token: left_par on: mot,fin_phrase

test token: left_par,noeud_mot,liaison_groupe_mot,right_par on: mot,fin_phrase

test token: left_par on: mot,fin_phrase

test token: id on: fin_phrase

test token: action on: fin_phrase

test token: present_gauche on: fin_phrase

test token: present_gauche on: fin_phrase

test token: present_droit on: fin_phrase

test token: present_droit on: fin_phrase

test token: passe_gauche on: fin_phrase

test token: passe_gauche on: fin_phrase

test token: passe_droit on: fin_phrase

test token: passe_droit on: fin_phrase

test token: futu

test token: groupe,action,groupe,action,groupe on: mot,fin_phrase

test token: groupe on: mot,fin_phrase

test token: groupe_mot on: mot,fin_phrase

test token: groupe_mot on: mot,fin_phrase

test token: noeud_vide on: mot,fin_phrase

test token: noeud_vide on: mot,fin_phrase

test token: left_par,right_par on: mot,fin_phrase

test token: left_par on: mot,fin_phrase

test token: noeud_mot on: mot,fin_phrase

test token: noeud_mot on: mot,fin_phrase

test token: deco_mot,mot,deco_mot on: mot,fin_phrase

test token: deco_mot on: mot,fin_phrase

test token: deco_plusieurs on: mot,fin_phrase

test token: deco_plusieurs on: mot,fin_phrase

test token: deco_fort on: mot,fin_phrase

test token: deco_fort on: mot,fin_phrase

test token: mot on: mot,fin_phrase

test token: mot on: mot,fin_phrase

test token: ref on: mot,fin_phrase

test token: ref on: mot,fin_phrase

test token: left_par,noeud_mot,right_par on: mot,fin_phrase

test token: left_par on: mot,fin_phrase

test token: left_par,noeud_

test token: deco_mot,mot,deco_mot on: 

test token: deco_mot on: 

test token: deco_plusieurs on: 

test token: deco_plusieurs on: 

test token: deco_fort on: 

test token: deco_fort on: 

test token: mot on: 

test token: mot on: 

test token: ref on: 

test token: ref on: 

test token: left_par,noeud_mot,right_par on: 

test token: left_par on: 

test token: left_par,noeud_mot,liaison_groupe_mot,right_par on: 

test token: left_par on: 

test token: groupe_mot,id on: 

test token: groupe_mot on: 

test token: noeud_vide on: 

test token: noeud_vide on: 

test token: left_par,right_par on: 

test token: left_par on: 

test token: noeud_mot on: 

test token: noeud_mot on: 

test token: deco_mot,mot,deco_mot on: 

test token: deco_mot on: 

test token: deco_plusieurs on: 

test token: deco_plusieurs on: 

test token: deco_fort on: 

test token: deco_fort on: 

test token: mot on: 

test token: mot on: 

test token: ref on: 

test token: ref on: 

test token: left_par,noeud_mot,right_par

test token: mot on: 

test token: mot on: 

test token: ref on: 

test token: ref on: 

test token: left_par,noeud_mot,right_par on: 

test token: left_par on: 

test token: left_par,noeud_mot,liaison_groupe_mot,right_par on: 

test token: left_par on: 

test token: expression on: 

test token: expression on: 

test token: groupe,action,groupe,action,groupe on: 

test token: groupe on: 

test token: groupe_mot on: 

test token: groupe_mot on: 

test token: noeud_vide on: 

test token: noeud_vide on: 

test token: left_par,right_par on: 

test token: left_par on: 

test token: noeud_mot on: 

test token: noeud_mot on: 

test token: deco_mot,mot,deco_mot on: 

test token: deco_mot on: 

test token: deco_plusieurs on: 

test token: deco_plusieurs on: 

test token: deco_fort on: 

test token: deco_fort on: 

test token: mot on: 

test token: mot on: 

test token: ref on: 

test token: ref on: 

test token: left_par,noeud_mot,right_par on: 

test token: left_par on: 

test token: left_par,n

2

In [14]:
fetch("http://localhost:8000/grammar.txt", {mode: 'no-cors'})
    .then(res => res.text())
    .then(text => console.log(text))

ReferenceError: fetch is not defined