Permalink
Browse files

moar grammars for the Zoo

git-svn-id: https://slps.svn.sourceforge.net/svnroot/slps@802 ab42f6e0-554d-0410-b580-99e487e6eeb2
  • Loading branch information...
grammarware committed Jun 28, 2010
1 parent 3ce6f62 commit 3207596e1a9cdd59277be2a702093eab2b4f8778
View
@@ -1,4 +1,4 @@
Copyright (c) 2008, Contributors to the SLPS project
Copyright (c) 2008–2010, Contributors to the SLPS project
All rights reserved.
Redistribution and use in source and binary forms, with or without
View
@@ -22,7 +22,8 @@ elif [ ! -r $4 ]; then
echo "Oops: $1 not found or not readable."
exit 1
else
python ${SLPS}/topics/extraction/bnf2bgf/pdf2lll.py $1 intermediate.lll $3 $4
python ${SLPS}/topics/extraction/bnf2bgf/pdf2lll.py $1 intermediate.lll $3 $4
python ${SLPS}/topics/extraction/bnf2bgf/lll2bgf.py intermediate.lll $2
#rm -f intermediate.lll
#rm -f intermediate.lll
fi
@@ -125,6 +125,7 @@ def serialiseExpression(ts,debug):
def serialiseFormula(name,tokens):
# Useful yet annoying
#print 'Processing',name,'...'
# Replace BREAKPOINT with the name of a nonterminal you like to debug
if name=='BREAKPOINT':
print tokens
return '<bgf:production><nonterminal>'+name+'</nonterminal>'+serialiseExpression(tokens,True)+'</bgf:production>'
@@ -160,7 +160,7 @@ def readLines(f):
lines.append(line)
pdf.close()
def readGrammar(f):
def readGrammar(lines):
print 'Reading the PDF contents...'
for line in lines:
#print line
@@ -1,8 +1,11 @@
build:
cd iso-14882-1998 && make build
cd iso-n2723 && make build
clean:
cd iso-14882-1998 && make clean
cd iso-n2723 && make clean
test:
cd iso-14882-1998 && make test
cd iso-n2723 && make test
@@ -0,0 +1,15 @@
build:
grep -xv 'c ' iso-is-annex.txt > iso-is-annex-no-c.txt
perl -pi -w -e 's/hand ler/handler/g;' iso-is-annex-no-c.txt
../../../../shared/tools/pdf2bgf iso-is-annex-no-c.txt iso-output.bgf banned-lines.lst keywords.lst
../../../../shared/tools/checkxml bgf iso-output.bgf
../../../../shared/tools/normbgf iso-output.bgf iso-is-grammar.bgf
../../../../shared/tools/bgf2bnf iso-is-grammar.bgf iso-is.bnf
rm -f iso-output.bgf
clean:
rm -f iso-output.bgf iso-is-grammar.bgf iso-is.bnf iso-is-annex-no-c.txt intermediate.lll
test:
make build
../../../../shared/tools/gdt iso-is-grammar.bgf extracted-grammar.bgf
@@ -0,0 +1,21 @@
Doc No: N2723=08-0233
Date: 2008-08-25
Reply to: Pete Becker
Roundhouse Consulting, Ltd.
pete@versatilecoding.com
Working Draft, Standard for Programming
Language C++
Note: this is an early draft. It’s known to be incomplet and incorrekt, and it has lots of ba d
format ting.
© ISO/IEC
Fixes that were necessary for extraction:
'handler' is for some reason typeset as 'hand ler'
fixed by a perl regexp inline replace
'(c)' was copy-pasted as two lines
'c ' lines are removed by a negative grep
@@ -0,0 +1,3 @@
ISO/IEC N2723
§ A.
gram.
Oops, something went wrong.

0 comments on commit 3207596

Please sign in to comment.