Skip to content

Commit

Permalink
Added wordlist support in cli for bag of words. Added NL test files. …
Browse files Browse the repository at this point in the history
…Fixed few things regarding BOW management.

git-svn-id: https://tinyap.googlecode.com/svn/trunk@135 26adf5cf-fd31-0410-b5e7-cb6bf36da140
  • Loading branch information
damien.leroux committed Apr 7, 2011
1 parent 1257a1b commit e94deb1
Show file tree
Hide file tree
Showing 13 changed files with 73 additions and 14 deletions.
6 changes: 3 additions & 3 deletions src/lr/loop_test.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash
./tinyap -pg > loop_test_1.txt
./tinyap -pg|./tinyap -i - -pag -pg > loop_test_2.txt
./tinyap -pg|./tinyap -i - -pag -pg|./tinyap -i - -pag -pg > loop_test_3.txt
./tinyap-debug -pg > loop_test_1.txt
./tinyap-debug -pg|./tinyap-debug -i - -pag -pg > loop_test_2.txt
./tinyap-debug -pg|./tinyap-debug -i - -pag -pg|./tinyap-debug -i - -pag -pg > loop_test_3.txt

diff loop_test_1.txt loop_test_2.txt && diff loop_test_1.txt loop_test_3.txt && diff loop_test_2.txt loop_test_3.txt && echo "print-grammar parsed successfully into itself"
exit $?
2 changes: 1 addition & 1 deletion src/lr/lr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ namespace item {
ret = cached = new token::Str(x&&Car(x)?Value(Car(x)):"", x&&Cdr(x)?Value(Car(Cdr(x))):"");
} else if(tag==STR_BOW) {
ast_node_t x = Cdr(n);
cached = new token::Bow(Value(Car(x)), !!Cdr(x));
ret = cached = new token::Bow(Value(Car(x)), !!Cdr(x));
} else if(tag==STR_AddToBag) {
ret = cached = NULL;
} else if(tag==STR_Prefix) {
Expand Down
4 changes: 1 addition & 3 deletions src/lr/lr_grammar.h
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,6 @@ namespace grammar {
bool keep() const { return keep_; }
virtual std::pair<ast_node_t, unsigned int> recognize(const char* source, unsigned int offset, unsigned int size) const {
unsigned long slen = trie_match_prefix(find(tag_), source+offset);
/*match_bow(pda, tag_);*/
if(slen>0) {
if(!keep_) {
return std::pair<ast_node_t, unsigned int>(PRODUCTION_OK_BUT_EMPTY, offset+slen);
Expand All @@ -488,10 +487,9 @@ namespace grammar {
}

static trie_t find(const char*tag) {
trie_t ret = all[tag];
trie_t& ret = all[tag];
if(!ret) {
ret = trie_new();
all[tag] = ret;
}
return ret;
}
Expand Down
14 changes: 14 additions & 0 deletions src/lr/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "config.h"
#include "tinyap.h"
#include "ast.h"
#include <fstream>

extern "C" {
#include "tinyape.h"
Expand Down Expand Up @@ -92,6 +93,19 @@ int do_args(int argc,char*argv[]) {
/*} else {*/
/*fprintf(stderr,"parse error at line %i, column %i\n%s\n",tinyap_get_error_row(parser),tinyap_get_error_col(parser),tinyap_get_error(parser));*/
}
} else if(cmp_param(2,"--wordlist","-wl")) {
trie_t bow;
i+=1;
const char* tag = argv[i];
bow = tinyap_get_bow(tag);
i+=1;
std::ifstream wl(argv[i]);
while(!wl.eof()) {
std::string word;
wl >> word;
trie_insert(bow, word.c_str());
std::clog << '~' << tag << '~' << ' ' << word << std::endl;
}
} else if(cmp_param(0,"--parse","-p")) {
tinyap_parse(parser, false);
if(tinyap_parsed_ok(parser)&&tinyap_get_output(parser)) {
Expand Down
5 changes: 5 additions & 0 deletions src/lr/nl/det.wl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
a
an
the
some
many
12 changes: 12 additions & 0 deletions src/lr/nl/n.wl
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
I
you
he
she
we
us
him
her
they
man
park
telescope
8 changes: 8 additions & 0 deletions src/lr/nl/nl.gram
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
_start = sentence.
sentence ::= n_p v_p.
sentence ::= sentence p_p.
n_p ::= ~n!~.
n_p ::= ~det~ ~n!~.
n_p ::= n_p p_p.
p_p ::= ~prep!~ n_p.
v_p ::= ~v!~ n_p.
2 changes: 2 additions & 0 deletions src/lr/nl/prep.wl
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
in
with
1 change: 1 addition & 0 deletions src/lr/nl/test.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
I saw a man in the park with a telescope
2 changes: 2 additions & 0 deletions src/lr/nl/v.wl
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
see
saw
24 changes: 20 additions & 4 deletions src/lr/tinyap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ struct _tinyap_t {
/*parse_context_t context;*/
lr::automaton* A;
grammar::Grammar* G;

ext::hash_map<const char*, trie_t> bows;

char*grammar_source;
ast_node_t grammar;
Expand All @@ -66,6 +68,14 @@ struct _tinyap_t {
float parse_time;

int error;

_tinyap_t()
: A(0), G(0), bows(),
grammar_source(0), grammar(0),
start(0), output(0), ws(0), ws_source(0),
flags(0), source_file(0), source_buffer(0),
source_buffer_sz(0), parse_time(0), error(0)
{}
};

int tinyap_verbose=0;
Expand All @@ -81,6 +91,11 @@ void node_pool_init();
void node_pool_term();


trie_t tinyap_get_bow(const char* tag) {
return grammar::item::token::Bow::find(tag);
}


void tinyap_set_verbose(int v) {
tinyap_verbose=v;
}
Expand Down Expand Up @@ -154,14 +169,15 @@ void tinyap_delete(tinyap_t t) {
if(t->source_file) free(t->source_file);
if(t->source_buffer) free(t->source_buffer);

free(t);
/*free(t);*/
delete t;
// printf("after tinyap_delete : %li nodes (%i alloc'd so far)\n",node_pool_size(),_node_alloc_count);
}


tinyap_t tinyap_new() {
tinyap_t ret=(tinyap_t)malloc(sizeof(struct _tinyap_t));
memset(ret,0,sizeof(struct _tinyap_t));
tinyap_t ret=new _tinyap_t();
/*tinyap_t ret=(tinyap_t)malloc(sizeof(struct _tinyap_t));*/
/*memset(ret,0,sizeof(struct _tinyap_t));*/
tinyap_set_grammar(ret,"short");
ret->flags=0;
/*init_pilot_manager();*/
Expand Down
5 changes: 3 additions & 2 deletions src/lr/tinyap.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
/*!
* \mainpage tinyap
*
* This is not yet another parser.
* This is not yet another parser... generator!
*
* \section sec_intro Introduction
*
Expand Down Expand Up @@ -58,6 +58,7 @@ extern "C" {
#endif

#include "bootstrap.h"
#include "trie.h"

/*! \weakgroup api_parser Parser API */
/*@{*/
Expand Down Expand Up @@ -109,7 +110,7 @@ extern "C" {
/*! \brief set buffer as text input source */
void tinyap_set_source_buffer(tinyap_t,const char*,const unsigned int);

//void
trie_t tinyap_get_bow(const char*);

/*! \brief perform parsing of configured source with configured grammar. If the passed flag is true, a full parse is performed, otherwise a simple parse with preference for shifting over reducing is performed.
* \return 1 if parsing was successful, 0 otherwise
Expand Down
2 changes: 1 addition & 1 deletion src/lr/trie.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Tinya(J)P : this is not yet another (Java) parser.
/* tinyap : this is not yet another parser.
* Copyright (C) 2007 Damien Leroux
*
* This program is free software; you can redistribute it and/or
Expand Down

0 comments on commit e94deb1

Please sign in to comment.