Permalink
Find file
Fetching contributors…
Cannot retrieve contributors at this time
128 lines (121 sloc) 6.51 KB
% This file was created with JabRef 2.6.
% Encoding: ISO8859_1
@CONFERENCE{almeida2010bigorna,
abstract = {{Languages are born, evolve and, eventually, die. During this evolution
their spelling rules (and sometimes the syntactic and semantic ones)
change, putting old documents out of use. In Portugal, a pair of
political agreements with Brazil forced relevant changes on the way
the Portuguese language is written. In this article we will detail
these two Orthographic Agreements (one in the thirties and the other
more recently, in the nineties), and the challenges present on the
automatic migration of old documents spelling to their actual one.
We will reveal Bigorna, a toolkit for the classification of language
variants, their comparison and the conversion of texts in different
language versions. These tools will be explained together with examples
of migration issues. As Birgorna relies on a set of conversion rules
we will also discuss how to infer conversion rules from a set of
documents (texts with different ages). The document concludes with
a brief evaluation on the conversion and classification tool results
and their relevance in the current Portuguese language scenario.}},
file = {1005_bigorna_lrec10.pdf},
sourcefolder = {/home/andrefs/Projects/natura/dicionarios/bigorna/artigos},
author = {Almeida, J.J. and Santos, A. and Simoes, A.},
title = {Bigorna -- A Toolkit for Orthography Migration Challenges},
booktitle = {Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC'10)},
year = {2010},
month = {may},
date = {19-21},
address = {Valletta, Malta},
publisher = {European Language Resources Association (ELRA)},
isbn = {2-9517408-6-7},
language = {english}
}
editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Bente Maegaard and Joseph Mariani and Jan Odijk and Stelios Piperidis and Mike Rosner and Daniel Tapias},
@CONFERENCE{rocha2010agile,
author = {Rocha, A. and Santos, A. and Rocha, D. and Silva, H. and Mendes,
J. and Freitas, J. and Coelho, M. and Regedor, M. and da Cruz, D.
and Henriques, P.R.},
title = {{AGile, a structured editor, analyzer, metric evaluator, and transformer
for Attribute Grammars}},
booktitle = {INForum 2010, Braga, Portugal},
year = {2010},
abstract = {{As edit, analyze, measure or transform attribute grammars by hand
is an exhaustive task, it would be great if it could be automatized,
specially for those who work in Language Engineering. However, currently
there are no editors oriented to grammar development that cover all
our needs. In this paper we describe the architecture and the development
stages of AG- ile, a structured editor, analyzer, metric calculator
and transformer for attribute grammars. It is intended, with this
tool, to fill the existing gap. An AnTLR based attribute grammar
syntax was used to define the input for this system. As soon as the
user types the grammar, the input is parsed and kept in an intermediate
structure in memory which holds the important information about the
input grammar. This intermediate structure can be used to calculate
all the metrics or to transform the input grammar. This system can
be a valorous tool for those who need to improve the perfor- mance
or functionalities of their language processor, speeding up the difficult
task of defining and managing a language. Features like highlighting,
automatic indentation, on-the-fly error detection, etc., also adds
efficiency.}},
file = {1009_agile_inforum10.pdf}
}
@MASTERSTHESIS{santos2011,
author = {Santos, A.},
title = {{Contributions for building a Corpora-Flow system}},
school = {{Departamento de Inform\'{a}tica, Escola de Engenharia, Universidade
do Minho}},
year = {2011},
month = {October},
file = {:/home/andrefs/Projects/publications/1110_mei_master_thesis.pdf:PDF},
owner = {andrefs},
timestamp = {2012.04.30}
}
@CONFERENCE{santos2011survey,
author = {Santos, A.},
title = {{A survey on parallel corpora alignment}},
booktitle = {MI-Star 2011, Braga, Portugal},
year = {2011},
abstract = {{A parallel text is the set formed by a text and its translation (in
which case it is called a bitext) or translations. Parallel text
alignment is the task of identifying correspondences between blocks
or tokens in each halve of a bitext. Aligned parallel corpora are
used in several different areas of linguistic and computational linguistics
research. In this paper, a survey on parallel text alignment is presented:
the historical background is provided, and the main methods are described.
A list of relevant tools and projects is presented as well.}},
file = {1102_corpalignment_mistar.pdf},
sourcefolder = {/home/andrefs/UM/MEI/UCE15/Paper}
}
@CONFERENCE{santos2010bookcleaner,
author = {Santos, A. and Almeida, J.J.},
title = {{Text::Perfide::Bookcleaner, a Perl module to clean and normalize
plain text books}},
booktitle = {Congreso Anual de la Sociedad Española para el Procesamiento del
Lenguaje Natural (SEPLN2011), Huelva, Spain},
year = {2011},
abstract = {{This paper presents Text::Perfide::BookCleaner, an application to
pre- process plain text books and clean them for any further arbitrary
use, e.g., text alignment, format conversion or information retrieval.
Cleaning tasks include removing page breaks, page numbers, headers
and footers; finding section titles and boundaries; removing footnotes
and normalizing paragraph notation and Unicode characters. The process
is guided with the help of declarative objects such as ontologies
and configuration files. A comparative evaluation of alignments with
and without Text::Perfide::BookCleaner was performed, and the results
and conclusions are presented.}},
file = {1109_bookcleaner_sepln11.pdf},
sourcefolder = {/home/andrefs/Projects/natura_docs/articles/SEPLN2011/bookcleaner}
}
@InProceedings{santos2012structural,
author = {André Santos and José João Almeida and Nuno Carvalho},
title = {Structural alignment of plain text books},
booktitle = {Proceedings of the Eight International Conference on Language Resources and Evaluation (LREC'12)},
year = {2012},
month = {May},
date = {23-25},
address = {Istanbul, Turkey},
publisher = {European Language Resources Association (ELRA)},
isbn = {978-2-9517408-7-7},
language = {english}
}
editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Mehmet Uğur Doğan and Bente Maegaard and Joseph Mariani and Jan Odijk and Stelios Piperidis},