Permalink
Browse files

v1.8.2

  • Loading branch information...
1 parent 95d7f5c commit 3a0199d77656c5b68a887c857315556684528456 @blackwinter committed Mar 6, 2012
Showing with 29 additions and 11 deletions.
  1. +18 −0 ChangeLog
  2. +1 −1 README
  3. +2 −4 Rakefile
  4. +2 −0 TODO
  5. +1 −1 lib/lingo/version.rb
  6. +5 −5 lingo.gemspec
View
@@ -1,5 +1,23 @@
= Revision history for Lingo
+== 1.8.2 [2012-04-19]
+
+* Performance improvements regarding Attendee::VectorFilter's (as well as
+ Attendee::NonewordFilter's) memory usage; set <tt>sort: false</tt> in the config.
+* Added Attendee::Stemmer (implementing Porter's algorithm for suffix stripping).
+* Added progress reporting to Attendee::TextReader; set <tt>progress: true</tt>
+ in the config.
+* Added directory and glob processing to Attendee::TextReader (new options
+ +glob+ and +recursive+).
+* Renamed Attendee::TextReader's option +lir-record-pattern+ to +records+.
+* Fixed Attendee::Debugger to forward all objects so it can be inserted
+ between any two attendees in the config.
+* Fixed regression introduced in 1.8.0 where Lingo would not use existing
+ compiled dictionary when source file is not present.
+* Fixed "invalid byte sequence in UTF-8" on Windows for SDBM store.
+* Enabled pluggable (compiled) dictionaries and storage backends.
+* Extensive internal refactoring and cleanup. (Finished for now.)
+
== 1.8.1 [2012-02-19]
* Introduced alternative storage backends, mainly to circumvent SDBM's record
View
2 README
@@ -25,7 +25,7 @@
== VERSION
-This documentation refers to Lingo version 1.8.1
+This documentation refers to Lingo version 1.8.2
== DESCRIPTION
View
@@ -39,10 +39,8 @@ The main functions of Lingo are:
of word classes
EOT
extra_files: FileList[
- 'lingo.rb', 'lingo{,-call}.cfg', 'lingo.opt', 'doc/**/*',
- '{de,en}.lang', '{de,en}/{lingo-*,user-dic}.txt', 'txt/artikel{,-en}.txt',
- 'info/gpl-hdr.txt', 'info/*.png', 'lir.cfg', 'txt/lir.txt', 'porter/*',
- '{de,en}/test_*.txt'
+ 'lingo.rb', 'lingo{,-call}.cfg', 'lir.cfg', '{de,en}.lang',
+ '{de,en}/{lingo-*,user-dic,test_*}.txt', 'txt/{artikel{,-en},lir}.txt'
].to_a,
required_ruby_version: '>= 1.9',
dependencies: [['ruby-nuggets', '>= 0.8.5'], 'unicode', 'highline'],
View
2 TODO
@@ -1,5 +1,7 @@
= ToDo list for Lingo
+* Configuration parameter validation.
+* Replace regex-based tokenizer with a (Racc/Ragel/ANTLR-based?) lexer.
* Update and translate old documentation.
* Allow for handling of documents in various encodings, not just the one the
dictionaries are encoded in.
View
@@ -4,7 +4,7 @@ module Version
MAJOR = 1
MINOR = 8
- TINY = 1
+ TINY = 2
class << self
View
@@ -2,18 +2,18 @@
Gem::Specification.new do |s|
s.name = "lingo"
- s.version = "1.8.1"
+ s.version = "1.8.2"
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
s.authors = ["John Vorhauer", "Jens Wille"]
- s.date = "2012-02-19"
+ s.date = "2012-04-19"
s.description = "Lingo is an open source indexing system for research and teachings.\nThe main functions of Lingo are:\n\n* identification of (i.e. reduction to) basic word form by means of\n dictionaries and suffix lists\n* algorithmic decomposition\n* dictionary-based synonymisation and identification of phrases\n* generic identification of phrases/word sequences based on patterns\n of word classes\n"
s.email = ["lingo@vorhauer.de", "jens.wille@uni-koeln.de"]
- s.executables = ["lingo", "lingoctl"]
+ s.executables = ["lingoctl", "lingo"]
s.extra_rdoc_files = ["README", "COPYING", "ChangeLog"]
- s.files = ["lib/lingo/ctl.rb", "lib/lingo/database.rb", "lib/lingo/error.rb", "lib/lingo/version.rb", "lib/lingo/database/source.rb", "lib/lingo/database/libcdb_store.rb", "lib/lingo/database/sdbm_store.rb", "lib/lingo/database/show_progress.rb", "lib/lingo/database/crypter.rb", "lib/lingo/database/source/multi_key.rb", "lib/lingo/database/source/key_value.rb", "lib/lingo/database/source/single_word.rb", "lib/lingo/database/source/word_class.rb", "lib/lingo/database/source/multi_value.rb", "lib/lingo/database/gdbm_store.rb", "lib/lingo/database/hash_store.rb", "lib/lingo/cli.rb", "lib/lingo/cachable.rb", "lib/lingo/attendee/variator.rb", "lib/lingo/attendee/debugger.rb", "lib/lingo/attendee/object_filter.rb", "lib/lingo/attendee/synonymer.rb", "lib/lingo/attendee/text_writer.rb", "lib/lingo/attendee/multi_worder.rb", "lib/lingo/attendee/text_reader.rb", "lib/lingo/attendee/dehyphenizer.rb", "lib/lingo/attendee/tokenizer.rb", "lib/lingo/attendee/abbreviator.rb", "lib/lingo/attendee/formatter.rb", "lib/lingo/attendee/noneword_filter.rb", "lib/lingo/attendee/sequencer.rb", "lib/lingo/attendee/decomposer.rb", "lib/lingo/attendee/word_searcher.rb", "lib/lingo/attendee/vector_filter.rb", "lib/lingo/config.rb", "lib/lingo/core_ext.rb", "lib/lingo/agenda_item.rb", "lib/lingo/buffered_attendee.rb", "lib/lingo/reportable.rb", "lib/lingo/language.rb", "lib/lingo/language/dictionary.rb", "lib/lingo/language/word.rb", "lib/lingo/language/lexical.rb", "lib/lingo/language/word_form.rb", "lib/lingo/language/token.rb", "lib/lingo/language/grammar.rb", "lib/lingo/language/lexical_hash.rb", "lib/lingo/attendee.rb", "lib/lingo/call.rb", "lib/lingo.rb", "bin/lingo", "bin/lingoctl", "lingo.rb", "lingo.cfg", "lingo-all.cfg", "lingo-call.cfg", "de.lang", "en.lang", "de/lingo-syn.txt", "de/lingo-abk.txt", "de/lingo-dic.txt", "de/lingo-mul.txt", "de/user-dic.txt", "en/lingo-dic.txt", "en/lingo-mul.txt", "en/user-dic.txt", "txt/artikel.txt", "txt/artikel-en.txt", "info/gpl-hdr.txt", "info/kerze.png", "info/meeting.png", "info/lingo.png", "info/types.png", "info/logo.png", "info/language.png", "info/Typen.png", "info/Objekte.png", "info/download.png", "info/database.png", "info/db_small.png", "lir.cfg", "txt/lir.txt", "porter/stem.rb", "porter/stem.cfg", "test.cfg", "de/test_mul.txt", "de/test_singleword.txt", "de/test_mul2.txt", "de/test_syn.txt", "de/test_dic.txt", "de/test_syn2.txt", "TODO", "README", "ChangeLog", "COPYING", "Rakefile", "spec/spec_helper.rb", ".rspec", "test/lir.csv", "test/attendee/ts_abbreviator.rb", "test/attendee/ts_noneword_filter.rb", "test/attendee/ts_word_searcher.rb", "test/attendee/ts_object_filter.rb", "test/attendee/ts_vector_filter.rb", "test/attendee/ts_text_writer.rb", "test/attendee/ts_decomposer.rb", "test/attendee/ts_sequencer.rb", "test/attendee/ts_synonymer.rb", "test/attendee/ts_tokenizer.rb", "test/attendee/ts_variator.rb", "test/attendee/ts_text_reader.rb", "test/attendee/ts_multi_worder.rb", "test/mul.txt", "test/test_helper.rb", "test/ref/artikel.ven", "test/ref/lir.csv", "test/ref/artikel.vec", "test/ref/lir.mul", "test/ref/artikel.syn", "test/ref/lir.syn", "test/ref/artikel.mul", "test/ref/artikel.seq", "test/ref/lir.seq", "test/ref/artikel.non", "test/ref/artikel.ver", "test/ref/lir.non", "test/lir2.txt", "test/ts_database.rb", "test/lir.txt", "test/ts_language.rb"]
+ s.files = ["lib/lingo.rb", "lib/lingo/show_progress.rb", "lib/lingo/config.rb", "lib/lingo/database.rb", "lib/lingo/language/dictionary.rb", "lib/lingo/language/word_form.rb", "lib/lingo/language/lexical.rb", "lib/lingo/language/grammar.rb", "lib/lingo/language/lexical_hash.rb", "lib/lingo/language/token.rb", "lib/lingo/language/word.rb", "lib/lingo/attendee/stemmer/porter.rb", "lib/lingo/attendee/vector_filter.rb", "lib/lingo/attendee/noneword_filter.rb", "lib/lingo/attendee/object_filter.rb", "lib/lingo/attendee/variator.rb", "lib/lingo/attendee/multi_worder.rb", "lib/lingo/attendee/text_reader.rb", "lib/lingo/attendee/synonymer.rb", "lib/lingo/attendee/word_searcher.rb", "lib/lingo/attendee/dehyphenizer.rb", "lib/lingo/attendee/sequencer.rb", "lib/lingo/attendee/debugger.rb", "lib/lingo/attendee/text_writer.rb", "lib/lingo/attendee/stemmer.rb", "lib/lingo/attendee/tokenizer.rb", "lib/lingo/attendee/abbreviator.rb", "lib/lingo/attendee/decomposer.rb", "lib/lingo/attendee/formatter.rb", "lib/lingo/database/hash_store.rb", "lib/lingo/database/show_progress.rb", "lib/lingo/database/sdbm_store.rb", "lib/lingo/database/source.rb", "lib/lingo/database/crypter.rb", "lib/lingo/database/source/multi_value.rb", "lib/lingo/database/source/word_class.rb", "lib/lingo/database/source/key_value.rb", "lib/lingo/database/source/multi_key.rb", "lib/lingo/database/source/single_word.rb", "lib/lingo/database/gdbm_store.rb", "lib/lingo/database/libcdb_store.rb", "lib/lingo/call.rb", "lib/lingo/attendee.rb", "lib/lingo/version.rb", "lib/lingo/ctl.rb", "lib/lingo/cli.rb", "lib/lingo/core_ext.rb", "lib/lingo/buffered_attendee.rb", "lib/lingo/agenda_item.rb", "lib/lingo/cachable.rb", "lib/lingo/language.rb", "lib/lingo/error.rb", "lib/lingo/reportable.rb", "bin/lingoctl", "bin/lingo", "lingo.rb", "lingo.cfg", "lingo-call.cfg", "lir.cfg", "de.lang", "en.lang", "de/lingo-dic.txt", "de/lingo-abk.txt", "de/lingo-syn.txt", "de/lingo-mul.txt", "de/user-dic.txt", "de/test_syn.txt", "de/test_dic.txt", "de/test_syn2.txt", "de/test_singleword.txt", "de/test_mul.txt", "de/test_mul2.txt", "en/lingo-dic.txt", "en/lingo-syn.txt", "en/lingo-mul.txt", "en/user-dic.txt", "txt/artikel.txt", "txt/artikel-en.txt", "txt/lir.txt", "ChangeLog", "COPYING", "README", "Rakefile", "TODO", "spec/spec_helper.rb", ".rspec", "test/ref/artikel.ven", "test/ref/lir.mul", "test/ref/lir.vec", "test/ref/artikel.vec", "test/ref/lir.syn", "test/ref/artikel.mul", "test/ref/artikel.syn", "test/ref/artikel.seq", "test/ref/artikel.non", "test/ref/lir.non", "test/ref/lir.seq", "test/ref/artikel.ver", "test/ts_language.rb", "test/lir2.txt", "test/attendee/ts_noneword_filter.rb", "test/attendee/ts_text_writer.rb", "test/attendee/ts_sequencer.rb", "test/attendee/ts_object_filter.rb", "test/attendee/ts_text_reader.rb", "test/attendee/ts_multi_worder.rb", "test/attendee/ts_variator.rb", "test/attendee/ts_decomposer.rb", "test/attendee/ts_abbreviator.rb", "test/attendee/ts_stemmer.rb", "test/attendee/ts_tokenizer.rb", "test/attendee/ts_vector_filter.rb", "test/attendee/ts_word_searcher.rb", "test/attendee/ts_synonymer.rb", "test/lir.vec", "test/test_helper.rb", "test/lir.txt", "test/mul.txt", "test/ts_database.rb"]
s.homepage = "http://lex-lingo.de"
- s.rdoc_options = ["--charset", "UTF-8", "--line-numbers", "--all", "--title", "lingo Application documentation (v1.8.1)", "--main", "README"]
+ s.rdoc_options = ["--charset", "UTF-8", "--line-numbers", "--all", "--title", "lingo Application documentation (v1.8.2)", "--main", "README"]
s.require_paths = ["lib"]
s.required_ruby_version = Gem::Requirement.new(">= 1.9")
s.rubygems_version = "1.8.17"

0 comments on commit 3a0199d

Please sign in to comment.