Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Working with ruby1.9 and added results sorting by basic word similarity

  • Loading branch information...
commit c1c3c75c8d9097d2cfa3df3e31125877ca743488 1 parent d09b0d4
@bastien authored
View
8 Gemfile
@@ -0,0 +1,8 @@
+source "http://rubygems.org"
+
+group :development do
+ gem "bundler", "~> 1.0.0"
+ gem "jeweler", "~> 1.6.2"
+ gem "ruby-debug", :platforms => [:ruby_18, :mri_18]
+ gem 'ruby-debug19', :platforms => [:ruby_19, :mri_19]
+end
View
40 Gemfile.lock
@@ -0,0 +1,40 @@
+GEM
+ remote: http://rubygems.org/
+ specs:
+ archive-tar-minitar (0.5.2)
+ columnize (0.3.4)
+ git (1.2.5)
+ jeweler (1.6.2)
+ bundler (~> 1.0)
+ git (>= 1.2.5)
+ rake
+ linecache (0.46)
+ rbx-require-relative (> 0.0.4)
+ linecache19 (0.5.12)
+ ruby_core_source (>= 0.1.4)
+ rake (0.9.2)
+ rbx-require-relative (0.0.5)
+ ruby-debug (0.10.4)
+ columnize (>= 0.1)
+ ruby-debug-base (~> 0.10.4.0)
+ ruby-debug-base (0.10.4)
+ linecache (>= 0.3)
+ ruby-debug-base19 (0.11.25)
+ columnize (>= 0.3.1)
+ linecache19 (>= 0.5.11)
+ ruby_core_source (>= 0.1.4)
+ ruby-debug19 (0.11.6)
+ columnize (>= 0.3.1)
+ linecache19 (>= 0.5.11)
+ ruby-debug-base19 (>= 0.11.19)
+ ruby_core_source (0.1.5)
+ archive-tar-minitar (>= 0.5.2)
+
+PLATFORMS
+ ruby
+
+DEPENDENCIES
+ bundler (~> 1.0.0)
+ jeweler (~> 1.6.2)
+ ruby-debug
+ ruby-debug19
View
40 Rakefile
@@ -1,30 +1,44 @@
-require 'rubygems'
+# encoding: utf-8
+
+require 'rubygems'
+require 'bundler'
+begin
+ Bundler.setup(:default, :development)
+rescue Bundler::BundlerError => e
+ $stderr.puts e.message
+ $stderr.puts "Run `bundle install` to install missing gems"
+ exit e.status_code
+end
require 'rake'
require 'rake/rdoctask'
-begin
- require 'jeweler'
+require 'jeweler'
- Jeweler::Tasks.new do |gemspec|
+Jeweler::Tasks.new do |gemspec|
gemspec.name = "zidian"
gemspec.summary = "Chinese dictionary"
gemspec.description = "Chinese dictionary using the CEDICT word list"
gemspec.email = "bastien.vaucher@gmail.com"
gemspec.homepage = "http://github.com/bastien/zidian"
gemspec.authors = ["Bastien Vaucher"]
- end
- Jeweler::GemcutterTasks.new
-rescue LoadError
- puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
-end
+end
+Jeweler::GemcutterTasks.new
+
+require 'rake/testtask'
+Rake::TestTask.new(:test) do |test|
+ test.libs << 'lib' << 'test'
+ test.pattern = 'test/**/test_*.rb'
+ test.verbose = true
+end
+
+task :default => :test
desc 'Generate documentation for the Zidian gem.'
-Rake::RDocTask.new(:rdoc) do |rdoc|
+Rake::RDocTask.new do |rdoc|
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
rdoc.rdoc_dir = 'rdoc'
rdoc.title = 'Zidian'
rdoc.options << '--line-numbers' << '--inline-source'
rdoc.rdoc_files.include('README.mkd')
rdoc.rdoc_files.include('lib/**/*.rb')
-end
-
-Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
+end
View
25 lib/zidian.rb
@@ -1,14 +1,14 @@
module Zidian
def self.find(expression)
- $KCODE = 'UTF8'
+ $KCODE = 'UTF8' if RUBY_VERSION < "1.9.0"
case expression.class.name
when "Array"
expression.collect{|e| find(e) }.flatten.uniq
when "Integer", "Fixnum" then
Word.new(get_line(expression), expression)
when "String" then
- find_word(expression).lines.to_a.collect{|line| Word.new(line) }
+ find_words(expression).collect{|raw_word| Word.new(raw_word) }
else
raise InvalFindInputException
end
@@ -16,10 +16,11 @@ def self.find(expression)
protected
- def self.find_word(word) #:nodoc:
+ def self.find_words(word, case_sensitive = false) #:nodoc:
words = word.split.map{|w| "#{w}[1-4]?"}.join(" ")
# adding the -i option allows to search independently from the case, but it makes it very slow
- `less #{File.dirname(__FILE__)}/cedict_ts.u8 | grep -n -E '(^|[^a-zA-Z])#{words}($|[^a-zA-Z])'`
+ results = `less #{File.dirname(__FILE__)}/cedict_ts.u8 | grep -n -E#{ case_sensitive ? ' -i' : ''} '(^|[^a-zA-Z])#{words}($|[^a-zA-Z])'`
+ sort_lines(results.lines.to_a, words)
end
def self.get_line(line_number) #:nodoc:
@@ -27,6 +28,22 @@ def self.get_line(line_number) #:nodoc:
`sed -n '#{line_number}p' #{File.dirname(__FILE__)}/cedict_ts.u8`
end
+ # Sorts the lines by similarity to the words
+ #
+ def self.sort_lines(lines, words)
+ lines.sort do |a, b|
+ line_similarity_to_words(a, words) <=> line_similarity_to_words(b, words)
+ end
+ end
+
+ def self.line_similarity_to_words(line, words)
+ # words delimited by : ],:/[
+ # Very basic similarity determination
+ # we count how many characters before and after the word, the less characters the more similar the match is
+ match_data = line.match("(^|[,:\\\[\/,])([^\\\[\/,:]*)#{words}([^\\\]\\\[\/,]*)($|[,\\\]\\\[\/,])").to_a
+ match_data[2].strip.size + match_data[3].strip.size
+ end
+
class Word
attr_reader :id, :traditional, :simplified, :pinyin, :english
View
18 test/helper.rb
@@ -0,0 +1,18 @@
+require 'rubygems'
+require 'bundler'
+begin
+ Bundler.setup(:default, :development)
+rescue Bundler::BundlerError => e
+ $stderr.puts e.message
+ $stderr.puts "Run `bundle install` to install missing gems"
+ exit e.status_code
+end
+require 'test/unit'
+require "ruby-debug"
+
+$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
+$LOAD_PATH.unshift(File.dirname(__FILE__))
+require 'zidian'
+
+class Test::Unit::TestCase
+end
View
9 test/test_zidian.rb
@@ -1,5 +1,5 @@
-require "#{File.dirname(__FILE__)}/../lib/zidian"
-require "test/unit"
+# encoding: utf-8
+require "helper"
class TestZidian < Test::Unit::TestCase
@@ -34,6 +34,11 @@ def test_find_word_from_pinyin
assert_equal("围城", words.first.simplified)
end
+ def test_find_word_from_chinese
+ words = Zidian.find("围城")
+ assert_equal("wei2 cheng2", words.first.pinyin)
+ end
+
def test_find_word_from_pinyin_marked
words = Zidian.find("wei2 cheng2")
assert_equal("siege", words.first.english.first)
Please sign in to comment.
Something went wrong with that request. Please try again.