Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Working with ruby1.9 and added results sorting by basic word similarity

  • Loading branch information...
commit c1c3c75c8d9097d2cfa3df3e31125877ca743488 1 parent d09b0d4
Bastien Vaucher authored
8 Gemfile
View
@@ -0,0 +1,8 @@
+source "http://rubygems.org"
+
+group :development do
+ gem "bundler", "~> 1.0.0"
+ gem "jeweler", "~> 1.6.2"
+ gem "ruby-debug", :platforms => [:ruby_18, :mri_18]
+ gem 'ruby-debug19', :platforms => [:ruby_19, :mri_19]
+end
40 Gemfile.lock
View
@@ -0,0 +1,40 @@
+GEM
+ remote: http://rubygems.org/
+ specs:
+ archive-tar-minitar (0.5.2)
+ columnize (0.3.4)
+ git (1.2.5)
+ jeweler (1.6.2)
+ bundler (~> 1.0)
+ git (>= 1.2.5)
+ rake
+ linecache (0.46)
+ rbx-require-relative (> 0.0.4)
+ linecache19 (0.5.12)
+ ruby_core_source (>= 0.1.4)
+ rake (0.9.2)
+ rbx-require-relative (0.0.5)
+ ruby-debug (0.10.4)
+ columnize (>= 0.1)
+ ruby-debug-base (~> 0.10.4.0)
+ ruby-debug-base (0.10.4)
+ linecache (>= 0.3)
+ ruby-debug-base19 (0.11.25)
+ columnize (>= 0.3.1)
+ linecache19 (>= 0.5.11)
+ ruby_core_source (>= 0.1.4)
+ ruby-debug19 (0.11.6)
+ columnize (>= 0.3.1)
+ linecache19 (>= 0.5.11)
+ ruby-debug-base19 (>= 0.11.19)
+ ruby_core_source (0.1.5)
+ archive-tar-minitar (>= 0.5.2)
+
+PLATFORMS
+ ruby
+
+DEPENDENCIES
+ bundler (~> 1.0.0)
+ jeweler (~> 1.6.2)
+ ruby-debug
+ ruby-debug19
40 Rakefile
View
@@ -1,30 +1,44 @@
-require 'rubygems'
+# encoding: utf-8
+
+require 'rubygems'
+require 'bundler'
+begin
+ Bundler.setup(:default, :development)
+rescue Bundler::BundlerError => e
+ $stderr.puts e.message
+ $stderr.puts "Run `bundle install` to install missing gems"
+ exit e.status_code
+end
require 'rake'
require 'rake/rdoctask'
-begin
- require 'jeweler'
+require 'jeweler'
- Jeweler::Tasks.new do |gemspec|
+Jeweler::Tasks.new do |gemspec|
gemspec.name = "zidian"
gemspec.summary = "Chinese dictionary"
gemspec.description = "Chinese dictionary using the CEDICT word list"
gemspec.email = "bastien.vaucher@gmail.com"
gemspec.homepage = "http://github.com/bastien/zidian"
gemspec.authors = ["Bastien Vaucher"]
- end
- Jeweler::GemcutterTasks.new
-rescue LoadError
- puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
-end
+end
+Jeweler::GemcutterTasks.new
+
+require 'rake/testtask'
+Rake::TestTask.new(:test) do |test|
+ test.libs << 'lib' << 'test'
+ test.pattern = 'test/**/test_*.rb'
+ test.verbose = true
+end
+
+task :default => :test
desc 'Generate documentation for the Zidian gem.'
-Rake::RDocTask.new(:rdoc) do |rdoc|
+Rake::RDocTask.new do |rdoc|
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
rdoc.rdoc_dir = 'rdoc'
rdoc.title = 'Zidian'
rdoc.options << '--line-numbers' << '--inline-source'
rdoc.rdoc_files.include('README.mkd')
rdoc.rdoc_files.include('lib/**/*.rb')
-end
-
-Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
+end
25 lib/zidian.rb
View
@@ -1,14 +1,14 @@
module Zidian
def self.find(expression)
- $KCODE = 'UTF8'
+ $KCODE = 'UTF8' if RUBY_VERSION < "1.9.0"
case expression.class.name
when "Array"
expression.collect{|e| find(e) }.flatten.uniq
when "Integer", "Fixnum" then
Word.new(get_line(expression), expression)
when "String" then
- find_word(expression).lines.to_a.collect{|line| Word.new(line) }
+ find_words(expression).collect{|raw_word| Word.new(raw_word) }
else
raise InvalFindInputException
end
@@ -16,10 +16,11 @@ def self.find(expression)
protected
- def self.find_word(word) #:nodoc:
+ def self.find_words(word, case_sensitive = false) #:nodoc:
words = word.split.map{|w| "#{w}[1-4]?"}.join(" ")
# adding the -i option allows to search independently from the case, but it makes it very slow
- `less #{File.dirname(__FILE__)}/cedict_ts.u8 | grep -n -E '(^|[^a-zA-Z])#{words}($|[^a-zA-Z])'`
+ results = `less #{File.dirname(__FILE__)}/cedict_ts.u8 | grep -n -E#{ case_sensitive ? ' -i' : ''} '(^|[^a-zA-Z])#{words}($|[^a-zA-Z])'`
+ sort_lines(results.lines.to_a, words)
end
def self.get_line(line_number) #:nodoc:
@@ -27,6 +28,22 @@ def self.get_line(line_number) #:nodoc:
`sed -n '#{line_number}p' #{File.dirname(__FILE__)}/cedict_ts.u8`
end
+ # Sorts the lines by similarity to the words
+ #
+ def self.sort_lines(lines, words)
+ lines.sort do |a, b|
+ line_similarity_to_words(a, words) <=> line_similarity_to_words(b, words)
+ end
+ end
+
+ def self.line_similarity_to_words(line, words)
+ # words delimited by : ],:/[
+ # Very basic similarity determination
+ # we count how many characters before and after the word, the less characters the more similar the match is
+ match_data = line.match("(^|[,:\\\[\/,])([^\\\[\/,:]*)#{words}([^\\\]\\\[\/,]*)($|[,\\\]\\\[\/,])").to_a
+ match_data[2].strip.size + match_data[3].strip.size
+ end
+
class Word
attr_reader :id, :traditional, :simplified, :pinyin, :english
18 test/helper.rb
View
@@ -0,0 +1,18 @@
+require 'rubygems'
+require 'bundler'
+begin
+ Bundler.setup(:default, :development)
+rescue Bundler::BundlerError => e
+ $stderr.puts e.message
+ $stderr.puts "Run `bundle install` to install missing gems"
+ exit e.status_code
+end
+require 'test/unit'
+require "ruby-debug"
+
+$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
+$LOAD_PATH.unshift(File.dirname(__FILE__))
+require 'zidian'
+
+class Test::Unit::TestCase
+end
9 test/test_zidian.rb
View
@@ -1,5 +1,5 @@
-require "#{File.dirname(__FILE__)}/../lib/zidian"
-require "test/unit"
+# encoding: utf-8
+require "helper"
class TestZidian < Test::Unit::TestCase
@@ -34,6 +34,11 @@ def test_find_word_from_pinyin
assert_equal("围城", words.first.simplified)
end
+ def test_find_word_from_chinese
+ words = Zidian.find("围城")
+ assert_equal("wei2 cheng2", words.first.pinyin)
+ end
+
def test_find_word_from_pinyin_marked
words = Zidian.find("wei2 cheng2")
assert_equal("siege", words.first.english.first)
Please sign in to comment.
Something went wrong with that request. Please try again.