Permalink
Browse files

initial commit

  • Loading branch information...
flori committed Aug 25, 2009
0 parents commit f6742da9800ae636dcab02a186723b9cf2a6b694
@@ -0,0 +1,3 @@
+.*.sw[pon]
+coverage
+pkg
28 CHANGES
@@ -0,0 +1,28 @@
+2009-08-25 (0.2.4)
+ * Included Jaro and Jaro-Winkler metrics implementation of Kevin Ballard
+ <kevin@rapleaf.com>. Thanks a lot.
+ * Made the extension compile under Ruby 1.9.
+2006-06-25 (0.2.3)
+ * Fixed agrep.rb to use the new API.
+2005-10-11 (0.2.2)
+ * Fixed a typo in extconf.rb that prohibitted compiling on
+ non-gcc compilers.
+2005-09-12 (0.2.1)
+ * Bugfix: Wrong type for pattern length corrected. Thanks to David
+ Heinemeier Hansson for reporting it.
+2005-06-01 (0.2.0)
+ * Major changes in API and implementation:
+ Now the Levenshtein edit distance, Sellers edit distance, the Hamming
+ distance, the longest common subsequence length, the longest common
+ substring length, and the pair distance metric can be computed.
+2005-01-20 (0.1.4)
+ * Better argument handling in initialization method
+ * Minor changes in Rakefile and README.en
+2004-09-27 (0.1.3)
+ * Rakefile and gem support added.
+2004-09-24 (0.1.2)
+ * Uses Test::Unit for regression tests now.
+2002-04-21 (0.1.1)
+ * Minor changes: documentation, more test cases and exceptions.
+2002-03-14 (0.1.0)
+ * Initial Version
340 COPYING

Large diffs are not rendered by default.

Oops, something went wrong.
25 README
@@ -0,0 +1,25 @@
+Installation
+============
+
+Just type into the command line as root:
+
+# ruby install.rb
+
+If you have installed rake (rake.rubyforge.org), you can also type:
+
+# rake install
+
+To install this extension as a gem type
+
+# gem install amatch
+
+Author
+======
+
+Florian Frank <flori@ping.de>
+
+License
+=======
+
+GNU General Public License, Version 2 (GPLv2)
+
113 Rakefile
@@ -0,0 +1,113 @@
+# vim: set filetype=ruby et sw=2 ts=2:
+
+begin
+ require 'rake/gempackagetask'
+rescue LoadError
+end
+require 'rbconfig'
+include Config
+require 'rake/clean'
+CLEAN.include 'coverage', 'doc'
+require 'rake/testtask'
+
+MAKE = ENV['MAKE'] || %w[gmake make].find { |c| system(c, '-v') }
+PKG_NAME = 'amatch'
+PKG_VERSION = File.read('VERSION').chomp
+PKG_FILES = FileList["**/*"].exclude(/^(pkg|coverage|doc)/)
+PKG_DOC_FILES = [ "ext/amatch.c" ].concat(Dir['lib/**/*.rb']) << 'doc-main.txt'
+
+task :default => :test
+
+desc "Run unit tests"
+task :test => :compile_ext do
+ sh %{testrb -Iext:lib tests/test_*.rb}
+end
+
+desc "Compiling library"
+task :compile_ext do
+ cd 'ext' do
+ ruby %{extconf.rb}
+ sh MAKE
+ end
+end
+
+desc "Installing library"
+task :install => :test do
+ src, = Dir['ext/amatch.*'].reject { |x| /\.[co]$/.match x }
+ filename = File.basename(src)
+ dst = File.join(CONFIG["sitelibdir"], filename)
+ install(src, dst, :verbose => true)
+end
+
+desc "Removing generated files"
+task :clean do
+ cd 'ext' do
+ ruby 'extconf.rb'
+ sh "#{MAKE} distclean" if File.exist?('Makefile')
+ end
+end
+
+desc "Build the documentation"
+task :doc do
+ sh "rdoc -m doc-main.txt -t '#{PKG_NAME} - Approximate Matching' #{PKG_DOC_FILES * ' '}"
+end
+
+if defined? Gem
+ spec = Gem::Specification.new do |s|
+ s.name = 'amatch'
+ s.version = PKG_VERSION
+ s.summary = "Approximate String Matching library"
+ s.description = <<EOF
+Amatch is a library for approximate string matching and searching in strings.
+Several algorithms can be used to do this, and it's also possible to compute a
+similarity metric number between 0.0 and 1.0 for two given strings.
+EOF
+
+ s.files = PKG_FILES
+
+ s.extensions << "ext/extconf.rb"
+
+ s.require_path = 'ext'
+
+ s.bindir = "bin"
+ s.executables = ["agrep.rb"]
+ s.default_executable = "agrep.rb"
+
+ s.has_rdoc = true
+ s.extra_rdoc_files.concat PKG_DOC_FILES
+ s.rdoc_options << '--main' << 'doc-main.txt' <<
+ '--title' << "#{PKG_NAME} - Approximate Matching"
+ s.test_files.concat Dir['tests/test_*.rb']
+
+ s.author = "Florian Frank"
+ s.email = "flori@ping.de"
+ s.homepage = "http://amatch.rubyforge.org"
+ s.rubyforge_project = "amatch"
+ end
+
+ Rake::GemPackageTask.new(spec) do |pkg|
+ pkg.need_tar = true
+ pkg.package_files += PKG_FILES
+ end
+end
+
+desc m = "Writing version information for #{PKG_VERSION}"
+task :version do
+ puts m
+ File.open(File.join('lib', 'amatch', 'version.rb'), 'w') do |v|
+ v.puts <<EOT
+module Amatch
+ # Amatch version
+ VERSION = '#{PKG_VERSION}'
+ VERSION_ARRAY = VERSION.split(/\\./).map { |x| x.to_i } # :nodoc:
+ VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
+ VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
+ VERSION_BUILD = VERSION_ARRAY[2] # :nodoc:
+end
+EOT
+ end
+end
+
+
+desc "Prepare a new release"
+task :release => [ :clean, :version, :package ]
@@ -0,0 +1 @@
+0.2.4
@@ -0,0 +1,79 @@
+#! /usr/bin/env ruby
+
+require 'amatch'
+require 'getoptlong'
+
+def usage(msg, options)
+ puts msg, "Usage: #{File.basename($0)} [OPTIONS] PATTERN [FILE ...]", ""
+ options.each do |o|
+ puts " " + o[1] + ", " + o[0] + " " +
+ (o[2] == GetoptLong::REQUIRED_ARGUMENT ? 'ARGUMENT' : '')
+ end
+ puts "\nReport bugs to <flori@ping.de>."
+ exit 0
+end
+
+class Amatch::Levenshtein
+ def search_relative(strings)
+ search(strings).to_f / pattern.size
+ end
+end
+
+$distance = 1
+$mode = :search
+begin
+ parser = GetoptLong.new
+ options = [
+ [ '--distance', '-d', GetoptLong::REQUIRED_ARGUMENT ],
+ [ '--relative', '-r', GetoptLong::NO_ARGUMENT ],
+ [ '--verbose', '-v', GetoptLong::NO_ARGUMENT ],
+ [ '--help', '-h', GetoptLong::NO_ARGUMENT ],
+ ]
+ parser.set_options(*options)
+ parser.each_option do |name, arg|
+ name = name.sub(/^--/, '')
+ case name
+ when 'distance'
+ $distance = arg.to_f
+ when 'relative'
+ $mode = :search_relative
+ when 'verbose'
+ $verbose = 1
+ when 'help'
+ usage('You\'ve asked for it!', options)
+ end
+ end
+rescue
+ exit 1
+end
+pattern = ARGV.shift or usage('Pattern needed!', options)
+
+matcher = Amatch::Levenshtein.new(pattern)
+size = 0
+start = Time.new
+if ARGV.size > 0 then
+ ARGV.each do |filename|
+ File.stat(filename).file? or next
+ size += File.size(filename)
+ begin
+ File.open(filename, 'r').each_line do |line|
+ if matcher.__send__($mode, line) <= $distance
+ puts "#{filename}:#{line}"
+ end
+ end
+ rescue
+ STDERR.puts "Failure at #{filename}: #{$!} => Skipping!"
+ end
+ end
+else
+ STDIN.each_line do |line|
+ size += line.size
+ if matcher.__send__($mode, line) <= $distance
+ puts line
+ end
+ end
+end
+time = Time.new - start
+$verbose and STDERR.printf "%.3f secs running, scanned %.3f KB/s.\n",
+ time, size / time / 1024
+exit 0
@@ -0,0 +1,115 @@
+== amatch - Approximate Matching Extension for Ruby
+
+=== Description
+
+This is a collection of classes that can be used for Approximate
+matching, searching, and comparing of Strings. They implement algorithms
+that compute the Levenshtein edit distance, Sellers edit distance, the
+Hamming distance, the longest common subsequence length, the longest common
+substring length, the pair distance metric, the Jaro-Winkler metric.
+
+=== Author
+
+Florian Frank mailto:flori@ping.de
+
+=== License
+
+This is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License Version 2 as published by
+the Free Software Foundation: http://www.gnu.org/copyleft/gpl.html
+
+=== Download
+
+The latest version of <b>amatch</b> can be found at
+
+* http://rubyforge.org/frs/?group_id=390
+
+Online Documentation should be located at
+
+* http://amatch.rubyforge.org
+
+=== Examples
+ require 'amatch'
+ # => true
+ include Amatch
+ # => Object
+
+ m = Sellers.new("pattern")
+ # => #<Amatch::Sellers:0x40366324>
+ m.match("pattren")
+ # => 2.0
+ m.substitution = m.insertion = 3
+ # => 3
+ m.match("pattren")
+ # => 4.0
+ m.reset_weights
+ # => #<Amatch::Sellers:0x40366324>
+ m.match(["pattren","parent"])
+ # => [2.0, 4.0]
+ m.search("abcpattrendef")
+ # => 2.0
+
+ m = Levenshtein.new("pattern")
+ # => #<Amatch::Levenshtein:0x4035919c>
+ m.match("pattren")
+ # => 2
+ m.search("abcpattrendef")
+ # => 2
+ "pattern language".levenshtein_similar("language of patterns")
+ # => 0.2
+
+ m = Hamming.new("pattern")
+ # => #<Amatch::Hamming:0x40350858>
+ m.match("pattren")
+ # => 2
+ "pattern language".hamming_similar("language of patterns")
+ # => 0.1
+
+ m = PairDistance.new("pattern")
+ # => #<Amatch::PairDistance:0x40349be8>
+ m.match("pattr en")
+ # => 0.545454545454545
+ m.match("pattr en", nil)
+ # => 0.461538461538462
+ m.match("pattr en", /t+/)
+ # => 0.285714285714286
+ "pattern language".pair_distance_similar("language of patterns")
+ # => 0.928571428571429
+
+ m = LongestSubsequence.new("pattern")
+ # => #<Amatch::LongestSubsequence:0x4033e900>
+ m.match("pattren")
+ # => 6
+ "pattern language".longest_subsequence_similar("language of patterns")
+ # => 0.4
+
+ m = LongestSubstring.new("pattern")
+ # => #<Amatch::LongestSubstring:0x403378d0>
+ m.match("pattren")
+ # => 4
+ "pattern language".longest_substring_similar("language of patterns")
+ # => 0.4
+
+ m = Jaro.new("pattern")
+ # => #<Amatch::Jaro:0x363b70>
+ m.match("paTTren")
+ # => 0.952380952380952
+ m.ignore_case = false
+ m.match("paTTren")
+ # => 0.742857142857143
+ "pattern language".jaro_similar("language of patterns")
+ # => 0.672222222222222
+
+ m = JaroWinkler.new("pattern")
+ # #<Amatch::JaroWinkler:0x3530b8>
+ m.match("paTTren")
+ # => 0.971428571712403
+ m.ignore_case = false
+ m.match("paTTren")
+ # => 0.79428571505206
+ m.scaling_factor = 0.05
+ m.match("pattren")
+ # => 0.961904762046678
+ "pattern language".jarowinkler_similar("language of patterns")
+ # => 0.672222222222222
+
Oops, something went wrong.

0 comments on commit f6742da

Please sign in to comment.