Permalink
Browse files

change io to superio

  • Loading branch information...
chenkovsky
chenkovsky committed Jul 26, 2018
1 parent 8a2f160 commit a6c55097c21fc6458993e1e241622554c992f43e
Showing with 53 additions and 249 deletions.
  1. +2 −0 shard.yml
  2. +2 −12 src/aha/ac.cr
  3. +1 −13 src/aha/array_hash.cr
  4. +4 −2 src/aha/arrayx.cr
  5. +10 −18 src/aha/cedar.cr
  6. +17 −28 src/aha/sam.cr
  7. +4 −14 src/aha/sym_spell.cr
  8. +0 −131 src/aha/util.cr
  9. +13 −31 src/aha/wu_manber.cr
@@ -9,5 +9,7 @@ crystal: 0.23.1
dependencies:
sub_hash:
github: chenkovsky/sub_hash
super_io:
github: chenkovsky/super_io
license: MIT
@@ -1,5 +1,6 @@
require "./matcher"
require "bit_array"
require "super_io"
module Aha
# 如果找不到子节点,每次都去fail节点查看有没有相对应的子节点。
@@ -9,6 +10,7 @@ module Aha
class ACX(T)
include Aha::MatchString
SuperIO.save_load
struct OutNode(T)
@next : T
@@ -185,18 +187,6 @@ module Aha
end
end
def save(path)
File.open(path, "wb") do |f|
to_io f, Aha::ByteFormat
end
end
def self.load(path)
File.open(path, "rb") do |f|
return self.from_io f, Aha::ByteFormat
end
end
def match(seq : Bytes | Array(UInt8), sep : BitArray, &block)
raise "sep BitArray size > 256 is not supported" if sep.size > 256
match_ seq do |idx, nid|
@@ -1,7 +1,7 @@
module Aha
# an open address hash table
class ArrayHash(N) # N is the byte num of value
SuperIO.save_load
include Enumerable({Bytes, Bytes})
alias Slot = UInt8*
@@ -298,17 +298,5 @@ module Aha
end
end
end
def save(path)
File.open(path, "wb") do |f|
to_io f, Aha::ByteFormat
end
end
def self.load(path)
File.open(path, "rb") do |f|
return self.from_io f, Aha::ByteFormat
end
end
end
end
@@ -1,3 +1,5 @@
require "super_io"
module Aha
class ArrayX(T)
@size : Int64
@@ -29,14 +31,14 @@ module Aha
end
def self.from_io(io : IO, format : IO::ByteFormat) : self
ptr, size, capacity = Aha.ptr_from_io T, io, format, Math.pw2ceil
ptr, size, capacity = SuperIO.ptr_from_io Pointer(T), io, format
self.new(capacity, ptr, size)
end
def to_io(io : IO, format : IO::ByteFormat)
@size.to_io io, format
(0...@size).each do |i|
Aha.to_io (@ptr + i).value, T, io, format
SuperIO.to_io (@ptr + i), io, format
end
end
end
@@ -1,8 +1,12 @@
require "super_io"
module Aha
alias Cedar = CedarX(Int32)
alias CedarBig = CedarX(Int64)
class CedarX(T)
SuperIO.save_load
def self.value_limit
T::MAX
end
@@ -160,10 +164,10 @@ module Aha
end
def to_io(io : IO, format : IO::ByteFormat)
Aha.ptr_to_io @array, @array_size, Node(T), io, format
Aha.ptr_to_io @blocks, (@array_size >> 8), Block(T), io, format
SuperIO.ptr_to_io @array, @array_size, io, format
SuperIO.ptr_to_io @blocks, (@array_size >> 8), io, format
@reject.each { |r| r.to_io io, format }
Aha.ptr_to_io @leafs, (@key_num), T, io, format
SuperIO.ptr_to_io @leafs, (@key_num), io, format
@bheadF.to_io io, format
@bheadC.to_io io, format
@bheadO.to_io io, format
@@ -174,12 +178,12 @@ module Aha
def self.from_io(io : IO, format : IO::ByteFormat) : self
c = Cedar.new
c.array, array_size, capacity = Aha.ptr_from_io Node(T), io, format, Math.pw2ceil
c.array, array_size, capacity = SuperIO.ptr_from_io Pointer(Node(T)), io, format
c.array_size = T.new(array_size)
c.capacity = T.new(capacity)
c.blocks, _, _ = Aha.ptr_from_io Block(T), io, format, Math.pw2ceil
c.blocks, _, _ = SuperIO.ptr_from_io Pointer(Block(T)), io, format
c.reject = StaticArray(Int32, 257).new { |i| Int32.from_io io, format }
c.leafs, key_num, key_capacity = Aha.ptr_from_io T, io, format, Math.pw2ceil
c.leafs, key_num, key_capacity = SuperIO.ptr_from_io Pointer(T), io, format
c.key_num = T.new(key_num)
c.key_capacity = T.new(key_capacity)
c.bheadF = T.from_io io, format
@@ -946,18 +950,6 @@ module Aha
end
end
def save(path)
File.open(path, "wb") do |f|
to_io f, Aha::ByteFormat
end
end
def self.load(path)
File.open(path, "rb") do |f|
return self.from_io f, Aha::ByteFormat
end
end
private def jump(chr : Char, from : T = T.new(0)) : T # 小于 0 说明没有路径
chr.each_byte do |byte|
from = jump byte, from
@@ -1,5 +1,6 @@
module Aha
class SAM
SuperIO.save_load
@lens : Array(Int32)
@slinks : Array(Int32) # 不在同一个right class的最长的suffix的class
@nmas : Array(Int32) # nearest marked ancestor (NMA) data structure on the inverse suffix link tree
@@ -40,31 +41,31 @@ module Aha
end
def to_io(io : IO, format : IO::ByteFormat)
Aha.array_to_io @lens, Int32, io, format
Aha.array_to_io @slinks, Int32, io, format
Aha.array_to_io @nmas, Int32, io, format
Aha.array_to_io @flags, UInt8, io, format
Aha.array_to_io @outputs, UInt32, io, format
Aha.array_to_io @key_lens, UInt32, io, format
Aha.string_array_to_io @keys, io, format
SuperIO.to_io @lens, io, format
SuperIO.to_io @slinks, io, format
SuperIO.to_io @nmas, io, format
SuperIO.to_io @flags, io, format
SuperIO.to_io @outputs, io, format
SuperIO.to_io @key_lens, io, format
SuperIO.to_io @keys, io, format
@del_num.to_io io, format
@nexts.size.to_io io, format
@nexts.each do |hs|
Aha.hash_to_io hs, Char, Int32, io, format
SuperIO.to_io hs, io, format
end
end
def self.from_io(io : IO, format : IO::ByteFormat) : self
lens = Aha.array_from_io Int32, io, format
slinks = Aha.array_from_io Int32, io, format
nmas = Aha.array_from_io Int32, io, format
flags = Aha.array_from_io UInt8, io, format
outputs = Aha.array_from_io Int32, io, format
key_lens = Aha.array_from_io UInt32, io, format
keys = Aha.string_array_from_io io, format
lens = SuperIO.from_io Array(Int32), io, format
slinks = SuperIO.from_io Array(Int32), io, format
nmas = SuperIO.from_io Array(Int32), io, format
flags = SuperIO.from_io Array(UInt8), io, format
outputs = SuperIO.from_io Array(Int32), io, format
key_lens = SuperIO.from_io Array(UInt32), io, format
keys = SuperIO.from_io Array(String), io, format
del_num = Int32.from_io io, format
next_size = Int32.from_io io, format
nexts = (0...next_size).map { |_| Aha.hash_from_io Char, Int32, io, format }
nexts = (0...next_size).map { |_| SuperIO.from_io Hash(Char, Int32), io, format }
return SAM.new(lens, slinks, nmas, nexts, flags, outputs, key_lens, keys, del_num)
end
@@ -175,18 +176,6 @@ module Aha
return newchildnode
end
def save(path)
File.open(path, "wb") do |f|
to_io f, Aha::ByteFormat
end
end
def self.load(path)
File.open(path, "rb") do |f|
return self.from_io f, Aha::ByteFormat
end
end
def substr?(str) : Bool
# str 是否是字典中的某个字符串的子串
str.each_char do |chr|
@@ -2,6 +2,8 @@ require "./edit_distance"
module Aha
class SymSpell
SuperIO.save_load
# 对于每个字符串,前面的prefix_len长度的,进行max_edit_distance次delete操作后
# 所有的子串的hash为key。
# 查询的字符串的至多max_edit_distance编辑距离的字符串一定可以在hash表中找到。
@@ -128,7 +130,7 @@ module Aha
strings = Array(String).new(string_to_id.size, "")
string_to_id.each { |k, id| strings[id] = k }
Aha.string_array_to_io strings, io, format
SuperIO.to_io strings, io, format
@words.size.to_io io, format
@words.each { |k, _| string_to_id[k].to_io io, format }
@words.each { |_, v| v.to_io io, format }
@@ -144,7 +146,7 @@ module Aha
prefix_length = Int32.from_io io, format
compact_mask = UInt32.from_io io, format
max_length = Int32.from_io io, format
strings : Array(String) = Aha.string_array_from_io io, format
strings : Array(String) = SuperIO.from_io Array(String), io, format
word_num = Int32.from_io io, format
words = Hash.zip((0...word_num).map { |_| strings[Int32.from_io(io, format)] }, (0...word_num).map { |_| Int32.from_io(io, format) })
delete_num = Int32.from_io io, format
@@ -193,18 +195,6 @@ module Aha
return spell
end
def save(path)
File.open(path, "wb") do |f|
to_io f, Aha::ByteFormat
end
end
def self.load(path)
File.open(path, "rb") do |f|
return SymSpell.from_io f, Aha::ByteFormat
end
end
protected def initialize(@max_edit_distance, @prefix_length, @compact_mask, @max_length, @deletes, @words)
end
Oops, something went wrong.

0 comments on commit a6c5509

Please sign in to comment.