Skip to content

Commit

Permalink
Create analyzer class
Browse files Browse the repository at this point in the history
  • Loading branch information
dtan4 committed May 31, 2014
1 parent 7dc96d0 commit 4f30b19
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 0 deletions.
1 change: 1 addition & 0 deletions hiraoyogi.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Gem::Specification.new do |spec|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
spec.require_paths = ["lib"]

spec.add_dependency "natto"
spec.add_dependency "nokogiri"
spec.add_dependency "redis"

Expand Down
1 change: 1 addition & 0 deletions lib/hiraoyogi.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
require "hiraoyogi/analyzer"
require "hiraoyogi/crawler"
require "hiraoyogi/database"
require "hiraoyogi/version"
Expand Down
31 changes: 31 additions & 0 deletions lib/hiraoyogi/analyzer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# -*- coding: utf-8 -*-

require "natto"

module Hiraoyogi
class Analyzer
DISALLOW_TYPE = %w(助詞 助動詞)

def initialize
@natto = Natto::MeCab.new
end

def analyze_text(text)
result = {}

@natto.parse(text) do |line|
next if DISALLOW_TYPE.include?(type(line.feature))
result[line.surface] ||= 0
result[line.surface] += 1
end

result
end

private

def type(feature)
feature.split(",")[0]
end
end
end
21 changes: 21 additions & 0 deletions spec/hiraoyogi/analyzer_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# -*- coding: utf-8 -*-

require "spec_helper"

module Hiraoyogi
describe Analyzer do
let(:analyzer) do
described_class.new
end

let(:text) do
"すもももももももものうち"
end

describe "#analyze_text" do
it "should create index table" do
expect(analyzer.analyze_text(text)).to include "すもも" => 1, "もも" => 2, "うち" => 1
end
end
end
end

0 comments on commit 4f30b19

Please sign in to comment.