Navigation Menu

Skip to content

Commit

Permalink
Add schema and indexes for Groonga and use it
Browse files Browse the repository at this point in the history
  • Loading branch information
kou committed Apr 4, 2014
1 parent 1668c42 commit be6801e
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 0 deletions.
7 changes: 7 additions & 0 deletions config/groonga/indexes.grn
@@ -0,0 +1,7 @@
column_create Categories pages_categories COLUMN_INDEX Pages categories

table_create Terms TABLE_PAT_KEY ShortText \
--default_tokenizer TokenBigram \
--normalizer NormalizerAuto
column_create Terms pages COLUMN_INDEX|WITH_SECTION|WITH_POSITION \
Pages title,text
6 changes: 6 additions & 0 deletions config/groonga/schema.grn
@@ -0,0 +1,6 @@
table_create Categories TABLE_HASH_KEY ShortText

table_create Pages TABLE_HASH_KEY UInt64
column_create Pages title COLUMN_SCALAR ShortText
column_create Pages text COLUMN_SCALAR Text
column_create Pages categories COLUMN_VECTOR Categories
48 changes: 48 additions & 0 deletions lib/wikipedia-search/task.rb
Expand Up @@ -14,6 +14,7 @@ def define

def define
define_data_tasks
define_groonga_tasks
end

private
Expand Down Expand Up @@ -93,6 +94,33 @@ def define_data_convert_droonga_tasks
end
end

def define_groonga_tasks
namespace :groonga do
desc "Load data."
task :load do
rm_rf(groonga_database_dir_path.to_s)
mkdir_p(groonga_database_dir_path.to_s)
groonga_run(groonga_schema_path.to_s)
groonga_run(ja_groonga_pages_path.to_s.to_s)
groonga_run(groonga_indexes_path.to_s)
end
end
end

def groonga_run(input)
command_line = [
"groonga",
"--log-path", (groonga_database_dir_path + "groonga.log").to_s,
"--query-log-path", (groonga_database_dir_path + "query.log").to_s,
"--file", input,
]
unless groonga_database_path.exist?
command_line << "-n"
end
command_line << groonga_database_path.to_s
sh(*command_line)
end

def download_base_url(language)
"http://dumps.wikimedia.org/#{language}wiki/latest"
end
Expand Down Expand Up @@ -128,5 +156,25 @@ def ja_titles_path
def ja_titles_base_name
"jawiki-latest-all-titles.gz"
end

def config_dir
Pathname.new("config")
end

def groonga_schema_path
config_dir + "groonga" + "schema.grn"
end

def groonga_indexes_path
config_dir + "groonga" + "indexes.grn"
end

def groonga_database_dir_path
data_dir_path + "groonga"
end

def groonga_database_path
groonga_database_dir_path + "db"
end
end
end

0 comments on commit be6801e

Please sign in to comment.