Navigation Menu

Skip to content

Commit

Permalink
Add data:convert:ja:droonga task
Browse files Browse the repository at this point in the history
  • Loading branch information
kou committed Apr 4, 2014
1 parent 6e1c73d commit a3c84f4
Showing 1 changed file with 21 additions and 5 deletions.
26 changes: 21 additions & 5 deletions lib/wikipedia-search/task.rb
Expand Up @@ -36,8 +36,7 @@ def define_download_tasks
def define_convert_tasks
namespace :convert do
namespace :ja do
desc "Convert Japanese Wikipedia data to Groonga data."
task :groonga => ja_data_path.to_s do
file ja_groonga_data_path.to_s => ja_data_path.to_s do
command_line = []
command_line << "bzcat"
command_line << Shellwords.escape(ja_data_path.to_s)
Expand All @@ -47,9 +46,22 @@ def define_convert_tasks
command_line << "--max-n-records"
command_line << "5000"
command_line << "--output"
command_line << ja_groonga_output_path.to_s
command_line << ja_groonga_data_path.to_s
sh(command_line.join(" "))
end

desc "Convert Japanese Wikipedia data to Groonga data."
task :groonga => ja_groonga_data_path.to_s

file ja_droonga_data_path.to_s => ja_groonga_data_path.to_s do
sh("grn2drn",
"--dataset", "Wikipedia",
"--output", ja_droonga_data_path.to_s,
ja_groonga_data_path.to_s)
end

desc "Convert Japanese Wikipedia data to Droonga data."
task :droonga => ja_droonga_data_path.to_s
end
end
end
Expand All @@ -66,8 +78,12 @@ def ja_data_base_name
"jawiki-latest-pages-articles.xml.bz2"
end

def ja_groonga_output_path
@ja_groonga_output_path ||= data_dir_path + "ja-data.grn"
def ja_groonga_data_path
@ja_groonga_data_path ||= data_dir_path + "ja-data.grn"
end

def ja_droonga_data_path
@ja_droonga_data_path ||= data_dir_path + "ja-data.jsons"
end
end
end

0 comments on commit a3c84f4

Please sign in to comment.