Navigation Menu

Skip to content

Commit

Permalink
Extract downloader
Browse files Browse the repository at this point in the history
  • Loading branch information
kou committed Apr 4, 2014
1 parent 090ee78 commit 0dbc3f3
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 63 deletions.
2 changes: 1 addition & 1 deletion Rakefile
Expand Up @@ -7,4 +7,4 @@ lib_dir_path = base_dir_path + "lib"

$LOAD_PATH.unshift(lib_dir_path.to_s)

require "task/download"
require "wikipedia-search/task/download"
62 changes: 0 additions & 62 deletions lib/task/download.rb

This file was deleted.

61 changes: 61 additions & 0 deletions lib/wikipedia-search/downloader.rb
@@ -0,0 +1,61 @@
require "open-uri"

module WikipediaSearch
class Downloader
class << self
def download(url, output_path)
new(url, output_path).download
end
end

def initialize(url, output_path)
@url = url
@output_path = output_path
end

def download
base_name = File.basename(@url)
max = nil
content_length_proc = lambda do |content_length|
max = content_length
end
progress_proc = lambda do |current|
if max
percent = (current / max.to_f) * 100
formatted_size = "[%s/%s]" % [format_size(current), format_size(max)]
print("\r%s - %06.2f%% %s" % [base_name, percent, formatted_size])
puts if current == max
end
end
options = {
:content_length_proc => content_length_proc,
:progress_proc => progress_proc,
}

open(@url, options) do |input|
@output_path.open("wb") do |output|
chunk = ""
chunk_size = 8192
while input.read(chunk_size, chunk)
output.print(chunk)
end
end
end
end

private
def format_size(size)
if size < 1024
"%d" % size
elsif size < (1024 ** 2)
"%7.2fKiB" % (size.to_f / 1024)
elsif size < (1024 ** 3)
"%7.2fMiB" % (size.to_f / (1024 ** 2))
elsif size < (1024 ** 4)
"%7.2fGiB" % (size.to_f / (1024 ** 3))
else
"%.2fTiB" % (size.to_f / (1024 ** 4))
end
end
end
end
18 changes: 18 additions & 0 deletions lib/wikipedia-search/task/download.rb
@@ -0,0 +1,18 @@
require "wikipedia-search/downloader"

namespace :data do
data_dir_path = Pathname.new("data")
directory data_dir_path.to_s

namespace :download do
base_name = "jawiki-latest-pages-articles.xml.bz2"
ja_data_path = data_dir_path + base_name
file ja_data_path.to_s => data_dir_path.to_s do
url = "http://dumps.wikimedia.org/jawiki/latest/#{base_name}"
WikipediaSearch::Downloader.download(url, ja_data_path)
end

desc "Download the latest Japanese Wikipedia data."
task :ja => ja_data_path.to_s
end
end

0 comments on commit 0dbc3f3

Please sign in to comment.