In [3]:
"""
ImageNetから画像ダウンロード
@purpose: ハリネズミとヤマアラシを識別するためのデータセットを収集
"""

using HTTP

# ダウンロード先のディレクトリ作成
if !isdir("dataset")
    mkdir("dataset")
end

# 画像ダウンロード関数: (url::AbstractString, savedir::AbstractString) -> filesize::Int
download_image(url::AbstractString, savedir::AbstractString="dataset")::Int = begin
    # ダウンロード済みならスキップ
    if isfile("$(savedir)/$(basename(url))")
        return 0
    end

    try
        # タイムアウト=30秒, 再試行なし
        r = HTTP.request("GET", url; readtimeout=30, retry=false)
        return open("$(savedir)/$(basename(url))", "w") do fp
            write(fp, r.body)
        end
    catch
        return 0
    end
end

# ImageNetのヤマアラシ・ハリネズミ画像をダウンロード
## URLリストを読み込み、各URLから画像を取得
r = HTTP.request("GET", "http://imagenet.stanford.edu/api/text/imagenet.synset.geturls?wnid=n02346627")
lines = split(String(r.body), r"[\n\r]") # 改行で文字列分解
for url in lines[lines .!= ""] # 空文字でない行について順次処理
    println("$(url): downloaded size = $(download_image(url))")
end

http://farm1.static.flickr.com/99/287755717_39892adee3.jpg: downloaded size = 0
http://cdn.ugoto.com/pictures/porcupine-babies-a9d.JPG: downloaded size = 0
http://farm1.static.flickr.com/201/496754650_999d27390c.jpg: downloaded size = 55018
http://farm1.static.flickr.com/96/251632312_8cff39a0be.jpg: downloaded size = 86903
http://www.alaska-in-pictures.com/data/media/1/porcupine-prickle_1554.jpg: downloaded size = 70535
http://www.tapirback.com/tapirgal/gifts/friends/rodents/porcupine-stuffed-hand-puppet-f876a.jpg: downloaded size = 498814
http://farm3.static.flickr.com/2153/1814962418_543c4cb586.jpg: downloaded size = 65938
http://farm2.static.flickr.com/1317/1118045404_5da481d949.jpg: downloaded size = 73270
http://farm2.static.flickr.com/1093/1400266648_f81f42394a.jpg: downloaded size = 99006
http://farm2.static.flickr.com/1094/823876224_e7a3d62e2d.jpg: downloaded size = 58310
http://bp3.blogger.com/_QZ3H58jqkfQ/RqCeA65SRgI/AAAAAAAAANs/FP1Tw7vKlDs/s320/porcupine.gif: downloaded size