# data_download

> Used to download datasets used in this project

In [1]:
#| default_exp data_download

In [2]:
#| hide
from nbdev.showdoc import *

In [3]:
#| export
import requests
import os
from tqdm.auto import tqdm
from pathlib import Path
import math

def download_file(url, filename=None, dirname=None):
    """Downloads a file at `url`
    url - URl of file to download
    filename[optional]- filename to save as
    dirname[optional] - directory in which to save filename
    """
    if filename is None:
        filename = url.split(os.sep)[-1]
    if dirname is not None:
        Path(dirname).mkdir(parents=True, exist_ok=True)
        filename = dirname + os.sep + filename
    
    if Path(filename).exists():
        print(f"{filename} already exists. Skipping")
        return
    # Get size of the file
    CHUNK_SIZE = 16384
    headers = requests.head(url).headers
    size = None
    if headers:
        size = headers.get('content-length', None)
        if size is not None:
            size = float(size)/CHUNK_SIZE
            size = math.ceil(size)
    with requests.get(url, stream=True) as r:
        r.raise_for_status()
        with open(filename, 'wb') as f:
            for chunk in tqdm(r.iter_content(chunk_size=16384), total=size):
                f.write(chunk)

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
# Run cell below to download Stanford Dogs data
STANFORD_DOGS_IMAGE_URL = 'http://vision.stanford.edu/aditya86/ImageNetDogs/images.tar'
STANFORD_DOGS_ANNOTATIONS_URL = 'http://vision.stanford.edu/aditya86/ImageNetDogs/annotation.tar'
STANFORD_DOGS_SPLITS_URL = 'http://vision.stanford.edu/aditya86/ImageNetDogs/lists.tar'

In [7]:
download_file(STANFORD_DOGS_IMAGE_URL, dirname='../stanford_dogs')

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 48437/48437 [01:55<00:00, 420.85it/s]


In [8]:
download_file(STANFORD_DOGS_ANNOTATIONS_URL, dirname='../stanford_dogs')

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1334/1334 [00:06<00:00, 213.70it/s]


In [9]:
download_file(STANFORD_DOGS_SPLITS_URL, dirname='../stanford_dogs')

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:01<00:00, 27.19it/s]


In [None]:
# Run cell below to download Tsinghua Dogs Dataset
TSINGHUA_DOGS_LOW_RES_IMAGES_URL = 'https://cloud.tsinghua.edu.cn/f/80013ef29c5f42728fc8/?dl=1'
TSINGHUA_DOGS_LOW_RES_ANNOTATIONS_URL = 'https://cg.cs.tsinghua.edu.cn/ThuDogs/low-annotations.zip'

In [None]:
download_file(TSINGHUA_DOGS_LOW_RES_IMAGES_URL, filename='low-resolution.zip', dirname='../tsinghua_dogs')

In [None]:
download_file(TSINGHUA_DOGS_LOW_RES_ANNOTATIONS_URL, dirname='tsinghua_dogs')

In [4]:
#| hide
import nbdev; nbdev.nbdev_export()