Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 6 additions & 26 deletions observations/celeba.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import os
import zipfile

from observations.util import maybe_download_and_extract


def celeba(path):
"""Load the Large-scale CelebFaces Attributes (CelebA) data set
Expand All @@ -21,34 +23,12 @@ def celeba(path):
Returns:
str. It is a message advising to load data manually.
"""
import requests
from tqdm import tqdm
def _maybe_download_and_extract(path, url, drive_id):
if not os.path.exists(path):
session = requests.Session()
response = session.get(url, params={'id': drive_id}, stream=True)
token = None
for key, value in response.cookies.items():
if key.startswith('download_warning'):
token = value
if token:
response = session.get(
url, params={'id': drive_id, 'confirm': token}, stream=True)
total_size = int(response.headers.get('content-length', 0))
chunk_size = 32 * 1024
with open(destination, "wb") as f:
for chunk in tqdm(response.iter_content(chunk_size), total=total_size,
unit='B', unit_scale=True, desc=destination):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
with zipfile.ZipFile(path) as f:
f.extractall(path)
path = os.path.expanduser(path)
if not os.path.exists(os.path.join(path, 'img_align_celeba')):
save_path = os.path.join(path, 'img_align_celeba.zip')
url = 'https://docs.google.com/uc?export=download'
drive_id = '0B7EVK8r0v71pZjFTYXZWM3FlRnM'
_maybe_download_and_extract(save_path, url, drive_id)
url = 'https://docs.google.com/uc?export=download&' \
'id=0B7EVK8r0v71pZjFTYXZWM3FlRnM'
maybe_download_and_extract(path, url,
save_file_name='img_align_celeba.zip')
string = "Data set is larger than 1 GB. We recommend loading your " \
"data in batches."
return string
1 change: 1 addition & 0 deletions observations/cifar10.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import numpy as np
import os
import six
import sys

from observations.util import maybe_download_and_extract
Expand Down
1 change: 1 addition & 0 deletions observations/cifar100.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import numpy as np
import os
import six
import sys

from observations.util import maybe_download_and_extract
Expand Down
2 changes: 1 addition & 1 deletion observations/crabs.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def crabs(path):
if not os.path.exists(os.path.join(path, filename)):
url = 'https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/' \
'MASS/crabs.csv'
maybe_download_and_extract(path, url)
maybe_download_and_extract(path, url, resume=False)

species_encoder = {'B': 0, 'O': 1}
sex_encoder = {'M': 0, 'F': 1}
Expand Down
2 changes: 1 addition & 1 deletion observations/insteval.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def insteval(path):
if not os.path.exists(os.path.join(path, filename)):
url = 'https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/' \
'lme4/InstEval.csv'
maybe_download_and_extract(path, url)
maybe_download_and_extract(path, url, resume=False)

with open(os.path.join(path, filename)) as f:
iterator = csv.reader(f)
Expand Down
14 changes: 5 additions & 9 deletions observations/lsun.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,13 @@ def lsun(path, category='bedroom'):
str. It is a message advising to load data manually.
"""
def _maybe_download_and_extract(set_name):
maybe_download_and_extract(path, url + set_name)
source = os.path.join(
path, 'download.cgi?tag=latest&category={}&set={}'.format(
category, set_name))
if set_name == 'test':
target = os.path.join(path, '{}_lmdb.zip'.format(set_name))
target = '{}_lmdb.zip'.format(set_name)
else:
target = os.path.join(path, '{}_{}_lmdb.zip'.format(category, set_name))
os.rename(source, target)
with zipfile.ZipFile(target) as f:
f.extractall(path)
target = '{}_{}_lmdb.zip'.format(category, set_name)
maybe_download_and_extract(path, url + set_name,
save_file_name=target)

path = os.path.expanduser(path)
url = 'http://lsun.cs.princeton.edu/htbin/download.cgi?tag=latest' \
'&category={}&set='.format(category)
Expand Down
16 changes: 10 additions & 6 deletions observations/ptb.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from __future__ import print_function

import collections
import io
import os

from observations.util import maybe_download_and_extract
Expand All @@ -29,10 +30,13 @@ def ptb(path):
maybe_download_and_extract(path, url)

path = os.path.join(path, 'simple-examples/data')
with open(os.path.join(path, 'ptb.train.txt')) as f:
x_train = f.read().decode("utf-8").replace("\n", "<eos>")
with open(os.path.join(path, 'ptb.test.txt')) as f:
x_test = f.read().decode("utf-8").replace("\n", "<eos>")
with open(os.path.join(path, 'ptb.valid.txt')) as f:
x_valid = f.read().decode("utf-8").replace("\n", "<eos>")
with io.open(os.path.join(path, 'ptb.train.txt'),
encoding='utf-8') as f:
x_train = f.read().replace("\n", "<eos>")
with io.open(os.path.join(path, 'ptb.test.txt'),
encoding='utf-8') as f:
x_test = f.read().replace("\n", "<eos>")
with io.open(os.path.join(path, 'ptb.valid.txt'),
encoding='utf-8') as f:
x_valid = f.read().replace("\n", "<eos>")
return x_train, x_test, x_valid
Loading