Skip to content

Commit

Permalink
add d2lbook saving mark for consecutive vars/functions
Browse files Browse the repository at this point in the history
  • Loading branch information
Aston Zhang committed Jan 6, 2020
1 parent 2c53e54 commit bca6cfb
Show file tree
Hide file tree
Showing 7 changed files with 66 additions and 0 deletions.
2 changes: 2 additions & 0 deletions chapter_computer-vision/semantic-segmentation-and-dataset.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ VOC_COLORMAP = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0],
[64, 0, 128], [192, 0, 128], [64, 128, 128], [192, 128, 128],
[0, 64, 0], [128, 64, 0], [0, 192, 0], [128, 192, 0],
[0, 64, 128]]
# Saved in the d2l package for later use
VOC_CLASSES = ['background', 'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair', 'cow',
'diningtable', 'dog', 'horse', 'motorbike', 'person',
Expand Down
4 changes: 4 additions & 0 deletions chapter_multilayer-perceptrons/kaggle-house-price.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ import tarfile
# Saved in the d2l package for later use
DATA_HUB = dict()
# Saved in the d2l package for later use
DATA_URL = 'http://d2l-data.s3-accelerate.amazonaws.com/'
```

Expand Down Expand Up @@ -157,6 +159,8 @@ For convenience, we downloaded and saved the Kaggle dataset in the `DATA_URL` we
DATA_HUB['kaggle_house_train'] = (
DATA_URL+'kaggle_house_pred_train.csv',
'585e9cc93e70b39160e7921475f9bcd7d31219ce')
# Saved in the d2l package for later use
DATA_HUB['kaggle_house_test'] = (
DATA_URL+'kaggle_house_pred_test.csv',
'fa19780a7b011d9b009e8bff8e99922a8ee2eb90')
Expand Down
2 changes: 2 additions & 0 deletions chapter_natural-language-processing/word2vec-dataset.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ This dataset has already been preprocessed. Each line of the dataset acts as a s
# Saved in the d2l package for later use
d2l.DATA_HUB['ptb'] = (d2l.DATA_URL+'ptb.zip',
'319d85e578af0cdc590547f26231e4e31cdf1e42')
# Saved in the d2l package for later use
def read_ptb():
data_dir = d2l.download_extract('ptb')
with open(data_dir+'ptb.train.txt') as f:
Expand Down
2 changes: 2 additions & 0 deletions chapter_optimization/minibatch-sgd.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ Let's have a look at how minibatches are efficiently generated from data. In the
# Saved in the d2l package for later use
d2l.DATA_HUB['airfoil'] = (d2l.DATA_URL+'airfoil_self_noise.dat',
'76e5be1548fd8222e5074cf0faae75edff8cf93f')
# Saved in the d2l package for later use
def get_data_ch11(batch_size=10, n=1500):
data = np.genfromtxt(d2l.download('airfoil'),
dtype=np.float32, delimiter='\t')
Expand Down
2 changes: 2 additions & 0 deletions chapter_recommender-systems/movielens.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ Then, we download the MovieLens 100k dataset and load the interactions as `DataF
d2l.DATA_HUB['ml-100k'] = (
'http://files.grouplens.org/datasets/movielens/ml-100k.zip',
'cd4dcac4241c8a4ad7badc7ca635da8a69dddb83')
# Saved in the d2l package for later use
def read_data_ml100k():
data_dir = d2l.download_extract('ml-100k')
names = ['user_id', 'item_id', 'rating', 'timestamp']
Expand Down
2 changes: 2 additions & 0 deletions chapter_recurrent-modern/machine-translation.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ We first download a dataset that contains a set of English sentences with the co
# Saved in the d2l package for later use
d2l.DATA_HUB['fra-eng'] = (d2l.DATA_URL+'fra-eng.zip',
'94646ad1522d915e7b0f9296181140edcf86a4f5')
# Saved in the d2l package for later use
def read_data_nmt():
data_dir = d2l.download_extract('fra-eng')
with open(data_dir+'fra.txt', 'r') as f:
Expand Down
52 changes: 52 additions & 0 deletions d2l/d2l.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,10 @@ def evaluate_loss(net, data_iter, loss):
DATA_HUB = dict()


# Defined in file: ./chapter_multilayer-perceptrons/kaggle-house-price.md
DATA_URL = 'http://d2l-data.s3-accelerate.amazonaws.com/'


# Defined in file: ./chapter_multilayer-perceptrons/kaggle-house-price.md
def download(name, cache_dir='../data'):
"""Download a file inserted into DATA_HUB, return the local filename"""
Expand Down Expand Up @@ -362,6 +366,12 @@ def download_all():
'585e9cc93e70b39160e7921475f9bcd7d31219ce')


# Defined in file: ./chapter_multilayer-perceptrons/kaggle-house-price.md
DATA_HUB['kaggle_house_test'] = (
DATA_URL+'kaggle_house_pred_test.csv',
'fa19780a7b011d9b009e8bff8e99922a8ee2eb90')


# Defined in file: ./chapter_deep-learning-computation/use-gpu.md
def try_gpu(i=0):
"""Return gpu(i) if exists, otherwise return cpu()."""
Expand Down Expand Up @@ -712,6 +722,13 @@ def begin_state(self, *args, **kwargs):
'94646ad1522d915e7b0f9296181140edcf86a4f5')


# Defined in file: ./chapter_recurrent-modern/machine-translation.md
def read_data_nmt():
data_dir = d2l.download_extract('fra-eng')
with open(data_dir+'fra.txt', 'r') as f:
return f.read()


# Defined in file: ./chapter_recurrent-modern/machine-translation.md
def preprocess_nmt(text):
text = text.replace('\u202f', ' ').replace('\xa0', ' ')
Expand Down Expand Up @@ -1006,6 +1023,16 @@ def show_trace_2d(f, results):
'76e5be1548fd8222e5074cf0faae75edff8cf93f')


# Defined in file: ./chapter_optimization/minibatch-sgd.md
def get_data_ch11(batch_size=10, n=1500):
data = np.genfromtxt(d2l.download('airfoil'),
dtype=np.float32, delimiter='\t')
data = (data - data.mean(axis=0)) / data.std(axis=0)
data_iter = d2l.load_array(
(data[:n, :-1], data[:n, -1]), batch_size, is_train=True)
return data_iter, data.shape[1]-1


# Defined in file: ./chapter_optimization/minibatch-sgd.md
def train_ch11(trainer_fn, states, hyperparams, data_iter,
feature_dim, num_epochs=2):
Expand Down Expand Up @@ -1252,6 +1279,13 @@ def read_voc_images(voc_dir, is_train=True):
[0, 64, 128]]


# Defined in file: ./chapter_computer-vision/semantic-segmentation-and-dataset.md
VOC_CLASSES = ['background', 'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair', 'cow',
'diningtable', 'dog', 'horse', 'motorbike', 'person',
'potted plant', 'sheep', 'sofa', 'train', 'tv/monitor']


# Defined in file: ./chapter_computer-vision/semantic-segmentation-and-dataset.md
def build_colormap2label():
"""Build an RGB color to label mapping for segmentation."""
Expand Down Expand Up @@ -1391,6 +1425,14 @@ def reorg_test(data_dir):
'319d85e578af0cdc590547f26231e4e31cdf1e42')


# Defined in file: ./chapter_natural-language-processing/word2vec-dataset.md
def read_ptb():
data_dir = d2l.download_extract('ptb')
with open(data_dir+'ptb.train.txt') as f:
raw_text = f.read()
return [line.split() for line in raw_text.split('\n')]


# Defined in file: ./chapter_natural-language-processing/word2vec-dataset.md
def subsampling(sentences, vocab):
# Map low frequency words into <unk>
Expand Down Expand Up @@ -1542,6 +1584,16 @@ def predict_sentiment(net, vocab, sentence):
'cd4dcac4241c8a4ad7badc7ca635da8a69dddb83')


# Defined in file: ./chapter_recommender-systems/movielens.md
def read_data_ml100k():
data_dir = d2l.download_extract('ml-100k')
names = ['user_id', 'item_id', 'rating', 'timestamp']
data = pd.read_csv(data_dir+'u.data', '\t', names=names, engine='python')
num_users = data.user_id.unique().shape[0]
num_items = data.item_id.unique().shape[0]
return data, num_users, num_items


# Defined in file: ./chapter_recommender-systems/movielens.md
def split_data_ml100k(data, num_users, num_items,
split_mode="random", test_ratio=0.1):
Expand Down

0 comments on commit bca6cfb

Please sign in to comment.