From f72e3860d64e6f95ec8ade9a53dc04a0b98482c6 Mon Sep 17 00:00:00 2001 From: Hiromi Suenaga Date: Tue, 7 Nov 2017 16:13:17 +0900 Subject: [PATCH 1/3] Added docstring to a method I learned today --- fastai/dataset.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/fastai/dataset.py b/fastai/dataset.py index 841edacff..dbcc4752c 100644 --- a/fastai/dataset.py +++ b/fastai/dataset.py @@ -293,6 +293,20 @@ def from_arrays(self, path, trn, val, bs=64, tfms=(None,None), classes=None, num @classmethod def from_paths(self, path, bs=64, tfms=(None,None), trn_name='train', val_name='valid', test_name=None, num_workers=8): + """ Read in images and their labels given as subfolder names + + Arguments: + path: a root path of the data + bs: batch size + tfms: transformations (for data augmentations). e.g. output of `tfms_from_model` + trn_name: a name of the folder that contains training images. + val_name: a name of the folder that contains validation images. + test_name: a name of the folder that contains test images. + num_workers: TODO + + Returns: + ImageClassifierData + """ trn,val = [folder_source(path, o) for o in (trn_name, val_name)] test_fnames = read_dir(path, test_name) if test_name else None datasets = self.get_ds(FilesIndexArrayDataset, trn, val, tfms, path=path, test=test_fnames) @@ -301,6 +315,28 @@ def from_paths(self, path, bs=64, tfms=(None,None), trn_name='train', val_name=' @classmethod def from_csv(self, path, folder, csv_fname, bs=64, tfms=(None,None), val_idxs=None, suffix='', test_name=None, continuous=False, skip_header=True, num_workers=8): + """ Read in images and their labels given as a CSV file. + + This method should be used when training image labels are given in an CSV file as opposed to + sub-directories with label names. + + Arguments: + path: a root path of the data. + folder: a name of the folder in which training images are contained. + csv_fname: a name of the CSV file which contains labels. + bs: batch size + tfms: transformations (for data augmentations). e.g. output of `tfms_from_model` + val_idxs: index of images to be used for validation. e.g. output of `get_cv_idxs` + suffix: suffix to add to image names in CSV file (sometimes CSV only contains the file name without file + extension e.g. '.jpg' - in which case, you can set suffix as '.jpg') + test_name: a name of the folder which contains test images. + continuous: TODO + skip_header: skip the first row of the CSV file. + num_workers: TODO + + Returns: + ImageClassifierData + """ fnames,y,classes = csv_source(folder, csv_fname, skip_header, suffix, continuous=continuous) ((val_fnames,trn_fnames),(val_y,trn_y)) = split_by_idx(val_idxs, np.array(fnames), y) From d2a1825474ccedecfc27ca43162cac3c03b5692f Mon Sep 17 00:00:00 2001 From: Hiromi Suenaga Date: Thu, 9 Nov 2017 10:57:31 +0900 Subject: [PATCH 2/3] Added some docstrings to ImageClassifierData --- fastai/dataset.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/fastai/dataset.py b/fastai/dataset.py index dbcc4752c..34d235f34 100644 --- a/fastai/dataset.py +++ b/fastai/dataset.py @@ -288,12 +288,28 @@ def get_ds(self, fn, trn, val, tfms, test=None, **kwargs): @classmethod def from_arrays(self, path, trn, val, bs=64, tfms=(None,None), classes=None, num_workers=4, test=None): + """ Read in images that are given as numpy arrays + + Arguments: + path: a root path of the data (used for storing trained models, precomputed values, etc) + trn: a tuple of training data matrix and target label/classification array (`trn=(x,y)` where `x` has the + shape of `(5000, 784)` and `y` has the shape of `(5000,)`) + val: a tuple of validation data matrix and target label/classification array. + bs: batch size + tfms: transformations (for data augmentations). e.g. output of `tfms_from_model` + classes: TODO + num_workers: TODO + test: a matrix of test data (the shape should match `trn[0]`) + + Returns: + ImageClassifierData + """ datasets = self.get_ds(ArraysIndexDataset, trn, val, tfms, test=test) return self(path, datasets, bs, num_workers, classes=classes) @classmethod def from_paths(self, path, bs=64, tfms=(None,None), trn_name='train', val_name='valid', test_name=None, num_workers=8): - """ Read in images and their labels given as subfolder names + """ Read in images and their labels given as sub-folder names Arguments: path: a root path of the data @@ -302,7 +318,7 @@ def from_paths(self, path, bs=64, tfms=(None,None), trn_name='train', val_name=' trn_name: a name of the folder that contains training images. val_name: a name of the folder that contains validation images. test_name: a name of the folder that contains test images. - num_workers: TODO + num_workers: number of workers Returns: ImageClassifierData @@ -323,7 +339,7 @@ def from_csv(self, path, folder, csv_fname, bs=64, tfms=(None,None), Arguments: path: a root path of the data. folder: a name of the folder in which training images are contained. - csv_fname: a name of the CSV file which contains labels. + csv_fname: a name of the CSV file which contains target labels. bs: batch size tfms: transformations (for data augmentations). e.g. output of `tfms_from_model` val_idxs: index of images to be used for validation. e.g. output of `get_cv_idxs` @@ -332,7 +348,7 @@ def from_csv(self, path, folder, csv_fname, bs=64, tfms=(None,None), test_name: a name of the folder which contains test images. continuous: TODO skip_header: skip the first row of the CSV file. - num_workers: TODO + num_workers: number of workers Returns: ImageClassifierData From c972b7574223b4b0a5ef8c699a1ac43f038ddd94 Mon Sep 17 00:00:00 2001 From: Hiromi Suenaga Date: Thu, 9 Nov 2017 11:08:00 +0900 Subject: [PATCH 3/3] Fixed some typos --- fastai/dataset.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fastai/dataset.py b/fastai/dataset.py index 8d2c930be..ed13c6ebe 100644 --- a/fastai/dataset.py +++ b/fastai/dataset.py @@ -288,17 +288,17 @@ def get_ds(self, fn, trn, val, tfms, test=None, **kwargs): @classmethod def from_arrays(self, path, trn, val, bs=64, tfms=(None,None), classes=None, num_workers=4, test=None): - """ Read in images that are given as numpy arrays + """ Read in images and their labels given as numpy arrays Arguments: path: a root path of the data (used for storing trained models, precomputed values, etc) - trn: a tuple of training data matrix and target label/classification array (`trn=(x,y)` where `x` has the + trn: a tuple of training data matrix and target label/classification array (e.g. `trn=(x,y)` where `x` has the shape of `(5000, 784)` and `y` has the shape of `(5000,)`) val: a tuple of validation data matrix and target label/classification array. bs: batch size tfms: transformations (for data augmentations). e.g. output of `tfms_from_model` classes: TODO - num_workers: TODO + num_workers: a number of workers test: a matrix of test data (the shape should match `trn[0]`) Returns: @@ -312,7 +312,7 @@ def from_paths(self, path, bs=64, tfms=(None,None), trn_name='train', val_name=' """ Read in images and their labels given as sub-folder names Arguments: - path: a root path of the data + path: a root path of the data (used for storing trained models, precomputed values, etc) bs: batch size tfms: transformations (for data augmentations). e.g. output of `tfms_from_model` trn_name: a name of the folder that contains training images. @@ -337,7 +337,7 @@ def from_csv(self, path, folder, csv_fname, bs=64, tfms=(None,None), sub-directories with label names. Arguments: - path: a root path of the data. + path: a root path of the data (used for storing trained models, precomputed values, etc) folder: a name of the folder in which training images are contained. csv_fname: a name of the CSV file which contains target labels. bs: batch size