Merge pull request fastai#17 from hiromis/master

Adding docstrings to ImageClassifierData
dineshkumarsarangapani · Nov 9, 2017 · 61a481b · 61a481b
2 parents 47802ac + c972b75
commit 61a481b
Showing 1 changed file with 52 additions and 0 deletions.
diff --git a/fastai/dataset.py b/fastai/dataset.py
@@ -288,11 +288,41 @@ def get_ds(self, fn, trn, val, tfms, test=None, **kwargs):
 
     @classmethod
     def from_arrays(self, path, trn, val, bs=64, tfms=(None,None), classes=None, num_workers=4, test=None):
+        """ Read in images and their labels given as numpy arrays
+
+        Arguments:
+            path: a root path of the data (used for storing trained models, precomputed values, etc)
+            trn: a tuple of training data matrix and target label/classification array (e.g. `trn=(x,y)` where `x` has the
+                shape of `(5000, 784)` and `y` has the shape of `(5000,)`)
+            val: a tuple of validation data matrix and target label/classification array.
+            bs: batch size
+            tfms: transformations (for data augmentations). e.g. output of `tfms_from_model`
+            classes: TODO
+            num_workers: a number of workers
+            test: a matrix of test data (the shape should match `trn[0]`)
+
+        Returns:
+            ImageClassifierData
+        """
         datasets = self.get_ds(ArraysIndexDataset, trn, val, tfms, test=test)
         return self(path, datasets, bs, num_workers, classes=classes)
 
     @classmethod
     def from_paths(self, path, bs=64, tfms=(None,None), trn_name='train', val_name='valid', test_name=None, num_workers=8):
+        """ Read in images and their labels given as sub-folder names
+
+        Arguments:
+            path: a root path of the data (used for storing trained models, precomputed values, etc)
+            bs: batch size
+            tfms: transformations (for data augmentations). e.g. output of `tfms_from_model`
+            trn_name: a name of the folder that contains training images.
+            val_name:  a name of the folder that contains validation images.
+            test_name:  a name of the folder that contains test images.
+            num_workers: number of workers
+
+        Returns:
+            ImageClassifierData
+        """
         trn,val = [folder_source(path, o) for o in (trn_name, val_name)]
         test_fnames = read_dir(path, test_name) if test_name else None
         datasets = self.get_ds(FilesIndexArrayDataset, trn, val, tfms, path=path, test=test_fnames)
@@ -301,6 +331,28 @@ def from_paths(self, path, bs=64, tfms=(None,None), trn_name='train', val_name='
     @classmethod
     def from_csv(self, path, folder, csv_fname, bs=64, tfms=(None,None),
                val_idxs=None, suffix='', test_name=None, continuous=False, skip_header=True, num_workers=8):
+        """ Read in images and their labels given as a CSV file.
+
+        This method should be used when training image labels are given in an CSV file as opposed to
+        sub-directories with label names.
+
+        Arguments:
+            path: a root path of the data (used for storing trained models, precomputed values, etc)
+            folder: a name of the folder in which training images are contained.
+            csv_fname: a name of the CSV file which contains target labels.
+            bs: batch size
+            tfms: transformations (for data augmentations). e.g. output of `tfms_from_model`
+            val_idxs: index of images to be used for validation. e.g. output of `get_cv_idxs`
+            suffix: suffix to add to image names in CSV file (sometimes CSV only contains the file name without file
+                    extension e.g. '.jpg' - in which case, you can set suffix as '.jpg')
+            test_name: a name of the folder which contains test images.
+            continuous: TODO
+            skip_header: skip the first row of the CSV file.
+            num_workers: number of workers
+
+        Returns:
+            ImageClassifierData
+        """
         fnames,y,classes = csv_source(folder, csv_fname, skip_header, suffix, continuous=continuous)
         ((val_fnames,trn_fnames),(val_y,trn_y)) = split_by_idx(val_idxs, np.array(fnames), y)