From f72e3860d64e6f95ec8ade9a53dc04a0b98482c6 Mon Sep 17 00:00:00 2001
From: Hiromi Suenaga <hiromis@gmail.com>
Date: Tue, 7 Nov 2017 16:13:17 +0900
Subject: [PATCH 1/3] Added docstring to a method I learned today

---
 fastai/dataset.py | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/fastai/dataset.py b/fastai/dataset.py
index 841edacff..dbcc4752c 100644
--- a/fastai/dataset.py
+++ b/fastai/dataset.py
@@ -293,6 +293,20 @@ def from_arrays(self, path, trn, val, bs=64, tfms=(None,None), classes=None, num
 
     @classmethod
     def from_paths(self, path, bs=64, tfms=(None,None), trn_name='train', val_name='valid', test_name=None, num_workers=8):
+        """ Read in images and their labels given as subfolder names
+
+        Arguments:
+            path: a root path of the data
+            bs: batch size
+            tfms: transformations (for data augmentations). e.g. output of `tfms_from_model`
+            trn_name: a name of the folder that contains training images.
+            val_name:  a name of the folder that contains validation images.
+            test_name:  a name of the folder that contains test images.
+            num_workers: TODO
+
+        Returns:
+            ImageClassifierData
+        """
         trn,val = [folder_source(path, o) for o in (trn_name, val_name)]
         test_fnames = read_dir(path, test_name) if test_name else None
         datasets = self.get_ds(FilesIndexArrayDataset, trn, val, tfms, path=path, test=test_fnames)
@@ -301,6 +315,28 @@ def from_paths(self, path, bs=64, tfms=(None,None), trn_name='train', val_name='
     @classmethod
     def from_csv(self, path, folder, csv_fname, bs=64, tfms=(None,None),
                val_idxs=None, suffix='', test_name=None, continuous=False, skip_header=True, num_workers=8):
+        """ Read in images and their labels given as a CSV file.
+
+        This method should be used when training image labels are given in an CSV file as opposed to
+        sub-directories with label names.
+
+        Arguments:
+            path: a root path of the data.
+            folder: a name of the folder in which training images are contained.
+            csv_fname: a name of the CSV file which contains labels.
+            bs: batch size
+            tfms: transformations (for data augmentations). e.g. output of `tfms_from_model`
+            val_idxs: index of images to be used for validation. e.g. output of `get_cv_idxs`
+            suffix: suffix to add to image names in CSV file (sometimes CSV only contains the file name without file
+                    extension e.g. '.jpg' - in which case, you can set suffix as '.jpg')
+            test_name: a name of the folder which contains test images.
+            continuous: TODO
+            skip_header: skip the first row of the CSV file.
+            num_workers: TODO
+
+        Returns:
+            ImageClassifierData
+        """
         fnames,y,classes = csv_source(folder, csv_fname, skip_header, suffix, continuous=continuous)
         ((val_fnames,trn_fnames),(val_y,trn_y)) = split_by_idx(val_idxs, np.array(fnames), y)
 

From d2a1825474ccedecfc27ca43162cac3c03b5692f Mon Sep 17 00:00:00 2001
From: Hiromi Suenaga <hiromis@gmail.com>
Date: Thu, 9 Nov 2017 10:57:31 +0900
Subject: [PATCH 2/3] Added some docstrings to ImageClassifierData

---
 fastai/dataset.py | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/fastai/dataset.py b/fastai/dataset.py
index dbcc4752c..34d235f34 100644
--- a/fastai/dataset.py
+++ b/fastai/dataset.py
@@ -288,12 +288,28 @@ def get_ds(self, fn, trn, val, tfms, test=None, **kwargs):
 
     @classmethod
     def from_arrays(self, path, trn, val, bs=64, tfms=(None,None), classes=None, num_workers=4, test=None):
+        """ Read in images that are given as numpy arrays
+
+        Arguments:
+            path: a root path of the data (used for storing trained models, precomputed values, etc)
+            trn: a tuple of training data matrix and target label/classification array (`trn=(x,y)` where `x` has the
+                shape of `(5000, 784)` and `y` has the shape of `(5000,)`)
+            val: a tuple of validation data matrix and target label/classification array.
+            bs: batch size
+            tfms: transformations (for data augmentations). e.g. output of `tfms_from_model`
+            classes: TODO
+            num_workers: TODO
+            test: a matrix of test data (the shape should match `trn[0]`)
+
+        Returns:
+            ImageClassifierData
+        """
         datasets = self.get_ds(ArraysIndexDataset, trn, val, tfms, test=test)
         return self(path, datasets, bs, num_workers, classes=classes)
 
     @classmethod
     def from_paths(self, path, bs=64, tfms=(None,None), trn_name='train', val_name='valid', test_name=None, num_workers=8):
-        """ Read in images and their labels given as subfolder names
+        """ Read in images and their labels given as sub-folder names
 
         Arguments:
             path: a root path of the data
@@ -302,7 +318,7 @@ def from_paths(self, path, bs=64, tfms=(None,None), trn_name='train', val_name='
             trn_name: a name of the folder that contains training images.
             val_name:  a name of the folder that contains validation images.
             test_name:  a name of the folder that contains test images.
-            num_workers: TODO
+            num_workers: number of workers
 
         Returns:
             ImageClassifierData
@@ -323,7 +339,7 @@ def from_csv(self, path, folder, csv_fname, bs=64, tfms=(None,None),
         Arguments:
             path: a root path of the data.
             folder: a name of the folder in which training images are contained.
-            csv_fname: a name of the CSV file which contains labels.
+            csv_fname: a name of the CSV file which contains target labels.
             bs: batch size
             tfms: transformations (for data augmentations). e.g. output of `tfms_from_model`
             val_idxs: index of images to be used for validation. e.g. output of `get_cv_idxs`
@@ -332,7 +348,7 @@ def from_csv(self, path, folder, csv_fname, bs=64, tfms=(None,None),
             test_name: a name of the folder which contains test images.
             continuous: TODO
             skip_header: skip the first row of the CSV file.
-            num_workers: TODO
+            num_workers: number of workers
 
         Returns:
             ImageClassifierData

From c972b7574223b4b0a5ef8c699a1ac43f038ddd94 Mon Sep 17 00:00:00 2001
From: Hiromi Suenaga <hiromis@gmail.com>
Date: Thu, 9 Nov 2017 11:08:00 +0900
Subject: [PATCH 3/3] Fixed some typos

---
 fastai/dataset.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fastai/dataset.py b/fastai/dataset.py
index 8d2c930be..ed13c6ebe 100644
--- a/fastai/dataset.py
+++ b/fastai/dataset.py
@@ -288,17 +288,17 @@ def get_ds(self, fn, trn, val, tfms, test=None, **kwargs):
 
     @classmethod
     def from_arrays(self, path, trn, val, bs=64, tfms=(None,None), classes=None, num_workers=4, test=None):
-        """ Read in images that are given as numpy arrays
+        """ Read in images and their labels given as numpy arrays
 
         Arguments:
             path: a root path of the data (used for storing trained models, precomputed values, etc)
-            trn: a tuple of training data matrix and target label/classification array (`trn=(x,y)` where `x` has the
+            trn: a tuple of training data matrix and target label/classification array (e.g. `trn=(x,y)` where `x` has the
                 shape of `(5000, 784)` and `y` has the shape of `(5000,)`)
             val: a tuple of validation data matrix and target label/classification array.
             bs: batch size
             tfms: transformations (for data augmentations). e.g. output of `tfms_from_model`
             classes: TODO
-            num_workers: TODO
+            num_workers: a number of workers
             test: a matrix of test data (the shape should match `trn[0]`)
 
         Returns:
@@ -312,7 +312,7 @@ def from_paths(self, path, bs=64, tfms=(None,None), trn_name='train', val_name='
         """ Read in images and their labels given as sub-folder names
 
         Arguments:
-            path: a root path of the data
+            path: a root path of the data (used for storing trained models, precomputed values, etc)
             bs: batch size
             tfms: transformations (for data augmentations). e.g. output of `tfms_from_model`
             trn_name: a name of the folder that contains training images.
@@ -337,7 +337,7 @@ def from_csv(self, path, folder, csv_fname, bs=64, tfms=(None,None),
         sub-directories with label names.
 
         Arguments:
-            path: a root path of the data.
+            path: a root path of the data (used for storing trained models, precomputed values, etc)
             folder: a name of the folder in which training images are contained.
             csv_fname: a name of the CSV file which contains target labels.
             bs: batch size