Skip to content

Commit

Permalink
Merge pull request fastai#17 from hiromis/master
Browse files Browse the repository at this point in the history
Adding docstrings to ImageClassifierData
  • Loading branch information
yanneta committed Nov 9, 2017
2 parents 47802ac + c972b75 commit 61a481b
Showing 1 changed file with 52 additions and 0 deletions.
52 changes: 52 additions & 0 deletions fastai/dataset.py
Expand Up @@ -288,11 +288,41 @@ def get_ds(self, fn, trn, val, tfms, test=None, **kwargs):

@classmethod
def from_arrays(self, path, trn, val, bs=64, tfms=(None,None), classes=None, num_workers=4, test=None):
""" Read in images and their labels given as numpy arrays
Arguments:
path: a root path of the data (used for storing trained models, precomputed values, etc)
trn: a tuple of training data matrix and target label/classification array (e.g. `trn=(x,y)` where `x` has the
shape of `(5000, 784)` and `y` has the shape of `(5000,)`)
val: a tuple of validation data matrix and target label/classification array.
bs: batch size
tfms: transformations (for data augmentations). e.g. output of `tfms_from_model`
classes: TODO
num_workers: a number of workers
test: a matrix of test data (the shape should match `trn[0]`)
Returns:
ImageClassifierData
"""
datasets = self.get_ds(ArraysIndexDataset, trn, val, tfms, test=test)
return self(path, datasets, bs, num_workers, classes=classes)

@classmethod
def from_paths(self, path, bs=64, tfms=(None,None), trn_name='train', val_name='valid', test_name=None, num_workers=8):
""" Read in images and their labels given as sub-folder names
Arguments:
path: a root path of the data (used for storing trained models, precomputed values, etc)
bs: batch size
tfms: transformations (for data augmentations). e.g. output of `tfms_from_model`
trn_name: a name of the folder that contains training images.
val_name: a name of the folder that contains validation images.
test_name: a name of the folder that contains test images.
num_workers: number of workers
Returns:
ImageClassifierData
"""
trn,val = [folder_source(path, o) for o in (trn_name, val_name)]
test_fnames = read_dir(path, test_name) if test_name else None
datasets = self.get_ds(FilesIndexArrayDataset, trn, val, tfms, path=path, test=test_fnames)
Expand All @@ -301,6 +331,28 @@ def from_paths(self, path, bs=64, tfms=(None,None), trn_name='train', val_name='
@classmethod
def from_csv(self, path, folder, csv_fname, bs=64, tfms=(None,None),
val_idxs=None, suffix='', test_name=None, continuous=False, skip_header=True, num_workers=8):
""" Read in images and their labels given as a CSV file.
This method should be used when training image labels are given in an CSV file as opposed to
sub-directories with label names.
Arguments:
path: a root path of the data (used for storing trained models, precomputed values, etc)
folder: a name of the folder in which training images are contained.
csv_fname: a name of the CSV file which contains target labels.
bs: batch size
tfms: transformations (for data augmentations). e.g. output of `tfms_from_model`
val_idxs: index of images to be used for validation. e.g. output of `get_cv_idxs`
suffix: suffix to add to image names in CSV file (sometimes CSV only contains the file name without file
extension e.g. '.jpg' - in which case, you can set suffix as '.jpg')
test_name: a name of the folder which contains test images.
continuous: TODO
skip_header: skip the first row of the CSV file.
num_workers: number of workers
Returns:
ImageClassifierData
"""
fnames,y,classes = csv_source(folder, csv_fname, skip_header, suffix, continuous=continuous)
((val_fnames,trn_fnames),(val_y,trn_y)) = split_by_idx(val_idxs, np.array(fnames), y)

Expand Down

0 comments on commit 61a481b

Please sign in to comment.