docs

keras-team · Jul 1, 2018 · aaf24ae · aaf24ae
1 parent bbabbe9
commit aaf24ae
Show file tree

Hide file tree

Showing 12 changed files with 123 additions and 376 deletions.
diff --git a/autokeras/classifier.py b/autokeras/classifier.py
@@ -44,10 +44,13 @@ def run_searcher_once(x_train, y_train, x_test, y_test, path):
 
 
 def read_csv_file(csv_file_path):
-    """
-    Read the cvs file and returns two seperate list containing images name and their labels
-    :param csv_file_path: Path to the CVS file.
-    :return: img_file_names list containing images names and img_label list containing their respective labels
+    """Read the cvs file and returns two seperate list containing images name and their labels
+
+    Args:
+        csv_file_path: Path to the CVS file.
+
+    Returns:
+        img_file_names list containing images names and img_label list containing their respective labels.
     """
     img_file_names = []
     img_labels = []
@@ -61,11 +64,14 @@ def read_csv_file(csv_file_path):
 
 
 def read_images(img_file_names, images_dir_path):
-    """
-    Reads the images from the path and return there numpy.ndarray instance
-    :param img_file_names: List containing images names
-    :param images_dir_path: Path to directory containing images
-    :return: Returns a numpy.ndarray instance containing the training data.
+    """Reads the images from the path and return there numpy.ndarray instance
+
+    Args:
+        img_file_names: List containing images names
+        images_dir_path: Path to directory containing images
+
+    Returns:
+        Returns a numpy.ndarray instance containing the training data.
     """
     x_train = []
     if os.path.isdir(images_dir_path):
@@ -83,27 +89,57 @@ def read_images(img_file_names, images_dir_path):
     return np.asanyarray(x_train)
 
 
-class ClassifierBase:
-    """Base class of Classifier.
+def load_image_dataset(csv_file_path, images_path):
+    """Load images from the files and labels from a csv file.
+
+    Second, the dataset is a set of images and the labels are in a CSV file.
+    The CSV file should contain two columns whose names are 'File Name' and 'Label'.
+    The file names in the first column should match the file names of the images with extensions,
+    e.g., .jpg, .png.
+    The path to the CSV file should be passed through the csv_file_path.
+    The path to the directory containing all the images should be passed through image_path.
+
+    Args:
+        csv_file_path: CVS file path.
+        images_path: Path where images exist.
+
+    Returns:
+        x: Four dimensional numpy.ndarray. The channel dimension is the last dimension.
+        y: The labels.
+    """
+    img_file_name, y = read_csv_file(csv_file_path)
+    x = read_images(img_file_name, images_path)
+    return x, y
 
-    ClassifierBase is the base class of all classifier classes, classifier is used
-    to train and predict data.
+
+class ImageClassifier:
+    """The image classifier class.
+
+    It is used for image classification. It searches convolutional neural network architectures
+    for the best configuration for the dataset.
 
     Attributes:
+        path: A path to the directory to save the classifier.
         y_encoder: An instance of OneHotEncoder for y_train (array of categorical labels).
         verbose: A boolean value indicating the verbosity mode.
-        searcher: An instance of one of the subclasses of Searcher. It search different
+        searcher: An instance of BayesianSearcher. It search different
             neural architecture to find the best model.
-        searcher_type: The type of searcher to use. It must be 'climb' or 'random'.
-        path: A path to the directory to save the classifier.
+        searcher_args: A dictionary containing the parameters for the searcher's __init__ function.
     """
 
-    def __init__(self, verbose=False, searcher_type=None, path=constant.DEFAULT_SAVE_PATH, resume=False,
+    def __init__(self, verbose=False, path=constant.DEFAULT_SAVE_PATH, resume=False,
                  searcher_args=None):
         """Initialize the instance.
 
-        The classifier will be loaded from file if the directory in 'path' has a saved classifier.
+        The classifier will be loaded from the files in 'path' if parameter 'resume' is True.
         Otherwise it would create a new one.
+
+        Args:
+            verbose: An boolean of whether the search process will be printed to stdout.
+            path: A string. The path to a directory, where the intermediate results are saved.
+            resume: An boolean. If True, the classifier will continue to previous work saved in path.
+                Otherwise, the classifier will start a new search.
+
         """
         if searcher_args is None:
             searcher_args = {}
@@ -116,35 +152,27 @@ def __init__(self, verbose=False, searcher_type=None, path=constant.DEFAULT_SAVE
             self.y_encoder = None
             self.verbose = verbose
             self.searcher = False
-            self.searcher_type = searcher_type
             self.path = path
             self.searcher_args = searcher_args
             ensure_dir(path)
 
-    def fit(self, x_train=None, y_train=None, csv_file_path=None, images_path=None, time_limit=None):
-        """Find the best model.
+    def fit(self, x_train=None, y_train=None, time_limit=None):
+        """Find the best neural architecture and train it.
 
-        Format the input, and split the dataset into training and testing set,
-        save the classifier and find the best model.
+        Based on the given dataset, the function will find the best neural architecture for it.
+        The dataset is in numpy.ndarray format.
+        So they training data should be passed through x_train, y_train.
 
         Args:
-            time_limit:
             x_train: An numpy.ndarray instance contains the training data.
             y_train: An numpy.ndarray instance contains the label of the training data.
-            csv_file_path: CVS file path
-            images_path: Path where images exist
+            time_limit: The time limit for the search in seconds.
         """
 
         if y_train is None:
             y_train = []
         if x_train is None:
             x_train = []
-        if csv_file_path is not None:
-            img_file_name, y_train = read_csv_file(csv_file_path)
-            if images_path is not None:
-                x_train = read_images(img_file_name, images_path)
-            else:
-                raise ValueError('Directory containing images is not provided')
 
         x_train = np.array(x_train)
         y_train = np.array(y_train).flatten()
@@ -166,7 +194,7 @@ def fit(self, x_train=None, y_train=None, csv_file_path=None, images_path=None,
             self.searcher_args['input_shape'] = input_shape
             self.searcher_args['path'] = self.path
             self.searcher_args['verbose'] = self.verbose
-            searcher = self._get_searcher_class()(**self.searcher_args)
+            searcher = BayesianSearcher(**self.searcher_args)
             self.save_searcher(searcher)
             self.searcher = True
 
@@ -190,6 +218,9 @@ def predict(self, x_test):
 
         Args:
             x_test: An instance of numpy.ndarray contains the testing data.
+
+        Returns:
+            An numpy.ndarray containing the results.
         """
         if constant.LIMIT_MEMORY:
             config = tf.ConfigProto()
@@ -207,10 +238,6 @@ def summary(self):
         model = self.load_searcher().load_best_model()
         model.summary()
 
-    def _get_searcher_class(self):
-        """Return searcher class based on the 'searcher_type'."""
-        return BayesianSearcher
-
     def evaluate(self, x_test, y_test):
         """Return the accuracy score between predict value and test_y."""
         y_predict = self.predict(x_test)
@@ -223,6 +250,16 @@ def load_searcher(self):
         return pickle_from_file(os.path.join(self.path, 'searcher'))
 
     def final_fit(self, x_train, y_train, x_test, y_test, trainer_args=None, retrain=False):
+        """Final training after found the best architecture.
+
+        Args:
+            x_train: An numpy.ndarray of training data.
+            y_train: An numpy.ndarray of training targets.
+            x_test: An numpy.ndarray of testing data.
+            y_test: An numpy.ndarray of testing targets.
+            trainer_args: A dictionary containing the parameters of the ModelTrainer constructure.
+            retrain: A boolean of whether reinitialize the weights of the model.
+        """
         if trainer_args is None:
             trainer_args = {}
         y_train = self.y_encoder.transform(y_train)
@@ -234,26 +271,20 @@ def final_fit(self, x_train, y_train, x_test, y_test, trainer_args=None, retrain
         _, _1, graph = train((graph, x_train, y_train, x_test, y_test, trainer_args, None))
 
     def export_keras_model(self, path, model_id=None):
+        """Export the searched model as a Keras saved model.
+
+        Args:
+            path: A string. The path to the file to save.
+            model_id: A integer. If not provided, the function will export the best model.
+        """
         if model_id is None:
             model_id = self.get_best_model_id()
         graph = self.load_searcher().load_model_by_id(model_id)
         graph.produce_model().save(path)
 
     def get_best_model_id(self):
+        """
+        Returns:
+            An integer. The best model id.
+        """
         return self.load_searcher().get_best_model_id()
-
-
-class ImageClassifier(ClassifierBase):
-    """Image classifier class inherited from ClassifierBase class.
-
-    It is used for image classification. It searches convolutional neural network architectures
-    for the best configuration for the dataset.
-    """
-
-    def __init__(self, verbose=True, searcher_type='bayesian', path=constant.DEFAULT_SAVE_PATH, resume=False,
-                 searcher_args=None):
-        super().__init__(verbose,
-                         searcher_type,
-                         path,
-                         resume=resume,
-                         searcher_args=searcher_args)
diff --git a/autokeras/generator.py b/autokeras/generator.py
@@ -14,18 +14,6 @@
 
 
 class ClassifierGenerator:
-    """The base class of classifier generators.
-
-    ClassifierGenerator is the base class of all classifier generator classes.
-    It is used for generating classifier models.
-
-    Attributes:
-        n_classes: Number of classes in the input data.
-        input_shape: A tuple of integers containing the size of each dimension of the input data,
-            excluding the dimension of number of training examples. The length of the tuple should
-            between two and four inclusively.
-    """
-
     def __init__(self, n_classes, input_shape):
         self.n_classes = n_classes
         self.input_shape = input_shape
@@ -35,24 +23,19 @@ def __init__(self, n_classes, input_shape):
             raise ValueError('The input dimension is too low.')
 
     def _get_pool_layer_func(self):
-        """Return MaxPooling function based on the dimension of input shape."""
         pool_funcs = [MaxPooling1D, MaxPooling2D, MaxPooling3D]
         return pool_funcs[len(self.input_shape) - 2]
 
     def _get_shape(self, dim_size):
-        """Return filter shape tuple based on the dimension of input shape."""
         temp_list = [(dim_size,), (dim_size, dim_size), (dim_size, dim_size, dim_size)]
         return temp_list[len(self.input_shape) - 2]
 
 
 class DefaultClassifierGenerator(ClassifierGenerator):
-    """A classifier generator always generates models with the same default architecture and configuration."""
-
     def __init__(self, n_classes, input_shape):
         super().__init__(n_classes, input_shape)
 
     def generate(self, model_len=constant.MODEL_LEN, model_width=constant.MODEL_WIDTH):
-        """Return the default classifier model that has been compiled."""
         pool = self._get_pool_layer_func()
         conv = get_conv_layer_func(len(self._get_shape(3)))
         ave = get_ave_layer_func(len(self._get_shape(3)))
@@ -80,8 +63,6 @@ def generate(self, model_len=constant.MODEL_LEN, model_width=constant.MODEL_WIDT
 
 
 class RandomConvClassifierGenerator(ClassifierGenerator):
-    """A classifier generator that generates random convolutional neural networks."""
-
     def __init__(self, n_classes, input_shape):
         super().__init__(n_classes, input_shape)