Skip to content

Commit

Permalink
docs
Browse files Browse the repository at this point in the history
  • Loading branch information
haifeng-jin committed Jul 1, 2018
1 parent bbabbe9 commit aaf24ae
Show file tree
Hide file tree
Showing 12 changed files with 123 additions and 376 deletions.
137 changes: 84 additions & 53 deletions autokeras/classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,13 @@ def run_searcher_once(x_train, y_train, x_test, y_test, path):


def read_csv_file(csv_file_path):
"""
Read the cvs file and returns two seperate list containing images name and their labels
:param csv_file_path: Path to the CVS file.
:return: img_file_names list containing images names and img_label list containing their respective labels
"""Read the cvs file and returns two seperate list containing images name and their labels
Args:
csv_file_path: Path to the CVS file.
Returns:
img_file_names list containing images names and img_label list containing their respective labels.
"""
img_file_names = []
img_labels = []
Expand All @@ -61,11 +64,14 @@ def read_csv_file(csv_file_path):


def read_images(img_file_names, images_dir_path):
"""
Reads the images from the path and return there numpy.ndarray instance
:param img_file_names: List containing images names
:param images_dir_path: Path to directory containing images
:return: Returns a numpy.ndarray instance containing the training data.
"""Reads the images from the path and return there numpy.ndarray instance
Args:
img_file_names: List containing images names
images_dir_path: Path to directory containing images
Returns:
Returns a numpy.ndarray instance containing the training data.
"""
x_train = []
if os.path.isdir(images_dir_path):
Expand All @@ -83,27 +89,57 @@ def read_images(img_file_names, images_dir_path):
return np.asanyarray(x_train)


class ClassifierBase:
"""Base class of Classifier.
def load_image_dataset(csv_file_path, images_path):
"""Load images from the files and labels from a csv file.
Second, the dataset is a set of images and the labels are in a CSV file.
The CSV file should contain two columns whose names are 'File Name' and 'Label'.
The file names in the first column should match the file names of the images with extensions,
e.g., .jpg, .png.
The path to the CSV file should be passed through the csv_file_path.
The path to the directory containing all the images should be passed through image_path.
Args:
csv_file_path: CVS file path.
images_path: Path where images exist.
Returns:
x: Four dimensional numpy.ndarray. The channel dimension is the last dimension.
y: The labels.
"""
img_file_name, y = read_csv_file(csv_file_path)
x = read_images(img_file_name, images_path)
return x, y

ClassifierBase is the base class of all classifier classes, classifier is used
to train and predict data.

class ImageClassifier:
"""The image classifier class.
It is used for image classification. It searches convolutional neural network architectures
for the best configuration for the dataset.
Attributes:
path: A path to the directory to save the classifier.
y_encoder: An instance of OneHotEncoder for y_train (array of categorical labels).
verbose: A boolean value indicating the verbosity mode.
searcher: An instance of one of the subclasses of Searcher. It search different
searcher: An instance of BayesianSearcher. It search different
neural architecture to find the best model.
searcher_type: The type of searcher to use. It must be 'climb' or 'random'.
path: A path to the directory to save the classifier.
searcher_args: A dictionary containing the parameters for the searcher's __init__ function.
"""

def __init__(self, verbose=False, searcher_type=None, path=constant.DEFAULT_SAVE_PATH, resume=False,
def __init__(self, verbose=False, path=constant.DEFAULT_SAVE_PATH, resume=False,
searcher_args=None):
"""Initialize the instance.
The classifier will be loaded from file if the directory in 'path' has a saved classifier.
The classifier will be loaded from the files in 'path' if parameter 'resume' is True.
Otherwise it would create a new one.
Args:
verbose: An boolean of whether the search process will be printed to stdout.
path: A string. The path to a directory, where the intermediate results are saved.
resume: An boolean. If True, the classifier will continue to previous work saved in path.
Otherwise, the classifier will start a new search.
"""
if searcher_args is None:
searcher_args = {}
Expand All @@ -116,35 +152,27 @@ def __init__(self, verbose=False, searcher_type=None, path=constant.DEFAULT_SAVE
self.y_encoder = None
self.verbose = verbose
self.searcher = False
self.searcher_type = searcher_type
self.path = path
self.searcher_args = searcher_args
ensure_dir(path)

def fit(self, x_train=None, y_train=None, csv_file_path=None, images_path=None, time_limit=None):
"""Find the best model.
def fit(self, x_train=None, y_train=None, time_limit=None):
"""Find the best neural architecture and train it.
Format the input, and split the dataset into training and testing set,
save the classifier and find the best model.
Based on the given dataset, the function will find the best neural architecture for it.
The dataset is in numpy.ndarray format.
So they training data should be passed through x_train, y_train.
Args:
time_limit:
x_train: An numpy.ndarray instance contains the training data.
y_train: An numpy.ndarray instance contains the label of the training data.
csv_file_path: CVS file path
images_path: Path where images exist
time_limit: The time limit for the search in seconds.
"""

if y_train is None:
y_train = []
if x_train is None:
x_train = []
if csv_file_path is not None:
img_file_name, y_train = read_csv_file(csv_file_path)
if images_path is not None:
x_train = read_images(img_file_name, images_path)
else:
raise ValueError('Directory containing images is not provided')

x_train = np.array(x_train)
y_train = np.array(y_train).flatten()
Expand All @@ -166,7 +194,7 @@ def fit(self, x_train=None, y_train=None, csv_file_path=None, images_path=None,
self.searcher_args['input_shape'] = input_shape
self.searcher_args['path'] = self.path
self.searcher_args['verbose'] = self.verbose
searcher = self._get_searcher_class()(**self.searcher_args)
searcher = BayesianSearcher(**self.searcher_args)
self.save_searcher(searcher)
self.searcher = True

Expand All @@ -190,6 +218,9 @@ def predict(self, x_test):
Args:
x_test: An instance of numpy.ndarray contains the testing data.
Returns:
An numpy.ndarray containing the results.
"""
if constant.LIMIT_MEMORY:
config = tf.ConfigProto()
Expand All @@ -207,10 +238,6 @@ def summary(self):
model = self.load_searcher().load_best_model()
model.summary()

def _get_searcher_class(self):
"""Return searcher class based on the 'searcher_type'."""
return BayesianSearcher

def evaluate(self, x_test, y_test):
"""Return the accuracy score between predict value and test_y."""
y_predict = self.predict(x_test)
Expand All @@ -223,6 +250,16 @@ def load_searcher(self):
return pickle_from_file(os.path.join(self.path, 'searcher'))

def final_fit(self, x_train, y_train, x_test, y_test, trainer_args=None, retrain=False):
"""Final training after found the best architecture.
Args:
x_train: An numpy.ndarray of training data.
y_train: An numpy.ndarray of training targets.
x_test: An numpy.ndarray of testing data.
y_test: An numpy.ndarray of testing targets.
trainer_args: A dictionary containing the parameters of the ModelTrainer constructure.
retrain: A boolean of whether reinitialize the weights of the model.
"""
if trainer_args is None:
trainer_args = {}
y_train = self.y_encoder.transform(y_train)
Expand All @@ -234,26 +271,20 @@ def final_fit(self, x_train, y_train, x_test, y_test, trainer_args=None, retrain
_, _1, graph = train((graph, x_train, y_train, x_test, y_test, trainer_args, None))

def export_keras_model(self, path, model_id=None):
"""Export the searched model as a Keras saved model.
Args:
path: A string. The path to the file to save.
model_id: A integer. If not provided, the function will export the best model.
"""
if model_id is None:
model_id = self.get_best_model_id()
graph = self.load_searcher().load_model_by_id(model_id)
graph.produce_model().save(path)

def get_best_model_id(self):
"""
Returns:
An integer. The best model id.
"""
return self.load_searcher().get_best_model_id()


class ImageClassifier(ClassifierBase):
"""Image classifier class inherited from ClassifierBase class.
It is used for image classification. It searches convolutional neural network architectures
for the best configuration for the dataset.
"""

def __init__(self, verbose=True, searcher_type='bayesian', path=constant.DEFAULT_SAVE_PATH, resume=False,
searcher_args=None):
super().__init__(verbose,
searcher_type,
path,
resume=resume,
searcher_args=searcher_args)
19 changes: 0 additions & 19 deletions autokeras/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,6 @@


class ClassifierGenerator:
"""The base class of classifier generators.
ClassifierGenerator is the base class of all classifier generator classes.
It is used for generating classifier models.
Attributes:
n_classes: Number of classes in the input data.
input_shape: A tuple of integers containing the size of each dimension of the input data,
excluding the dimension of number of training examples. The length of the tuple should
between two and four inclusively.
"""

def __init__(self, n_classes, input_shape):
self.n_classes = n_classes
self.input_shape = input_shape
Expand All @@ -35,24 +23,19 @@ def __init__(self, n_classes, input_shape):
raise ValueError('The input dimension is too low.')

def _get_pool_layer_func(self):
"""Return MaxPooling function based on the dimension of input shape."""
pool_funcs = [MaxPooling1D, MaxPooling2D, MaxPooling3D]
return pool_funcs[len(self.input_shape) - 2]

def _get_shape(self, dim_size):
"""Return filter shape tuple based on the dimension of input shape."""
temp_list = [(dim_size,), (dim_size, dim_size), (dim_size, dim_size, dim_size)]
return temp_list[len(self.input_shape) - 2]


class DefaultClassifierGenerator(ClassifierGenerator):
"""A classifier generator always generates models with the same default architecture and configuration."""

def __init__(self, n_classes, input_shape):
super().__init__(n_classes, input_shape)

def generate(self, model_len=constant.MODEL_LEN, model_width=constant.MODEL_WIDTH):
"""Return the default classifier model that has been compiled."""
pool = self._get_pool_layer_func()
conv = get_conv_layer_func(len(self._get_shape(3)))
ave = get_ave_layer_func(len(self._get_shape(3)))
Expand Down Expand Up @@ -80,8 +63,6 @@ def generate(self, model_len=constant.MODEL_LEN, model_width=constant.MODEL_WIDT


class RandomConvClassifierGenerator(ClassifierGenerator):
"""A classifier generator that generates random convolutional neural networks."""

def __init__(self, n_classes, input_shape):
super().__init__(n_classes, input_shape)

Expand Down

0 comments on commit aaf24ae

Please sign in to comment.