diff --git a/.gitignore b/.gitignore index 52e40c4..472101a 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,8 @@ *.pyc *.so .ipynb_checkpoints +.cache +.coverage # Packages # ############ diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..700f9b4 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,42 @@ +sudo: false + +# addons: +# apt: +# packages: +# - + +cache: + directories: + - $HOME/env + +language: python + +notifications: + email: false + +python: + - "2.7" + - "3.4" + - "3.5" + +before_install: + - bash .travis_dependencies.sh + - export PATH="$HOME/env/miniconda$TRAVIS_PYTHON_VERSION/bin:$PATH"; + - hash -r + - source activate test-environment + +install: + - pip install pytest pytest-cov + - pip install coveralls + - pip install -e . + +script: + - python --version + - py.test -vs --cov=optimus ./tests + +after_success: + - coveralls + - pip uninstall -y optimus + +after_failure: + - pip uninstall -y optimus diff --git a/.travis_dependencies.sh b/.travis_dependencies.sh new file mode 100755 index 0000000..77dcc52 --- /dev/null +++ b/.travis_dependencies.sh @@ -0,0 +1,46 @@ +#!/bin/sh + +ENV_NAME="test-environment" +set -e + +conda_create () +{ + + hash -r + conda config --set always_yes yes --set changeps1 no + conda update -q conda + conda config --add channels pypi + conda info -a + deps='pip numpy scipy pandas theano coverage matplotlib' + + conda create -q -n $ENV_NAME "python=$TRAVIS_PYTHON_VERSION" $deps + conda update --all +} + +src="$HOME/env/miniconda$TRAVIS_PYTHON_VERSION" +if [ ! -d "$src" ]; then + mkdir -p $HOME/env + pushd $HOME/env + + # Download miniconda packages + wget http://repo.continuum.io/miniconda/Miniconda-3.16.0-Linux-x86_64.sh -O miniconda.sh; + + # Install both environments + bash miniconda.sh -b -p $src + + export PATH="$src/bin:$PATH" + conda_create + + source activate $ENV_NAME + + pip install python-coveralls + + if [ "$ENABLE_NUMBA" = true ]; then + conda install numba + fi + + source deactivate + popd +else + echo "Using cached dependencies" +fi \ No newline at end of file diff --git a/examples/datatools.py b/examples/datatools.py new file mode 100644 index 0000000..28d8859 --- /dev/null +++ b/examples/datatools.py @@ -0,0 +1,97 @@ +import gzip +import numpy as np +import pickle + + +def load_mnist(mnist_file): + """Load the MNIST dataset into memory. + + Parameters + ---------- + mnist_file : str + Path to gzipped MNIST file. + + Returns + ------- + train, valid, test: tuples of np.ndarrays + Each consists of (data, labels), where data.shape=(N, 1, 28, 28) and + labels.shape=(N,). + """ + dsets = [] + with gzip.open(mnist_file, 'rb') as fp: + for split in pickle.load(fp): + n_samples = len(split[1]) + data = np.zeros([n_samples, 1, 28, 28]) + labels = np.zeros([n_samples], dtype=int) + for n, (x, y) in enumerate(zip(*split)): + data[n, ...] = x.reshape(1, 28, 28) + labels[n] = y + dsets.append((data, labels)) + + return dsets + + +def load_mnist_npz(mnist_file): + """Load the MNIST dataset into memory from an NPZ. + + Parameters + ---------- + mnist_file : str + Path to an NPZ file of MNIST data. + + Returns + ------- + train, valid, test: tuples of np.ndarrays + Each consists of (data, labels), where data.shape=(N, 1, 28, 28) and + labels.shape=(N,). + """ + data = np.load(mnist_file) + dsets = [] + for name in 'train', 'valid', 'test': + x = data['x_{}'.format(name)].reshape(-1, 1, 28, 28) + y = data['y_{}'.format(name)] + dsets.append([x, y]) + + return dsets + + +def minibatch(data, labels, batch_size, max_iter=np.inf): + """Random mini-batches generator. + + Parameters + ---------- + data : array_like, len=N + Observation data. + labels : array_like, len=N + Labels corresponding the the given data. + batch_size : int + Number of datapoints to return at each iteration. + max_iter : int, default=inf + Number of iterations before raising a StopIteration. + + Yields + ------ + batch : dict + Random batch of datapoints, under the keys `data` and `labels`. + """ + if len(data) != len(labels): + raise ValueError("data and labels must have the same number of items.") + + num_points = len(labels) + if num_points <= batch_size: + raise ValueError("batch_size cannot exceed number of data points") + + count = 0 + order = np.random.permutation(num_points) + idx = 0 + while count < max_iter: + x, y = [], [] + while len(y) < batch_size: + x.append(data[order[idx]]) + y.append(labels[order[idx]]) + idx += 1 + if idx >= num_points: + idx = 0 + np.random.shuffle(order) + yield dict(data=np.asarray(x), labels=np.asarray(y)) + count += 1 diff --git a/examples/mnist.py b/examples/mnist_classifier.py similarity index 100% rename from examples/mnist.py rename to examples/mnist_classifier.py diff --git a/optimus/__init__.py b/optimus/__init__.py index 85650df..4c8df0b 100644 --- a/optimus/__init__.py +++ b/optimus/__init__.py @@ -1,7 +1,7 @@ """Magic and hackery.""" import json - +import numpy as np import theano import theano.tensor as T @@ -31,6 +31,7 @@ from .nodes import Dimshuffle from .nodes import Flatten from .nodes import Slice +from .nodes import SliceGT from .nodes import L1Magnitude from .nodes import L2Magnitude from .nodes import Max @@ -62,15 +63,20 @@ from .nodes import SelectIndex from .nodes import MaxNotIndex from .nodes import MinNotIndex +from .nodes import Euclidean from .nodes import SquaredEuclidean from .nodes import Divide from .nodes import Product -# Old losses ... deprecated, don't use. -from .losses import NegativeLogLikelihoodLoss -from .losses import MeanSquaredErrorLoss +# Scalar losses +from .losses import NegativeLogLikelihood +from .losses import MeanSquaredError +from .losses import CrossEntropy from .losses import CrossEntropyLoss from .losses import WeightDecayPenalty +from .losses import SimilarityMargin +from .losses import ContrastiveMargin +from .losses import PairwiseRank # Framework classes from .framework import ConnectionManager @@ -101,11 +107,20 @@ def __str_convert__(obj): return obj +def serialize(self, obj): + if isinstance(obj, np.ndarray): + obj = obj.tolist() + elif hasattr(obj, '__json__'): + obj = getattr(obj, '__json__', obj) + + return obj + + class JSONSupport(): """Context manager for temporary JSON support.""" def __enter__(self): # Encoder returns the object's `__json__` property. - json.JSONEncoder.default = lambda self, jobj: jobj.__json__ + json.JSONEncoder.default = serialize # Decoder looks for the class name, and calls it's class constructor. def decode(obj): diff --git a/optimus/framework.py b/optimus/framework.py index 4721737..fc675b2 100644 --- a/optimus/framework.py +++ b/optimus/framework.py @@ -1,14 +1,18 @@ from __future__ import print_function + +from collections import OrderedDict +import datetime +import matplotlib.pyplot as plt import numpy as np +import os +import pandas as pd +import six from theano.tensor import grad -from collections import OrderedDict from theano import function - import time -import os -import json -from .core import JObject + +from optimus.core import JObject def named_list(items): @@ -18,6 +22,7 @@ def named_list(items): class ConnectionManager(object): """TODO(ejhumphrey): write me.""" + def __init__(self, edges): """ edges: list of Port tuples @@ -62,9 +67,10 @@ def edges(self): class Graph(JObject): """Graph Implementation - Property attributes are named dictionaries, while the corresponding private - variables are lists. + Property attributes are named dictionaries, while the corresponding + private variables are lists. """ + def __init__(self, name, inputs, nodes, connections, outputs, loss=None, updates=None, constraints=None, chunk_size=250, verbose=False, momentum=0): @@ -127,7 +133,7 @@ def param_values(self): def param_values(self, values): for k in values.keys(): if k in self.params: - self.params[k].value = values[k] + self.params[k].value = values.get(k) else: print("Received erroneous parameter: {}".format(k)) @@ -196,7 +202,7 @@ def __wire__(self): break if nothing_happened: print("Your logic is poor, but we can help.") - for k, v in local_map.iteritems(): + for k, v in six.iteritems(local_map): print("\t{{{}: {}}}".format(k, v)) return self.ports = input_ports @@ -219,7 +225,7 @@ def __wire__(self): raise ValueError( "Requested loss `{}` is not among the computed ports: " "\n{}".format(self._loss, self.ports.keys())) - for input_name, param_names in self._updates.iteritems(): + for input_name, param_names in six.iteritems(self._updates): eta = self.inputs.get(input_name) for param_name in param_names: param = self.params[param_name] @@ -235,9 +241,9 @@ def __wire__(self): def __call__(self, *args, **kwargs): """writeme""" - # Needs internal buffering strategy -- check if the value of each kwarg - # is an np.ndarray? if not, map it over all chunks. This should be a - # separate function though... + # Needs internal buffering strategy -- check if the value of each + # kwarg is an np.ndarray? if not, map it over all chunks. This should + # be a separate function though... # if self.chunk_size is None and len(kwargs.values()[0]) > ...? return OrderedDict([(k, v) for k, v in zip(self.outputs.keys(), self._fx(*args, **kwargs))]) @@ -264,7 +270,10 @@ def load_param_values(self, filename): def data_stepper(chunk_size=250, **kwargs): - """Generator to chunk unweildy inputs into smaller bites.""" + """Generator to chunk unweildy inputs into smaller bites. + + Isn't this superseded by optimus.util.array_stepper? + """ constants = dict() arrays = dict() for key, value in kwargs: @@ -279,9 +288,9 @@ def data_stepper(chunk_size=250, **kwargs): num_chunks = int(np.ceil(len(arrays.values()[0]) / float(chunk_size))) for n in range(num_chunks): - i0, i1 = n*chunk_size, (n+1)*chunk_size - array_chunk = dict([(key, value[i0:i1]) - for key, value in arrays.iteritems()]) + i0, i1 = n * chunk_size, (n + 1) * chunk_size + array_chunk = dict((key, value[i0:i1]) + for key, value in six.iteritems(arrays)) array_chunk.update(constants) yield array_chunk @@ -289,29 +298,56 @@ def data_stepper(chunk_size=250, **kwargs): class Driver(object): """ """ - TIME_FMT = "%04d-%02d-%02d_%02dh%02dm%02ds" + # Replace with a datetime strfmt + TIME_FMT = "%04d%02d%02d_%02dh%02dm%02ds" + + def __init__(self, graph, name, output_directory=None, + parameter_cache=None, log_file=None, display=False): + """Create an optimus training driver. - def __init__(self, graph, name='trainer', - output_directory=None, - log_file='training_stats.json', - init_param_file=None): + Parameters + ---------- + graph : optimus.Graph + Instantiated graph to train. + + name : str + Unique name for this driver. + + output_directory : str, default=None + Path to cache outputs. Will be created if it doesn't exist. + + parameter_cache : dict or biggie.Stash, default=None + Object to hold parameter checkpoints + + log_file : str, default=None + Path to a file for writing progress. + + display : bool, default=False + Draw the loss over iteration. + + TODO + ---- + display_func : callable + A function that receives an axes handle and the outputs of an + update step, and updates a canvas accordingly. Could also do this + for parameters. + """ self.graph = graph - self.log_file = log_file - self.output_directory = output_directory self.name = name + self.log_file = log_file + self.parameter_cache = parameter_cache + self.display = display - if init_param_file: - self.graph.load_param_values(init_param_file) - - if output_directory is not None: - self.output_directory = output_directory - if not os.path.exists(self.output_directory): - os.makedirs(self.output_directory) + self.output_directory = output_directory + if output_directory and not os.path.exists(output_directory): + os.makedirs(self.output_directory) + # TODO: y/n? # def_file = os.path.join(self.output_directory, # "%s.json" % self.name) # save(self.graph, def_file) - self._stats = dict() + self.stats = pd.DataFrame(columns=['key', 'timestamp', + 'iteration', 'loss']) self._last_params = None def fit(self, source, hyperparams, max_iter=10000, save_freq=250, @@ -322,79 +358,127 @@ def fit(self, source, hyperparams, max_iter=10000, save_freq=250, ---------- source : generator Yields dictionaries of data matching the inputs of the graph. + hyperparams : dict Static hyperparameters for the graph; merged with the dynamic source. + max_iter : int Maximum number of interations to run. + save_freq : int Number of iterations between checkpoints. + print_freq : int Number of iterations between displaying progress. + nan_exceptions : int, default=0 Number of NaNs to catch before stopping. + + Returns + ------- + progress : pd.DataFrame + Training progress as a pandas DataFrame """ - assert self.graph.loss, "Loss not set!" - self._stats.update( - checkpoints=[], - start_time=time.asctime(), - hyperparams=hyperparams, - max_iter=max_iter, - save_freq=save_freq) + if not self.graph.loss: + raise ValueError("Loss not set!") if save_freq is None or save_freq <= 0: save_freq = np.inf - param_file_fmt = "%%s-%%0%dd-%s.npz" % (np.ceil(np.log10(max_iter)+1), - self.TIME_FMT) - self._last_params = self.graph.param_values + if self.display: + plt.ion() + fig, axes = plt.subplots(1, 1) + canvas = axes.plot([0], [0])[0] + + param_file_fmt = "%%s-%%0%dd-%s" % (np.ceil(np.log10(max_iter) + 1), + self.TIME_FMT) + self._last_saved_params = None try: for n_iter, data in enumerate(source): data.update(**hyperparams) + # TODO: Technically, nothing is forcing the loss to be + # returned as an output... + param_values = self.graph.param_values outputs = self.graph(**data) - self.update_stats(n_iter, outputs[self.graph.loss.name]) + loss = outputs.get(self.graph.loss.name, np.nan) + key = self.iter_to_key(n_iter, param_file_fmt) + + # Update stats + row = dict(key=key, timestamp=datetime.datetime.now(), + iteration=n_iter, loss=float(loss)) + self.stats.loc[len(self.stats)] = row + + # Checkpoint params if appropriate iteration. if n_iter > 0 and (n_iter % save_freq) == 0: - if self.output_directory is not None: - self._save_params(n_iter, param_file_fmt) + self._last_saved_params = key + self._save_params(key) + + # Log progress if (n_iter % print_freq) == 0: - self.print_last_stats() + self.print_last_stats(row, max_iter) + if self.display: + canvas.set_data(self.stats.iteration, self.stats.loss) + axes.set_ylim(self.stats.loss.min() * 1.1, + self.stats.loss.max() * 1.1) + axes.set_xlim(0, self.stats.iteration.max()) + plt.draw() + plt.pause(0.01) + + # Break if done if n_iter >= max_iter: break - if not np.isfinite(outputs[self.graph.loss.name]): + + # NaN Handling + if not np.isfinite(loss): print("Caught a non-finite loss at iteration: {} " "".format(n_iter)) - if nan_exceptions <= 0: + if nan_exceptions <= 0 or self._last_saved_params is None: print("Stopping.") - break - print("Reseting parameter values and moving on...") - self.graph.param_values = self._last_params + return data, outputs, param_values + key = self._last_saved_params + print("Reseting parameter values to {}".format(key)) + self.graph.param_values = self.parameter_cache.get(key) nan_exceptions = nan_exceptions - 1 - self._last_params = self.graph.param_values except KeyboardInterrupt: print("Stopping early after {} iterations".format(n_iter)) - def update_stats(self, n_iter, loss): - cpoint = dict(timestamp=time.asctime(), - n_iter=n_iter, - loss=float(loss)) - self._stats['checkpoints'].append(cpoint) + return self.stats - def print_last_stats(self): - cpoint = self._stats['checkpoints'][-1] - print("[%s] %d / %d: %0.4f".format(cpoint['timestamp'], - cpoint['n_iter'], - self._stats['max_iter'], - cpoint['loss'])) + def print_last_stats(self, row, max_iter): + """ + """ + print("[{timestamp}] {iteration} / {max_iter}: {loss}" + "".format(max_iter=max_iter, **row)) - def _save_params(self, n_iter, param_file_fmt): + def iter_to_key(self, n_iter, param_file_fmt): args = tuple([self.name, n_iter] + list(time.localtime()[:6])) - param_file = os.path.join(self.output_directory, param_file_fmt % args) - self.graph.save_param_values(param_file) + return param_file_fmt % args + + def _save_params(self, key): + """Checkpoint the graph's parameters. + + Parameters + ---------- + n_iter : int + Current iteration. + + param_file_fmt : str + String formatter for producing keys. + """ + # This is pretty gross right here. + if self.output_directory: + param_file = os.path.join(self.output_directory, + "{}.npz".format(key)) + self.graph.save_param_values(param_file) + + if self.parameter_cache is not None: + self.parameter_cache[key] = self.graph.param_values + + return def __del__(self): - if not self.log_file or not self.output_directory: + if not self.log_file: return - log_file = os.path.join(self.output_directory, self.log_file) - with open(log_file, 'w') as fp: - json.dump(self._stats, fp, indent=2) + self.stats.to_csv(self.log_file) diff --git a/optimus/losses.py b/optimus/losses.py index 3842d94..d047995 100644 --- a/optimus/losses.py +++ b/optimus/losses.py @@ -5,11 +5,13 @@ """ import theano.tensor as T -from . import core -from .nodes import Node +import optimus.core as core +from optimus.nodes import Node +import optimus.functions as functions -class NegativeLogLikelihoodLoss(Node): + +class NegativeLogLikelihood(Node): """Indexed negative log-likelihood loss, i.e. for 1-of-k classifiers. In numpy syntax, computes the following: @@ -21,6 +23,7 @@ class NegativeLogLikelihoodLoss(Node): See also: optimus.nodes.SelectIndex """ + def __init__(self, name): # Input Validation Node.__init__(self, name=name) @@ -32,7 +35,7 @@ def __init__(self, name): def transform(self): """writeme""" - assert self.is_ready() + self.validate_ports() assert self.likelihoods.variable.ndim == 2 col_index = self.index.variable @@ -41,12 +44,13 @@ def transform(self): self.likelihoods.variable[row_index, col_index])) -class CrossEntropyLoss(Node): +class CrossEntropy(Node): """Pointwise cross-entropy between a `prediction` and `target`. NOTE: Both inputs *must* be non-negative, and only `target` may contain zeros. Expect all hell to break loose if this is violated. """ + def __init__(self, name, epsilon=10.0**-6): # Input Validation Node.__init__(self, name=name, epsilon=epsilon) @@ -59,7 +63,7 @@ def __init__(self, name, epsilon=10.0**-6): def transform(self): """writeme""" - assert self.is_ready() + self.validate_ports() prediction = self.prediction.variable target = self.target.variable @@ -67,14 +71,28 @@ def transform(self): output = target * T.log((prediction + self.epsilon) / eps_p1) output += (1.0 - target) * T.log( (1.0 - prediction + self.epsilon) / eps_p1) - self.output.variable = -T.mean(output) + self.output.variable = -output.mean(axis=1) + + +class CrossEntropyLoss(CrossEntropy): + """Pointwise cross-entropy between a `prediction` and `target`. + NOTE: Both inputs *must* be non-negative, and only `target` may contain + zeros. Expect all hell to break loose if this is violated. + """ -class MeanSquaredErrorLoss(Node): + def transform(self): + """writeme""" + super(CrossEntropyLoss, self).transform() + self.output.variable = T.mean(self.output.variable) + + +class MeanSquaredError(Node): """Compute the mean squared error between a `prediction` and a `target`. See also: optimus.nodes.SquaredEuclidean """ + def __init__(self, name): # Input Validation Node.__init__(self, name=name) @@ -86,7 +104,7 @@ def __init__(self, name): def transform(self): """writeme""" - assert self.is_ready() + self.validate_ports() if self.prediction.variable.ndim >= 2: xA = T.flatten(self.prediction.variable, outdim=2) xB = T.flatten(self.target.variable, outdim=2) @@ -103,6 +121,7 @@ class WeightDecayPenalty(Node): See also: optimus.nodes.L2Magnitude. """ + def __init__(self, name): Node.__init__(self, name=name) self.input = core.Port(name=self.__own__('input')) @@ -112,7 +131,206 @@ def __init__(self, name): self._outputs.append(self.output) def transform(self): - assert self.is_ready(), "Not all ports are set." + self.validate_ports() x_in = self.input.variable.flatten(2) w_mag = T.sqrt(T.sum(T.pow(x_in, 2.0), axis=-1)) self.output.variable = self.weight.variable * T.mean(w_mag) + + +class SimilarityMargin(Node): + """Create a contrastive margin loss, controlled by an `equivalence` score. + + Inputs + ------ + distance : vector + Observed distance between datapoints. + + equivalence : vector + Similarity scores, normalized to [0, 1], corresponding to least and + most similar, respectively. + + sim_margin : scalar + Margin between similar points within which no penalty is incurred. If + value is <= 0, a penalty is always incurred. + + diff_margin : scalar + Margin between dissimilar points within which no penalty is incurred. + If value is <= 0, a penalty is always incurred. + + Outputs + ------- + output : scalar + Cost incurred given the input parameters. + + Equation + -------- + Given D: distance, y: equivalence ... + sim_cost = y*hwr(D - sim_margin)^2 + diff_cost = (1 - y) * hwr(diff_margin - D)^2 + total = ave(sim_cost + diff_cost) + + """ + + def __init__(self, name): + super(SimilarityMargin, self).__init__(name=name) + self.distance = core.Port(name=self.__own__('distance')) + self.equivalence = core.Port(name=self.__own__('equivalence')) + self.sim_margin = core.Port(name=self.__own__('sim_margin')) + self.diff_margin = core.Port(name=self.__own__('diff_margin')) + self._inputs.extend([self.distance, self.equivalence, + self.sim_margin, self.diff_margin]) + self.output = core.Port(name=self.__own__('output')) + self._outputs.append(self.output) + + def transform(self): + """Transform inputs to outputs.""" + self.validate_ports() + + # TODO: Find a more reusable way of enforcing this behavior. + if self.distance.variable.ndim != 1: + raise ValueError("`distance` must be a vector.") + + if self.equivalence.variable.ndim != 1: + raise ValueError("`equivalence` must be a vector.") + + dist = self.distance.variable + equiv = self.equivalence.variable + smarg = self.sim_margin.variable + dmarg = self.diff_margin.variable + + sim_cost = T.pow(functions.relu(dist - smarg), 2.0) + diff_cost = T.pow(functions.relu(dmarg - dist), 2.0) + total_cost = equiv * sim_cost + (1 - equiv) * diff_cost + self.output.variable = T.mean(total_cost) + + +class ContrastiveMargin(Node): + """Create a contrastive margin loss, comparing the relationship between + three datapoints as two costs. + + Inputs + ------ + cost_sim : vector + Observed cost between two inputs defined as "similar". + + cost_diff : vector + Observed cost between two inputs defined as "different". + + margin_sim : scalar + Margin between similar points within which no penalty is incurred. If + value is <= 0, a penalty is always incurred. + + margin_diff : scalar + Margin between dissimilar points within which no penalty is incurred. + If value is <= 0, a penalty is always incurred. + + Outputs + ------- + output : scalar + Cost incurred given the input parameters. + + Equation + -------- + Given above... + loss_sim = hwr(cost_sim - margin_sim)^2 + loss_diff = hwr(margin_diff - cost_diff)^2 + total = ave(loss_sim + loss_diff) + + """ + + def __init__(self, name, filter_zeros=True): + super(ContrastiveMargin, self).__init__(name=name) + self.cost_sim = core.Port(name=self.__own__('cost_sim')) + self.cost_diff = core.Port(name=self.__own__('cost_diff')) + self.margin_sim = core.Port(name=self.__own__('margin_sim')) + self.margin_diff = core.Port(name=self.__own__('margin_diff')) + self._inputs.extend([self.cost_sim, self.cost_diff, + self.margin_sim, self.margin_diff]) + self.output = core.Port(name=self.__own__('output')) + self._outputs.append(self.output) + self.filter_zeros = filter_zeros + + def transform(self): + """Transform inputs to outputs.""" + self.validate_ports() + + # TODO: Find a more reusable way of enforcing this behavior. + if self.cost_sim.variable.ndim != 1: + raise ValueError("`cost_sim` must be a vector.") + + if self.cost_diff.variable.ndim != 1: + raise ValueError("`cost_diff` must be a vector.") + + cost_sim = self.cost_sim.variable + cost_diff = self.cost_diff.variable + smarg = self.margin_sim.variable + dmarg = self.margin_diff.variable + + loss_sim = T.pow(functions.relu(cost_sim - smarg), 2.0) + loss_diff = T.pow(functions.relu(dmarg - cost_diff), 2.0) + + if self.filter_zeros: + loss_sim = loss_sim[(loss_sim > 0).nonzero()] + loss_diff = loss_diff[(loss_diff > 0).nonzero()] + + self.output.variable = T.mean(loss_sim) + T.mean(loss_diff) + + +class PairwiseRank(Node): + """Create a pairwise rank loss, where the cost of a similar pair should + be alpha-times smaller than a the cost of a dissimilar pair. + + Inputs + ------ + cost_sim : vector + Observed cost between two inputs defined as "similar". + + cost_diff : vector + Observed cost between two inputs defined as "different". + + alpha : scalar + Parameter controlling the separation between dissimilar manifolds; + when in doubt, set to 1. + + Outputs + ------- + output : scalar + Cost incurred given the input parameters. + + Equation + -------- + Given above... + cost = hwr(cost_diff - alpha * cost_sim + margin)^2 + total = ave(loss_sim + loss_diff) + + """ + + def __init__(self, name): + super(PairwiseRank, self).__init__(name=name) + self.cost_sim = core.Port(name=self.__own__('cost_sim')) + self.cost_diff = core.Port(name=self.__own__('cost_diff')) + self.alpha = core.Port(name=self.__own__('alpha')) + self.margin = core.Port(name=self.__own__('margin')) + self._inputs.extend([self.cost_sim, self.cost_diff, + self.alpha, self.margin]) + self.output = core.Port(name=self.__own__('output')) + self._outputs.append(self.output) + + def transform(self): + """Transform inputs to outputs.""" + self.validate_ports() + + # TODO: Find a more reusable way of enforcing this behavior. + if self.cost_sim.variable.ndim != 1: + raise ValueError("`cost_sim` must be a vector.") + + if self.cost_diff.variable.ndim != 1: + raise ValueError("`cost_diff` must be a vector.") + + cost_sim = self.cost_sim.variable + cost_diff = self.cost_diff.variable + alpha = self.alpha.variable + margin = self.margin.variable + + cost = cost_diff - alpha * cost_sim + margin + self.output.variable = T.mean(T.pow(functions.relu(cost), 2.0)) diff --git a/optimus/nodes.py b/optimus/nodes.py index e292262..087f01a 100644 --- a/optimus/nodes.py +++ b/optimus/nodes.py @@ -1,20 +1,17 @@ """TODO(ejhumphrey): write me.""" from __future__ import print_function import numpy as np -import theano import theano.tensor as T from theano.tensor.shared_randomstreams import RandomStreams -from theano.tensor.signal import downsample +from theano.tensor.signal import pool from . import core from . import FLOATX from . import functions -def compile(inputs, outputs): - return theano.function(inputs=[x.variable for x in inputs], - outputs=[z.variable for z in outputs], - allow_input_downcast=True) +class UnconnectedNodeError(BaseException): + pass # --- Node Implementations --- @@ -22,6 +19,7 @@ class Node(core.JObject): """ Nodes in the graph perform parameter management and micro-math operations. """ + def __init__(self, name, **kwargs): """writeme.""" self.name = name @@ -44,6 +42,12 @@ def is_ready(self): set_outputs = all([p.variable for p in self.outputs.values()]) return set_inputs and not set_outputs + def validate_ports(self): + if not self.is_ready(): + status = self.port_status + status['name'] = self.name + raise UnconnectedNodeError(status) + def reset(self): """TODO(ejhumphrey): write me.""" for p in self.inputs.values(): @@ -51,6 +55,12 @@ def reset(self): for p in self.outputs.values(): p.reset() + @property + def port_status(self): + return dict( + inputs={k: bool(p.variable) for k, p in self.inputs.items()}, + outputs={k: bool(p.variable) for k, p in self.outputs.items()}) + @property def __json__(self): self.__args__.update(type=self.type, name=self.name) @@ -129,35 +139,38 @@ def __init__(self, name, num_inputs, **kwargs): class Add(MultiInput): """Summation node.""" + def transform(self): """writeme""" - assert self.is_ready(), "Not all ports are set." + self.validate_ports() self.output.variable = sum([x.variable for x in self._inputs]) self.output.shape = self._inputs[0].shape class Concatenate(MultiInput): """Concatenate a set of inputs.""" + def __init__(self, name, num_inputs, axis=-1): MultiInput.__init__(self, name=name, num_inputs=num_inputs, axis=axis) self.axis = axis def transform(self): """In-place transformation""" - assert self.is_ready(), "Not all ports are set." + self.validate_ports() self.output.variable = T.concatenate( [x.variable for x in self._inputs], axis=self.axis) class Stack(MultiInput): """Form a rank+1 tensor of a set of inputs; optionally reorder the axes.""" + def __init__(self, name, num_inputs, axes=None): MultiInput.__init__(self, name=name, num_inputs=num_inputs, axes=axes) self.axes = axes def transform(self): """In-place transformation""" - assert self.is_ready(), "Not all ports are set." + self.validate_ports() output = T.stack(*list([x.variable for x in self._inputs])) if self.axes: output = T.transpose(output, axes=self.axes) @@ -166,6 +179,7 @@ def transform(self): class Constant(Node): """Single input / output nodes.""" + def __init__(self, name, shape): Node.__init__(self, name=name, shape=shape) self.data = core.Parameter(shape=shape, name=self.__own__('data')) @@ -174,12 +188,13 @@ def __init__(self, name, shape): self._outputs.append(self.output) def transform(self): - assert self.is_ready(), "Not all ports are set." + self.validate_ports() self.output.variable = self.data.variable class Unary(Node): """Single input / output nodes.""" + def __init__(self, name, **kwargs): Node.__init__(self, name=name, **kwargs) self.input = core.Port(name=self.__own__('input')) @@ -188,7 +203,7 @@ def __init__(self, name, **kwargs): self._outputs.append(self.output) def transform(self): - assert self.is_ready(), "Not all ports are set." + self.validate_ports() class Dimshuffle(Unary): @@ -215,6 +230,7 @@ def transform(self): class Slice(Unary): """writeme""" + def __init__(self, name, slices): # Input Validation Unary.__init__(self, name=name, slices=slices) @@ -234,14 +250,16 @@ def transform(self): class Log(Unary): - def __init__(self, name, epsilon=0.0): - Unary.__init__(self, name=name, epsilon=epsilon) + def __init__(self, name, epsilon=0.0, gain=1.0): + Unary.__init__(self, name=name, epsilon=epsilon, gain=gain) self.epsilon = epsilon + self.gain = gain def transform(self): """In-place transformation""" Unary.transform(self) - self.output.variable = T.log(self.input.variable + self.epsilon) + self.output.variable = T.log( + self.gain * self.input.variable + self.epsilon) class Sqrt(Unary): @@ -277,6 +295,7 @@ def transform(self): class Softmax(Unary): """Apply the softmax to an input.""" + def __init__(self, name): Unary.__init__(self, name=name) @@ -288,6 +307,7 @@ def transform(self): class RectifiedLinear(Unary): """Apply the (hard) rectified linear function to an input.""" + def __init__(self, name): Unary.__init__(self, name=name) @@ -299,6 +319,7 @@ def transform(self): class SoftRectifiedLinear(Unary): """Apply the (hard) rectified linear function to an input.""" + def __init__(self, name, knee): Unary.__init__(self, name=name, knee=knee) self.knee = knee @@ -312,6 +333,7 @@ def transform(self): class Tanh(Unary): """Apply the hyperbolic tangent to an input.""" + def __init__(self, name): Unary.__init__(self, name=name) @@ -321,8 +343,26 @@ def transform(self): self.output.variable = T.tanh(self.input.variable) +class SliceGT(Unary): + """Return a """ + + def __init__(self, name, value): + Unary.__init__(self, name=name, value=value) + self.value = value + + def transform(self): + """In-place transformation""" + Unary.transform(self) + if self.input.variable.ndim != 1: + raise ValueError("`input` must be a vector.") + + idx = self.input.variable > self.value + self.output.variable = self.input.variable[idx.nonzero()] + + class Sum(Unary): """Returns the sum of an input, or over a given axis.""" + def __init__(self, name, axis=None): Unary.__init__(self, name=name, axis=axis) self.axis = axis @@ -400,7 +440,7 @@ def transform(self): """In-place transformation""" Unary.transform(self) weight = self.weight.variable - if not self.broadcast is None: + if self.broadcast is not None: weight = T.addbroadcast(weight, *self.broadcast) self.output.variable = self.input.variable * weight @@ -454,6 +494,7 @@ def transform(self): x_in = T.flatten(self.input.variable, outdim=2) z_out = self.activation(T.dot(x_in, weights) + bias) if self.dropout: + # TODO: Logging print("Performing dropout in {}".format(self.name)) dropout = self.dropout.variable selector = self._theano_rng.binomial( @@ -649,8 +690,8 @@ def transform(self): size=self.bias.shape, p=1.0 - dropout).astype(FLOATX) output *= selector.dimshuffle('x', 0, 'x', 'x') / (1.0 - dropout) - output = downsample.max_pool_2d( - output, self.pool_shape, ignore_border=False) + output = pool.pool_2d( + output, self.pool_shape, ignore_border=False, mode='max') self.output.variable = output @@ -734,8 +775,8 @@ def transform(self, x_in): z_out = self.activation(z_out + b.dimshuffle('x', 0, 'x', 'x')) z_out *= selector.dimshuffle('x', 0, 'x', 'x') * (self.dropout + 0.5) - return downsample.max_pool_2d( - z_out, self.get("pool_shape"), ignore_border=False) + return pool.pool_2d(z_out, self.get("pool_shape"), + ignore_border=False, mode='max') class CrossProduct(Node): @@ -772,7 +813,7 @@ def outputs(self): def transform(self): """In-place transformation""" - assert self.is_ready(), "Not all ports are set." + self.validate_ports() in_a = self.input_a.variable.dimshuffle(0, 1, 'x') in_b = self.input_b.variable.dimshuffle(0, 'x', 1) @@ -846,7 +887,7 @@ def __init__(self, name): def transform(self): """writeme""" - assert self.is_ready() + self.validate_ports() assert self.input.variable.ndim == 2 col_index = self.index.variable row_index = T.arange(col_index.shape[0], dtype='int32') @@ -866,7 +907,7 @@ def __init__(self, name): def transform(self): """writeme""" - assert self.is_ready() + self.validate_ports() index = self.index.variable input_var = self.input.variable assert input_var.ndim == 2 @@ -886,7 +927,7 @@ def __init__(self, name): def transform(self): """writeme""" - assert self.is_ready() + self.validate_ports() index = self.index.variable input_var = self.input.variable assert input_var.ndim == 2 @@ -907,19 +948,37 @@ def __init__(self, name): self._outputs.append(self.output) +class Euclidean(Binary): + """Euclidean Node + + Computes: z_n = \sqrt{\sum_i (xA_n[i] - xB_n[i])^2} + + See also: RadialBasis, which maintains internal parameters. + """ + def transform(self): + """Transform inputs to outputs.""" + self.validate_ports() + if self.input_a.variable.ndim >= 2: + xA = T.flatten(self.input_a.variable, outdim=2) + xB = T.flatten(self.input_b.variable, outdim=2) + axis = 1 + else: + xA = self.input_a.variable + xB = self.input_b.variable + axis = None + self.output.variable = T.sqrt(T.pow(xA - xB, 2.0).sum(axis=axis)) + + class SquaredEuclidean(Binary): """Squared Euclidean Node - Computes: z_n = \sum_i(xA_n[i] - xB_n[i])^2 + Computes: z_n = \sum_i (xA_n[i] - xB_n[i])^2 See also: RadialBasis, which maintains internal parameters. """ - def __init__(self, name): - Binary.__init__(self, name=name) - def transform(self): """Transform inputs to outputs.""" - assert self.is_ready() + self.validate_ports() if self.input_a.variable.ndim >= 2: xA = T.flatten(self.input_a.variable, outdim=2) xB = T.flatten(self.input_b.variable, outdim=2) @@ -936,12 +995,9 @@ class Product(Binary): See also: Multiply, which maintains internal parameters. """ - def __init__(self, name): - Binary.__init__(self, name=name) - def transform(self): """Transform inputs to outputs.""" - assert self.is_ready() + self.validate_ports() self.output.variable = self.input_a.variable * self.input_b.variable @@ -957,30 +1013,30 @@ def __init__(self, name): def transform(self): """Transform inputs to outputs.""" - assert self.is_ready() + self.validate_ports() denom = (self.denominator.variable == 0) + self.denominator.variable self.output.variable = self.numerator.variable / denom class L1Magnitude(Unary): def __init__(self, name, axis=None): - Unary.__init__(self, name=name, axis=None) + super(L1Magnitude, self).__init__(name=name, axis=None) self.axis = axis def transform(self): """writeme""" - Unary.transform(self) + super(L1Magnitude, self).transform() self.output.variable = T.sum(T.abs_(self.input.variable), axis=self.axis) class L2Magnitude(Unary): def __init__(self, name, axis=None): - Unary.__init__(self, name=name, axis=None) + super(L2Magnitude, self).__init__(name=name, axis=None) self.axis = axis def transform(self): """writeme""" - Unary.transform(self) + super(L2Magnitude, self).transform() self.output.variable = T.sqrt(T.sum(T.pow(self.input.variable, 2.0), axis=self.axis)) diff --git a/optimus/util.py b/optimus/util.py index 41a9afa..9a61a0b 100644 --- a/optimus/util.py +++ b/optimus/util.py @@ -1,7 +1,29 @@ import numpy as np +import theano -def random_init(param, mean=0.0, std=0.025): +def compile(inputs, outputs): + """Thin wrapper around theano's function compilation. + + Parameters + ---------- + inputs : list of optimus.Inputs + Optimus inputs. + + outputs : list of optimus.Outputs + Connected and transformed outputs. + + Returns + ------- + func : callable + Function defined by the graph of inputs / outputs. + """ + return theano.function(inputs=[x.variable for x in inputs], + outputs=[z.variable for z in outputs], + allow_input_downcast=True) + + +def random_init(param, mean=0.0, std=0.025, seed=None): """Initialize a parameter from a normal distribution. Parameters @@ -13,7 +35,8 @@ def random_init(param, mean=0.0, std=0.025): std : scalar Standard deviation of the distribution. """ - param.value = np.random.normal(mean, std, size=param.shape) + rng = np.random.RandomState(seed) + param.value = rng.normal(mean, std, size=param.shape) def array_stepper(value, length, stride=1, axis=0, @@ -26,13 +49,13 @@ def array_stepper(value, length, stride=1, axis=0, Array to step through. length: int Length of the sub-slice. - stride: int + stride: int, default=1 Number of data points to advance at each step. - axis: int + axis: int, default=0 Axis to step through. - mode: str + mode: str, default='full' Like convolution, one of ['valid', 'full', 'same'] - fill_value: scalar + fill_value: scalar, default=0.0 Like convolution, value to use for out-of-bounds values. Yields @@ -40,7 +63,7 @@ def array_stepper(value, length, stride=1, axis=0, value_slice: np.ndarray Slices of value, with the 'axis' dimension of size 'length'. """ - axes_order = range(value.ndim) + axes_order = list(range(value.ndim)) axes_order.insert(0, axes_order.pop(axis)) axes_reorder = np.array(axes_order).argsort() value = value.transpose(axes_order) @@ -48,9 +71,9 @@ def array_stepper(value, length, stride=1, axis=0, if mode == 'full': pad = np.zeros([length] + list(value.shape[1:])) elif mode == 'same': - pad = np.zeros([length / 2] + list(value.shape[1:])) + pad = np.zeros([int(length / 2)] + list(value.shape[1:])) - if not pad is None: + if pad is not None: pad[:] = fill_value value = np.concatenate([pad, value, pad[:-1]], axis=0) @@ -78,9 +101,55 @@ def concatenate_data(batches): result = dict() for batch in batches: for key in batch: - if not key in result: + if key not in result: result[key] = list() result[key].append(batch[key]) for key in result: result[key] = np.concatenate(result[key], axis=0) return result + + +def convolve(x_in, graph, axis=0, chunk_size=250): + """Apply a graph convolutionally to a field in an an entity. + + Parameters + ---------- + x_in : np.ndarray + Observation to predict. + graph : optimus.Graph + Network for processing an entity. + data_key : str + Name of the field to use for the input. + chunk_size : int, default=None + Number of slices to transform in a given step. When None, parses one + slice at a time. + + Returns + ------- + z_out : dict + Output values mapped under their respective keys. + """ + # TODO(ejhumphrey): Make this more stable, somewhat fragile as-is + win_length = graph.inputs.values()[0].shape[2] + input_stepper = array_stepper( + x_in, win_length, axis=axis, mode='same') + results = dict([(k, list()) for k in graph.outputs]) + if chunk_size: + chunk = [] + for x in input_stepper: + chunk.append(x) + if len(chunk) == chunk_size: + for k, v in graph(np.array(chunk)).items(): + results[k].append(v) + chunk = [] + if len(chunk): + for k, v in graph(np.array(chunk)).items(): + results[k].append(v) + else: + for x in input_stepper: + for k, v in graph(x[np.newaxis, ...]).items(): + results[k].append(v) + for k in results: + results[k] = np.concatenate(results[k], axis=0) + + return results diff --git a/setup.py b/setup.py index a0fdda7..bfeda7a 100644 --- a/setup.py +++ b/setup.py @@ -34,9 +34,7 @@ license='ISC', install_requires=[ 'numpy >= 1.8.0', - 'scipy >= 0.13.0', - 'scikit-learn >= 0.14.0', - 'matplotlib', - 'theano >= 0.6.0' + 'theano >= 0.8.0', + 'pandas >= 0.18' ] ) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..cddad25 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,17 @@ +import os +import pytest +import shutil +import tempfile + + +@pytest.fixture(scope='module') +def workspace(request): + test_workspace = tempfile.mkdtemp() + + def fin(): + if os.path.exists(test_workspace): + shutil.rmtree(test_workspace) + + request.addfinalizer(fin) + + return test_workspace diff --git a/tests/sources.py b/tests/sources.py new file mode 100644 index 0000000..ea69ec6 --- /dev/null +++ b/tests/sources.py @@ -0,0 +1,128 @@ +import numpy as np + + +def parabola(xlim=(-5, 5), scale=1, offset=(0, 0), seed=None): + """Sample from a parabolic distribution: + + y = scale * (x - offset[0]) ** 2 + offset[1] + + Parameters + ---------- + xlim : tuple, len=2 + X-limits on which to sample the parabola. + + scale : scalar, default=1 + Scaling coefficient. + + offset : tuple, len=2 + X and Y coordinate offsets. + + seed : int, default=None + Random state for random number generation. + + Yields + ------ + xy : ndarray, shape=(2,) + An (x, y) coordinate pair. + """ + rng = np.random.RandomState(seed) + assert len(xlim) == 2 + while True: + x = rng.rand() * np.abs(np.diff(xlim)) + xlim[0] + y = scale * np.power(x - offset[0], 2.0) - offset[1] + yield np.array([x, y]).squeeze() + + +def gaussian2d(means, stds, seed=None): + """Sample from a Gaussian (normal) distribution. + + Parameters + ---------- + mean : tuple, len=2 + Sample means for (x, y). + + std : tuple, len=2 + Sample standard deviations for (x, y). + + seed : int, default=None + Random state for random number generation. + + Yields + ------ + xy : ndarray, shape=(2,) + An (x, y) coordinate pair. + """ + rng = np.random.RandomState(seed) + assert len(means) == len(stds) == 2 + while True: + x = rng.normal(loc=means[0], scale=stds[0]) + y = rng.normal(loc=means[1], scale=stds[1]) + yield np.array([x, y]) + + +def merge(streams, probs=None, seed=None): + """Stochastically merge a collection of streams. + + Parameters + ---------- + streams : array_like, len=n + Collection of streams from which to draw samples. + + probs : array_like, len=n, or None (default) + Probability of drawing a sample from each stream; if None, + a uniform distribution is used. + + seed : int, default=None + Random state for random number generation. + + Yields + ------ + Same as streams[i] + """ + rng = np.random.RandomState(seed) + if probs is None: + probs = np.ones(len(streams)) + probs = np.asarray(probs, dtype=float) / np.sum(probs) + while True: + idx = rng.choice(len(streams), p=probs) + yield next(streams[idx]) + + +def batch(streams, batch_size, probs=None, seed=None): + """Batch sample a collection of streams. + + Parameters + ---------- + streams : array_like, len=n + Collection of streams from which to draw samples. + + batch_size : int + Number of samples to return on each batch. + + probs : array_like, len=n, or None (default) + Probability of drawing a sample from each stream; if None, + a uniform distribution is used. + + seed : int, default=None + Random state for random number generation. + + Yields + ------ + data : dict + A batch of data with keys [``x_input``, ``y_target``]. + Note that y_target corresponds to the index of the stream from + which the observation came. + """ + rng = np.random.RandomState(seed) + + if probs is None: + probs = np.ones(len(streams)) + + probs = np.asarray(probs, dtype=float) / np.sum(probs) + while True: + x_input, y_target = [], [] + while len(y_target) < batch_size: + idx = rng.choice(len(streams), p=probs) + x_input.append(next(streams[idx])) + y_target.append(idx) + yield dict(x_input=np.array(x_input), y_target=np.array(y_target)) diff --git a/tests/test_framework.py b/tests/test_framework.py new file mode 100644 index 0000000..9174717 --- /dev/null +++ b/tests/test_framework.py @@ -0,0 +1,100 @@ +import numpy as np +import pytest + +import optimus +import sources + + +def build_model(): + x_in = optimus.Input(name="x_input", shape=(None, 2)) + class_idx = optimus.Input(name="y_target", shape=(None,), dtype='int32') + learning_rate = optimus.Input(name='learning_rate', shape=None) + + layer0 = optimus.Affine( + name='layer0', input_shape=x_in.shape, + output_shape=(None, 100), act_type='tanh') + + classifier = optimus.Affine( + name='classifier', input_shape=layer0.output.shape, + output_shape=(None, 2), act_type='softmax') + + nll = optimus.NegativeLogLikelihood(name='nll') + likelihoods = optimus.Output(name='likelihoods') + loss = optimus.Output(name='loss') + + trainer_edges = optimus.ConnectionManager([ + (x_in, layer0.input), + (layer0.output, classifier.input), + (classifier.output, nll.likelihoods), + (class_idx, nll.index), + (nll.output, loss)]) + + update_manager = optimus.ConnectionManager([ + (learning_rate, layer0.weights), + (learning_rate, layer0.bias), + (learning_rate, classifier.weights), + (learning_rate, classifier.bias)]) + + trainer = optimus.Graph( + name='trainer', + inputs=[x_in, class_idx, learning_rate], + nodes=[layer0, classifier, nll], + connections=trainer_edges.connections, + outputs=[loss], + loss=loss, + updates=update_manager.connections, + verbose=True) + + optimus.random_init(classifier.weights, seed=123) + + predictor_edges = optimus.ConnectionManager([ + (x_in, layer0.input), + (layer0.output, classifier.input), + (classifier.output, likelihoods)]) + + predictor = optimus.Graph( + name='predictor', + inputs=[x_in], + nodes=[layer0, classifier], + connections=predictor_edges.connections, + outputs=[likelihoods]) + + return trainer, predictor + + +def test_convergence(workspace): + stream1 = sources.parabola((-2, 2), 2.5, seed=159) + stream2 = sources.gaussian2d((0, 5), (0.25, 0.5), seed=5) + + stream = sources.batch(streams=[stream1, stream2], batch_size=32, + probs=[0.5, 0.5], seed=13) + + trainer, predictor = build_model() + # params = biggie.Stash(os.path.join(workspace, 'params.hdf5')) + params = dict() + driver = optimus.Driver(graph=trainer, name='take000', + parameter_cache=params, + log_file='training_stats.csv') + + hyperparams = dict(learning_rate=0.1) + stats = driver.fit(stream, hyperparams=hyperparams, save_freq=250, + print_freq=100, max_iter=1000) + + # Verify that the output stats and checkpointed params make sense. + num_checkpoints = 4 + assert stats.loss.iloc[0] > stats.loss.iloc[-1] + assert len(params) == num_checkpoints + assert stats.key.isin(params.keys()).sum() == num_checkpoints + + # Verify that saved params can be set in the predictor and used. + for key in sorted(params.keys()): + predictor.param_values = params.get(key) + data = next(stream) + outputs = predictor(x_input=data['x_input']) + assert outputs + y_pred = outputs['likelihoods'].argmax(axis=1) + acc = np.mean(y_pred == data['y_target']) + print("Key: {}\t Accuracy: {}".format(key, acc)) + + # And confirm that the model's accuracy should be hovering around perfect + assert acc > 0.96 diff --git a/tests/test_losses.py b/tests/test_losses.py new file mode 100644 index 0000000..a9abe2a --- /dev/null +++ b/tests/test_losses.py @@ -0,0 +1,193 @@ +import numpy as np +import pytest + +import optimus.core as core +import optimus.nodes as nodes +import optimus.util as util +import optimus.losses as losses + + +def test_NegativeLogLikelihood(): + lhoods = core.Input(name='likelihoods', shape=(None, 2)) + y_true = core.Input(name='y_true', shape=(None,), dtype='int32') + + nll = losses.NegativeLogLikelihood(name='nll') + + nll.likelihoods.connect(lhoods) + with pytest.raises(nodes.UnconnectedNodeError): + nll.transform() + + nll.index.connect(y_true) + assert nll.output.shape is None + nll.transform() + + # TODO: Fixshape + # assert nll.output.shape == () + fx = util.compile(inputs=[lhoods, y_true], outputs=[nll.output]) + + x_obs = np.array([[0.001, 1 - 0.001], [0, 1]]) + y_obs = np.array([0, 1]) + # Pretty wrong answer + assert fx(likelihoods=x_obs[:1], y_true=y_obs[:1])[0] > 6.0 + # Right answer + assert fx(likelihoods=x_obs[1:], y_true=y_obs[1:])[0] == 0 + + +def test_CrossEntropy(): + pred = core.Input(name='prediction', shape=(None, 2)) + target = core.Input(name='target', shape=(None, 2)) + + xentropy = losses.CrossEntropy(name='cross_entropy') + + xentropy.prediction.connect(pred) + with pytest.raises(nodes.UnconnectedNodeError): + xentropy.transform() + + xentropy.target.connect(target) + assert xentropy.output.shape is None + xentropy.transform() + + # TODO: Fixshape + # assert xentropy.output.shape == () + fx = util.compile(inputs=[pred, target], outputs=[xentropy.output]) + + x_obs = np.array([[0.001, 1 - 0.001], [0.001, 1 - 0.001]]) + y_obs = np.array([[1, 0], [0, 1]]) + output = fx(prediction=x_obs, target=y_obs)[0] + + assert output.ndim == 1 + # Pretty wrong answer + assert output[0] > 6 + # Right answer + assert output[1] < 0.01 + + +def test_CrossEntropyLoss(): + pred = core.Input(name='prediction', shape=(None, 2)) + target = core.Input(name='target', shape=(None, 2)) + + xentropy = losses.CrossEntropyLoss(name='cross_entropy') + + xentropy.prediction.connect(pred) + with pytest.raises(nodes.UnconnectedNodeError): + xentropy.transform() + + xentropy.target.connect(target) + assert xentropy.output.shape is None + xentropy.transform() + + # TODO: Fixshape + # assert xentropy.output.shape == () + fx = util.compile(inputs=[pred, target], outputs=[xentropy.output]) + + x_obs = np.array([[0.001, 1 - 0.001], [0.001, 1 - 0.001]]) + y_obs = np.array([[1, 0], [0, 1]]) + # Pretty wrong answer + assert fx(prediction=x_obs[:1], target=y_obs[:1])[0] > 6 + # Right answer + assert fx(prediction=x_obs[1:], target=y_obs[1:])[0] < 0.01 + + +def test_SimilarityMargin(): + dist = core.Input(name='distance', shape=(None,)) + equiv = core.Input(name='equivalence', shape=(None,)) + sim_margin = core.Input(name='sim_margin', shape=None) + diff_margin = core.Input(name='diff_margin', shape=None) + + contrast = losses.SimilarityMargin(name='sim_margin') + + contrast.distance.connect(dist) + with pytest.raises(nodes.UnconnectedNodeError): + contrast.transform() + + contrast.equivalence.connect(equiv) + contrast.sim_margin.connect(sim_margin) + contrast.diff_margin.connect(diff_margin) + assert contrast.output.shape is None + contrast.transform() + + # TODO: Fixshape + # assert contrast.output.shape == () + fx = util.compile(inputs=[dist, equiv, sim_margin, diff_margin], + outputs=[contrast.output]) + + dvals = np.array([0.5, 0.5, 2.5, 2.5]) + evals = np.array([1, 0, 1, 0]) + margins = dict(sim_margin=1.0, diff_margin=2.0) + exps = [0, 1.5**2, 1.5**2, 0] + for n in range(4): + idx = slice(n, n + 1) + cost = fx(distance=dvals[idx], equivalence=evals[idx], **margins) + assert cost[0] == exps[n], \ + (n, dvals[idx], evals[idx], cost[0], exps[n]) + + +def test_ContrastiveMargin_no_filter(): + cost_sim = core.Input(name='cost_sim', shape=(None,)) + cost_diff = core.Input(name='cost_diff', shape=(None,)) + margin_sim = core.Input(name='margin_sim', shape=None) + margin_diff = core.Input(name='margin_diff', shape=None) + + contrast = losses.ContrastiveMargin(name='contrastive_margin', + filter_zeros=False) + + contrast.cost_sim.connect(cost_sim) + with pytest.raises(nodes.UnconnectedNodeError): + contrast.transform() + + contrast.cost_diff.connect(cost_diff) + contrast.margin_sim.connect(margin_sim) + contrast.margin_diff.connect(margin_diff) + assert contrast.output.shape is None + contrast.transform() + + # TODO: Fixshape + # assert contrast.output.shape == () + fx = util.compile(inputs=[cost_sim, cost_diff, margin_sim, margin_diff], + outputs=[contrast.output]) + + c_sim = np.array([0.5, 0.1, 0.5]) + c_diff = np.array([0.5, 0, 2]) + margins = dict(margin_sim=0.25, margin_diff=2.0) + exps = [1.5**2 + 0.25**2, 4, 0.25**2] + for n in range(len(exps)): + idx = slice(n, n + 1) + cost = fx(cost_sim=c_sim[idx], cost_diff=c_diff[idx], **margins) + assert cost[0] == exps[n] + + +def _relu(x): + return x * (x > 0) + + +def test_ContrastiveMargin_with_filter(): + cost_sim = core.Input(name='cost_sim', shape=(None,)) + cost_diff = core.Input(name='cost_diff', shape=(None,)) + margin_sim = core.Input(name='margin_sim', shape=None) + margin_diff = core.Input(name='margin_diff', shape=None) + + contrast = losses.ContrastiveMargin(name='contrastive_margin', + filter_zeros=True) + + contrast.cost_sim.connect(cost_sim) + with pytest.raises(nodes.UnconnectedNodeError): + contrast.transform() + + contrast.cost_diff.connect(cost_diff) + contrast.margin_sim.connect(margin_sim) + contrast.margin_diff.connect(margin_diff) + assert contrast.output.shape is None + contrast.transform() + + # TODO: Fixshape + # assert contrast.output.shape == () + fx = util.compile(inputs=[cost_sim, cost_diff, margin_sim, margin_diff], + outputs=[contrast.output]) + + c_sim = np.array([0.5, 0.1, 0.5]) + c_diff = np.array([0.5, 0, 2]) + margins = dict(margin_sim=0.25, margin_diff=2.0) + + exp = 3.1875 + cost = fx(cost_sim=c_sim, cost_diff=c_diff, **margins) + assert cost[0] == exp diff --git a/tests/test_nodes.py b/tests/test_nodes.py index 1aa1b00..1ac1d8d 100644 --- a/tests/test_nodes.py +++ b/tests/test_nodes.py @@ -1,15 +1,16 @@ """Tests for Node objects.""" +import numpy as np import unittest -import numpy as np -import optimus.nodes as nodes import optimus.core as core +import optimus.nodes as nodes +import optimus.util as util def __relu__(x): "Numpy Rectified Linear Unit." - return 0.5*(np.abs(x) + x) + return 0.5 * (np.abs(x) + x) class NodeTests(unittest.TestCase): @@ -28,7 +29,7 @@ def test_Constant(self): n.data.value = 1.0 n.transform() - fx = nodes.compile(inputs=[], outputs=[n.output]) + fx = util.compile(inputs=[], outputs=[n.output]) np.testing.assert_equal(np.array(fx()[0]), 1.0) @@ -39,7 +40,7 @@ def test_Add(self): n = nodes.Add(name='accumulate', num_inputs=2) n.input_0.connect(x1) - with self.assertRaises(AssertionError): + with self.assertRaises(nodes.UnconnectedNodeError): n.transform() n.input_1.connect(x2) @@ -47,8 +48,8 @@ def test_Add(self): n.transform() self.assertEqual(n.output.shape, (2, 2)) - fx = nodes.compile(inputs=[x1, x2], - outputs=[n.output]) + fx = util.compile(inputs=[x1, x2], + outputs=[n.output]) a = np.array([[3, -1], [3, 7]]) b = np.array([[1, 2], [3, 4]]) @@ -63,7 +64,7 @@ def test_Bincount(self): n.input.connect(x1) n.transform() - fx = nodes.compile(inputs=[x1], outputs=[n.counts]) + fx = util.compile(inputs=[x1], outputs=[n.counts]) a = np.array([3, 0, 3, 1]) np.testing.assert_equal(n.counts.value, np.array([0, 0, 0, 0])) @@ -79,13 +80,13 @@ def test_Concatenate(self): for axis in range(2): n = nodes.Concatenate(name='concatenate', num_inputs=2, axis=axis) n.input_0.connect(x1) - with self.assertRaises(AssertionError): + with self.assertRaises(nodes.UnconnectedNodeError): n.transform() n.input_1.connect(x2) n.transform() - fx = nodes.compile(inputs=[x1, x2], - outputs=[n.output]) + fx = util.compile(inputs=[x1, x2], + outputs=[n.output]) z = fx(a, b)[0] np.testing.assert_equal(z, np.concatenate([a, b], axis=axis)) @@ -102,8 +103,8 @@ def test_Stack(self): n.input_0.connect(x1) n.transform() - fx = nodes.compile(inputs=[x1, x2], - outputs=[n.output]) + fx = util.compile(inputs=[x1, x2], + outputs=[n.output]) z = fx(a, b)[0] expected = np.array([a, b]) @@ -121,8 +122,8 @@ def test_Dimshuffle(self): n.input.connect(x1) n.transform() - fx = nodes.compile(inputs=n.inputs.values(), - outputs=n.outputs.values()) + fx = util.compile(inputs=n.inputs.values(), + outputs=n.outputs.values()) np.testing.assert_equal(fx(a)[0].shape, shp) @@ -136,8 +137,8 @@ def test_Slice(self): n.input.connect(x1) n.transform() - fx = nodes.compile(inputs=n.inputs.values(), - outputs=n.outputs.values()) + fx = util.compile(inputs=n.inputs.values(), + outputs=n.outputs.values()) np.testing.assert_equal(fx(a)[0], ans) @@ -147,8 +148,8 @@ def test_Log(self): log.input.connect(x1) log.transform() - fx = nodes.compile(inputs=log.inputs.values(), - outputs=log.outputs.values()) + fx = util.compile(inputs=log.inputs.values(), + outputs=log.outputs.values()) a = np.array([[3, 1], [4, 7]], dtype=np.float32) z = fx(a)[0] @@ -163,8 +164,8 @@ def test_Multiply(self): n.input.connect(x1) n.transform() - fx = nodes.compile(inputs=n.inputs.values(), - outputs=n.outputs.values()) + fx = util.compile(inputs=n.inputs.values(), + outputs=n.outputs.values()) np.testing.assert_equal(fx(a)[0], np.zeros_like(a)) @@ -175,8 +176,8 @@ def test_Multiply(self): n.input.connect(x1) n.transform() - fx = nodes.compile(inputs=n.inputs.values(), - outputs=n.outputs.values()) + fx = util.compile(inputs=n.inputs.values(), + outputs=n.outputs.values()) np.testing.assert_equal(fx(a)[0], np.zeros_like(a)) @@ -192,8 +193,8 @@ def test_Max(self): n.input.connect(x1) n.transform() - fx = nodes.compile(inputs=n.inputs.values(), - outputs=n.outputs.values()) + fx = util.compile(inputs=n.inputs.values(), + outputs=n.outputs.values()) np.testing.assert_equal(fx(a)[0], res[idx]) @@ -206,8 +207,8 @@ def test_Min(self): n.input.connect(x1) n.transform() - fx = nodes.compile(inputs=n.inputs.values(), - outputs=n.outputs.values()) + fx = util.compile(inputs=n.inputs.values(), + outputs=n.outputs.values()) np.testing.assert_equal(fx(a)[0], res[idx]) @@ -220,8 +221,8 @@ def test_Sum(self): n.input.connect(x1) n.transform() - fx = nodes.compile(inputs=n.inputs.values(), - outputs=n.outputs.values()) + fx = util.compile(inputs=n.inputs.values(), + outputs=n.outputs.values()) np.testing.assert_equal(fx(a)[0], res[idx]) @@ -234,8 +235,8 @@ def test_Mean(self): n.input.connect(x1) n.transform() - fx = nodes.compile(inputs=n.inputs.values(), - outputs=n.outputs.values()) + fx = util.compile(inputs=n.inputs.values(), + outputs=n.outputs.values()) np.testing.assert_equal(fx(a)[0], res[idx]) @@ -250,8 +251,8 @@ def test_NormalizeDim(self): n.input.connect(x1) n.transform() - fx = nodes.compile(inputs=n.inputs.values(), - outputs=n.outputs.values()) + fx = util.compile(inputs=n.inputs.values(), + outputs=n.outputs.values()) np.testing.assert_almost_equal(fx(a)[0], ans) def test_SelectIndex(self): @@ -265,8 +266,8 @@ def test_SelectIndex(self): n.index.connect(idx) n.transform() - fx = nodes.compile(inputs=[x1, idx], - outputs=n.outputs.values()) + fx = util.compile(inputs=[x1, idx], + outputs=n.outputs.values()) np.testing.assert_equal(fx(a, i)[0], np.array([-1, 4])) @@ -286,8 +287,8 @@ def test_SquaredEuclidean(self): n.input_b.connect(x2) n.transform() - fx = nodes.compile(inputs=[x1, x2], - outputs=n.outputs.values()) + fx = util.compile(inputs=[x1, x2], + outputs=n.outputs.values()) np.testing.assert_equal(fx(a, b)[0], z) def test_Product(self): @@ -301,14 +302,14 @@ def test_Product(self): x2 = core.Input(name='x2', shape=b.shape) n = nodes.Product('product') n.input_a.connect(x1) - with self.assertRaises(AssertionError): + with self.assertRaises(nodes.UnconnectedNodeError): n.transform() n.input_b.connect(x2) self.assertTrue(n.is_ready()) n.transform() - fx = nodes.compile(inputs=[x1, x2], - outputs=n.outputs.values()) + fx = util.compile(inputs=[x1, x2], + outputs=n.outputs.values()) np.testing.assert_equal(fx(a, b)[0], a*b) def test_Affine_linear(self): @@ -328,7 +329,7 @@ def test_Affine_linear(self): n.input.connect(x1) n.transform() - fx = nodes.compile(inputs=[x1], outputs=n.outputs.values()) + fx = util.compile(inputs=[x1], outputs=n.outputs.values()) np.testing.assert_equal(fx(a)[0], np.dot(a, w) + b) def test_Affine_relu(self): @@ -348,7 +349,7 @@ def test_Affine_relu(self): n.input.connect(x1) n.transform() - fx = nodes.compile(inputs=[x1], outputs=n.outputs.values()) + fx = util.compile(inputs=[x1], outputs=n.outputs.values()) np.testing.assert_equal(fx(a)[0], __relu__(np.dot(a, w) + b)) def test_Affine_dropout(self): @@ -368,12 +369,12 @@ def test_Affine_dropout(self): n.enable_dropout() n.input.connect(x1) - with self.assertRaises(AssertionError): + with self.assertRaises(nodes.UnconnectedNodeError): n.transform() n.dropout.connect(dropout) n.transform() - fx = nodes.compile(inputs=[x1, dropout], outputs=n.outputs.values()) + fx = util.compile(inputs=[x1, dropout], outputs=n.outputs.values()) np.testing.assert_equal(fx(a, 0.0)[0], np.dot(a, w) + b) self.assertGreaterEqual(np.equal(fx(a, 0.9)[0], 0.0).sum(), 1) @@ -408,7 +409,7 @@ def test_Affine_share_params(self): n2.input.connect(x) n2.transform() - fx = nodes.compile(inputs=[x], outputs=n2.outputs.values()) + fx = util.compile(inputs=[x], outputs=n2.outputs.values()) np.testing.assert_equal(fx(a)[0], np.dot(a, w) + b) n1.weights.value *= 2 @@ -438,7 +439,7 @@ def test_Conv3D_linear(self): n.input.connect(x1) n.transform() - fx = nodes.compile(inputs=[x1], outputs=n.outputs.values()) + fx = util.compile(inputs=[x1], outputs=n.outputs.values()) np.testing.assert_equal(fx(a.reshape(1, 1, 2, 3))[0], z.reshape(1, 3, 1, 3)) @@ -470,7 +471,7 @@ def test_Conv3D_relu(self): n.input.connect(x1) n.transform() - fx = nodes.compile(inputs=[x1], outputs=n.outputs.values()) + fx = util.compile(inputs=[x1], outputs=n.outputs.values()) np.testing.assert_equal(fx(a)[0], __relu__(z)) def test_Conv3D_dropout(self): @@ -501,12 +502,12 @@ def test_Conv3D_dropout(self): n.bias.value = b n.input.connect(x1) - with self.assertRaises(AssertionError): + with self.assertRaises(nodes.UnconnectedNodeError): n.transform() n.dropout.connect(dropout) n.transform() - fx = nodes.compile(inputs=[x1, dropout], outputs=n.outputs.values()) + fx = util.compile(inputs=[x1, dropout], outputs=n.outputs.values()) np.testing.assert_equal(fx(a, 0.0)[0], z) self.assertGreaterEqual(np.equal(fx(a, 0.9)[0], 0.0).sum(), 1) @@ -524,10 +525,22 @@ def test_RadialBasis(self): n.input.connect(x) n.transform() - fx = nodes.compile(inputs=[x], outputs=n.outputs.values()) + fx = util.compile(inputs=[x], outputs=n.outputs.values()) z = np.power(a.reshape(2, 2, 1) - w.reshape(1, 2, 3), 2.0).sum(axis=1) np.testing.assert_equal(fx(a)[0], z) + def test_SliceGT(self): + x = core.Input(name='x', shape=(None,)) + + n = nodes.SliceGT(name='slice-greater', value=0) + n.input.connect(x) + n.transform() + + fx = util.compile(inputs=[x], outputs=n.outputs.values()) + a = np.array([1, -2, 0]) + np.testing.assert_equal(fx(a)[0], np.array([1])) + + if __name__ == "__main__": unittest.main()