In [4]:
"""
Most of this code is taken from
the colorama library
"""
import logging
import sys
import traceback as tb

COLOR_CODES = {
    'utility': 'yellow',
    'algorithm': 'lblue',
    'traj_opt': 'lgreen',
    'dynamics': 'lblue',
    'agent': 'lyellow',
    'default': 'white',
}


CSI = '\033['
OSC = '\033]'
BEL = '\007'


def code_to_chars(code):
    return CSI + str(code) + 'm'


def set_title(title):
    return OSC + '2;' + title + BEL


def clear_screen(mode=2):
    return CSI + str(mode) + 'J'


def clear_line(mode=2):
    return CSI + str(mode) + 'K'


class AnsiCodes(object):
    def __init__(self):
        # the subclasses declare class attributes which are numbers.
        # Upon instantiation we define instance attributes, which are the same
        # as the class attributes but wrapped with the ANSI escape sequence
        for name in dir(self):
            if not name.startswith('_'):
                value = getattr(self, name)
                setattr(self, name, code_to_chars(value))


class AnsiCursor(object):
    def UP(self, n=1):
        return CSI + str(n) + 'A'

    def DOWN(self, n=1):
        return CSI + str(n) + 'B'

    def FORWARD(self, n=1):
        return CSI + str(n) + 'C'

    def BACK(self, n=1):
        return CSI + str(n) + 'D'

    def POS(self, x=1, y=1):
        return CSI + str(y) + ';' + str(x) + 'H'


class AnsiFore(AnsiCodes):
    BLACK = 30
    RED = 31
    GREEN = 32
    YELLOW = 33
    BLUE = 34
    MAGENTA = 35
    CYAN = 36
    WHITE = 37
    RESET = 39

    # These are fairly well supported, but not part of the standard.
    LIGHTBLACK_EX = 90
    LIGHTRED_EX = 91
    LIGHTGREEN_EX = 92
    LIGHTYELLOW_EX = 93
    LIGHTBLUE_EX = 94
    LIGHTMAGENTA_EX = 95
    LIGHTCYAN_EX = 96
    LIGHTWHITE_EX = 97


class AnsiBack(AnsiCodes):
    BLACK = 40
    RED = 41
    GREEN = 42
    YELLOW = 43
    BLUE = 44
    MAGENTA = 45
    CYAN = 46
    WHITE = 47
    RESET = 49

    # These are fairly well supported, but not part of the standard.
    LIGHTBLACK_EX = 100
    LIGHTRED_EX = 101
    LIGHTGREEN_EX = 102
    LIGHTYELLOW_EX = 103
    LIGHTBLUE_EX = 104
    LIGHTMAGENTA_EX = 105
    LIGHTCYAN_EX = 106
    LIGHTWHITE_EX = 107


class AnsiStyle(AnsiCodes):
    BRIGHT = 1
    DIM = 2
    NORMAL = 22
    RESET_ALL = 0


Fore = AnsiFore()
Back = AnsiBack()
Style = AnsiStyle()
Cursor = AnsiCursor()

_COLOR_MAP = {
    'red': (Fore.RED, Back.RED),
    'blue': (Fore.BLUE, Back.BLUE),
    'green': (Fore.GREEN, Back.GREEN),
    'white': (Fore.WHITE, Back.WHITE),
    'black': (Fore.BLACK, Back.BLACK),
    'yellow': (Fore.YELLOW, Back.YELLOW),
    'magenta': (Fore.MAGENTA, Back.MAGENTA),
    'cyan': (Fore.CYAN, Back.CYAN),
    'gray': (Fore.LIGHTBLACK_EX, Back.LIGHTBLACK_EX),
    'reset': (Fore.RESET, Back.RESET),
    'lred': (Fore.LIGHTRED_EX, Back.LIGHTRED_EX),
    'lblue': (Fore.LIGHTBLUE_EX, Back.LIGHTBLUE_EX),
    'lgreen': (Fore.LIGHTGREEN_EX, Back.LIGHTGREEN_EX),
    'lwhite': (Fore.LIGHTWHITE_EX, Back.LIGHTWHITE_EX),
    'lblack': (Fore.LIGHTBLACK_EX, Back.LIGHTBLACK_EX),
    'lyellow': (Fore.LIGHTYELLOW_EX, Back.LIGHTYELLOW_EX),
    'lmagenta': (Fore.LIGHTMAGENTA_EX, Back.LIGHTMAGENTA_EX),
    'lcyan': (Fore.LIGHTCYAN_EX, Back.LIGHTCYAN_EX),
    None: (Fore.RESET, Back.RESET)
}


def print_color(text, fore=None, back=None, reset=True, outstream=sys.stdout):
    """
    Prints text in the specified colors

    Color codes:
        red, blue, green, white, black, yellow, magenta,
        cyan, reset

    :param text: A string to print
    :param fore: A string color code for the foreground.
    :param back: A string color code for the background.
    :param reset: (Default True) Whether to restore colors back to defaults
        after printing.
    :return: None
    """
    reset_ = Fore.RESET + Back.RESET if reset else ''
    outstream.write(_COLOR_MAP[fore][0] + _COLOR_MAP[back][1] + text + reset_)


def cursorup():
    print Cursor.UP()


def cursorl():
    print Cursor.BACK()


def cursorr():
    print Cursor.FORWARD()


def get_color_code(fname):
    for code_dir in COLOR_CODES:
        if code_dir in fname:
            return COLOR_CODES[code_dir]
    return COLOR_CODES['default']


def color_string(msg, color=None):
    if color==None:
        fname, lineno, method, _ = tb.extract_stack()[-2]  # Get caller
        color = get_color_code(fname)
    return _COLOR_MAP[color][0] + msg + Fore.RESET


class ColorLogger(object):
    def __init__(self, name):
        self.name = name
        self.logger = logging.getLogger(name)

    def info(self, msg, *frmat):
        msg = color_string(msg % frmat, color=get_color_code(self.name))
        self.logger.info(msg)

    def debug(self, msg, *frmat):
        msg = color_string(msg % frmat, color=get_color_code(self.name))
        self.logger.debug(msg)

    def warning(self, msg, *frmat):
        msg = color_string(msg % frmat, color='red')
        self.logger.warning(msg)

In [5]:
""" General utiliy functions """
import logging
try:
   import cPickle as pickle
except:
   import pickle
import gzip
import contextlib
import numpy as np
import scipy.ndimage as sp_ndimage
import os
import errno
import time
import traceback as tb


LOGGER = logging.getLogger(__name__)

@contextlib.contextmanager
def open_zip(filename, mode='r'):
    """
    Open a file; if filename ends with .gz, opens as a gzip file
    """
    if filename.endswith('.gz'):
        openfn = gzip.open
    else:
        openfn = open
    yield openfn(filename, mode)

class DataLogger(object):
    """
    This class pickles data into files and unpickles data from files.
    TODO: Handle logging text to terminal, GUI text, and/or log file at
        DEBUG, INFO, WARN, ERROR, FATAL levels.
    TODO: Handle logging data to terminal, GUI text/plots, and/or data
          files.
    """
    def __init__(self):
        pass

    def pickle(self, filename, data):
        """ Pickle data into file specified by filename. """
        with open_zip(filename, 'wb') as f:
            pickle.dump(data, f)

    def unpickle(self, filename):
        """ Unpickle data from file specified by filename. """
        try:
            with open_zip(filename, 'rb') as f:
                result = pickle.load(f)
            return result
        except IOError:
            LOGGER.debug('Unpickle error. Cannot find file: %s', filename)
            return None
            
def mkdir_p(path):
    try:
        os.makedirs(path)
    except OSError as exc:  # Python >2.5
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else:
            raise

def extract_demo_dict(demo_file):
    if type(demo_file) is not list:
        demos = DataLogger().unpickle(demo_file)
    else:
        demos = {}
        for i in xrange(0, len(demo_file)):
            with Timer('Extracting demo file %d' % i):
                demos[i] = DataLogger().unpickle(demo_file[i])
    return demos

class Timer(object):
    def __init__(self, message):
        self.message = message

    def __enter__(self):
        self.time_start = time.time()

    def __exit__(self, exc_type, exc_val, exc_tb):
        new_time = time.time() - self.time_start
        fname, lineno, method, _ = tb.extract_stack()[-2]  # Get caller
        _, fname = os.path.split(fname)
        id_str = '%s:%s' % (fname, method)
        print 'TIMER:'+color_string('%s: %s (Elapsed: %fs)' % (id_str, self.message, new_time), color='gray')

def load_scale_and_bias(data_path):
    with open(data_path, 'rb') as f:
        data = pickle.load(f)
        scale = data['scale']
        bias = data['bias']
    return scale, bias
    
def generate_noise(T, dU):
    """
    Generate a T x dU gaussian-distributed noise vector. This will
    approximately have mean 0 and variance 1, ignoring smoothing.

    Args:
        T: Number of time steps.
        dU: Dimensionality of actions.
    Hyperparams:
        smooth: Whether or not to perform smoothing of noise.
        var : If smooth=True, applies a Gaussian filter with this
            variance.
        renorm : If smooth=True, renormalizes data to have variance 1
            after smoothing.
    """
    var = 2.0
    noise = np.random.randn(T, dU)
    # Smooth noise. This violates the controller assumption, but
    # might produce smoother motions.
    for i in range(dU):
        noise[:, i] = sp_ndimage.filters.gaussian_filter(noise[:, i], var)
    variance = np.var(noise, axis=0)
    noise = noise / np.sqrt(variance)
    return noise

In [6]:
""" Utility functions for tensorflow. """

import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.framework import tensor_util
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.ops import array_ops
import numpy as np

def safe_get(name, *args, **kwargs):
    """ Same as tf.get_variable, except flips on reuse_variables automatically """
    try:
        return tf.get_variable(name, *args, **kwargs)
    except ValueError:
        tf.get_variable_scope().reuse_variables()
        return tf.get_variable(name, *args, **kwargs)

def init_weights(shape, name=None):
    shape = tuple(shape)
    weights = np.random.normal(scale=0.01, size=shape).astype('f')
    return safe_get(name, list(shape), initializer=tf.constant_initializer(weights), dtype=tf.float32)
    
def init_bias(shape, name=None):
    return safe_get(name, initializer=tf.zeros(shape, dtype=tf.float32))

def init_fc_weights_xavier(shape, name=None):
    fc_initializer =  tf.contrib.layers.xavier_initializer(dtype=tf.float32)
    return safe_get(name, list(shape), initializer=fc_initializer, dtype=tf.float32)

def init_conv_weights_xavier(shape, name=None):
    conv_initializer =  tf.contrib.layers.xavier_initializer_conv2d(dtype=tf.float32)
    return safe_get(name, list(shape), initializer=conv_initializer, dtype=tf.float32)
    
def init_fc_weights_snn(shape, name=None):
    weights = np.random.normal(scale=np.sqrt(1.0/shape[0]), size=shape).astype('f')
    return safe_get(name, list(shape), initializer=tf.constant_initializer(weights), dtype=tf.float32)

def init_conv_weights_snn(shape, name=None):
    weights = np.random.normal(scale=np.sqrt(1.0/(shape[0]*shape[1]*shape[2])), size=shape).astype('f')
    return safe_get(name, list(shape), initializer=tf.constant_initializer(weights), dtype=tf.float32)

def batched_matrix_vector_multiply(vector, matrix):
    """ computes x^T A in mini-batches. """
    vector_batch_as_matricies = tf.expand_dims(vector, [1])
    mult_result = tf.matmul(vector_batch_as_matricies, matrix)
    squeezed_result = tf.squeeze(mult_result, [1])
    return squeezed_result

def euclidean_loss_layer(a, b, multiplier=100.0, use_l1=False, eps=0.01):
    """ Math:  out = (action - mlp_out)'*precision*(action-mlp_out)
                    = (u-uhat)'*A*(u-uhat)"""
    multiplier = tf.constant(multiplier, dtype='float') #for bc #10000
    uP =a*multiplier-b*multiplier
    if use_l1:
        return tf.reduce_mean(eps*tf.square(uP) + tf.abs(uP))
    return tf.reduce_mean(tf.square(uP))

def conv2d(img, w, b, strides=[1, 1, 1, 1], is_dilated=False):
    if is_dilated:
        layer = tf.nn.atrous_conv2d(img, w, rate=2, padding='SAME') + b
    else:
        layer = tf.nn.conv2d(img, w, strides=strides, padding='SAME') + b
    return layer
            
def dropout(layer, keep_prob=0.9, is_training=True, name=None, selu=False):
    if selu:
        return dropout_selu(layer, 1.0 - keep_prob, name=name, training=is_training)
    if is_training:
        return tf.nn.dropout(layer, keep_prob=keep_prob, name=name)
    else:
        return tf.add(layer, 0, name=name)

def norm(layer, norm_type='batch_norm', decay=0.9, id=0, is_training=True, activation_fn=tf.nn.relu, prefix='conv_'):
    if norm_type != 'batch_norm' and norm_type != 'layer_norm':
        return tf.nn.relu(layer)
    with tf.variable_scope('norm_layer_%s%d' % (prefix, id)) as vs:
        if norm_type == 'batch_norm':
            if is_training:
                try:
                    layer = tf.contrib.layers.batch_norm(layer, is_training=True, center=True,
                        scale=False, decay=decay, activation_fn=activation_fn, updates_collections=None, scope=vs) # updates_collections=None
                except ValueError:
                    layer = tf.contrib.layers.batch_norm(layer, is_training=True, center=True,
                        scale=False, decay=decay, activation_fn=activation_fn, updates_collections=None, scope=vs, reuse=True) # updates_collections=None
            else:
                layer = tf.contrib.layers.batch_norm(layer, is_training=False, center=True,
                    scale=False, decay=decay, activation_fn=activation_fn, updates_collections=None, scope=vs, reuse=True) # updates_collections=None
        elif norm_type == 'layer_norm': # layer_norm
            # Take activation_fn out to apply lrelu
            try:
                layer = activation_fn(tf.contrib.layers.layer_norm(layer, center=True,
                    scale=False, scope=vs)) # updates_collections=None
                
            except ValueError:
                layer = activation_fn(tf.contrib.layers.layer_norm(layer, center=True,
                    scale=False, scope=vs, reuse=True))
        elif norm_type == 'selu':
            layer = selu(layer)
        else:
            raise NotImplementedError('Other types of norm not implemented.')
        return layer
        
class VBN(object):
    """
    Virtual Batch Normalization
    """

    def __init__(self, x, name, epsilon=1e-5):
        """
        x is the reference batch
        """
        assert isinstance(epsilon, float)

        shape = x.get_shape().as_list()
        with tf.variable_scope(name) as scope:
            self.epsilon = epsilon
            self.name = name
            self.mean = tf.reduce_mean(x, [0, 1, 2], keep_dims=True)
            self.mean_sq = tf.reduce_mean(tf.square(x), [0, 1, 2], keep_dims=True)
            self.batch_size = int(x.get_shape()[0])
            assert x is not None
            assert self.mean is not None
            assert self.mean_sq is not None
            out = tf.nn.relu(self._normalize(x, self.mean, self.mean_sq, "reference"))
            self.reference_output = out

    def __call__(self, x, update=False):
        with tf.variable_scope(self.name) as scope:
            if not update:
                new_coeff = 1. / (self.batch_size + 1.)
                old_coeff = 1. - new_coeff
                new_mean = tf.reduce_mean(x, [1, 2], keep_dims=True)
                new_mean_sq = tf.reduce_mean(tf.square(x), [1, 2], keep_dims=True)
                mean = new_coeff * new_mean + old_coeff * self.mean
                mean_sq = new_coeff * new_mean_sq + old_coeff * self.mean_sq
                out = tf.nn.relu(self._normalize(x, mean, mean_sq, "live"))
            # Update the mean and mean_sq when passing the reference data
            else:
                self.mean = tf.reduce_mean(x, [0, 1, 2], keep_dims=True)
                self.mean_sq = tf.reduce_mean(tf.square(x), [0, 1, 2], keep_dims=True)
                out = tf.nn.relu(self._normalize(x, self.mean, self.mean_sq, "reference"))
            return out

    def _normalize(self, x, mean, mean_sq, message):
        # make sure this is called with a variable scope
        shape = x.get_shape().as_list()
        assert len(shape) == 4
        self.gamma = safe_get("gamma", [shape[-1]],
                                initializer=tf.random_normal_initializer(1., 0.02))
        gamma = tf.reshape(self.gamma, [1, 1, 1, -1])
        self.beta = safe_get("beta", [shape[-1]],
                                initializer=tf.constant_initializer(0.))
        beta = tf.reshape(self.beta, [1, 1, 1, -1])
        assert self.epsilon is not None
        assert mean_sq is not None
        assert mean is not None
        std = tf.sqrt(self.epsilon + mean_sq - tf.square(mean))
        out = x - mean
        out = out / std
        out = out * gamma
        out = out + beta
        return out

def max_pool(img, k):
    return tf.nn.max_pool(img, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')


# Consider stride size when using xavier for fp network
def get_xavier_weights(filter_shape, poolsize=(2, 2), name=None):
    fan_in = np.prod(filter_shape[1:])
    fan_out = (filter_shape[0] * np.prod(filter_shape[2:]) //
               np.prod(poolsize))

    low = -4*np.sqrt(6.0/(fan_in + fan_out)) # use 4 for sigmoid, 1 for tanh activation
    high = 4*np.sqrt(6.0/(fan_in + fan_out))
    weights = np.random.uniform(low=low, high=high, size=filter_shape)
    return safe_get(name, filter_shape, initializer=tf.constant_initializer(weights))

def get_he_weights(filter_shape, name=None):
    fan_in = np.prod(filter_shape[1:])

    stddev = np.sqrt(2.6/fan_in)
    weights = stddev * np.random.randn(filter_shape[0], filter_shape[1], filter_shape[2], filter_shape[3])
    return safe_get(name, filter_shape, initializer=tf.constant_initializer(weights))

  from ._conv import register_converters as _register_converters
  (fname, cnt))
  (fname, cnt))


In [19]:
class a():
  pass

FLAGS = a()
FLAGS.num_updates = 1
FLAGS.update_batch_size = 1
FLAGS.meta_batch_size = 12
FLAGS.meta_lr = 0.001
FLAGS.T = 50
FLAGS.num_filters = 64
FLAGS.num_strides = 3
FLAGS.num_conv_layers = 5
FLAGS.filter_size = 3

In [21]:
tf.set_random_seed(0)
np.random.seed(0)
random.seed(0)

graph = tf.Graph()
sess = tf.Session(graph=graph)
network_config = {
    'num_filters': [FLAGS.num_filters]*FLAGS.num_conv_layers,
    'strides': [[1, 2, 2, 1]]*FLAGS.num_strides + [[1, 1, 1, 1]]*(FLAGS.num_conv_layers-FLAGS.num_strides),
    'filter_size': FLAGS.filter_size,
    'image_width': FLAGS.im_width,
    'image_height': FLAGS.im_height,
    'image_channels': FLAGS.num_channels,
    'n_layers': FLAGS.num_fc_layers,
    'layer_size': FLAGS.layer_size,
    'initialization': FLAGS.init,
}
data_generator = DataGenerator()
state_idx = data_generator.state_idx
img_idx = range(len(state_idx), len(state_idx)+FLAGS.im_height*FLAGS.im_width*FLAGS.num_channels)
# need to compute x_idx and img_idx from data_generator
model = MIL(data_generator._dU, state_idx=state_idx, img_idx=img_idx, network_config=network_config)

data_generator.generate_batches(noisy=FLAGS.use_noisy_demos)
with graph.as_default():
    train_image_tensors = data_generator.make_batch_tensor(network_config, restore_iter=FLAGS.restore_iter)
    inputa = train_image_tensors[:, :FLAGS.update_batch_size*FLAGS.T, :]
    inputb = train_image_tensors[:, FLAGS.update_batch_size*FLAGS.T:, :]
    train_input_tensors = {'inputa': inputa, 'inputb': inputb}
    val_image_tensors = data_generator.make_batch_tensor(network_config, restore_iter=FLAGS.restore_iter, train=False)
    inputa = val_image_tensors[:, :FLAGS.update_batch_size*FLAGS.T, :]
    inputb = val_image_tensors[:, FLAGS.update_batch_size*FLAGS.T:, :]
    val_input_tensors = {'inputa': inputa, 'inputb': inputb}
model.init_network(graph, input_tensors=train_input_tensors, restore_iter=FLAGS.restore_iter)
model.init_network(graph, input_tensors=val_input_tensors, restore_iter=FLAGS.restore_iter, prefix='Validation_')

with graph.as_default():
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    train(graph, model, saver, sess, data_generator, log_dir, restore_itr=FLAGS.restore_iter)
    
    for itr in range(TOTAL_ITERS):
        state, tgt_mu = data_generator.generate_data_batch(itr)
        statea = state[:, :FLAGS.update_batch_size*FLAGS.T, :]
        stateb = state[:, FLAGS.update_batch_size*FLAGS.T:, :]
        actiona = tgt_mu[:, :FLAGS.update_batch_size*FLAGS.T, :]
        actionb = tgt_mu[:, FLAGS.update_batch_size*FLAGS.T:, :]
        feed_dict = {model.statea: statea,
                    model.stateb: stateb,
                    model.actiona: actiona,
                    model.actionb: actionb}

        with graph.as_default():
            results = sess.run([model.train_op], feed_dict=feed_dict)

        if itr != 0 and itr % TEST_PRINT_INTERVAL == 0:
            if FLAGS.val_set_size > 0:
                val_state, val_act = data_generator.generate_data_batch(itr, train=False)
                statea = val_state[:, :FLAGS.update_batch_size*FLAGS.T, :]
                stateb = val_state[:, FLAGS.update_batch_size*FLAGS.T:, :]
                actiona = val_act[:, :FLAGS.update_batch_size*FLAGS.T, :]
                actionb = val_act[:, FLAGS.update_batch_size*FLAGS.T:, :]
                feed_dict = {model.statea: statea,
                            model.stateb: stateb,
                            model.actiona: actiona,
                            model.actionb: actionb}
                with graph.as_default():
                    results = sess.run([model.val_summ_op, model.val_total_loss1,
                                        model.val_total_losses2[model.num_updates-1]],
                                       feed_dict=feed_dict)
                print 'Test results: average preloss is %.2f, average postloss is %.2f' % (np.mean(results[1]), np.mean(results[2]))

AttributeError: a instance has no attribute 'im_width'

In [None]:
flags.DEFINE_integer('num_filters', 64, 'number of filters for conv nets -- 64 for placing, 16 for pushing, 40 for reaching.')
flags.DEFINE_integer('filter_size', 3, 'filter size for conv nets -- 3 for placing, 5 for pushing, 3 for reaching.')
flags.DEFINE_integer('num_conv_layers', 5, 'number of conv layers -- 5 for placing, 4 for pushing, 3 for reaching.')
flags.DEFINE_integer('num_strides', 3, 'number of conv layers with strided filters -- 3 for placing, 4 for pushing, 3 for