Learning-to-learn is a way to train a neural network to learn. Instead of using "AdaDelta", "ADAM", or "RMSProp", give the responsiblity to a neural network to find a suitable optimizer for you. 

In [4]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

In [1]:
import os

import tensorflow as tf

from tensorflow.contrib.learn.python.learn import monitored_session as ms

import meta
import util

In [3]:
save_path = "meta_optimizer"
num_epoch = 2000
log_period = 100
evaluation_period = 1000
evaluation_epochs = 20

problem = "simple"
num_steps = 100 # Number of optimization steps per epoch
unroll_length = 20 # Meta-optimizer unroll length
learning_rate = 0.01
second_derivatives = False

### Main Learning for a simple problem of optimizing a square function

In [9]:
num_unrolls = num_steps // unroll_length
if os.path.isdir(save_path):
    os.rmdir(save_path)
os.makedirs(save_path)

In [10]:
def simple():
  """Simple problem: f(x) = x^2."""

  def build():
    """Builds loss graph."""
    x = tf.get_variable(
        "x",
        shape=[],
        dtype=tf.float32,
        initializer=tf.ones_initializer())
    return tf.square(x, name="x_squared")

  return build

In [11]:
def simple_multi_optimizer(num_dims=2):
  """Multidimensional simple problem."""

  def get_coordinate(i):
    return tf.get_variable("x_{}".format(i),
                           shape=[],
                           dtype=tf.float32,
                           initializer=tf.ones_initializer())

  def build():
    coordinates = [get_coordinate(i) for i in xrange(num_dims)]
    x = tf.concat(0, [tf.expand_dims(c, 0) for c in coordinates])
    return tf.reduce_sum(tf.square(x, name="x_squared"))

  return build


To train an optimizer, we essentially need three things
1. We should be able to give loss function.
2. We should have a LSTM network as meta-optimizer.
3. We should be able to tell our optimizer, what kind of network that we want for each variables. 

In [12]:
problem_simple = simple()  # return a function "build" that gives square-loss when called 
net_config_simple = {"cw": {
        "net": "CoordinateWiseDeepLSTM",
        "net_options": {"layers": (), "initializer": "zeros"},
        "net_path": None
    }}
net_assignments_simple = None

In [15]:
problem = simple_multi_optimizer()
net_config_simple_multi_optimizer = {
    "cw": {
        "net": "CoordinateWiseDeepLSTM",
        "net_options": {"layers": (), "initializer": "zeros"},
        "net_path": None
    },
    "adam": {
        "net": "Adam",
        "net_options": {"learning_rate": 0.1}
    }
}
net_assignments_simple_multi_optimizer = [("cw", ["x_0"]), ("adam", ["x_1"])]