In [0]:
# Copyright 2019 Google LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License")

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import layers
import tensorflow.keras.backend as keras_backend
tf.keras.backend.set_floatx('float32')
import tensorflow_probability as tfp
from tensorflow_probability.python.layers import util as tfp_layers_util

import random
import sys
import time
import os

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

print(tf.__version__) # use tensorflow version >= 2.0.0
#pip install tensorflow=2.0.0
#pip install --upgrade tensorflow-probability

exp_type = 'MAML'  # choose from 'MAML', 'MR-MAML-W', 'MR-MAML-A'

2.0.0


In [0]:
class SinusoidGenerator():
    def __init__(self, K=10, width=5, K_amp=20, phase=0, amps = None, amp_ind=None,  amplitude =None, seed = None):
        '''
        Args:
            K: batch size. Number of values sampled at every batch.
            amplitude: Sine wave amplitude.
            pahse: Sine wave phase.
        '''
        self.K = K
        self.width = width
        self.K_amp = K_amp
        self.phase = phase
        self.seed = seed
        self.x = self._sample_x()
        self.amp_ind = amp_ind if amp_ind is not None else random.randint(0,self.K_amp-5)
        self.amps = amps if amps is not None else np.linspace(0.1,4,self.K_amp)
        self.amplitude = amplitude if amplitude is not None else self.amps[self.amp_ind]

    def _sample_x(self):
        if self.seed is not None:
          np.random.seed(self.seed)
        return np.random.uniform(-self.width, self.width, self.K)


    def batch(self, noise_scale, x = None):
        '''return xa is [K, d_x+d_a], y is [K, d_y]'''
        if x is None:
          x = self._sample_x()
        x = x[:, None]
        amp = np.zeros([1, self.K_amp])
        amp[0,self.amp_ind] = 1
        amp = np.tile(amp, x.shape)
        xa = np.concatenate([x, amp], axis = 1)
        y = self.amplitude * np.sin(x - self.phase) + np.random.normal(scale = noise_scale, size = x.shape)
        return xa, y

    def equally_spaced_samples(self, K=None, width=None):
        '''Returns K equally spaced samples.'''
        if K is None:
            K = self.K
        if width is None:
            width = self.width
        return self.batch(noise_scale = 0, x=np.linspace(-width+0.5, width-0.5, K))

In [0]:
noise_scale = 0.1 #@param {type:"number"}
n_obs = 20 #@param {type:"number"}
n_context = 10 #@param {type:"number"}
K_amp = 20 #@param {type:"number"}
x_width = 5 #@param {type:"number"}
n_iter = 20000 #@param {type:"number"}
amps = np.linspace(0.1,4,K_amp)
lr_inner = 0.01 #@param {type:"number"}
dim_w = 5 #@param {type:"number"}
train_ds = [SinusoidGenerator(K=n_context, width = x_width, \
                              K_amp = K_amp, amps = amps) \
                              for _ in range(n_iter)]

In [0]:

class SineModel(keras.Model):
    def __init__(self):
        super(SineModel, self).__init__() # python 2 syntax
        # super().__init__() # python 3 syntax
        self.hidden1 = keras.layers.Dense(40)
        self.hidden2 = keras.layers.Dense(40)
        self.out = keras.layers.Dense(1)

    def call(self, x):
        x = keras.activations.relu(self.hidden1(x))
        x = keras.activations.relu(self.hidden2(x))
        x = self.out(x)
        return x


def kl_qp_gaussian(mu_q, sigma_q, mu_p, sigma_p):
  """Kullback-Leibler KL(N(mu_q), Diag(sigma_q^2) || N(mu_p), Diag(sigma_p^2))"""
  sigma2_q = tf.square(sigma_q) + 1e-16
  sigma2_p = tf.square(sigma_p) + 1e-16
  temp = tf.math.log(sigma2_p) - tf.math.log(sigma2_q) - 1.0 + \
          sigma2_q / sigma2_p + tf.square(mu_q - mu_p) / sigma2_p  #n_target * d_w
  kl = 0.5 * tf.reduce_mean(temp, axis = 1)
  return tf.reduce_mean(kl)

def copy_model(model, x=None, input_shape=None):
    '''
      Copy model weights to a new model.
      Args:
          model: model to be copied.
          x: An input example.
    '''
    copied_model = SineModel()
    if x is not None:
      copied_model.call(tf.convert_to_tensor(x))
    if input_shape is not None:
      copied_model.build(tf.TensorShape([None,input_shape]))
    copied_model.set_weights(model.get_weights())
    return copied_model

def np_to_tensor(list_of_numpy_objs):
    return (tf.convert_to_tensor(obj, dtype=tf.float32) for obj in list_of_numpy_objs)


def compute_loss(model, xa, y):
    y_hat = model.call(xa)
    loss = keras_backend.mean(keras.losses.mean_squared_error(y, y_hat))
    return loss, y_hat




In [0]:
def train_batch(xa, y, model, optimizer, encoder=None):
    tensor_xa, tensor_y = np_to_tensor((xa, y))
    if exp_type == 'MAML':
      with tf.GradientTape() as tape:
          loss, _ = compute_loss(model, tensor_xa, tensor_y)
    if exp_type == 'MR-MAML-W':
      w = encoder(tensor_xa)
      with tf.GradientTape() as tape:
          y_hat = model.call(w)
          loss = keras_backend.mean(keras.losses.mean_squared_error(tensor_y, y_hat))
    if exp_type == 'MR-MAML-A':
      _, w, _ = encoder(tensor_xa)
      with tf.GradientTape() as tape:
          y_hat = model.call(w)
          loss = keras_backend.mean(keras.losses.mean_squared_error(y, y_hat))
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss


def test_inner_loop(model, optimizer, xa_context, y_context, xa_target, y_target, num_steps, encoder=None):
    inner_record = []
    tensor_xa_target, tensor_y_target = np_to_tensor((xa_target, y_target))
    if exp_type == 'MAML':
      w_target = tensor_xa_target
    if exp_type == 'MR-MAML-W':
      w_target = encoder(tensor_xa_target)
    if exp_type == 'MR-MAML-A':
      _, w_target, _ = encoder(tensor_xa_target)

    for step in range(0, np.max(num_steps) + 1):
        if step in num_steps:
          if exp_type == 'MAML':
            loss, y_hat = compute_loss(model, w_target, tensor_y_target)
          else:
            y_hat = model.call(w_target)
            loss = keras_backend.mean(keras.losses.mean_squared_error(tensor_y_target, y_hat))
          inner_record.append((step, y_hat, loss))
        loss = train_batch(xa_context, y_context, model, optimizer, encoder)
    return inner_record


def eval_sinewave_for_test(model, sinusoid_generator, num_steps=(0, 1, 10), encoder=None, learning_rate = lr_inner, ax = None, legend= False):
    # data for training
    xa_context, y_context = sinusoid_generator.batch(noise_scale = noise_scale)
    y_context = y_context + np.random.normal(scale = noise_scale, size = y_context.shape)
    # data for validation
    xa_target, y_target = sinusoid_generator.equally_spaced_samples(K = 200, width = 5)
    y_target = y_target + np.random.normal(scale = noise_scale, size = y_target.shape)

    # copy model so we can use the same model multiple times
    if exp_type == 'MAML':
      copied_model = copy_model(model, x = xa_context)
    else:
      copied_model = copy_model(model, input_shape=dim_w)
    optimizer = keras.optimizers.SGD(learning_rate=learning_rate)
    inner_record = test_inner_loop(copied_model, optimizer, xa_context, y_context, xa_target, y_target, num_steps, encoder)

    # plot
    if ax is not None:
      plt.sca(ax)
      x_context = xa_context[:,0,None]
      x_target = xa_target[:,0,None]
      train, = plt.plot(x_context, y_context, '^')
      ground_truth, = plt.plot(x_target, y_target0, linewidth=2.0)
      plots = [train, ground_truth]
      legends = ['Context Points', 'True Function']
      for n, y_hat, loss in inner_record:
          cur, = plt.plot(x_target, y_hat[:, 0], '--')
          plots.append(cur)
          legends.append('After {} Steps'.format(n))
      if legend:
        plt.legend(plots, legends, loc='center left', bbox_to_anchor=(1, 0.5))
      plt.ylim(-6, 6)
      plt.axvline(x=-sinusoid_generator.width, linestyle='--')
      plt.axvline(x=sinusoid_generator.width,linestyle='--')
    return inner_record

In [0]:
exp_type = 'MAML'
if exp_type == 'MAML':
  model = SineModel()
  model.build((None, K_amp+1))

  dataset = train_ds
  optimizer = keras.optimizers.Adam()
  total_loss = 0
  n_iter = 15000
  losses = []

  for i, t in enumerate(random.sample(dataset, n_iter)):
      xa_train, y_train = np_to_tensor(t.batch(noise_scale = noise_scale))

      with tf.GradientTape(watch_accessed_variables=False) as test_tape:
          test_tape.watch(model.trainable_variables)
          with tf.GradientTape() as train_tape:
              train_loss, _ = compute_loss(model, xa_train, y_train)
          model_copy = copy_model(model, xa_train)
          gradients_inner = train_tape.gradient(train_loss, model.trainable_variables) # \nabla_{\theta}

          k = 0
          for j in range(len(model_copy.layers)):
              model_copy.layers[j].kernel = tf.subtract(model.layers[j].kernel,  # \phi_t = T(\theta, \nabla_{\theta})
                          tf.multiply(lr_inner, gradients_inner[k]))
              model_copy.layers[j].bias = tf.subtract(model.layers[j].bias,
                          tf.multiply(lr_inner, gradients_inner[k+1]))
              k += 2
          xa_validation, y_validation = np_to_tensor(t.batch(noise_scale = noise_scale))
          test_loss, y_hat = compute_loss(model_copy, xa_validation, y_validation) # test_loss
      gradients_outer = test_tape.gradient(test_loss, model.trainable_variables)
      optimizer.apply_gradients(zip(gradients_outer, model.trainable_variables))


      total_loss += test_loss
      loss = total_loss / (i+1.0)
      if i % 1000 == 0:
          print('Step {}: loss = {}'.format(i, loss))

Step 0: loss = 0.5836441516876221
Step 1000: loss = 0.7144132256507874
Step 2000: loss = 0.4053805470466614
Step 3000: loss = 0.2834143042564392
Step 4000: loss = 0.2200154811143875
Step 5000: loss = 0.18113426864147186
Step 6000: loss = 0.154592826962471
Step 7000: loss = 0.1353120058774948
Step 8000: loss = 0.12070710957050323
Step 9000: loss = 0.1094995066523552
Step 10000: loss = 0.1004406288266182
Step 11000: loss = 0.09283847361803055
Step 12000: loss = 0.08659099042415619
Step 13000: loss = 0.08126141875982285
Step 14000: loss = 0.07664626091718674


In [0]:
if exp_type == 'MAML':
  tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
  n_context = 5
  n_test_task = 100
  errs = []
  for ii in range(n_test_task):
    np.random.seed(ii)
    A = np.random.uniform(low = amps[0], high = amps[-1])
    test_ds = SinusoidGenerator(K=n_context, seed = ii, amplitude = A, amp_ind= random.randint(0,K_amp-5))
    inner_record = eval_sinewave_for_test(model,  test_ds, num_steps=(0, 1, 5, 100));
    errs.append(inner_record[-1][2].numpy())

  print('Model is', exp_type, 'meta-test MSE is', np.mean(errs) )

Model is MAML meta-test MSE is 0.51450217


# Training & Testing for MR-MAML(W)

In [0]:
if exp_type == 'MR-MAML-W':

  model = SineModel()
  dataset = train_ds
  optimizer = keras.optimizers.Adam()

  Beta = 5e-5
  learning_rate = 1e-3
  n_iter = 15000
  model.build((None, dim_w))

  kernel_posterior_fn=tfp_layers_util.default_mean_field_normal_fn(untransformed_scale_initializer=tf.compat.v1.initializers.random_normal(
      mean=-50., stddev=0.1))

  encoder_w = tf.keras.Sequential([
          tfp.layers.DenseReparameterization(100, activation=tf.nn.relu, kernel_posterior_fn=kernel_posterior_fn,input_shape=(1 + K_amp,)),
          tfp.layers.DenseReparameterization(dim_w,kernel_posterior_fn=kernel_posterior_fn),
        ])

  total_loss = 0
  losses = []
  start = time.time()

  for i, t in enumerate(random.sample(dataset, n_iter)):
      xa_train, y_train = np_to_tensor(t.batch(noise_scale = noise_scale))   #[K, 1]

      x_validation = np.random.uniform(-x_width, x_width, n_obs - n_context)
      xa_validation, y_validation = np_to_tensor(t.batch(noise_scale = noise_scale, x = x_validation))

      all_var = encoder_w.trainable_variables + model.trainable_variables
      with tf.GradientTape(watch_accessed_variables=False) as test_tape:
          test_tape.watch(all_var)
          with tf.GradientTape() as train_tape:
              w_train = encoder_w(xa_train)
              y_hat_train = model.call(w_train)
              train_loss =  keras_backend.mean(keras.losses.mean_squared_error(y_train, y_hat_train)) # K*1
          gradients_inner = train_tape.gradient(train_loss, model.trainable_variables) # \nabla_{\theta}
          model_copy = copy_model(model, x = w_train)
          k = 0
          for j in range(len(model_copy.layers)):
              model_copy.layers[j].kernel = tf.subtract(model.layers[j].kernel,  # \phi_t = T(\theta, \nabla_{\theta})
                          tf.multiply(lr_inner, gradients_inner[k]))
              model_copy.layers[j].bias = tf.subtract(model.layers[j].bias,
                          tf.multiply(lr_inner, gradients_inner[k+1]))
              k += 2

          w_validation = encoder_w(xa_validation)
          y_hat_validation = model_copy.call(w_validation)
          mse_loss = keras_backend.mean(keras.losses.mean_squared_error(y_validation, y_hat_validation))
          kl_loss = Beta * sum(encoder_w.losses)
          validation_loss = mse_loss + kl_loss

      gradients_outer = test_tape.gradient(validation_loss,all_var)
      keras.optimizers.Adam(learning_rate=learning_rate).apply_gradients(zip(gradients_outer, all_var))

      losses.append(validation_loss.numpy())

      if i % 1000 == 0 and i > 0:
          print('Step {}:'.format(i), 'loss=', np.mean(losses))
          losses = []

Step 1000: loss= 2.6914458
Step 2000: loss= 2.4870665
Step 3000: loss= 2.4284792
Step 4000: loss= 2.3726428
Step 5000: loss= 2.3125937
Step 6000: loss= 2.228668
Step 7000: loss= 2.1762276
Step 8000: loss= 2.1387603
Step 9000: loss= 2.112448
Step 10000: loss= 2.1087198
Step 11000: loss= 2.10187
Step 12000: loss= 2.102722
Step 13000: loss= 2.1002984
Step 14000: loss= 2.0911772


In [0]:
if exp_type == 'MR-MAML-W':
  n_context = 5
  n_test_task = 100
  errs = []
  for ii in range(n_test_task):
    np.random.seed(ii)
    A = np.random.uniform(low = amps[0], high = amps[-1])
    test_ds = SinusoidGenerator(K=n_context, seed = ii, amplitude = A, amp_ind= random.randint(0,K_amp-5))
    inner_record = eval_sinewave_for_test(model,  test_ds, num_steps=(0, 1, 5, 100), encoder=encoder_w);
    errs.append(inner_record[-1][2].numpy())

  print('Model is', exp_type, ', meta-test MSE is', np.mean(errs) )

Model is MR-MAML-W , meta-test MSE is 0.16159104


#Training & Testing for MR-MAML(A)

In [0]:

if exp_type == 'MR-MAML-A':
  class Encoder(keras.Model):
    def __init__(self, dim_w=5, name='encoder', **kwargs):
      # super().__init__(name = name)
      super(Encoder, self).__init__(name = name)
      self.dense_proj = layers.Dense(80, activation='relu')
      self.dense_mu = layers.Dense(dim_w)
      self.dense_sigma_w = layers.Dense(dim_w)

    def call(self, inputs):
      h = self.dense_proj(inputs)
      mu_w = self.dense_mu(h)
      sigma_w = self.dense_sigma_w(h)
      sigma_w = tf.nn.softplus(sigma_w)
      ws = mu_w + tf.random.normal(tf.shape(mu_w)) * sigma_w
      return ws, mu_w, sigma_w

  model = SineModel()
  model.build((None, dim_w))
  encoder_w = Encoder(dim_w = dim_w)
  encoder_w.build((None, K_amp+1))
  Beta = 5.0
  n_iter = 10000
  dataset = train_ds
  optimizer = keras.optimizers.Adam()
  losses = [];

  for i, t in enumerate(random.sample(dataset, n_iter)):
      xa_train, y_train = np_to_tensor(t.batch(noise_scale = noise_scale))   #[K, 1]

      with tf.GradientTape(watch_accessed_variables=False) as test_tape, tf.GradientTape(watch_accessed_variables=False) as encoder_test_tape:
          test_tape.watch(model.trainable_variables)
          encoder_test_tape.watch(encoder_w.trainable_variables)
          with tf.GradientTape() as train_tape:
              w_train, _, _ = encoder_w(xa_train)
              y_hat = model.call(w_train)
              train_loss = keras_backend.mean(keras.losses.mean_squared_error(y_train, y_hat))
          model_copy = copy_model(model, x=w_train)
          gradients_inner = train_tape.gradient(train_loss, model.trainable_variables) # \nabla_{\theta}
          k = 0
          for j in range(len(model_copy.layers)):
              model_copy.layers[j].kernel = tf.subtract(model.layers[j].kernel,  # \phi_t = T(\theta, \nabla_{\theta})
                          tf.multiply(lr_inner, gradients_inner[k]))
              model_copy.layers[j].bias = tf.subtract(model.layers[j].bias,
                          tf.multiply(lr_inner, gradients_inner[k+1]))
              k += 2
          x_validation = np.random.uniform(-x_width, x_width, n_obs - n_context)
          xa_validation, y_validation = np_to_tensor(t.batch(noise_scale = noise_scale, x = x_validation))

          w_validation, w_mu_validation, w_sigma_validation = encoder_w(xa_validation)
          test_mse, _ = compute_loss(model_copy, w_validation, y_validation)
          kl_ib = kl_qp_gaussian(w_mu_validation, w_sigma_validation,
                            tf.zeros(tf.shape(w_mu_validation)), tf.ones(tf.shape(w_sigma_validation)))
          test_loss = test_mse + Beta * kl_ib

      gradients_outer = test_tape.gradient(test_mse, model.trainable_variables)
      optimizer.apply_gradients(zip(gradients_outer, model.trainable_variables))

      gradients = encoder_test_tape.gradient(test_loss,encoder_w.trainable_variables)
      keras.optimizers.Adam(learning_rate=0.001).apply_gradients(zip(gradients, encoder_w.trainable_variables))

      losses.append(test_loss)

      if i % 1000 == 0 and i > 0:
          print('Step {}:'.format(i), 'loss = ', np.mean(losses))

Step 1000: loss =  1.9501201
Step 2000: loss =  1.9419937
Step 3000: loss =  1.8764127
Step 4000: loss =  1.8179001
Step 5000: loss =  1.7535788
Step 6000: loss =  1.6897625
Step 7000: loss =  1.632552
Step 8000: loss =  1.57314
Step 9000: loss =  1.5216928


In [0]:
if exp_type == 'MR-MAML-A':
  n_context = 5
  n_test_task = 100
  errs = []
  for ii in range(n_test_task):
    np.random.seed(ii)
    A = np.random.uniform(low = amps[0], high = amps[-1])
    test_ds = SinusoidGenerator(K=n_context, seed = ii, amplitude = A, amp_ind= random.randint(0,K_amp-5))
    inner_record = eval_sinewave_for_test(model,  test_ds, num_steps=(0, 1, 5, 100), encoder=encoder_w);
    errs.append(inner_record[-1][2].numpy())

  print('Model is', exp_type, ', meta-test MSE is', np.mean(errs) )

Model is MR-MAML-A , meta-test MSE is 0.14966752
