In [18]:
"""
#=============================#
||                           ||
||                           ||
||        PFNN Class         ||
||                           ||
||                           ||
#=============================#

Classes for Phase Functioned Neural Networks.
We will be implementing these from scratch.

We will be basing these off of code in Theano
from The Orange Duck found here:
https://github.com/sreyafrancis/PFNN


"""

import numpy as np
import collections
import tensorflow as tf

from tensorflow.python.layers import base as base_layer
from tensorflow.python.ops.rnn_cell_impl import RNNCell
from tensorflow.python.ops import random_ops
from tensorflow.python.ops import math_ops
from math import pi, floor
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import rnn_cell_impl
from tensorflow.contrib.rnn import AttentionCellWrapper
from tensorflow.contrib.rnn import DropoutWrapper

sigmoid = math_ops.sigmoid
tanh = math_ops.tanh

class PhaseFunctionedFFNN(base_layer.Layer):

    # only one layer for demonstration purposes
    def __init__(self, input_shape, output_shape, dropout=0.5):
        self.phases = 4
        self.x = tf.placeholder(tf.float32, [None, input_shape])
        self.W0 = [tf.Variable(tf.zeros([input_shape, output_shape])) for _ in range(self.phases)]
        self.b0 = [tf.Variable(tf.zeros([output_shape])) for _ in range(self.phases)]
        self.layers = [self.W0, self.b0]

        return

    def __call__(self, input):
        phase = input[-1]
        input = input[:-1]
        phase_num = (4 * phase) / (2 * pi)

        phase_depth = phase_num % 1 # how far into the current phase we are
        k = lambda n: (floor(phase_num) + n - 1) % 4
        W0_phase = self.cubic_spline(self.W0[k(0)], self.W0[k(1)], self.W0[k(2)], self.W0[k(3)], w)
        b0_phase = self.cubi|c_spline(self.b0[k(0)], self.b0[k(1)], self.b0[k(2)], self.b0[k(3)], w)

        return tf.matmul(W0_phase, input) + b0_phase

    def cubic_spline(self, y0, y1, y2, y3, mu):
        return ( \
            (-0.5*y0+1.5*y1-1.5*y2+0.5*y3)*mu*mu*mu + \
            (y0-2.5*y1+2.0*y2-0.5*y3)*mu*mu + \
            (-0.5*y0+0.5*y2)*mu + \
            (y1))



PFLSTMStateTuple = collections.namedtuple("PFLSTMStateTuple", ("c", "h"))

class PhaseFunctionedLSTM(RNNCell):

    def __init__(self, input_shape, output_shape, dropout=0.5):
        self.phases = 4

        self.forget_gate = [tf.Variable(tf.zeros([input_shape, output_shape]))] * self.phases
        self.forget_bias = [tf.Variable(tf.zeros([output_shape]))] * self.phases

        self.input_gate = [tf.Variable(tf.zeros([input_shape, output_shape]))] * self.phases
        self.input_bias = [tf.Variable(tf.zeros([output_shape]))] * self.phases

        self.new_input = [tf.Variable(tf.zeros([input_shape, output_shape]))] * self.phases
        self.new_bias = [tf.Variable(tf.zeros([output_shape]))] * self.phases

        self.output_gate = [tf.Variable(tf.zeros([input_shape, output_shape]))] * self.phases
        self.output_bias = [tf.Variable(tf.zeros([output_shape]))] * self.phases

        self.layers = [self.forget_gate, self.forget_bias, self.input_gate, self.input_bias, \
                        self.new_input, self.new_bias, self.output_gate, self.output_bias]
        return

    def __call__(self, input, state):
                # (c, h) = state
                # input = x
        # right now assumes only one input at a time (i.e. input is just a vector)
        h = state[1]
        x = input
        phase = input[-1]
        input = input[:-1]
        phase_num = (4 * phase) / (2 * pi) # assumes phase is from 0 - 2pi

        phase_depth = phase_num % 1 # how far into the current phase we are
        k = lambda n: (floor(phase_num) + n - 1) % 4 # control point selector function

        # indices 0-1 = forget, 2-3 = input, 4-5 = new, 6-7 = output
        phased_layers = []
        for layer in self.layers:
            interpolated = self.cubic_spline(self.layer[k(0)], self.layer[k(1)], self.layer[k(2)], self.layer[k(3)], w)
            phased_layers.append(interpolated) # W values

            concat = tf.concat([h, x], 1)
            W_f = phased_layers[0] # forget Weights
            b_f = phased_layers[1] # forget bias
            W_i = phased_layers[2] # input Weights
            b_i = phased_layers[3] # input bias
            W_c = phased_layers[4] # new input weights
            b_c = phased_layers[5] # new input bias
            W_o = phased_layers[6] # output weights
            b_o = phased_layers[7] # output bias
            f = sigmoid(tf.matmul(W_f, concat) + b_f)
            i = sigmoid(tf.matmul(W_i, concat) + b_i)
            C_tilde = tanh(tf.matmul(W_c, concat) + b_c)
            o = sigmoid(tf.matmul(W_o, concat + b_o))
            new_c = f * c + i * C_tilde
            new_h = o * tanh(new_c)
            new_state = PFLSTMStateTuple(new_c, new_h)

        return (new_h, new_state)


    def cubic_spline(self, y0, y1, y2, y3, mu):
        return ( \
            (-0.5*y0+1.5*y1-1.5*y2+0.5*y3)*mu*mu*mu + \
            (y0-2.5*y1+2.0*y2-0.5*y3)*mu*mu + \
            (-0.5*y0+0.5*y2)*mu + \
            (y1))


class PhaseAttentionCellWrapper(AttentionCellWrapper):
    """Changing basic attention cell wrapper to incorporate phase.
    Implementation based on https://arxiv.org/abs/1409.0473.
    """

    def __init__(self, cell, attn_length, attn_size=None, attn_vec_size=None,
               input_size=None, state_is_tuple=True, reuse=None):
        """Create a cell with attention.
        Args:
          cell: an RNNCell, an attention is added to it.
          attn_length: integer, the size of an attention window.
          attn_size: integer, the size of an attention vector. Equal to
              cell.output_size by default.
          attn_vec_size: integer, the number of convolutional features calculated
              on attention state and a size of the hidden layer built from
              base cell state. Equal attn_size to by default.
          input_size: integer, the size of a hidden linear layer,
              built from inputs and attention. Derived from the input tensor
              by default.
          state_is_tuple: If True, accepted and returned states are n-tuples, where
            `n = len(cells)`.  By default (False), the states are all
            concatenated along the column axis.
          reuse: (optional) Python boolean describing whether to reuse variables
            in an existing scope.  If not `True`, and the existing scope already has
            the given variables, an error is raised.
        Raises:
          TypeError: if cell is not an RNNCell.
          ValueError: if cell returns a state tuple but the flag
              `state_is_tuple` is `False` or if attn_length is zero or less.
        """
        super(AttentionCellWrapper, self).__init__(_reuse=reuse)
        if not rnn_cell_impl._like_rnncell(cell):  # pylint: disable=protected-access
            raise TypeError("The parameter cell is not RNNCell.")
        if nest.is_sequence(cell.state_size) and not state_is_tuple:
            raise ValueError("Cell returns tuple of states, but the flag "
                           "state_is_tuple is not set. State size is: %s"
                           % str(cell.state_size))
        if attn_length <= 0:
            raise ValueError("attn_length should be greater than zero, got %s"
                           % str(attn_length))
        if not state_is_tuple:
            logging.warn(
              "%s: Using a concatenated state is slower and will soon be "
              "deprecated.  Use state_is_tuple=True.", self)
        if attn_size is None:
            attn_size = cell.output_size
        if attn_vec_size is None:
            attn_vec_size = attn_size
        self._state_is_tuple = state_is_tuple
        self._cell = cell
        self._attn_vec_size = attn_vec_size
        self._input_size = input_size -1 	# discount phase
        self._attn_size = attn_size
        self._attn_length = attn_length
        self._reuse = reuse
        self._linear1 = None
        self._linear2 = None
        self._linear3 = None
        self.phase = None

    def call(self, inputs, state):
        """Long short-term memory cell with attention (LSTMA)."""

        # store phase, shorten inputs
        self.phase = inputs[-1]
        inputs = inputs[:-1]

        if self._state_is_tuple:
            state, attns, attn_states = state
        else:
            states = state
            state = array_ops.slice(states, [0, 0], [-1, self._cell.state_size])
            attns = array_ops.slice(
              states, [0, self._cell.state_size], [-1, self._attn_size])
            attn_states = array_ops.slice(
              states, [0, self._cell.state_size + self._attn_size],
              [-1, self._attn_size * self._attn_length])
        attn_states = array_ops.reshape(attn_states,
                                        [-1, self._attn_length, self._attn_size])
        input_size = self._input_size
        if input_size is None:
            input_size = inputs.get_shape().as_list()[1]
        if self._linear1 is None:
            self._linear1 = _Linear([inputs, attns], input_size, True)

        inputs = self._linear1([inputs, attns])

        # append phase back into input so that PFNN can use it
        inputs.append(self.phase)

        cell_output, new_state = self._cell(inputs, state)
        if self._state_is_tuple:
            new_state_cat = array_ops.concat(nest.flatten(new_state), 1)
        else:
            new_state_cat = new_state
        new_attns, new_attn_states = self._attention(new_state_cat, attn_states)
        with vs.variable_scope("attn_output_projection"):
            if self._linear2 is None:
                self._linear2 = _Linear([cell_output, new_attns], self._attn_size, True)
            output = self._linear2([cell_output, new_attns])
        new_attn_states = array_ops.concat(
            [new_attn_states, array_ops.expand_dims(output, 1)], 1)
        new_attn_states = array_ops.reshape(
            new_attn_states, [-1, self._attn_length * self._attn_size])
        new_state = (new_state, new_attns, new_attn_states)
        if not self._state_is_tuple:
            new_state = array_ops.concat(list(new_state), 1)

        return output, new_state

class PhaseDropoutWrapper(DropoutWrapper):
    """Operator adding dropout to inputs and outputs of the given cell.
     Incorporates phase. """

    def __init__(self, cell, input_keep_prob=1.0, output_keep_prob=1.0,
        state_keep_prob=1.0, variational_recurrent=False,
        input_size=None, dtype=None, seed=None,
        dropout_state_filter_visitor=None):

        super().__init__(self, cell, input_keep_prob, output_keep_prob, state_keep_prob, variational_recurrent,
            input_size, dtype, seed, dropout_state_filter_visitor)

        #don't know if I need this
        if input_size:
            self.input_size = input_size -1

        self.phase = None
        return

    def __call__(self, inputs, state, scope=None):
        """Run the cell with the declared dropouts."""

        # store phase value
        self.phase = inputs[-1]
        inputs = inputs[:-1]

        def _should_dropout(p):
             return (not isinstance(p, float)) or p < 1

        if _should_dropout(self._input_keep_prob):
            inputs = self._dropout(inputs, "input",
                                 self._recurrent_input_noise,
                                 self._input_keep_prob)

        # re-append phase so PFNN can use it
        inputs.append(self.phase)

        output, new_state = self._cell(inputs, state, scope)

        if _should_dropout(self._state_keep_prob):
        #       Identify which subsets of the state to perform dropout on and
          # which ones to keep.
            shallow_filtered_substructure = nest.get_traverse_shallow_structure(
                                    self._dropout_state_filter, new_state)
            new_state = self._dropout(new_state, "state",
                                    self._recurrent_state_noise,
                                    self._state_keep_prob,
                                    shallow_filtered_substructure)
        if _should_dropout(self._output_keep_prob):
            output = self._dropout(output, "output",
                                 self._recurrent_output_noise,
                                 self._output_keep_prob)
        return output, new_state


In [17]:
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Provides function to build an event sequence RNN model's graph."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from pfnn import PhaseFunctionedLSTM

# internal imports
import numpy as np
import six
import tensorflow as tf
import magenta

from tensorflow.python.util import nest as tf_nest

# def make_pfnn_cell(rnn_layer_sizes,
#                   dropout_keep_prob=1.0,
#                   attn_length=0,
#                   base_cell=magenta.models.shared.pfnn):
#   return make_rnn_cell(rnn_layer_sizes,
#                   dropout_keep_prob,
#                   attn_length,
#                   base_cell)

def make_rnn_cell(rnn_layer_sizes,
                  dropout_keep_prob=1.0,
                  attn_length=0,
                  base_cell=PhaseFunctionedLSTM):
  """Makes a RNN cell from the given hyperparameters.

  Args:
    rnn_layer_sizes: A list of integer sizes (in units) for each layer of the
        RNN.
    dropout_keep_prob: The float probability to keep the output of any given
        sub-cell.
    attn_length: The size of the attention vector.
    base_cell: The base tf.contrib.rnn.RNNCell to use for sub-cells.

  Returns:
      A tf.contrib.rnn.MultiRNNCell based on the given hyperparameters.
  """
  cells = []
  for num_units in rnn_layer_sizes:
    cell = base_cell(num_units)
    if attn_length and not cells:
      # Add attention wrapper to first layer.
      cell = tf.contrib.rnn.AttentionCellWrapper(
          cell, attn_length, state_is_tuple=True)
    cell = tf.contrib.rnn.DropoutWrapper(
        cell, output_keep_prob=dropout_keep_prob)
    cells.append(cell)

  cell = tf.contrib.rnn.MultiRNNCell(cells)

  return cell

# make_rnn_cell = make_pfnn_cell


def build_graph(mode, config, sequence_example_file_paths=None):
    """Builds the TensorFlow graph.

    Args:
    mode: 'train', 'eval', or 'generate'. Only mode related ops are added to
        the graph.
    config: An EventSequenceRnnConfig containing the encoder/decoder and HParams
        to use.
    sequence_example_file_paths: A list of paths to TFRecord files containing
        tf.train.SequenceExample protos. Only needed for training and
        evaluation.

    Returns:
    A tf.Graph instance which contains the TF ops.

    Raises:
    ValueError: If mode is not 'train', 'eval', or 'generate'.
    """
    if mode not in ('train', 'eval', 'generate'):
    raise ValueError("The mode parameter must be 'train', 'eval', "
                     "or 'generate'. The mode parameter was: %s" % mode)

    hparams = config.hparams
    encoder_decoder = config.encoder_decoder

    tf.logging.info('hparams = %s', hparams.values())

    input_size = encoder_decoder.input_size
    num_classes = encoder_decoder.num_classes
    no_event_label = encoder_decoder.default_event_label

    with tf.Graph().as_default() as graph:
    inputs, labels, lengths = None, None, None

    if mode == 'train' or mode == 'eval':
      inputs, labels, lengths = magenta.common.get_padded_batch(
          sequence_example_file_paths, hparams.batch_size, input_size,
          shuffle=mode == 'train')

    elif mode == 'generate':
      inputs = tf.placeholder(tf.float32, [hparams.batch_size, None,
                                           input_size])

    cell = make_rnn_cell(
        hparams.rnn_layer_sizes,
        dropout_keep_prob=(
            1.0 if mode == 'generate' else hparams.dropout_keep_prob),
        attn_length=(
            hparams.attn_length if hasattr(hparams, 'attn_length') else 0))

    initial_state = cell.zero_state(hparams.batch_size, tf.float32)

    outputs, final_state = tf.nn.dynamic_rnn(
        cell, inputs, sequence_length=lengths, initial_state=initial_state,
        swap_memory=True)

    outputs_flat = magenta.common.flatten_maybe_padded_sequences(
        outputs, lengths)
    logits_flat = tf.contrib.layers.linear(outputs_flat, num_classes)

    if mode == 'train' or mode == 'eval':
      labels_flat = magenta.common.flatten_maybe_padded_sequences(
          labels, lengths)

      softmax_cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
          labels=labels_flat, logits=logits_flat)

      predictions_flat = tf.argmax(logits_flat, axis=1)
      correct_predictions = tf.to_float(
          tf.equal(labels_flat, predictions_flat))
      event_positions = tf.to_float(tf.not_equal(labels_flat, no_event_label))
      no_event_positions = tf.to_float(tf.equal(labels_flat, no_event_label))

      # Compute the total number of time steps across all sequences in the
      # batch. For some models this will be different from the number of RNN
      # steps.
      def batch_labels_to_num_steps(batch_labels, lengths):
        num_steps = 0
        for labels, length in zip(batch_labels, lengths):
          num_steps += encoder_decoder.labels_to_num_steps(labels[:length])
        return np.float32(num_steps)
      num_steps = tf.py_func(
          batch_labels_to_num_steps, [labels, lengths], tf.float32)

        if mode == 'train':
        loss = tf.reduce_mean(softmax_cross_entropy)
        perplexity = tf.exp(loss)
        accuracy = tf.reduce_mean(correct_predictions)
        event_accuracy = (
            tf.reduce_sum(correct_predictions * event_positions) /
            tf.reduce_sum(event_positions))
        no_event_accuracy = (
            tf.reduce_sum(correct_predictions * no_event_positions) /
            tf.reduce_sum(no_event_positions))

        loss_per_step = tf.reduce_sum(softmax_cross_entropy) / num_steps
        perplexity_per_step = tf.exp(loss_per_step)

        optimizer = tf.train.AdamOptimizer(learning_rate=hparams.learning_rate)

        train_op = tf.contrib.slim.learning.create_train_op(
            loss, optimizer, clip_gradient_norm=hparams.clip_norm)
        tf.add_to_collection('train_op', train_op)

        vars_to_summarize = {
            'loss': loss,
            'metrics/perplexity': perplexity,
            'metrics/accuracy': accuracy,
            'metrics/event_accuracy': event_accuracy,
            'metrics/no_event_accuracy': no_event_accuracy,
            'metrics/loss_per_step': loss_per_step,
            'metrics/perplexity_per_step': perplexity_per_step,
        }
        elif mode == 'eval':
        vars_to_summarize, update_ops = tf.contrib.metrics.aggregate_metric_map(
            {
                'loss': tf.metrics.mean(softmax_cross_entropy),
                'metrics/accuracy': tf.metrics.accuracy(
                    labels_flat, predictions_flat),
                'metrics/per_class_accuracy':
                    tf.metrics.mean_per_class_accuracy(
                        labels_flat, predictions_flat, num_classes),
                'metrics/event_accuracy': tf.metrics.recall(
                    event_positions, correct_predictions),
                'metrics/no_event_accuracy': tf.metrics.recall(
                    no_event_positions, correct_predictions),
                'metrics/loss_per_step': tf.metrics.mean(
                    tf.reduce_sum(softmax_cross_entropy) / num_steps,
                    weights=num_steps),
            })
        for updates_op in update_ops.values():
            tf.add_to_collection('eval_ops', updates_op)

        # Perplexity is just exp(loss) and doesn't need its own update op.
        vars_to_summarize['metrics/perplexity'] = tf.exp(
            vars_to_summarize['loss'])
        vars_to_summarize['metrics/perplexity_per_step'] = tf.exp(
            vars_to_summarize['metrics/loss_per_step'])

        for var_name, var_value in six.iteritems(vars_to_summarize):
            tf.summary.scalar(var_name, var_value)
            tf.add_to_collection(var_name, var_value)

    elif mode == 'generate':
        temperature = tf.placeholder(tf.float32, [])
        softmax_flat = tf.nn.softmax(
          tf.div(logits_flat, tf.fill([num_classes], temperature)))
        softmax = tf.reshape(softmax_flat, [hparams.batch_size, -1, num_classes])

        tf.add_to_collection('inputs', inputs)
        tf.add_to_collection('temperature', temperature)
        tf.add_to_collection('softmax', softmax)
        # Flatten state tuples for metagraph compatibility.
        for state in tf_nest.flatten(initial_state):
        tf.add_to_collection('initial_state', state)
        for state in tf_nest.flatten(final_state):
        tf.add_to_collection('final_state', state)

    return graph


IndentationError: expected an indented block (pfnn.py, line 169)