In [221]:
import tensorflow as tf2
tf = tf2.compat.v1
tf.disable_eager_execution()

In [222]:
# This notebook borrows most of its code from https://github.com/differential-machine-learning/notebooks
# The above repository was written by Anotine Savine

In [223]:
def normalize(x_raw, y_raw, xbar_raw):
    x_mean = x_raw.mean(axis=0)
    x_std = x_raw.std(axis=0)
    x = (x_raw-  x_mean) / x_std
    y_mean = y_raw.mean(axis=0)
    y_std = y_raw.std(axis=0)
    y = (y_raw - y_mean) / y_std
    xbar = xbar_raw / y_std * x_std 

    return x_mean, x_std, x, y_mean, y_std, y, xbar

In [224]:
def twin_network(
    input_dim,
    n_units,
    n_layers):

    # input and initialization
    x = tf.placeholder(shape=[None, input_dim], dtype=tf.float32)
    z = [x]; w = [None]; b = [None]
    w.append(tf.get_variable("w1", [input_dim, n_units], initializer = tf.variance_scaling_initializer(), dtype=tf.float32))
    b.append(tf.get_variable("b1", [n_units], initializer = tf.zeros_initializer(), dtype=tf.float32))
    z.append(z[0] @ w[1] + b[1])

    # hidden layers
    for l in range(1, n_layers): 
        w.append(tf.get_variable("w%d"%(l+1), [n_units, n_units], initializer = tf.variance_scaling_initializer(), dtype=tf.float32))
        b.append(tf.get_variable("b%d"%(l+1), [n_units], initializer = tf.zeros_initializer(), dtype=tf.float32))
        z.append(tf.nn.softplus(z[l]) @ w[l+1] + b[l+1])

    # output
    w.append(tf.get_variable("w"+str(n_layers+1), [n_units, 1], initializer = tf.variance_scaling_initializer(), dtype=tf.float32))
    b.append(tf.get_variable("b"+str(n_layers+1), [1], initializer = tf.zeros_initializer(), dtype=tf.float32))
    z.append(tf.nn.softplus(z[n_layers]) @ w[n_layers+1] + b[n_layers+1]) 
    
    # result
    y = z[n_layers+1]

    # backpropagation
    L = len(z) - 1
    
    zbar = tf.ones_like(z[L])
    for l in range(L-1, 0, -1):
        zbar = (zbar @ tf.transpose(w[l+1])) * tf.nn.sigmoid(z[l])
    xbar = tf.matmul(zbar, tf.transpose(w[1]))

    return x, y, xbar

In [225]:
def train_graph(
    input_dim, 
    n_units, 
    n_layers):

    inputs, predictions, derivative_predictions = twin_network(input_dim, n_units, n_layers)
    labels = tf.placeholder(shape=[None, 1], dtype=tf.float32)
    derivative_labels = tf.placeholder(shape=[None, derivative_predictions.shape[1]], dtype=tf.float32)
    loss = tf.losses.mean_squared_error(labels, predictions) * tf.losses.mean_squared_error(derivative_labels, derivative_predictions)
    learning_rate = tf.placeholder(tf.float32)
    optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)

    return inputs, labels, derivative_labels, predictions, derivative_predictions, learning_rate, loss, optimizer.minimize(loss)

def one_epoch(
    inputs, labels, derivative_labels,
    lr_placeholder, minimizer,
    x_train, y_train, xbar_train,
    learning_rate, batch_size, session):

    m, n = x_train.shape
    first = 0
    last = min(batch_size, m)

    while first < m:

        session.run(minimizer, feed_dict = {   
            inputs: x_train[first:last], 
            labels: y_train[first:last],
            derivative_labels: xbar_train[first:last],
            lr_placeholder: learning_rate
        })

        first = last
        last = min(first + batch_size, m)

In [226]:
class NeuralNetwork():

    def __init__(self, x_raw, y_raw, xhat_raw):
        self.session = None
        self.graph = None
        self.x_raw = x_raw
        self.y_raw = y_raw
        self.xhat_raw = xhat_raw
        
    def __del__(self):
        if self.session is not None:
            self.session.close()

    def build(self, n_units, n_layers):
        if self.session is not None:
            self.session.close()

        self.graph = tf.Graph()
        
        with self.graph.as_default():

            self.inputs, \
            self.labels, \
            self.derivative_labels, \
            self.predictions, \
            self.derivative_predictions, \
            self.learning_rate, \
            self.loss, \
            self.minimizer = train_graph(self.n, n_units, n_layers)

        self.initializer = tf.global_variables_initializer()

        self.graph.finalize()
        self.session = tf.Session(graph=self.graph)

    def prepare(self, n_units = 20, n_layers = 4):

        self.x_mean, self.x_std, self.x, self.y_mean, \
             self.y_std, self.y, self.xbar = normalize(self.x_raw, self.y_raw, self.xhat_raw)
        
        self.m, self.n = self.x.shape        
        self.build(n_units, n_layers)

    def train(self,
        learning_rate = 0.1,
        n_epochs = 100,
        batch_size = 256,
        reinit=True):

        if reinit:
            self.session.run(self.initializer)

        for epoch in range(n_epochs):
        
            one_epoch(
                self.inputs, 
                self.labels, 
                self.derivative_labels,
                self.learning_rate, 
                self.minimizer, 
                self.x, 
                self.y, 
                self.xbar, 
                learning_rate,
                batch_size, 
                self.session)
                
    def predict(self, x):
        x_scaled = (x-self.x_mean) / self.x_std
        y_scaled, xhat_scaled = self.session.run(
            [self.predictions, self.derivative_predictions], 
            feed_dict = {self.inputs: x_scaled})
        y = self.y_mean + self.y_std * y_scaled
        xhat = self.y_std / self.x_std * xhat_scaled
        return y, xhat                                