here we put together things from the nnet and neural architecture search notebooks, and see what happens

In [3]:
import pandas as pd
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import datetime
import time
from scipy.stats import probplot
import datetime
import seaborn as sns
sns.set()
from sklearn.utils import shuffle
from keras.optimizers import RMSprop, Adam
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, TensorBoard, EarlyStopping
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, BatchNormalization, GaussianNoise, Input, PReLU, Activation, Concatenate
from keras.initializers import VarianceScaling
from keras import regularizers 
from keras.models import load_model
from keras import backend as K
from sklearn import metrics
import joblib

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [21]:
class ControllerRNN:
    def __init__(self, max_len, batch_size, type_size, arg_size,
                 learning_rate=0.001, hidden_size=32, baseline_smoothing=0.95,
                 variable_length_nnet=True):
        self.hidden_size = hidden_size
        self.unroll_by = max_len
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.type_size = type_size + 1 * variable_length_nnet  # 0 is for end-of-network token
        self.arg_size = arg_size
        self.baseline_smoothing = baseline_smoothing
        self.variable_length_nnet = variable_length_nnet

    def build(self):
        # reward for the architectures
        self.architecture_reward = tf.placeholder(tf.float32, [self.batch_size])
        
        # exponential moving average of the reward
        self.last_average_reward = tf.reduce_mean(self.architecture_reward)
        self.reward_ema = tf.train.ExponentialMovingAverage(self.baseline_smoothing)
        self.update_reward_ema = self.reward_ema.apply([self.last_average_reward])

        rnn_input = tf.random_normal([self.batch_size, self.type_size + self.arg_size])
        rnn = tf.contrib.rnn.GRUCell(self.hidden_size)
        state = tf.random_normal([self.batch_size, rnn.state_size])

        # weight matrices to transform from rnn output to layer type and discrete arg
        rnn_to_layer_type_weight = tf.Variable(tf.random_normal([rnn.output_size + 1, self.type_size]))
        rnn_to_layer_type_gradient = []
        rnn_to_layer_arg_weight = tf.Variable(tf.random_normal([rnn.output_size + 1, self.arg_size]))
        rnn_to_layer_arg_gradient = []

        # layer_probs contains the output from the network, namely the probabilities
        # of type and argument for every layer of every network
        self.layer_probs = []

        # layer_indicators contains one-hot indicators of type and argument
        # for every layer of every network.
        # used to select which action is used to compute the gradient.
        # fixed, must be set before updating the weights
        self.layer_indicators = []

        losses = []
        for i in range(self.unroll_by):
            # run rnn cell
            output, state = rnn(rnn_input, state)

            if i == 0:  # rnn variables are only initialized now
                rnn_params = rnn.trainable_variables + rnn.trainable_weights
                rnn_gradients = [[] for _ in range(len(rnn_params))]

            # compute output probabilites
            output = tf.concat([output, tf.ones((output.shape[0], 1))], axis=1)
            layer_type = tf.nn.relu(tf.matmul(output, rnn_to_layer_type_weight))
            layer_arg = tf.nn.relu(tf.matmul(output, rnn_to_layer_arg_weight))
            rnn_input = tf.concat([layer_type, layer_arg], axis=1)

            layer_type_probs = tf.nn.softmax(layer_type)
            layer_arg_probs = tf.nn.softmax(layer_arg)

            chosen_layer_type = tf.placeholder(tf.int32, self.batch_size)
            chosen_layer_arg = tf.placeholder(tf.int32, self.batch_size)

            self.layer_probs.append((layer_type_probs, layer_arg_probs))
            self.layer_indicators.append((chosen_layer_type, chosen_layer_arg))

            # aggregate gradients
            baseline = self.reward_ema.average(self.last_average_reward)
            prob = (self.last_average_reward - baseline) * (
                tf.reduce_sum(
                    tf.one_hot(
                        chosen_layer_type, depth=self.type_size
                    ) * tf.log(layer_type_probs + 1e-12),
                    axis=1
                ) + tf.reduce_sum(
                    tf.one_hot(
                        chosen_layer_arg, depth=self.arg_size
                    ) * tf.log(layer_arg_probs + 1e-12),
                    axis=1
                )
            )
            losses.append(prob)

            rnn_to_layer_arg_gradient.append(tf.gradients(prob, rnn_to_layer_arg_weight)[0])
            rnn_to_layer_type_gradient.append(tf.gradients(prob, rnn_to_layer_type_weight)[0])
            for param, grad in zip(rnn_params, rnn_gradients):
                grad.append(tf.gradients(prob, param)[0])

        self.loss = tf.reduce_mean(losses)

        def sanitize_gradient(grads):
            avg = sum(grads) / len(grads)
            return tf.clip_by_norm(avg, 1.0)

        optimizer = tf.train.AdamOptimizer(self.learning_rate)
        self.optimize = optimizer.apply_gradients([
            (sanitize_gradient(grad), param)
            for param, grad in zip(rnn_params, rnn_gradients)
        ] + [
            (sanitize_gradient(rnn_to_layer_type_gradient), rnn_to_layer_type_weight),
            (sanitize_gradient(rnn_to_layer_arg_gradient), rnn_to_layer_arg_weight),
        ])

    def generate_architecture(self, session):
        layers = session.run(self.layer_probs)
        networks = [[] for _ in range(self.batch_size)]
        for (ltype, larg) in layers:
            for i, (nnet, type_prob, arg_prob) in enumerate(zip(networks, ltype, larg)):
                if self.variable_length_nnet and nnet and nnet[-1][0] == 0:
                    continue

                assert all(np.isfinite(type_prob))
                assert all(np.isfinite(arg_prob))

                layer_type = np.random.choice(len(type_prob), p=type_prob)
                layer_arg = np.random.choice(len(arg_prob), p=arg_prob)

                nnet.append((layer_type, layer_arg))

        return networks

    def learn_from_rewards(self, sess, networks, rewards):
        assert len(rewards) == self.batch_size

        # set the indicator variables, telling which action was chosen
        feed_dict = {ind: []
                     for layer_ind in self.layer_indicators
                     for ind in layer_ind}

        for nnet in networks:
            # pad network if shorter than expected
            # we set the indicators to -1, so that all one hot will be 0
            # thus not contributing to the gradient
            if len(nnet) < self.unroll_by:
                nnet = nnet + [(-1, -1)] * (self.unroll_by - len(nnet))

            assert len(nnet) == self.unroll_by
            for (itype, iarg), (ntype, narg) in zip(self.layer_indicators, nnet):
                feed_dict[itype].append(ntype)
                feed_dict[iarg].append(narg)

        feed_dict[self.architecture_reward] = rewards
        loss, _, _ = sess.run([self.loss, self.update_reward_ema, self.optimize],
                              feed_dict=feed_dict)
        return loss

    
class MovingAverages:
    def __init__(self):
        self.metrics = {}
        self.smoothing = {}
        self.snapshots = []
    
    def update(self, metric, value, smoothing=None):
        if smoothing is None:
            smoothing = self.smoothing.get(metric, 0.6)
        self.smoothing[metric] = smoothing
        
        # can pass None to update smoothing
        if value is not None:
            self.metrics[metric] = (
                smoothing * self.metrics.get(metric, value)
                + (1 - smoothing) * value
            )
        return self.metrics[metric]
    
    def update_all(self, **metrics):
        for metric, value in metrics.items():
            self.update(metric, value)
        return [self.metrics[m] for m in metrics]
    
    def snapshot(self, **meta):
        snap = dict(self.metrics)
        snap.update(meta)
        self.snapshots.append(snap)
        return snap

    
def make_index(dtimes, interval):
    # returns a tuple index_above, index_below
    # index_above[i] is the largest i
    # such that dtimes[index_above[i]] - dtimes[i] < interval
    # index_below[i] is the smallest i
    # such that dtimes[i] - dtimes[index_below[i]] < interval
    # dtimes must be already sorted!
    index_below, index_above = np.zeros(
        (2, len(dtimes)), dtype=np.int
    ) - 1
    
    for i, x in enumerate(dtimes):
        j = index_below[i - 1] if i > 0 else 0
        while x - dtimes[j] > interval:
            j += 1

        index_below[i] = j
        index_above[j] = i

    last_above = index_above[0]
    for i in range(len(dtimes)):
        if index_above[i] < 0:
            index_above[i] = last_above
        else:
            last_above = index_above[i]
    
    return index_above, index_below


def compute_trend(df, columns, interval=3600):
    df = df.sort_values('datetime')
    for z in df.z.unique():  
        this_level = df[df.z == z]
        index_above, index_below = make_index(this_level.datetime.values, interval)

        for col in columns:
            val_above = this_level[col].values
            val_below = this_level.iloc[index_below][col].values

            time_above = this_level.datetime.values
            time_below = this_level.iloc[index_below].datetime.values

            trend = 3600 * (val_above - val_below) / (time_above - time_below)

            df.loc[df.z == z, col + '_trend'] = trend

    return df, [col + '_trend' for col in columns]


def get_features(df, use_trend, feature_level):
    wind_temp_levels = df.pivot_table(
        values=['wind', 'temp'], columns='z', index=['ds', 'tt']
    ).reset_index()
    wind_temp_levels.columns = [
        '%s_%d' % (a, b) if b else a
        for a, b in wind_temp_levels.columns.values
    ]

    df = df.merge(wind_temp_levels, on=['ds', 'tt'])

    feature_sets = [
        [
            'z', 'wind', 'temp', 'soil_temp',
            'wind_10', 'wind_20', 'wind_40',
            'temp_10', 'temp_20', 'temp_40',
        ],
        ['soilheat'],
        ['netrad'],
        ['rain', 'dewpoint'],
        ['H', 'LE'],
    ]

    features = [
        f for fset in feature_sets[:feature_level]
        for f in fset
    ]
    
    if use_trend:
        df, added_cols = compute_trend(df, [
            f for f in features if f != 'z'
        ])
        features.extend(added_cols)

    return df, features


def get_train_test_data(df, features, target, samples_count, n_months=12):
    df = df.dropna()

    # get random test months
    test_ds = np.random.choice(df.ds.unique(), n_months, replace=False)
    test_mask = df.ds.isin(test_ds)
    
    train_df, test_df = df.loc[~test_mask], df.loc[test_mask]
    if samples_count > 0:
        # maintain proportion of train/test samples
        test_size = int(samples_count * len(test_df) / len(train_df))
        train_df = train_df.sample(samples_count)
        test_df = test_df.sample(test_size)
    
    train_x, train_y = train_df[features], train_df[target]
    test_x, test_y = test_df[features], test_df[target]

    mean_x, mean_y = train_x.mean(), train_y.mean()
    std_x, std_y = train_x.std(), train_y.std()

    train_x = (train_x - mean_x) /  std_x
    test_x = (test_x - mean_x) / std_x
    
    assert np.all(np.isfinite(train_x))
    
    train_y = (train_y - mean_y) / std_y
    test_y = (test_y - mean_y) / std_y

    return train_x, train_y, test_x, test_y, mean_y, std_y
    

def compute_denormalized_mse(std_y):
    def denormalized_mse(y_true, y_pred):
        # model is trained with normalized data, but we want
        # mse on not normalized data to compare with MOST
        mse = K.mean(K.square(y_true - y_pred), axis=-1)
        return mse * std_y**2
    return denormalized_mse


def build_model(input_shape, architecture, std_y=1):
    # build model with fixed length architecture
    layers = [Input(shape=(input_shape,))]

    for i, (layer_type, layer_arg) in enumerate(architecture):
        if i % 2 == 0:
            num = 2**layer_arg
            layers.append(PReLU()(
                    Dense(num, kernel_initializer=VarianceScaling(2, 'fan_in'))(
                        layers[-1]
                    )
                )
            )
        else:
            pkeep = (layer_arg + 1) / 10  # from 0.1 to 1
            if pkeep < 1:
                layers.append(Dropout(pkeep)(layers[-1]))

    layers.append(Dense(1)(layers[-1]))

    opt = Adam(lr=0.001)
    model = Model(inputs=layers[0], outputs=layers[-1])
    model.compile(loss='mse', optimizer=opt, metrics=[compute_denormalized_mse(std_y)])
    return model


def build_model_vlen(input_shape, architecture, std_y=1):
    # build model with variable length architecture
    layers = [Input(shape=(input_shape,))]

    for layer_type, layer_arg in architecture:
        if layer_type == 0 or layer_type == 1:
            num = 2**layer_arg
            layers.append(PReLU()(
                    Dense(num, kernel_initializer=VarianceScaling(2, 'fan_in'))(
                        layers[-1]
                    )
                )
            )
        elif layer_type == 2:
            pkeep = (layer_arg + 1) / 11  # from 1/11 to 10/11
            layers.append(Dropout(pkeep)(layers[-1]))
        else:
            raise ValueError('layer type from 0 to 2')

    layers.append(Dense(1)(layers[-1]))

    opt = Adam(lr=0.001)
    model = Model(inputs=layers[0], outputs=layers[-1])
    model.compile(loss='mse', optimizer=opt, metrics=[compute_denormalized_mse(std_y)])
    return model



def evaluate_architecture(step, arch_idx, architecture, max_epochs, samples_count):
    train_x, train_y, test_x, test_y, _, std_y = get_train_test_data(
        ddf, features, 'phi_m', samples_count, n_months=12
    )
    
    #K.clear_session()  # https://stackoverflow.com/q/35114376/521776
    model = build_model(train_x.shape[1], architecture, std_y=std_y)

    logdir = 'dev/logs/nas-2/step-%d-arch-%d' % (step, arch_idx)
    callbacks = [
        ReduceLROnPlateau(factor=0.1, verbose=0, min_lr=1e-6, patience=10, monitor='loss'),
        TensorBoard(logdir, write_graph=True, write_grads=True, histogram_freq=0),
        EarlyStopping(min_delta=0.001, patience=25),
    ]

    hist = model.fit(
        train_x, train_y,
        batch_size=1024,
        epochs=max_epochs,
        verbose=0,
        shuffle=True,
        callbacks=callbacks,
        validation_data=(test_x, test_y)
    )

    best = min(hist.history['val_denormalized_mse'])

    return best

In [22]:
def load_data():
    dframe_path = 'data/cabauw/processed-full-log.csv.gz'
    df = pd.read_csv(dframe_path, na_values='--', compression='gzip')

    df = df[(df.ustar > 0.1) & (abs(df.H) > 10) & (df.wind > 1)]
    df = df[df.ds != 201603]

    return df

df = load_data()
ddf, features = get_features(df, use_trend=True, feature_level=4)



In [18]:
controller = ControllerRNN(
    hidden_size=64,
    max_len=16,
    batch_size=1,
    type_size=1,
    arg_size=10,
    learning_rate=0.001,
    baseline_smoothing=0.99,
    variable_length_nnet=False
)

hist = []
controller_graph = tf.Graph()
with controller_graph.as_default():
    controller.build()
    controller_session = tf.Session(graph=controller_graph)
    controller_session.run(tf.global_variables_initializer())

In [27]:
import gc
np.random.seed(4312)

In [None]:
averages = MovingAverages()
averages.smoothing['time'] = 0
start_time = time.time()
for step_idx in range(10000):
    avg_mse = averages.metrics.get('inner_mse', 10)
    if avg_mse > 1:
        max_epochs = 5
    elif avg_mse > 0.5:
        max_epochs = 20
    elif avg_mse > 0.3:
        max_epochs = 50
    else:
        max_epochs = 500

    architectures = controller.generate_architecture(controller_session)

    # test architectures on a temporary graph
    with tf.Graph().as_default():
        with tf.Session().as_default():
            rewards = [
                evaluate_architecture(
                    step_idx, arch_idx, arch,
                    max_epochs, samples_count=-1
                )
                for arch_idx, arch in enumerate(architectures)
            ]

    loss = controller.learn_from_rewards(
        controller_session, architectures, rewards
    )

    averages.update_all(
        contro_loss=loss**2,
        inner_mse=np.mean(rewards),
    )

    if step_idx % 1 == 0:
        snap = averages.snapshot(step=step_idx, time=time.time() - start_time)
        print('  '.join(
            '%s=%.3f' % metric for metric in snap.items()
        ))
        print(architectures)
        gc.collect()

contro_loss=8.213  inner_mse=1.536  step=0.000  time=29.399
[[(0, 0), (0, 3), (0, 3), (0, 3), (0, 3), (0, 3), (0, 3), (0, 3), (0, 3), (0, 3), (0, 3), (0, 3), (0, 3), (0, 3), (0, 3), (0, 3)]]
contro_loss=19.503  inner_mse=1.872  step=1.000  time=374.429
[[(0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8)]]
contro_loss=51.795  inner_mse=2.001  step=2.000  time=553.970
[[(0, 4), (0, 4), (0, 4), (0, 4), (0, 1), (0, 1), (0, 1), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8)]]
contro_loss=45.309  inner_mse=1.905  step=3.000  time=624.190
[[(0, 9), (0, 9), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1)]]
contro_loss=27.561  inner_mse=1.942  step=4.000  time=651.273
[[(0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 3), (0, 1), (0, 1)]]
contro_loss=19.982  inner_mse=1.478  

contro_loss=14.530  inner_mse=1.604  step=43.000  time=7406.434
[[(0, 7), (0, 7), (0, 7), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9)]]
contro_loss=72.492  inner_mse=1.903  step=44.000  time=7541.840
[[(0, 1), (0, 1), (0, 1), (0, 9), (0, 2), (0, 2), (0, 2), (0, 9), (0, 2), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8)]]
contro_loss=58.618  inner_mse=1.515  step=45.000  time=7774.106
[[(0, 3), (0, 9), (0, 9), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 9), (0, 1), (0, 9), (0, 9), (0, 1), (0, 1)]]
contro_loss=35.747  inner_mse=1.811  step=46.000  time=7807.169
[[(0, 5), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1)]]
contro_loss=262.260  inner_mse=2.024  step=47.000  time=7855.542
[[(0, 6), (0, 6), (0, 6), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1)]]
contro_loss=157.872  inn

contro_loss=48.332  inner_mse=1.427  step=85.000  time=21099.995
[[(0, 4), (0, 4), (0, 1), (0, 1), (0, 1), (0, 1), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9)]]
contro_loss=107.551  inner_mse=0.992  step=86.000  time=21746.071
[[(0, 5), (0, 1), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9)]]
contro_loss=66.596  inner_mse=0.909  step=87.000  time=23911.001
[[(0, 8), (0, 8), (0, 9), (0, 9), (0, 2), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9)]]
contro_loss=60.020  inner_mse=1.497  step=88.000  time=23998.768
[[(0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 1)]]
contro_loss=77.398  inner_mse=1.773  step=89.000  time=24302.109
[[(0, 3), (0, 3), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8), (0, 8)]]
contro_loss=46.833 

contro_loss=1.171  inner_mse=0.909  step=127.000  time=98903.551
[[(0, 1), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9)]]
contro_loss=0.726  inner_mse=0.664  step=128.000  time=101658.679
[[(0, 8), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9)]]
contro_loss=0.452  inner_mse=0.532  step=129.000  time=104398.452
[[(0, 8), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9)]]
contro_loss=1.721  inner_mse=1.180  step=130.000  time=106954.367
[[(0, 1), (0, 8), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9)]]
contro_loss=1.788  inner_mse=1.030  step=131.000  time=107593.737
[[(0, 8), (0, 8), (0, 8), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9)]]
contro_loss=1.29

contro_loss=3.586  inner_mse=0.631  step=169.000  time=207899.024
[[(0, 8), (0, 8), (0, 0), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9)]]
contro_loss=2.157  inner_mse=0.493  step=170.000  time=210840.923
[[(0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9)]]
contro_loss=2.689  inner_mse=0.512  step=171.000  time=214134.153
[[(0, 6), (0, 5), (0, 5), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9)]]
contro_loss=2.184  inner_mse=1.020  step=172.000  time=215908.314
[[(0, 1), (0, 1), (0, 1), (0, 1), (0, 1), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9)]]
contro_loss=1.946  inner_mse=0.927  step=173.000  time=216476.476
[[(0, 6), (0, 6), (0, 6), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9)]]
contro_loss=1.1

contro_loss=0.501  inner_mse=0.756  step=211.000  time=329865.802
[[(0, 2), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9)]]
contro_loss=0.305  inner_mse=0.632  step=212.000  time=332416.637
[[(0, 1), (0, 1), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9)]]
contro_loss=0.199  inner_mse=0.571  step=213.000  time=334942.168
[[(0, 0), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9)]]
contro_loss=0.221  inner_mse=0.523  step=214.000  time=337471.917
[[(0, 0), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9)]]
contro_loss=3.387  inner_mse=1.201  step=215.000  time=339607.569
[[(0, 0), (0, 0), (0, 0), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9)]]
contro_loss=2.1

contro_loss=0.403  inner_mse=0.470  step=253.000  time=478617.595
[[(0, 1), (0, 1), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9)]]
contro_loss=0.424  inner_mse=0.438  step=254.000  time=483892.580
[[(0, 4), (0, 1), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9)]]
contro_loss=0.405  inner_mse=0.400  step=255.000  time=488043.367
[[(0, 6), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9)]]
contro_loss=0.291  inner_mse=0.363  step=256.000  time=493439.675
[[(0, 3), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9)]]
contro_loss=0.219  inner_mse=0.336  step=257.000  time=497562.806
[[(0, 7), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9), (0, 9)]]
contro_loss=1.4