In [107]:
from keras.models import Sequential, clone_model, Model
from keras.layers import Dense, Dropout, Activation, Flatten, Input
from keras.layers import Convolution2D, MaxPooling2D
from keras.losses import mean_squared_error
from keras.optimizers import Adam
import numpy as np
import tensorflow as tf
from keras import backend as K

img_dim = [64,64,3]
action_dim = 1
steps = 1000
batch_size = 32
learning_rate = 0.001
nb_epoch = 100

# tf.reset_default_graph()

sess = tf.Session()
K.set_session(sess)

In [108]:
def build_model():
  inputs = Input(shape=(125, 125, 3))
  model = Convolution2D(32, (3, 3), padding='same', activation="relu")(inputs)
  model = Convolution2D(32, (3, 3), padding='same', activation="relu")(model)
  model = MaxPooling2D(pool_size=(2, 2))(model)
  model = Convolution2D(32, (3, 3), padding='same', activation="relu")(model)
  model = Dropout(0.25)(model)
  model = Flatten()(model)
  model = Dense(512, activation="relu")(model)
  model = Dropout(0.5)(model)
  model = Dense(1, activation="tanh")(model)
  preds = Dense(7, activation="tanh")(model)
  
  keras_model = Model(inputs=inputs, outputs=preds)
  
  return keras_model

In [109]:
# Load train data
train_input = tf.placeholder(tf.float32, shape=(batch_size, 125, 125, 3))
train_label = tf.placeholder(tf.float32, shape=(batch_size, 7))
# Build initial model
before_model = build_model()
# Calulate initial loss
before_pred = before_model(train_input)

In [110]:
# Calculate loss and gradient for the task
before_loss = tf.reduce_mean(mean_squared_error(train_label, before_pred))
before_gradients = tf.gradients(before_loss, before_model.trainable_weights)
# Calculate ethereal weights for task-specific network
ethereal = {}
for weight, gradient in zip(before_model.trainable_weights, before_gradients):
  ethereal[weight] = weight - learning_rate * gradient

In [111]:
# Build specialized
specialized_copy_ops = []
# Load test data
test_input = tf.placeholder(tf.float32, shape=(batch_size, 125, 125, 3))
test_label = tf.placeholder(tf.float32, shape=(batch_size, 7))
# Build new model
after_model = build_model()
# Assign ethereal values
for before_weight, after_weight in zip(before_model.trainable_weights,
                                       after_model.trainable_weights):
  specialized_copy_ops.append(tf.assign(after_weight, ethereal[before_weight]))

In [112]:
# Calculate new loss
after_pred = after_model(test_input)
after_loss = tf.reduce_mean(mean_squared_error(test_label, after_pred))
# Calculate after gradients
after_grads = tf.gradients(after_loss, after_model.trainable_weights)

In [118]:
# First initiate cache
grads_cache = {}
  grads_cache[before_weight] = before_weight - *tf.identity(grad)

# Reassign cache
# Build meta
meta_ops = []
for weight in before_model.trainable_weights:
  meta_ops.append(tf.assign(weight, weight - learning_rate * grads_cache[weight]))

In [122]:
meta_ops = []
for before_weight, grad in zip(before_model.trainable_weights, after_grads):
  meta_ops.append(tf.assign(before_weight, before_weight - learning_rate * grad))
  

In [123]:
# Initialize all variables
init_op = tf.global_variables_initializer()
sess.run(init_op)

In [159]:
def train(train_x, train_y, test_x, test_y):
  sess.run(specialized_copy_ops, feed_dict={train_input: train_x,
                                            train_label: train_y})
  sess.run(meta_ops, feed_dict={test_input: test_x,
                                test_label: test_y})


In [160]:
def test(train_x, train_y, test_x, test_y):
  sess.run(specialized_copy_ops, feed_dict={train_input: train_x,
                                            train_label: train_y})
  return sess.run(after_loss, feed_dict={test_input: test_x,
                                         test_label: test_y})

In [None]:
epochs = 1000
for i in range(epochs):
  train(train_x, train_y, test_x, test_y)
  print(test(train_x, train_y, test_x, test_y))

0.2876296
0.2874587
0.2872875
0.28711784
0.2869485
0.2867778
0.28660884
0.28644037
0.28627276
0.28610575
0.28594095
0.28577638
0.2856117
0.2854471
0.28528225
0.28511885
0.28495675
0.28479475
0.28463358
0.2844723
0.28431147
0.28415066
0.28399032
0.2838296
0.28366977
0.28351003
0.2833508
0.28319
0.283029
0.28286883
0.28270978
0.28255084
0.2823922
0.28223407
0.28207564
0.2819186
0.2817614
0.2816046
0.28144723
0.2812903
0.28113353
0.28097582
0.28081843
0.28066057
0.28050268
0.28034434
0.28018534
0.28002688
0.27986872
0.27971175
0.2795549
0.2793985
0.27924246
0.27908695
0.2789312
0.27877647
0.27862185
0.27846768
0.27831352
0.2781594
0.27800542
0.27785128
0.27769762
0.2775442
0.27739123
0.2772377
0.27708495
0.2769319
0.27677938
0.27662718
0.27647454


In [None]:
#aggregate and retrain
dagger_itr = 5
for itr in range(dagger_itr):
    ob_list = []

    env = TorcsEnv(vision=True, throttle=False)
    ob = env.reset(relaunch=True)
    reward_sum = 0.0

    for i in range(steps):
        act = model.predict(img_reshape(ob.img))
        ob, reward, done, _ = env.step(act)
        if done is True:
            break
        else:
            ob_list.append(ob)
        reward_sum += reward
        print(i, reward, reward_sum, done, str(act[0]))
    print('Episode done ', itr, i, reward_sum)
    output_file.write('Number of Steps: %02d\t Reward: %0.04f\n'%(i, reward_sum))
    env.end()

    if i==(steps-1):
        break

    for ob in ob_list:
        images_all = np.concatenate([images_all, img_reshape(ob.img)], axis=0)
        actions_all = np.concatenate([actions_all, np.reshape(get_teacher_action(ob), [1,action_dim])], axis=0)

    model.fit(images_all, actions_all,
                  batch_size=batch_size,
                  nb_epoch=nb_epoch,
                  shuffle=True)

In [23]:
train_x = np.random.uniform(size=(batch_size, 125, 125, 3))
train_y= np.random.uniform(size=(batch_size, 7))
test_x = np.random.uniform(size=(batch_size, 125, 125, 3))
test_y = np.random.uniform(size=(batch_size, 7))

In [62]:
from IPython.display import clear_output, Image, display, HTML

def strip_consts(graph_def, max_const_size=32):
    """Strip large constant values from graph_def."""
    strip_def = tf.GraphDef()
    for n0 in graph_def.node:
        n = strip_def.node.add() 
        n.MergeFrom(n0)
        if n.op == 'Const':
            tensor = n.attr['value'].tensor
            size = len(tensor.tensor_content)
            if size > max_const_size:
                tensor.tensor_content = "<stripped %d bytes>"%size
    return strip_def

def show_graph(graph_def, max_const_size=32):
    """Visualize TensorFlow graph."""
    if hasattr(graph_def, 'as_graph_def'):
        graph_def = graph_def.as_graph_def()
    strip_def = strip_consts(graph_def, max_const_size=max_const_size)
    code = """
        <script>
          function load() {{
            document.getElementById("{id}").pbtxt = {data};
          }}
        </script>
        <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
        <div style="height:600px">
          <tf-graph-basic id="{id}"></tf-graph-basic>
        </div>
    """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

    iframe = """
        <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
    """.format(code.replace('"', '&quot;'))
    display(HTML(iframe))
    
show_graph(tf.get_default_graph().as_graph_def())