<a href="https://colab.research.google.com/github/arunraja-hub/Preference_Extraction/blob/fine_tune/extract_preferences.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Click "open in colab" above to run. No need to download.
Change the runtime type to GPU or TPU to make it faster.

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

## Data and import stuff

In [1]:
%tensorflow_version 2.x

!git clone https://github.com/arunraja-hub/Preference_Extraction.git

!pip install tf-agents==0.3.0

!pip uninstall tensorflow-probability -y
!pip install tensorflow-probability==0.7.0

import tensorflow as tf

Cloning into 'Preference_Extraction'...
remote: Enumerating objects: 33, done.[K
remote: Counting objects: 100% (33/33), done.[K
remote: Compressing objects: 100% (27/27), done.[K
remote: Total 776 (delta 21), reused 6 (delta 6), pack-reused 743[K
Receiving objects: 100% (776/776), 32.02 MiB | 3.09 MiB/s, done.
Resolving deltas: 100% (150/150), done.
Collecting tf-agents==0.3.0
[?25l  Downloading https://files.pythonhosted.org/packages/0e/a5/07aa82a3cd586d193b2f086b50a2fd0f48bd888ae204389f666eb178cfb3/tf_agents-0.3.0-py2.py3-none-any.whl (839kB)
[K     |████████████████████████████████| 839kB 12.0MB/s 
Collecting gin-config==0.1.3
[?25l  Downloading https://files.pythonhosted.org/packages/8c/be/c984b1c8a7ba1c385b32bf39c7a225cd9f713d49705898309d01b60fd0e7/gin_config-0.1.3-py3-none-any.whl (43kB)
[K     |████████████████████████████████| 51kB 8.2MB/s 
Installing collected packages: gin-config, tf-agents
  Found existing installation: gin-config 0.3.0
    Uninstalling gin-config-0

In [0]:
import concurrent.futures
import itertools
import os
import pickle
import random
import sys
import time

import matplotlib.pyplot as plt
import numpy as np
import PIL
from PIL import Image
import scipy
from scipy import ndimage
from sklearn import metrics
from sklearn.utils import shuffle
import io
import collections

import urllib.request
from urllib.error import HTTPError

from tf_agents.trajectories.time_step import TimeStep
from tf_agents.specs.tensor_spec import TensorSpec
from tf_agents.specs.tensor_spec import TensorSpec
from tf_agents.specs.tensor_spec import BoundedTensorSpec
from tf_agents.networks import q_network

sys.path.append('Preference_Extraction')
from imports_data import all_load_data

## Load Data

In [0]:
all_raw_data = all_load_data("Preference_Extraction/data/simple_env_1/")

In [4]:
# @title Preprocess the data into x,y training pairs
# @markdown The use_agent control which model is trained.
# @markdown All of the cells below use the data specified here.

xs, ys = [], []

# Rerun this cell after setting these to different values to train on a different dataset.
use_agent = True # @param


for data in all_raw_data:
  for i in range(data.observation.shape[0]):

    x = np.copy(data.observation[i])
    y = data.policy_info['satisfaction'].as_list()[i] > -6

    xs.append(x)
    ys.append(y)

xs = np.array(xs)
ys = np.array(ys).astype(int)

xs, ys = shuffle(xs, ys)

print("xs", xs.shape, "ys", ys.shape)
print("ys 1", np.sum(ys))

xs (23750, 14, 16, 5) ys (23750,)
ys 1 9569


In [5]:
# @title Visualize and example
rand_index = random.randint(0,1000)

if use_agent == False:
  print("Color channels:")
  plt.imshow(xs[rand_index,:,:,:3], interpolation="none")
  plt.show()
  print("Remaining time channel:")
  plt.imshow(xs[rand_index,:,:,3], interpolation="none")
  plt.show()
  print("A different value for each coordinate to help with convolution:")
  plt.imshow(xs[rand_index,:,:,4], interpolation="none")
  plt.show()
else:
  print("x", xs[rand_index])
print("y", ys[rand_index])

x [[[0.7577426  0.62405086 0.16560254 0.9547147  0.        ]
  [0.6411708  0.27503952 0.37745285 0.9547147  0.00446429]
  [0.7577426  0.62405086 0.16560254 0.9547147  0.00892857]
  ...
  [0.6411708  0.27503952 0.37745285 0.09173517 0.05803571]
  [0.7577426  0.62405086 0.16560254 0.09173517 0.0625    ]
  [0.6411708  0.27503952 0.37745285 0.09173517 0.06696428]]

 [[0.6411708  0.27503952 0.37745285 0.9547147  0.07142857]
  [0.7577426  0.62405086 0.16560254 0.9547147  0.07589286]
  [0.6411708  0.27503952 0.37745285 0.9547147  0.08035714]
  ...
  [0.7577426  0.62405086 0.16560254 0.09173517 0.12946428]
  [0.6411708  0.27503952 0.37745285 0.09173517 0.13392857]
  [0.7577426  0.62405086 0.16560254 0.09173517 0.13839285]]

 [[0.6411708  0.27503952 0.37745285 0.9547147  0.14285715]
  [0.7577426  0.62405086 0.16560254 0.9547147  0.14732143]
  [0.7577426  0.62405086 0.16560254 0.9547147  0.15178572]
  ...
  [0.7577426  0.62405086 0.16560254 0.09173517 0.20089285]
  [0.7577426  0.62405086 0.16560

## Training Functions

In [6]:
# @title ChannelReducer from lucid
# Copied from https://github.com/tensorflow/lucid/blob/master/lucid/misc/channel_reducer.py

# Copyright 2018 The Lucid Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Helper for using sklearn.decomposition on high-dimensional tensors.

Provides ChannelReducer, a wrapper around sklearn.decomposition to help them
apply to arbitrary rank tensors. It saves lots of annoying reshaping.
"""

import numpy as np
import sklearn.decomposition

try:
    from sklearn.decomposition.base import BaseEstimator
except AttributeError:
    from sklearn.base import BaseEstimator


class ChannelReducer(object):
  """Helper for dimensionality reduction to the innermost dimension of a tensor.

  This class wraps sklearn.decomposition classes to help them apply to arbitrary
  rank tensors. It saves lots of annoying reshaping.

  See the original sklearn.decomposition documentation:
  http://scikit-learn.org/stable/modules/classes.html#module-sklearn.decomposition
  """

  def __init__(self, n_components=3, reduction_alg="NMF", **kwargs):
    """Constructor for ChannelReducer.

    Inputs:
      n_components: Numer of dimensions to reduce inner most dimension to.
      reduction_alg: A string or sklearn.decomposition class. Defaults to
        "NMF" (non-negative matrix facotrization). Other options include:
        "PCA", "FastICA", and "MiniBatchDictionaryLearning". The name of any of
        the sklearn.decomposition classes will work, though.
      kwargs: Additional kwargs to be passed on to the reducer.
    """

    if not isinstance(n_components, int):
      raise ValueError("n_components must be an int, not '%s'." % n_components)

    # Defensively look up reduction_alg if it is a string and give useful errors.
    algorithm_map = {}
    for name in dir(sklearn.decomposition):
      obj = sklearn.decomposition.__getattribute__(name)
      if isinstance(obj, type) and issubclass(obj, BaseEstimator):
        algorithm_map[name] = obj
    if isinstance(reduction_alg, str):
      if reduction_alg in algorithm_map:
        reduction_alg = algorithm_map[reduction_alg]
      else:
        raise ValueError("Unknown dimensionality reduction method '%s'." % reduction_alg)


    self.n_components = n_components
    self._reducer = reduction_alg(n_components=n_components, **kwargs)
    self._is_fit = False

  @classmethod
  def _apply_flat(cls, f, acts):
    """Utility for applying f to inner dimension of acts.

    Flattens acts into a 2D tensor, applies f, then unflattens so that all
    dimesnions except innermost are unchanged.
    """
    orig_shape = acts.shape
    acts_flat = acts.reshape([-1, acts.shape[-1]])
    new_flat = f(acts_flat)
    if not isinstance(new_flat, np.ndarray):
      return new_flat
    shape = list(orig_shape[:-1]) + [-1]
    return new_flat.reshape(shape)

  def fit(self, acts):
    self._is_fit = True
    return ChannelReducer._apply_flat(self._reducer.fit, acts)

  def fit_transform(self, acts):
    self._is_fit = True
    return ChannelReducer._apply_flat(self._reducer.fit_transform, acts)

  def transform(self, acts):
    return ChannelReducer._apply_flat(self._reducer.transform, acts)

  def __call__(self, acts):
    if self._is_fit:
      return self.transform(acts)
    else:
      return self.fit_transform(acts)

  def __getattr__(self, name):
    if name in self.__dict__:
      return self.__dict__[name]
    elif name + "_" in self._reducer.__dict__:
      return self._reducer.__dict__[name+"_"]

  def __dir__(self):
    dynamic_attrs = [name[:-1]
                     for name in dir(self._reducer)
                     if name[-1] == "_" and name[0] != "_"
                    ]

    return list(ChannelReducer.__dict__.keys()) + list(self.__dict__.keys()) + dynamic_attrs



In [0]:
def get_val_auc(logs):
      for key in logs:
        if key.startswith('val_auc'):
          return logs[key]

class BestStats(tf.keras.callbacks.Callback):
  """A callback to keep track of the best val accuracy and auc seen so far."""
  def on_train_begin(self, logs):
      self.bestMetric = -float('inf')
      self.bestLogs = None
      self.bestTrain = -float('inf')
      self.num_epochs = 0

  def on_epoch_end(self, epoch, logs):
    self.num_epochs += 1
    self.bestTrain = max(self.bestTrain, logs.get('accuracy'))

    val_accuracy = logs.get('val_accuracy')
    if val_accuracy == None:
      return 

    val_auc = get_val_auc(logs)
    
    metric = (val_accuracy + val_auc) / 2.0

    if metric > self.bestMetric:
      self.bestMetric = metric
      self.bestLogs = logs

### CNN from observations

In [0]:
# @title Image model.
# @markdown Run this cell iff use_agent=False
# @markdown If you're trying to improve the accuracy of the model trained on activations, you won't care about this cell.

def get_model(reg_amount, drop_rate, reduction_alg, n_components):
  del reduction_alg, n_components

  model = tf.keras.models.Sequential([
    # This layer gets one of the color channels. It works better than using all of them.
    tf.keras.layers.Lambda(lambda x: tf.expand_dims(x[:,:,:,tf.random.uniform((), 0,4,tf.int32)], 3), input_shape=xs.shape[1:]),
    tf.keras.layers.Conv2D(32, 2, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(reg_amount)),
    tf.keras.layers.Conv2D(16, 1, activation='relu', strides=1, kernel_regularizer=tf.keras.regularizers.l2(reg_amount)),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(drop_rate),
    tf.keras.layers.Dense(1, activation='sigmoid', kernel_regularizer=tf.keras.regularizers.l2(reg_amount)),
  ])

  model.compile(optimizer=tf.keras.optimizers.Adam(.01),
                loss='binary_crossentropy',
                metrics=['accuracy',
                        tf.keras.metrics.AUC()
                        ],
                )
  return model

In [0]:
# Hyperparameters for CNN from observations
all_hparam_possibilities = [{"reg_amount": [0.0], "drop_rate": [0.0], 'reduction_alg': [None], 'n_components': [None]}]

### Set up agent model and restore

In [0]:
cpt_name = "Preference_Extraction/model_ckpt"

input_shape = [14, 16, 5]
q_net = q_network.QNetwork(input_tensor_spec=TensorSpec(shape=input_shape), action_spec=BoundedTensorSpec((), tf.int32, 0, 2), conv_layer_params = [[16, 3, 1], [32, 3, 2]], fc_layer_params = [64])
q_net.layers[0].layers[1]._name = "EncodingNetwork/conv2d_1"

latest_cpt =  tf.train.latest_checkpoint(cpt_name)
reader = tf.compat.v1.train.NewCheckpointReader(latest_cpt)
model_input = tf.keras.Input(shape=input_shape)
q_model_nested = tf.keras.models.Model(inputs=model_input, outputs=[q_net(model_input)])
q_model_nested.build(input_shape=input_shape)

In [12]:
def flatten_model(model_nested):
    def get_layers(layers):
        layers_flat = []
        for layer in layers:
            try:
                layers_flat.extend(get_layers(layer.layers))
            except AttributeError:
                layers_flat.append(layer)
        return layers_flat

    model_flat = tf.keras.models.Sequential(
        get_layers(model_nested.layers)
    )
    return model_flat

def load_weigths(model, last_layer):

    layer_map = {
        model.layers[0]: "agent/_q_network/_encoder/_postprocessing_layers/0",
        model.layers[1]: "agent/_q_network/_encoder/_postprocessing_layers/1",
    }
    if last_layer > 3:
        layer_map[model.layers[3]] = "agent/_q_network/_encoder/_postprocessing_layers/3"
    if last_layer > 4:
        layer_map[model.layers[4]] = "agent/_q_network/_q_value_layer"

    last_name_part = "/.ATTRIBUTES/VARIABLE_VALUE"
    for keras_layer, weights_bias_name in layer_map.items():
        weights = reader.get_tensor(weights_bias_name+"/kernel"+last_name_part)
        biases = reader.get_tensor(weights_bias_name+"/bias"+last_name_part)
        keras_layer.set_weights([weights, biases])

    return model

q_model = flatten_model(q_model_nested)
q_model = load_weigths(q_model, last_layer=5)
q_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
EncodingNetwork/conv2d (Conv (None, 12, 14, 16)        736       
_________________________________________________________________
EncodingNetwork/conv2d_1 (Co (None, 5, 6, 32)          4640      
_________________________________________________________________
flatten_1 (Flatten)          (None, 960)               0         
_________________________________________________________________
EncodingNetwork/dense (Dense (None, 64)                61504     
_________________________________________________________________
dense_1 (Dense)              (None, 3)                 195       
Total params: 67,075
Trainable params: 67,075
Non-trainable params: 0
_________________________________________________________________


### Verify agent it performs inference correctly

In [0]:
def verify_model(model, output_index):
  activation_model = tf.keras.models.Model(inputs=model.input, outputs=model.layers[output_index].output)
  for i in range(len(all_raw_data[0].observation)):
    single_observation = np.array([all_raw_data[0].observation[i]])

    restored_activations = activation_model(single_observation)[0]
    old_activations = all_raw_data[0].policy_info["activations"][i]
    np.testing.assert_allclose(restored_activations, old_activations, rtol=.1)

verify_model(q_model, output_index=-2)

### Get agent model

In [0]:
# @title Model for training on top of the agent
# @markdown Run this cell iff use_agent=True

def get_model(reg_amount, drop_rate, layer_sizes, q_net_last_cut, q_net_freeze, reduction_alg, n_components):
  del reduction_alg, n_components

  layers = []
  for ix, layer_size in enumerate(layer_sizes):
    layers.append(tf.keras.layers.Dense(layer_size, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(reg_amount), name='post_agent_{}'.format(ix)))
    layers.append(tf.keras.layers.Dropout(drop_rate))

  for qix, _ in enumerate(q_model.layers[:q_net_last_cut]):
      if qix in q_net_freeze:
          q_model.layers[qix].trainable = False
      else:
          q_model.layers[qix].trainable = True
        

  model = tf.keras.models.Sequential([q_model.input] + q_model.layers[:q_net_last_cut] + layers + [
    tf.keras.layers.Dense(1, activation='sigmoid', kernel_regularizer=tf.keras.regularizers.l2(reg_amount), name='output')
  ])

  model.compile(optimizer=tf.keras.optimizers.Adam(.01),
                loss='binary_crossentropy',
                metrics=['accuracy',
                        tf.keras.metrics.AUC()
                        ],
                )

  return load_weigths(model, last_layer=q_net_last_cut)

In [0]:
# @title Run this cell for hparams with unsupervised feature extraction.
# @markdown Run this cell iff use_agent=True and you want unspervised feature exraction.

all_hparam_possibilities = [
  {'drop_rate': [0], 'reduction_alg': ['PCA'], 'layer_sizes': [()], 'reg_amount': [0.2], 'n_components': [2]},
  {'drop_rate': [0], 'reduction_alg': ['FastICA'], 'layer_sizes': [(16, 16)], 'reg_amount': [0], 'n_components': [8]},
]

In [0]:
# @title Run this cell for hparams without unsupervised feature extraction.
# @markdown Run this cell iff use_agent=True and you don't want unspervised feature exraction.
all_hparam_possibilities = [
   {
    "drop_rate": [.2],  
    "reg_amount": [.2],  
    "layer_sizes": [(32,), (64,32)],
    "q_net_last_cut": [3, 4, 5],
    "q_net_freeze": [(), (0,1,2), (0,1,2,3), (0,1,2,3,4)],
    "reduction_alg": [None], 
    "n_components": [None]
   },
    {
    "drop_rate": [.5],  
    "reg_amount": [.5],  
    "layer_sizes": [(32,), (64,32)],
    "q_net_last_cut": [3, 4, 5],
    "q_net_freeze": [(), (0,1,2), (0,1,2,3), (0,1,2,3,4)],
    "reduction_alg": [None], 
    "n_components": [None]
   }
]

# Training code

In [0]:
# Run this to train on 10k data instead.
num_train = 10000
num_val = 2000
epochs = 1000
num_repeat = 1

In [0]:
# Run this to reproduce the original results.
num_train = 50
num_val = 400
epochs = 400
num_repeat = 5

In [19]:
# @title Train the model
# @markdown This tries all the combinations of hparams and picks the best one.
# @markdown For each combination of hparams, it averages over num_repeat different train val splits.
# @markdown It re runs the best hyperparameters at the end.

print("use_agent:", use_agent, "num_train:", num_train, "epochs", epochs)
if num_train > 50:
  print("More than 50 train data!!!!!!!!")

# each item in all_hparam_possibilities specifies valid hyper params to try. Put parameters that don't make sense together in separate lists.

hparam_combinations = []
for hparam_possibilities in all_hparam_possibilities:
  hparam_keys, hparam_values = zip(*hparam_possibilities.items())
  hparam_combinations.extend([dict(zip(hparam_keys, v)) for v in itertools.product(*hparam_values)])
random.shuffle(hparam_combinations)
print("len(hparam_combinations)", len(hparam_combinations), "hparam_combinations", hparam_combinations)

def modify_x_for_reduce(xs):
  reshaped_x = np.reshape(xs, [xs.shape[0], -1])
  # Make everything positive because some reductions don't work with negatives.
  reshaped_x -= np.min(reshaped_x)
  return reshaped_x

def unsup_exstract(xs, reg_amount, drop_rate, layer_sizes, reduction_alg, n_components):
  del reg_amount, drop_rate, layer_sizes

  print("Using unsupervised feature extraction.")

  dim_reduct_model = ChannelReducer(reduction_alg=reduction_alg, n_components=n_components)
  xs = dim_reduct_model.fit_transform(modify_x_for_reduce(xs))
  return xs

def train_best_logs(xs, ys, num_val, do_summary, hparams, get_model):
  """Trains the model and retruns the logs of the best epoch. randomly splits the train and val data before training."""
  tf.keras.backend.clear_session()
  model = get_model(**hparams)
  if use_agent and hparams['q_net_last_cut'] > 3:
      verify_model(model, output_index=3)
  xs, ys = shuffle(xs, ys)

  xs_val = xs[num_train:num_train+num_val]
  ys_val = ys[num_train:num_train+num_val]
  early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=30, verbose=0)
  best_stats = BestStats()
  model.fit(xs[:num_train], ys[:num_train], epochs=epochs, batch_size=256, validation_freq=1, callbacks=[best_stats, early_stopping], validation_data=(xs_val, ys_val), verbose=0)
  if do_summary:
    model.summary()
    print("best train accuracy:", best_stats.bestTrain)
    print("Number of epochs:", best_stats.num_epochs)
  return best_stats.bestLogs

def multiple_train_ave(hparams):
  """Trains the model multiple times with the same parameters and returns the average metrics"""
  start = time.time()
  all_val_auc = []
  all_val_accuracy = []

  if hparams['reduction_alg'] != None:
    xs_for_train = unsup_exstract(xs, **hparams)
  else:
    xs_for_train = xs

  do_summary = True
  for i in range(num_repeat):
    logs = train_best_logs(xs_for_train, ys, num_val, do_summary, hparams, get_model)
    all_val_auc.append(get_val_auc(logs))
    all_val_accuracy.append(logs.get('val_accuracy'))
    do_summary = False 

  mean_val_auc = np.mean(all_val_auc)
  mean_val_accuracy = np.mean(all_val_accuracy)
  metric = (mean_val_auc + mean_val_accuracy) / 2.0
  print_data = ("mean_val_auc", mean_val_auc, "mean_val_accuracy", mean_val_accuracy, "metric", metric, "val_auc_std", np.std(all_val_auc), "val_accuracy_std", np.std(all_val_accuracy))

  end = time.time()
  print("Seconds per hyperparam config", end - start)
  # GPU: ('Seconds per hyperparam config', 16.970870971679688)

  return metric, print_data

best_metric = -float('inf')

run_num = 0
for hparams in hparam_combinations:
  print("hparams", hparams)

  metric, print_data = multiple_train_ave(hparams)

  print(print_data)
  if metric > best_metric:
    best_metric = metric
    best_print_data = print_data
    best_hparams = hparams

  run_num += 1
  print("fract done", run_num/float(len(hparam_combinations)))
  print
  print("==============================================================================================")
  print
  sys.stdout.flush()

print("best_hparams", best_hparams)
print("best results", best_print_data)
print("Retraining on the best_hparams to make sure we didn't just get good results by random chance.")

_, print_data = multiple_train_ave(best_hparams)
print("Result of retrain on the best hyperparameters", print_data)


use_agent: True num_train: 50 epochs 400
len(hparam_combinations) 48 hparam_combinations [{'drop_rate': 0.5, 'reg_amount': 0.5, 'layer_sizes': (32,), 'q_net_last_cut': 4, 'q_net_freeze': (), 'reduction_alg': None, 'n_components': None}, {'drop_rate': 0.5, 'reg_amount': 0.5, 'layer_sizes': (64, 32), 'q_net_last_cut': 3, 'q_net_freeze': (0, 1, 2, 3), 'reduction_alg': None, 'n_components': None}, {'drop_rate': 0.5, 'reg_amount': 0.5, 'layer_sizes': (64, 32), 'q_net_last_cut': 3, 'q_net_freeze': (), 'reduction_alg': None, 'n_components': None}, {'drop_rate': 0.2, 'reg_amount': 0.2, 'layer_sizes': (64, 32), 'q_net_last_cut': 3, 'q_net_freeze': (0, 1, 2, 3), 'reduction_alg': None, 'n_components': None}, {'drop_rate': 0.5, 'reg_amount': 0.5, 'layer_sizes': (32,), 'q_net_last_cut': 3, 'q_net_freeze': (), 'reduction_alg': None, 'n_components': None}, {'drop_rate': 0.2, 'reg_amount': 0.2, 'layer_sizes': (64, 32), 'q_net_last_cut': 4, 'q_net_freeze': (0, 1, 2), 'reduction_alg': None, 'n_component