<a href="https://colab.research.google.com/github/arunraja-hub/Preference_Extraction/blob/adding_unsupervised/extract_preferences.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Click "open in colab" above to run. No need to download.
Change the runtime type to GPU or TPU to make it faster.

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

# Data and import stuff

In [0]:
import concurrent.futures
import itertools
import os
import pickle
import random
import sys
import time

import matplotlib.pyplot as plt
import numpy as np
import PIL
from PIL import Image
import scipy
from scipy import ndimage
from sklearn import metrics
from sklearn.utils import shuffle
import io
import collections

import urllib.request
from urllib.error import HTTPError

In [0]:
# @title Hacks to make pickle work.

class Trajectory(
    collections.namedtuple('Trajectory', [
        'step_type',
        'observation',
        'action',
        'policy_info',
        'next_step_type',
        'reward',
        'discount',
    ])):
  """Stores the observation the agent saw and the action it took.
      The rest of the attributes aren't used in this code."""
  __slots__ = ()

class ListWrapper(object):
  def __init__(self, list_to_wrap):
    self._list = list_to_wrap

  def as_list(self):
    return self._list

class RenameUnpickler(pickle.Unpickler):
    def find_class(self, module, name):
      if name == "Trajectory":
        return Trajectory
      if name == "ListWrapper":
        return ListWrapper

      return super(RenameUnpickler, self).find_class(module, name)

def rename_load(s):
    """Helper function analogous to pickle.loads()."""
    return RenameUnpickler(s, encoding='latin1').load()

In [0]:
# @title Load all of the data
# If this takes more than a minute, stop and restart it.
# TODO: figure out why this cell always hangs the first time it's run.

def load_file(full_path):
  try:
    with urllib.request.urlopen(full_path) as f:
      data = rename_load(f)
    return data
  except HTTPError:
    pass

def all_load_data(base_path):
  executor = concurrent.futures.ThreadPoolExecutor(max_workers=100)
  
  futures = []
  for i in range(5000):
    full_path = os.path.join(base_path, "ts"+str(i)+".pickle?raw=true")
    future = executor.submit(load_file, full_path)
    futures.append(future)

  raw_data = []
  for future in concurrent.futures.as_completed(futures):
    result = future.result()
    if result:
      raw_data.append(result)

  return raw_data

# Need this useless load or else the all_load_data will hang forever the first time it's called.
load_file("https://github.com/arunraja-hub/Preference_Extraction/blob/master/data/simple_env_1/ts10.pickle?raw=true")

all_raw_data = all_load_data("https://github.com/arunraja-hub/Preference_Extraction/blob/master/data/simple_env_1/")

In [7]:
# @title Preprocess the data into x,y training pairs
# @markdown The use_activations control which data is used.
# @markdown All of the cells below use the data specified here.

xs, ys = [], []

# Rerun this cell after setting these to different values to train on a different dataset.
use_activations = True # @param

for data in all_raw_data:
  for i in range(data.observation.shape[0]):

    if use_activations:
      x = np.copy(data.policy_info["activations"][i])
    else:
      x = np.copy(data.observation[i])

    y = data.policy_info['satisfaction'].as_list()[i] > -6

    xs.append(x)
    ys.append(y)

xs = np.array(xs)
ys = np.array(ys).astype(int)

xs, ys = shuffle(xs, ys)

print("xs", xs.shape, "ys", ys.shape)
print("ys 1", np.sum(ys))

xs (3350, 64) ys (3350,)
ys 1 1320


In [8]:
# @title Visualize and example
rand_index = random.randint(0,1000)

if use_activations == False:
  print("Color channels:")
  plt.imshow(xs[rand_index,:,:,:3], interpolation="none")
  plt.show()
  print("Remaining time channel:")
  plt.imshow(xs[rand_index,:,:,3], interpolation="none")
  plt.show()
  print("A different value for each coordinate to help with convolution:")
  plt.imshow(xs[rand_index,:,:,4], interpolation="none")
  plt.show()
else:
  print("x", xs[rand_index])
print("y", ys[rand_index])

x [  0.         0.         0.         0.        13.35232    0.
   0.        18.336212   0.         0.         0.        17.459444
   0.        60.982452   0.         0.         0.         0.
   0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.         0.         0.
  16.84235    0.         0.         0.         0.         0.
   0.         0.       109.10112    0.         0.         0.
   0.         0.         0.         0.         0.         0.
   0.         0.         0.         0.         0.         0.
   0.        56.430775   0.         0.      ]
y 1


# ML models

In [9]:
# @title ChannelReducer from lucid
# Copied from https://github.com/tensorflow/lucid/blob/master/lucid/misc/channel_reducer.py

# Copyright 2018 The Lucid Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Helper for using sklearn.decomposition on high-dimensional tensors.

Provides ChannelReducer, a wrapper around sklearn.decomposition to help them
apply to arbitrary rank tensors. It saves lots of annoying reshaping.
"""

import numpy as np
import sklearn.decomposition

try:
    from sklearn.decomposition.base import BaseEstimator
except AttributeError:
    from sklearn.base import BaseEstimator


class ChannelReducer(object):
  """Helper for dimensionality reduction to the innermost dimension of a tensor.

  This class wraps sklearn.decomposition classes to help them apply to arbitrary
  rank tensors. It saves lots of annoying reshaping.

  See the original sklearn.decomposition documentation:
  http://scikit-learn.org/stable/modules/classes.html#module-sklearn.decomposition
  """

  def __init__(self, n_components=3, reduction_alg="NMF", **kwargs):
    """Constructor for ChannelReducer.

    Inputs:
      n_components: Numer of dimensions to reduce inner most dimension to.
      reduction_alg: A string or sklearn.decomposition class. Defaults to
        "NMF" (non-negative matrix facotrization). Other options include:
        "PCA", "FastICA", and "MiniBatchDictionaryLearning". The name of any of
        the sklearn.decomposition classes will work, though.
      kwargs: Additional kwargs to be passed on to the reducer.
    """

    if not isinstance(n_components, int):
      raise ValueError("n_components must be an int, not '%s'." % n_components)

    # Defensively look up reduction_alg if it is a string and give useful errors.
    algorithm_map = {}
    for name in dir(sklearn.decomposition):
      obj = sklearn.decomposition.__getattribute__(name)
      if isinstance(obj, type) and issubclass(obj, BaseEstimator):
        algorithm_map[name] = obj
    if isinstance(reduction_alg, str):
      if reduction_alg in algorithm_map:
        reduction_alg = algorithm_map[reduction_alg]
      else:
        raise ValueError("Unknown dimensionality reduction method '%s'." % reduction_alg)


    self.n_components = n_components
    self._reducer = reduction_alg(n_components=n_components, **kwargs)
    self._is_fit = False

  @classmethod
  def _apply_flat(cls, f, acts):
    """Utility for applying f to inner dimension of acts.

    Flattens acts into a 2D tensor, applies f, then unflattens so that all
    dimesnions except innermost are unchanged.
    """
    orig_shape = acts.shape
    acts_flat = acts.reshape([-1, acts.shape[-1]])
    new_flat = f(acts_flat)
    if not isinstance(new_flat, np.ndarray):
      return new_flat
    shape = list(orig_shape[:-1]) + [-1]
    return new_flat.reshape(shape)

  def fit(self, acts):
    self._is_fit = True
    return ChannelReducer._apply_flat(self._reducer.fit, acts)

  def fit_transform(self, acts):
    self._is_fit = True
    return ChannelReducer._apply_flat(self._reducer.fit_transform, acts)

  def transform(self, acts):
    return ChannelReducer._apply_flat(self._reducer.transform, acts)

  def __call__(self, acts):
    if self._is_fit:
      return self.transform(acts)
    else:
      return self.fit_transform(acts)

  def __getattr__(self, name):
    if name in self.__dict__:
      return self.__dict__[name]
    elif name + "_" in self._reducer.__dict__:
      return self._reducer.__dict__[name+"_"]

  def __dir__(self):
    dynamic_attrs = [name[:-1]
                     for name in dir(self._reducer)
                     if name[-1] == "_" and name[0] != "_"
                    ]

    return list(ChannelReducer.__dict__.keys()) + list(self.__dict__.keys()) + dynamic_attrs



In [10]:
%tensorflow_version 2.x
import tensorflow as tf

def get_val_auc(logs):
      for key in logs:
        if key.startswith('val_auc'):
          return logs[key]

class BestStats(tf.keras.callbacks.Callback):
  """A callback to keep track of the best val accuracy and auc seen so far."""
  def on_train_begin(self, logs):
      self.bestMetric = -float('inf')
      self.bestLogs = None
      self.bestTrain = -float('inf')
      self.num_epochs = 0

  def on_epoch_end(self, epoch, logs):
    self.num_epochs += 1
    self.bestTrain = max(self.bestTrain, logs.get('accuracy'))

    val_accuracy = logs.get('val_accuracy')
    if val_accuracy == None:
      return 

    val_auc = get_val_auc(logs)
    
    metric = (val_accuracy + val_auc) / 2.0

    if metric > self.bestMetric:
      self.bestMetric = metric
      self.bestLogs = logs

TensorFlow 2.x selected.


The cells below set the model and hyperparameters to search through. Only run the ones that set the options you want.

The all_hparam_possibilities in the code below are the best ones found so far.

In [0]:
# @title Image model.
# @markdown Run this cell iff use_activations=False
# @markdown If you're trying to improve the accuracy of the model trained on activations, you won't care about this cell.

def get_model(reg_amount, drop_rate, reduction_alg, n_components):
  del reduction_alg, n_components

  model = tf.keras.models.Sequential([
    # This layer gets one of the color channels. It works better than using all of them.
    tf.keras.layers.Lambda(lambda x: tf.expand_dims(x[:,:,:,tf.random.uniform((), 0,4,tf.int32)], 3), input_shape=xs.shape[1:]),
    tf.keras.layers.Conv2D(32, 2, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(reg_amount)),
    tf.keras.layers.Conv2D(16, 1, activation='relu', strides=1, kernel_regularizer=tf.keras.regularizers.l2(reg_amount)),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(drop_rate),
    tf.keras.layers.Dense(1, activation='sigmoid', kernel_regularizer=tf.keras.regularizers.l2(reg_amount)),
  ])

  model.compile(optimizer=tf.keras.optimizers.Adam(.01),
                loss='binary_crossentropy',
                metrics=['accuracy',
                        tf.keras.metrics.AUC()
                        ],
                )
  return model
all_hparam_possibilities = [{"reg_amount": [0.0], "drop_rate": [0.0], 'reduction_alg': [None], 'n_components': [None]}]

In [0]:
# @title Model for training on the network activations
# @markdown Run this cell iff use_activations=True

def get_model(reg_amount, drop_rate, layer_sizes, reduction_alg, n_components):
  del reduction_alg, n_components

  layers = []
  for layer_size in layer_sizes:
    layers.append(tf.keras.layers.Dense(layer_size, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(reg_amount)))
    layers.append(tf.keras.layers.Dropout(drop_rate))

  model = tf.keras.models.Sequential(layers + [
    tf.keras.layers.Dense(1, activation='sigmoid', kernel_regularizer=tf.keras.regularizers.l2(reg_amount))
  ])

  model.compile(optimizer=tf.keras.optimizers.Adam(.01),
                loss='binary_crossentropy',
                metrics=['accuracy', tf.keras.metrics.AUC()],
                )
  return model

In [0]:
# @title Run this cell for hparams without unsupervised feature extraction.
# @markdown Run this cell iff use_activations=True and you don't want unspervised feature exraction.
all_hparam_possibilities = [{"drop_rate": [.2,], "layer_sizes": [(32,)], "reg_amount": [.2], 'reduction_alg': [None], 'n_components': [None]},
                            {"drop_rate": [.5,], "layer_sizes": [(32,)], "reg_amount": [.5], 'reduction_alg': [None], 'n_components': [None]}]

In [0]:
# @title Run this cell for hparams with unsupervised feature extraction.
# @markdown Run this cell iff use_activations=True and you want unspervised feature exraction.
all_hparam_possibilities = [
  {'drop_rate': [0], 'reduction_alg': ['PCA'], 'layer_sizes': [()], 'reg_amount': [0.2], 'n_components': [2]},
  {'drop_rate': [0], 'reduction_alg': ['FastICA'], 'layer_sizes': [(16, 16)], 'reg_amount': [0], 'n_components': [8]},
]

# Training code

In [13]:
# @title Train the model
# @markdown This tries all the combinations of hparams and picks the best one.
# @markdown For each combination of hparams, it averages over 5 different train val splits.
# @markdown It re runs the best hyperparameters at the end.

num_train = 50
num_val = 1000
epochs = 400
print("use_activations:", use_activations, "num_train:", num_train, "epochs", epochs)
if num_train > 50:
  print("More than 50 train data!!!!!!!!")

# each item in all_hparam_possibilities specifies valid hyper params to try. Put parameters that don't make sense together in separate lists.

hparam_combinations = []
for hparam_possibilities in all_hparam_possibilities:
  hparam_keys, hparam_values = zip(*hparam_possibilities.items())
  hparam_combinations.extend([dict(zip(hparam_keys, v)) for v in itertools.product(*hparam_values)])
random.shuffle(hparam_combinations)
print("len(hparam_combinations)", len(hparam_combinations), "hparam_combinations", hparam_combinations)

def modify_x_for_reduce(xs):
  reshaped_x = np.reshape(xs, [xs.shape[0], -1])
  # Make everything positive because some reductions don't work with negatives.
  reshaped_x -= np.min(reshaped_x)
  return reshaped_x

def unsup_exstract(xs, reg_amount, drop_rate, layer_sizes, reduction_alg, n_components):
  del reg_amount, drop_rate, layer_sizes

  print("Using unsupervised feature extraction.")

  dim_reduct_model = ChannelReducer(reduction_alg=reduction_alg, n_components=n_components)
  xs = dim_reduct_model.fit_transform(modify_x_for_reduce(xs))
  return xs

def train_best_logs(xs, ys, num_val, do_summary, hparams, get_model):
  """Trains the model and retruns the logs of the best epoch. randomly splits the train and val data before training."""
  tf.keras.backend.clear_session()
  model = get_model(**hparams)
  xs, ys = shuffle(xs, ys)

  xs_val = xs[num_train:num_train+num_val]
  ys_val = ys[num_train:num_train+num_val]
  early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=30, verbose=0)
  best_stats = BestStats()
  model.fit(xs[:num_train], ys[:num_train], epochs=epochs, batch_size=256, validation_freq=1, callbacks=[best_stats, early_stopping], validation_data=(xs_val, ys_val), verbose=0)
  if do_summary:
    model.summary()
    print("best train accuracy:", best_stats.bestTrain)
    print("Number of epochs:", best_stats.num_epochs)
  return best_stats.bestLogs

def multiple_train_ave(hparams):
  """Trains the model multiple times with the same parameters and returns the average metrics"""
  start = time.time()
  all_val_auc = []
  all_val_accuracy = []

  if hparams['reduction_alg'] != None:
    xs_for_train = unsup_exstract(xs, **hparams)
  else:
    xs_for_train = xs

  do_summary = True
  for i in range(5):
    logs = train_best_logs(xs_for_train, ys, num_val, do_summary, hparams, get_model)
    all_val_auc.append(get_val_auc(logs))
    all_val_accuracy.append(logs.get('val_accuracy'))
    do_summary = False 

  mean_val_auc = np.mean(all_val_auc)
  mean_val_accuracy = np.mean(all_val_accuracy)
  metric = (mean_val_auc + mean_val_accuracy) / 2.0
  print_data = ("mean_val_auc", mean_val_auc, "mean_val_accuracy", mean_val_accuracy, "metric", metric, "val_auc_std", np.std(all_val_auc), "val_accuracy_std", np.std(all_val_accuracy))

  end = time.time()
  print("Seconds per hyperparam config", end - start)
  # GPU: ('Seconds per hyperparam config', 16.970870971679688)

  return metric, print_data

best_metric = -float('inf')

run_num = 0
for hparams in hparam_combinations:
  print("hparams", hparams)

  metric, print_data = multiple_train_ave(hparams)

  print(print_data)
  if metric > best_metric:
    best_metric = metric
    best_print_data = print_data
    best_hparams = hparams

  run_num += 1
  print("fract done", run_num/float(len(hparam_combinations)))
  print
  print("==============================================================================================")
  print
  sys.stdout.flush()

print("best_hparams", best_hparams)
print("best results", best_print_data)
print("Retraining on the best_hparams to make sure we didn't just get good results by random chance.")

_, print_data = multiple_train_ave(best_hparams)
print("Result of retrain on the best hyperparameters", print_data)


use_activations: True num_train: 50 epochs 400
len(hparam_combinations) 2 hparam_combinations [{'drop_rate': 0, 'reduction_alg': 'FastICA', 'layer_sizes': (16, 16), 'reg_amount': 0, 'n_components': 8}, {'drop_rate': 0, 'reduction_alg': 'PCA', 'layer_sizes': (), 'reg_amount': 0.2, 'n_components': 2}]
hparams {'drop_rate': 0, 'reduction_alg': 'FastICA', 'layer_sizes': (16, 16), 'reg_amount': 0, 'n_components': 8}
Using unsupervised feature extraction.
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                multiple                  144       
_________________________________________________________________
dropout (Dropout)            multiple                  0         
_________________________________________________________________
dense_1 (Dense)              multiple                  272       
________________________________________________________________