<a href="https://colab.research.google.com/github/arunraja-hub/Preference_Extraction/blob/master/find_subnets_torch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Find subnets implemented using mnist

## Imports

In [0]:
import os

if not os.path.isdir('Preference_Extraction'):
    print("Setting up colab environment")
    !pip uninstall -y -q pyarrow
    !pip install -q https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.8.0.dev5-cp36-cp36m-manylinux1_x86_64.whl
    !pip install -q ray[debug]
    !pip install 'ray[tune]' 
    !pip install bayesian-optimization

    !git clone https://github.com/arunraja-hub/Preference_Extraction.git
    # # A hack to force the runtime to restart, needed to include the above dependencies.
    # # Only after first time
    os._exit(0)

In [0]:
## If you are running on Google Colab, please install TensorFlow 2.0 by uncommenting below..

try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

In [0]:
from __future__ import print_function
import argparse
import os
import math

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.metrics import roc_curve, auc, roc_auc_score
from sklearn.preprocessing import label_binarize

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms.functional as TF
from torch.optim.lr_scheduler import CosineAnnealingLR
from torchvision import datasets, transforms
import torch.autograd as autograd
from torchsummary import summary

from sklearn.utils import shuffle
import tensorflow as tf
import concurrent.futures
import itertools
import os
import random
import sys
import time
import re
import io
import itertools
import sys

import ray
from ray import tune
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune.suggest.bayesopt import BayesOptSearch

sys.path.append('Preference_Extraction')
from imports_data import all_load_data

In [0]:
np.random.seed(0)
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [5]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
device

device(type='cuda')

## Parameters

In [0]:
params = {
    'num_train': 50,
    'num_tune': 25,
    'num_val': 400,
    'batch_size': 10,
    'val_batch_size': 10,
    'num_epochs': 100,
    'use_qnet_weights': True, # Flag for running models that use the weights of Qnet vs models that use random weights
    'use_mnist': False,  # Flag for running models on MNIST. If False uses RL Preference Extraction data
    'num_run': 5  # Number of runs (with different data sample) over which to average performance
}

## Subnets Methods

In [0]:
"""
    Original code from What's hidden in a randomly weighted neural network? paper
    Implemented at https://github.com/allenai/hidden-networks
    Remove weigths-initialisation since it is not relevant for us
"""

class GetSubnet(autograd.Function):
    @staticmethod
    def forward(ctx, scores, k):
        # Get the supermask by sorting the scores and using the top k%
        out = scores.clone()
        _, idx = scores.flatten().sort()
        j = int((1 - k) * scores.numel())

        # flat_out and out access the same memory.
        flat_out = out.flatten()
        flat_out[idx[:j]] = 0
        flat_out[idx[j:]] = 1

        return out

    @staticmethod
    def backward(ctx, g):
        # send the gradient g straight-through on the backward pass.
        return g, None

class SupermaskConv(nn.Conv2d):
    def __init__(self, *args, k, scores_init='kaiming_uniform', **kwargs):
        super().__init__(*args, **kwargs)
        self.k = k
        self.scores_init = scores_init

        # initialize the scores
        self.scores = nn.Parameter(torch.Tensor(self.weight.size()))
        if self.scores_init == 'kaiming_normal':
          nn.init.kaiming_normal_(self.scores)
        elif self.scores_init == 'kaiming_uniform':
          nn.init.kaiming_uniform_(self.scores, a=math.sqrt(5))
        elif self.scores_init == 'xavier_normal':
          nn.init.xavier_normal_(self.scores)
        elif self.scores_init == 'best_activation':
          nn.init.ones_(self.scores)
        else:
          nn.init.uniform_(self.scores)

        # initialize the weights
        nn.init.uniform_(self.weight)
        
        # NOTE: turn the gradient on the weights off
        self.weight.requires_grad = False

    def forward(self, x):
        subnet = GetSubnet.apply(self.scores.abs(), self.k)
        w = self.weight * subnet
        x = F.conv2d(
            x, w, self.bias, self.stride, self.padding, self.dilation, self.groups
        )
        return x

class SupermaskLinear(nn.Linear):
    def __init__(self, *args, k, scores_init='kaiming_uniform', **kwargs):
        super().__init__(*args, **kwargs)
        self.k = k
        self.scores_init = scores_init

        # initialize the scores and weights
        self.scores = nn.Parameter(torch.Tensor(self.weight.size()))
        if self.scores_init == 'kaiming_normal':
          nn.init.kaiming_normal_(self.scores)
        elif self.scores_init == 'kaiming_uniform':
          nn.init.kaiming_uniform_(self.scores, a=math.sqrt(5))
        elif self.scores_init == 'xavier_normal':
          nn.init.xavier_normal_(self.scores)
        elif self.scores_init == 'best_activation':
          nn.init.ones_(self.scores)
        else:
          nn.init.uniform_(self.scores)

        nn.init.uniform_(self.weight)

        # NOTE: turn the gradient on the weights off
        self.weight.requires_grad = False

    def forward(self, x):
        subnet = GetSubnet.apply(self.scores.abs(), self.k)
        w = self.weight * subnet
        return F.linear(x, w, self.bias)
        return x

# NOTE: not used here but we use NON-AFFINE Normalization!
# So there is no learned parameters for your nomralization layer.
class NonAffineBatchNorm(nn.BatchNorm2d):
    def __init__(self, dim):
        super(NonAffineBatchNorm, self).__init__(dim, affine=False)

## Define architecture

In [0]:
class PrefQNet(nn.Module):
    """
      If q_head_index is None, this uses a linear model on the normalized q outputs.
      Otherwise, it gets the Q head with the specified index.
    """ 
    def __init__(self, fine_tune, k, q_head_index, q_means_stds, use_last_linear, init_from_act_index=None):
        super(PrefQNet, self).__init__()
        
        if not params['use_mnist']:
            channels_in = 5
            flattened_shape = 960
        else:
            channels_in = 1
            flattened_shape = 4608

        if fine_tune:
            conv_layer = nn.Conv2d
            dense_layer = nn.Linear
            additional_args = {}
            init_from_act_index = None
        else:
            conv_layer = SupermaskConv
            dense_layer = SupermaskLinear
            additional_args = {'k': k}
            if init_from_act_index is not None:
                additional_args['scores_init'] = 'best_activation'
        
        self.conv1 = conv_layer(in_channels=channels_in, out_channels=16, kernel_size=3, stride=1, bias=True, **additional_args)
        self.conv2 = conv_layer(in_channels=16, out_channels=32, kernel_size=3, stride=2, bias=True, **additional_args)
        self.fc1 = dense_layer(in_features=flattened_shape, out_features=64, bias=True, **additional_args)
        self.fc2 = dense_layer(in_features=64, out_features=3, bias=True, **additional_args)
        
        if init_from_act_index is not None:
            init_scores = np.zeros((3, 64))
            init_scores[:, init_from_act_index] = 1.0
            self.fc2.scores.data = torch.from_numpy(init_scores).float()

        self.fc3 = dense_layer(in_features=3, out_features=1, bias=True, **additional_args)
        self.linear = nn.Linear(1, 1, bias=True)

        self.qix = q_head_index
        self.qu_mu_s = q_means_stds
        self.use_last_linear = use_last_linear

    def fwd_conv1(self, x):
        x = self.conv1(x)
        return F.relu(x)

    def fwd_conv2(self, x):
        x = self.fwd_conv1(x)
        x = self.conv2(x)
        return F.relu(x)

    def fwd_flat(self, x):
        x = self.fwd_conv2(x)
        return torch.flatten(torch.transpose(x, 1, 3), 1) # Pre-flattening transpose is necessary for TF-Torch conversion

    def fwd_fc1(self, x):
        x = self.fwd_flat(x)
        x = self.fc1(x)
        return F.relu(x)
    
    def fwd_fc2(self, x):
        x = self.fwd_fc1(x)
        return self.fc2(x)

    def forward(self, x):
        x = self.fwd_fc2(x)

        x -= torch.tensor(self.qu_mu_s[0], device=device)
        x /= torch.tensor(self.qu_mu_s[1], device=device)

        if self.qix == None:
          x = self.fc3(x)
        else:
          x = x[: ,self.qix:self.qix+1]

        if self.use_last_linear:
          x = self.linear(x)

        x = torch.sigmoid(x)
        return x.flatten()

## Load Data

In [0]:
# Run this cell for training with original RL Preference Extraction data
if not params['use_mnist']:
    all_raw_data = all_load_data("Preference_Extraction/data/simple_env_1/")

    activations = []
    observations = []
    preferences = []

    for data in all_raw_data:
        for i in range(data.observation.shape[0]):
            observations.append(np.copy(data.observation[i]))
            activations.append(np.copy(data.policy_info["activations"][i]))
            preferences.append((data.policy_info['satisfaction'].as_list()[i] > -6).astype(int))

    activations = np.array(activations)

    xs = np.rollaxis(np.array(observations), 3, 1) # Torch wants channel-first
    ys = np.array(preferences)

In [0]:
# Run this cell for training with MNIST
if params['use_mnist']:
    tr_data_loader = torch.utils.data.DataLoader(
        datasets.MNIST('mnist', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])), batch_size=params['batch_size'], shuffle=True)

    val_data_loader = torch.utils.data.DataLoader(
        datasets.MNIST('mnist', train=False, download=True, 
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])), batch_size=params['val_batch_size'], shuffle=True)

## Loading Weights

In [11]:
new_save_path = "Preference_Extraction/saved_model2"
restored_model = tf.keras.models.load_model(new_save_path)
restored_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
EncodingNetwork/conv2d (Conv (None, 12, 14, 16)        736       
_________________________________________________________________
EncodingNetwork/conv2d_1 (Co (None, 5, 6, 32)          4640      
_________________________________________________________________
flatten (Flatten)            (None, 960)               0         
_________________________________________________________________
EncodingNetwork/dense (Dense (None, 64)                61504     
_________________________________________________________________
dense (Dense)                (None, 3)                 195       
Total params: 67,075
Trainable params: 67,075
Non-trainable params: 0
_________________________________________________________________


In [0]:
original_weights=restored_model.get_weights()

In [0]:
def load_weights(model):
    if not params['use_mnist']:
        model.conv1.weight.data = torch.from_numpy(np.transpose(original_weights[0]))
        model.fc1.weight.data = torch.from_numpy(np.transpose(original_weights[4]))
    else:
        model.conv1.weight.data = torch.from_numpy(np.transpose(original_weights[0][:,:,:1,:]))
        mnist_flt_weights = np.random.rand(64, 4608)
        mnist_flt_weights[:, :original_weights[4].shape[0]] = np.transpose(original_weights[4])
        mnist_flt_weights = mnist_flt_weights.astype(np.float32)
        model.fc1.weight.data = torch.from_numpy(mnist_flt_weights)

    model.conv1.bias.data = torch.from_numpy(original_weights[1])
    model.conv2.weight.data = torch.from_numpy(np.transpose(original_weights[2]))
    model.conv2.bias.data = torch.from_numpy(original_weights[3])
    model.fc1.bias.data = torch.from_numpy(original_weights[5])
    model.fc2.weight.data = torch.from_numpy(np.transpose(original_weights[6]))
    model.fc2.bias.data = torch.from_numpy(original_weights[7])
    model.fc3.weight.data = torch.from_numpy(np.ones(shape=[1,3], dtype=np.float32))
    model.fc3.bias.data = torch.from_numpy(np.zeros(shape=[1], dtype=np.float32))
    model.to(device)

In [0]:
test_model = PrefQNet(k=1, fine_tune=False, q_head_index=None, q_means_stds=[[0, 0, 0], [1, 1, 1]], use_last_linear=True).to(device)

if params['use_qnet_weights']:
    load_weights(test_model)

## Test the weights loaded properly

In [0]:
# Comparing that the models have identical observations for identical images
tf_conv1_fn = tf.keras.models.Model(inputs=restored_model.input, outputs=restored_model.layers[0].output)
tf_conv2_fn = tf.keras.models.Model(inputs=restored_model.input, outputs=restored_model.layers[1].output)
tf_flt_fn = tf.keras.models.Model(inputs=restored_model.input, outputs=restored_model.layers[2].output)
tf_fc1_fn = tf.keras.models.Model(inputs=restored_model.input, outputs=restored_model.layers[3].output)

def npsigmoid(x):
  return 1/(1 + np.exp(-x)) 

def check_same(torch_layer, tf_layer):
    torch_out = np.transpose(torch_layer(single_observation_torch).detach().cpu().numpy())
    torch_out = torch_out.reshape(torch_out.shape[:-1])
    tf_out = tf_layer(single_observation)[0].numpy()
    np.testing.assert_allclose(torch_out, tf_out, rtol=.1, atol=5)  

# due to shape of original TF model this test can be done only when use_mnist = False
if not params['use_mnist'] and params['use_qnet_weights']:
    for i in range(len(all_raw_data[0].observation)):

        single_observation = np.array([all_raw_data[0].observation[i]])
        single_observation_torch = torch.Tensor(np.array([np.transpose(all_raw_data[0].observation[i])]))

        single_observation_torch = single_observation_torch.to(device)

        check_same(test_model.fwd_conv1, tf_conv1_fn)
        check_same(test_model.fwd_conv2, tf_conv2_fn)
        check_same(test_model.fwd_flat, tf_flt_fn)
        check_same(test_model.fwd_flat, tf_flt_fn)

        fc1_torch_out = np.transpose(test_model.fwd_fc1(single_observation_torch).detach().cpu().numpy())
        fc1_torch_out = fc1_torch_out.reshape(fc1_torch_out.shape[:-1])
        fc1_tf_out = tf_fc1_fn(single_observation)[0].numpy()
        
        np.testing.assert_allclose(fc1_torch_out, fc1_tf_out, rtol=.1, atol=5)
        old_activations = all_raw_data[0].policy_info["activations"][i]
        np.testing.assert_allclose(fc1_torch_out, old_activations, rtol=.1, atol=5)
        np.testing.assert_allclose(old_activations, fc1_tf_out, rtol=.1, atol=5)

        check_same(test_model.fwd_fc2, restored_model)

        torch_out = np.transpose(test_model.forward(single_observation_torch).detach().cpu().numpy())
        torch_out = torch_out.reshape(torch_out.shape[:-1])
        tf_out = npsigmoid(np.sum(restored_model(single_observation)[0].numpy()))
        np.testing.assert_allclose(torch_out, tf_out, rtol=.1, atol=5)  

## Modelling

### Get data to normalize qHeads

In [16]:
def get_q_heads_mu_and_sigma(model, all_obs, num_obs):
    
    model.eval()

    all_obs = shuffle(all_obs)
    obs_to_pass = all_obs[:num_obs]

    obs_tensor = torch.Tensor(obs_to_pass)
    obs_tensor = obs_tensor.to(device)
    qheads_values = model.fwd_fc2(obs_tensor).detach().cpu().numpy()

    mu = qheads_values.mean(axis=0)
    s = qheads_values.std(axis=0)

    print("mu", mu, "s", s)
    
    return np.array([mu, s])

if params['use_mnist']:
    img_batch, label = iter(tr_data_loader).next()
    xs = img_batch

q_mu_s = get_q_heads_mu_and_sigma(test_model, xs, 10000)

mu [ 92.63603  68.25072 138.24101] s [47.462715 50.686085 77.230034]


### Methods to inspect performance

In [0]:
def get_number_of_new_scores_in_top_k(new_scores, old_scores, k):
    new_top_k_scores = set(new_scores[:int(len(new_scores) * k)])
    old_top_k_scores = set(old_scores[:int(len(old_scores) * k)])

    return len(old_top_k_scores) - len(new_top_k_scores.intersection(old_top_k_scores))

def model_scores_to_dict(model):
    return {
        'conv1': model.conv1.scores.detach().cpu().numpy().flatten().argsort(),
        'conv2': model.conv2.scores.detach().cpu().numpy().flatten().argsort(),
        'fc1': model.fc1.scores.detach().cpu().numpy().flatten().argsort(),
        'fc2': model.fc2.scores.detach().cpu().numpy().flatten().argsort(),
        'fc3': model.fc3.scores.detach().cpu().numpy().flatten().argsort()
    }

def get_no_of_changed_scores(model, previous_scores, k):

    new_scores_idxs = model_scores_to_dict(model)

    score_changes = {}

    for score in new_scores_idxs:
        changed_scores_num = get_number_of_new_scores_in_top_k(new_scores_idxs[score], previous_scores[score], k)
        score_changes[score] = changed_scores_num

    return score_changes, new_scores_idxs

def plot_metric(results_dict, metric):
    plt.title(metric)
    plt.xlabel('Epochs')
    plt.plot(list(range(1, params['num_epochs'] + 1)), results_dict[f'train{metric}'], label=f'Train {metric}')
    plt.plot(list(range(1, params['num_epochs'] + 1)), results_dict[f'test{metric}'], label=f'Test {metric}')
    plt.legend()
    plt.show()

def plot_metric_multiple_runs(results_items, metric, train=True):
    plt.title(metric)
    plt.xlabel('Epochs')
    for res_key, res_dict in results_items.items():
        if train:
            plt.plot(list(range(1, params['num_epochs'] + 1)), res_dict[f'train{metric}'], label=f'Train {metric} - {res_key}')
        else:
            plt.plot(list(range(1, params['num_epochs'] + 1)), res_dict[f'test{metric}'], label=f'Test {metric} - {res_key}')
    plt.legend()
    plt.show()

def plot_score_changes(score_changes_dict):
    plt.title('Layer-wise score changes')
    plt.xlabel('Optimisation steps (num_train / batch_size * epochs)')
    for layer in score_changes_dict:
        plt.plot(list(range(1, len(score_changes_dict[layer]) + 1)), score_changes_dict[layer], label=f'{layer}')
    plt.legend()
    plt.show()

### Method to get data for one sample run

In [0]:
def get_data_sample(xs=None, ys=None):

    if not params['use_mnist']:
        xs, ys = shuffle(xs, ys)
        
        train_split = params['num_train']
        tune_split = params['num_train']+params['num_tune']
        test_split = params['num_train']+params['num_tune']+params['num_val']

        tr_data_loader = torch.utils.data.DataLoader(
            torch.utils.data.TensorDataset(torch.Tensor(xs[:train_split]), torch.Tensor(ys[:train_split])),
            batch_size=params['batch_size'])

        tune_data_loader = torch.utils.data.DataLoader(
            torch.utils.data.TensorDataset(torch.Tensor(xs[train_split:tune_split]), torch.Tensor(ys[train_split:tune_split])),
            batch_size=params['batch_size'])

        val_data_loader = torch.utils.data.DataLoader(
            torch.utils.data.TensorDataset(torch.Tensor(xs[tune_split:test_split]), torch.Tensor(ys[tune_split:test_split])),
            batch_size=params['val_batch_size'])
    else:
        tr_data_loader = torch.utils.data.DataLoader(
            datasets.MNIST('mnist', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])), batch_size=params['batch_size'], shuffle=True)

        tune_data_loader = torch.utils.data.DataLoader(
            datasets.MNIST('mnist', train=False, download=True, 
                        transform=transforms.Compose([
                            transforms.ToTensor(),
                            transforms.Normalize((0.1307,), (0.3081,))
                        ])), batch_size=params['batch_size'], shuffle=True)

        val_data_loader = torch.utils.data.DataLoader(
            datasets.MNIST('mnist', train=False, download=True, 
                        transform=transforms.Compose([
                            transforms.ToTensor(),
                            transforms.Normalize((0.1307,), (0.3081,))
                        ])), batch_size=params['val_batch_size'], shuffle=True)
        
    return tr_data_loader, tune_data_loader, val_data_loader

### Single run train/test methods

In [0]:
"""
    Train/Test function for Randomly Weighted Hidden Neural Networks Techniques
    Adapted from https://github.com/NesterukSergey/hidden-networks/blob/master/demos/mnist.ipynb
"""

def compute_metrics(predictions, true_labels):
    predictions = np.array(predictions)
    true_labels = np.array(true_labels)
    accuracy = np.sum(np.equal((predictions > 0.5).astype(int), true_labels)) / len(true_labels)
    fpr, tpr, thresholds = metrics.roc_curve(true_labels, predictions, pos_label=1)
    auc = metrics.auc(fpr, tpr)
    return accuracy, auc

def train(model, k, device, train_loader, optimizer, criterion):
    
    train_loss = 0
    true_labels = []
    predictions = [] # labels

    model.train()
    train_score_changes = {}
    if k is not None:
        scores = model_scores_to_dict(model)
        train_score_changes = {k: [] for k in scores}

    for data, target in itertools.islice(train_loader, params['num_train']):
        
        data, target = data.to(device), target.to(device)
        if params['use_mnist']:
            target = (target > 0).float()
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        if k is not None:
            score_changes, scores = get_no_of_changed_scores(model, scores, k)
            for layer_changes in score_changes:
                train_score_changes[layer_changes].append(score_changes[layer_changes])

        train_loss += loss
        predictions.extend(output.detach().cpu().numpy())
        true_labels.extend(target.detach().cpu().numpy())
    
    train_loss /= len(train_loader.dataset)
    accuracy, auc = compute_metrics(predictions, true_labels)

    return train_loss.item(), accuracy, auc, train_score_changes


def test(model, device, criterion, test_loader, num_test):
    true_labels = []
    predictions = [] # labels

    model.eval()
    test_loss = 0
    with torch.no_grad():
        for data, target in itertools.islice(test_loader, num_test):
            data, target = data.to(device), target.to(device)
            if params['use_mnist']:
                target = (target > 0).float()
            output = model(data)
            test_loss += criterion(output, target)

            predictions.extend(output.detach().cpu().numpy())
            true_labels.extend(target.detach().cpu().numpy())
    
    test_loss /= len(test_loader.dataset)
    accuracy, auc = compute_metrics(predictions, true_labels)

    return test_loss.item(), accuracy, auc

def run_model(model, k, learning_rate, weight_decay, num_epochs):

  tr_data_loader, tune_data_loader, val_data_loader = get_data_sample(xs, ys)

  optimizer = optim.Adam(
      [p for p in model.parameters() if p.requires_grad],
      lr=learning_rate,
      weight_decay=weight_decay
  )

  criterion = nn.BCELoss().to(device)
  scheduler = CosineAnnealingLR(optimizer, T_max=len(tr_data_loader))

  train_losses = []
  test_losses = []
  tune_losses = []
  train_accs = []
  train_aucs = []
  test_accs = []
  test_aucs = []
  score_changes = []
  

  for epoch in range(num_epochs):
      train_loss, train_accuracy, train_auc, train_score_changes = train(model, k, device, tr_data_loader, optimizer, criterion)
      tune_loss, _, _ = test(model, device, criterion, tune_data_loader, params['num_tune'])
      test_loss, test_accuracy, test_auc = test(model, device, criterion, val_data_loader, params['num_val'])
      scheduler.step()

      score_changes.append(train_score_changes)
      train_losses.append(train_loss)
      tune_losses.append(tune_loss)
      test_losses.append(test_loss)
      train_accs.append(train_accuracy)
      train_aucs.append(train_auc)
      test_accs.append(test_accuracy)
      test_aucs.append(test_auc)

  merged_score_changes = {k: [] for k in score_changes[0].keys()}
  for d in score_changes:
    for k in d:
        merged_score_changes[k].extend(d[k])

  return {'trainLoss': train_losses, 'testLoss': test_losses, 'tuneLoss': tune_losses,
          'trainAccuracy': train_accs, 'testAccuracy': test_accs,
          'trainAUC': train_aucs, 'testAUC': test_aucs, 'scoreChanges': merged_score_changes}

In [0]:
def multi_runs(fine_tune, K, q_head_index, q_means_stds, use_last_linear, init_from_act_index, 
               learning_rate, weight_decay, plots=False):

    averaged_results = {}    
    for run_ix in range(params['num_run']):
        
        model = PrefQNet(fine_tune=fine_tune, k=K, q_head_index=q_head_index, q_means_stds=q_means_stds,
                         use_last_linear=use_last_linear, init_from_act_index=init_from_act_index)
        
        if params['use_qnet_weights']:
            load_weights(model)

        results = run_model(model, K, learning_rate=learning_rate, weight_decay=weight_decay, num_epochs=params['num_epochs'])
        
        print(f'Train pass no. {run_ix+1}')
        if (run_ix == 0) and plots:
            print('Debug charts for first training run')
            plot_metric(results, 'Loss')
            plot_metric(results, 'Accuracy')
            plot_metric(results, 'AUC')

        for val in results:
            if len(results[val]) > 0 and val != 'scoreChanges':
                if val not in averaged_results:
                    averaged_results[val] = [results[val][-1]]
                else:
                    averaged_results[val].append(results[val][-1])         
    
    return averaged_results, {x: sum(averaged_results[x]) / params['num_run'] for x in averaged_results}

## Initialise Subnets Search with activation that obtained optimal AUC in previous experiment

We do this both as a sanity check as well as a potential improvement

In [21]:
from sklearn import metrics 

acts = []
prefs = []

for data in all_raw_data:
    for i in range(data.observation.shape[0]):
        acts.append(np.copy(data.policy_info["activations"][i]))
        prefs.append((data.policy_info['satisfaction'].as_list()[i] > -6).astype(int))

acts = np.array(acts)
prefs = np.array(prefs)

def display_auc_info(xs, ys):
    
    def calc_auc(xs, ys, i):
        fpr, tpr, thresholds = metrics.roc_curve(ys, xs[:,i], pos_label=1)
        return metrics.auc(fpr, tpr)


    multi_runs_aucs = []
    for run_ix in range(50):
        xs, ys = shuffle(xs, ys)
        flat_xs = np.reshape(xs, (xs.shape[0], -1))
        aucs = []    
        
        for i in range(flat_xs.shape[1]):
            auc = calc_auc(flat_xs[:params['num_train']], ys[:params['num_train']], i)
            aucs.append(auc)  

        aucs = np.array(aucs)
        multi_runs_aucs.append(aucs)

    aucs = np.array(multi_runs_aucs)
    aucs = aucs.mean(axis=0)

    print("AUC from only picking a single activation")
    print(np.argmin(aucs), "train", 1-np.min(aucs), "val", 1-calc_auc(flat_xs[params['num_train']:], ys[params['num_train']:], np.argmin(aucs)))
    print(np.argmax(aucs), "train", np.max(aucs), "val", calc_auc(flat_xs[params['num_train']:], ys[params['num_train']:], np.argmax(aucs)))
  
display_auc_info(acts, prefs)

AUC from only picking a single activation
34 train 0.8168389427248516 val 0.8203183087179845
13 train 0.6133060108723783 val 0.6179787990821131


In [22]:
best_act_index = 34
K = 66774 / 67152  # Num of weigths with all dense activations except one set to 0 / Number of total weights

params['num_epochs'] = 1
results = multi_runs(fine_tune=False, K=1, q_head_index=None, q_means_stds=q_mu_s, 
                     use_last_linear=False, init_from_act_index=best_act_index,
                     learning_rate=0.000, weight_decay=0.000, plots=False)

####results is a tuple where the first element is the dict
print(1-max(results[0]['testAUC']))

Train pass no. 1
Train pass no. 2
Train pass no. 3
Train pass no. 4
Train pass no. 5
0.8254807323986824


## Getting Results for Different Combinations

In [0]:
params['num_epochs'] = 100

In [24]:
multi_runs(fine_tune=True, K=None, q_head_index=None, q_means_stds=q_mu_s, 
           use_last_linear=True, init_from_act_index=None,
           learning_rate=0.01, weight_decay=0.001, plots=False)

Train pass no. 1
Train pass no. 2
Train pass no. 3
Train pass no. 4
Train pass no. 5


({'testAUC': [0.6906836808797593,
   0.8151207766592383,
   0.7207354027144712,
   0.8085122938064115,
   0.7416841396805676],
  'testAccuracy': [0.665, 0.745, 0.655, 0.7375, 0.71],
  'testLoss': [0.12993000447750092,
   0.15457528829574585,
   0.11470552533864975,
   0.09829843044281006,
   0.12614212930202484],
  'trainAUC': [1.0, 1.0, 1.0, 1.0, 1.0],
  'trainAccuracy': [1.0, 1.0, 1.0, 1.0, 1.0],
  'trainLoss': [0.0003728709416463971,
   0.00011889402230735868,
   0.0003117910528089851,
   0.0003292688634246588,
   0.00023946560395415872],
  'tuneLoss': [0.07673028111457825,
   0.13663232326507568,
   0.1911773979663849,
   0.24703745543956757,
   0.1049480214715004]},
 {'testAUC': 0.7553472587480897,
  'testAccuracy': 0.7025,
  'testLoss': 0.12473027557134628,
  'trainAUC': 1.0,
  'trainAccuracy': 1.0,
  'trainLoss': 0.0002744580968283117,
  'tuneLoss': 0.15130509585142135})

In [25]:
multi_runs(fine_tune=True, K=None, q_head_index=None, q_means_stds=[[0,0,0],[1,1,1]], 
           use_last_linear=True, init_from_act_index=None,
           learning_rate=0.01, weight_decay=0.001, plots=False)

Train pass no. 1
Train pass no. 2
Train pass no. 3
Train pass no. 4
Train pass no. 5


({'testAUC': [0.8995625275038182,
   0.8441666666666667,
   0.7726049027982707,
   0.7617784380305602,
   0.8433413536980856],
  'testAccuracy': [0.8, 0.8, 0.73, 0.715, 0.74],
  'testLoss': [0.12531328201293945,
   0.37798964977264404,
   0.3866333067417145,
   0.4151870012283325,
   0.10402786731719971],
  'trainAUC': [1.0, 0.9740259740259741, 1.0, 1.0, 1.0],
  'trainAccuracy': [1.0, 0.88, 1.0, 1.0, 1.0],
  'trainLoss': [3.7426249036798254e-05,
   0.0353105403482914,
   2.5645133064244874e-05,
   1.9554263417376205e-05,
   7.25729696569033e-05],
  'tuneLoss': [0.21842992305755615,
   0.4248124957084656,
   0.6506084203720093,
   0.697060227394104,
   0.32593879103660583]},
 {'testAUC': 0.8242907777394801,
  'testAccuracy': 0.757,
  'testLoss': 0.28183022141456604,
  'trainAUC': 0.9948051948051948,
  'trainAccuracy': 0.976,
  'trainLoss': 0.007093147792693344,
  'tuneLoss': 0.46336997151374815})

In [26]:
multi_runs(fine_tune=True, K=None, q_head_index=None, q_means_stds=q_mu_s, 
           use_last_linear=False, init_from_act_index=None,
           learning_rate=0.01, weight_decay=0.001, plots=False)

Train pass no. 1
Train pass no. 2
Train pass no. 3
Train pass no. 4
Train pass no. 5


({'testAUC': [0.7748549323017409,
   0.8385740801397531,
   0.755226480836237,
   0.7283735714826308,
   0.7619310734753669],
  'testAccuracy': [0.7275, 0.775, 0.6975, 0.6675, 0.71],
  'testLoss': [0.09886088222265244,
   0.08169195055961609,
   0.10054851323366165,
   0.12835179269313812,
   0.13617901504039764],
  'trainAUC': [1.0, 1.0, 1.0, 1.0, 1.0],
  'trainAccuracy': [1.0, 1.0, 1.0, 1.0, 1.0],
  'trainLoss': [0.0002572555677033961,
   0.00021848625328857452,
   0.00031790323555469513,
   0.00042096912511624396,
   0.0002733402361627668],
  'tuneLoss': [0.18685626983642578,
   0.1722647249698639,
   0.16332288086414337,
   0.07844734191894531,
   0.23171424865722656]},
 {'testAUC': 0.7717920276471457,
  'testAccuracy': 0.7155,
  'testLoss': 0.10912643074989319,
  'trainAUC': 1.0,
  'trainAccuracy': 1.0,
  'trainLoss': 0.0002975908835651353,
  'tuneLoss': 0.166521093249321})

In [27]:
multi_runs(fine_tune=True, K=None, q_head_index=None, q_means_stds=q_mu_s, 
           use_last_linear=True, init_from_act_index=None,
           learning_rate=0.005, weight_decay=0.005, plots=False)

Train pass no. 1
Train pass no. 2
Train pass no. 3
Train pass no. 4
Train pass no. 5


({'testAUC': [0.7517241379310344,
   0.8531231697690408,
   0.8105423186810307,
   0.8253530478306392,
   0.8597676654798722],
  'testAccuracy': [0.7175, 0.8025, 0.7525, 0.7525, 0.7975],
  'testLoss': [0.12750987708568573,
   0.08972541242837906,
   0.12522943317890167,
   0.11508839577436447,
   0.10832610726356506],
  'trainAUC': [1.0, 1.0, 1.0, 1.0, 1.0],
  'trainAccuracy': [1.0, 1.0, 1.0, 1.0, 1.0],
  'trainLoss': [0.0006767626036889851,
   0.000630376860499382,
   0.0005270104738883674,
   0.0005537134129554033,
   0.00028094794834032655],
  'tuneLoss': [0.12058184295892715,
   0.04821425676345825,
   0.23647792637348175,
   0.04430011287331581,
   0.0719020813703537]},
 {'testAUC': 0.8201020679383234,
  'testAccuracy': 0.7645,
  'testLoss': 0.1131758451461792,
  'trainAUC': 1.0,
  'trainAccuracy': 1.0,
  'trainLoss': 0.0005337622598744929,
  'tuneLoss': 0.10429524406790733})

In [28]:
multi_runs(fine_tune=True, K=None, q_head_index=0, q_means_stds=q_mu_s, 
           use_last_linear=True, init_from_act_index=None,
           learning_rate=0.01, weight_decay=0.005, plots=False)

Train pass no. 1
Train pass no. 2
Train pass no. 3
Train pass no. 4
Train pass no. 5


({'testAUC': [0.8118566983164446,
   0.5558600172238315,
   0.789559748427673,
   0.8292198581560284,
   0.819068144499179],
  'testAccuracy': [0.7525, 0.5725, 0.7875, 0.7825, 0.73],
  'testLoss': [0.11455755680799484,
   0.14648042619228363,
   0.1544840782880783,
   0.06819981336593628,
   0.11620546132326126],
  'trainAUC': [1.0, 1.0, 1.0, 1.0, 1.0],
  'trainAccuracy': [1.0, 1.0, 1.0, 1.0, 1.0],
  'trainLoss': [0.0011434376938268542,
   0.0016285425517708063,
   0.00031225051498040557,
   0.0012264150427654386,
   0.0007125942502170801],
  'tuneLoss': [0.11026094108819962,
   0.1816493421792984,
   0.18949778378009796,
   0.12107405066490173,
   0.10686139762401581]},
 {'testAUC': 0.7611128933246312,
  'testAccuracy': 0.7249999999999999,
  'testLoss': 0.11998546719551087,
  'trainAUC': 1.0,
  'trainAccuracy': 1.0,
  'trainLoss': 0.0010046480107121169,
  'tuneLoss': 0.1418687030673027})

In [29]:
multi_runs(fine_tune=True, K=None, q_head_index=None, q_means_stds=q_mu_s, 
           use_last_linear=True, init_from_act_index=None,
           learning_rate=0.02, weight_decay=0.02, plots=False)

Train pass no. 1
Train pass no. 2
Train pass no. 3
Train pass no. 4
Train pass no. 5


({'testAUC': [0.6674976227801908,
   0.758662724348999,
   0.7199561575197682,
   0.632303488860866,
   0.7362480909114442],
  'testAccuracy': [0.695, 0.685, 0.6775, 0.6325, 0.705],
  'testLoss': [0.14450573921203613,
   0.10684135556221008,
   0.15463560819625854,
   0.13950411975383759,
   0.08086144179105759],
  'trainAUC': [0.8596491228070176, 1.0, 1.0, 1.0, 0.9930555555555556],
  'trainAccuracy': [0.82, 1.0, 0.98, 1.0, 0.96],
  'trainLoss': [0.051631756126880646,
   0.004716536495834589,
   0.00461394852027297,
   0.0012791382614523172,
   0.01370335929095745],
  'tuneLoss': [0.25260892510414124,
   0.11049012839794159,
   0.16391296684741974,
   0.18502095341682434,
   0.12671466171741486]},
 {'testAUC': 0.7029336168842535,
  'testAccuracy': 0.679,
  'testLoss': 0.12526965290307998,
  'trainAUC': 0.9705409356725146,
  'trainAccuracy': 0.952,
  'trainLoss': 0.015188947739079594,
  'tuneLoss': 0.16774952709674834})

In [30]:
multi_runs(fine_tune=False, K=0.9, q_head_index=None, q_means_stds=q_mu_s, 
           use_last_linear=True, init_from_act_index=None,
           learning_rate=0.01, weight_decay=0.001, plots=False)

Train pass no. 1
Train pass no. 2
Train pass no. 3
Train pass no. 4
Train pass no. 5


({'testAUC': [0.39635416666666673,
   0.768603706324867,
   0.8624240551951394,
   0.8485924013874325,
   0.8491711540492027],
  'testAccuracy': [0.5875, 0.6775, 0.7875, 0.7975, 0.775],
  'testLoss': [0.07246517390012741,
   0.0593697614967823,
   0.053132064640522,
   0.05517274886369705,
   0.05698756128549576],
  'trainAUC': [0.6397058823529411,
   0.8269230769230769,
   0.8181818181818181,
   0.8833333333333333,
   0.965909090909091],
  'trainAccuracy': [0.62, 0.74, 0.74, 0.8, 0.92],
  'trainLoss': [0.06489463150501251,
   0.05627024918794632,
   0.055982112884521484,
   0.041007645428180695,
   0.025145143270492554],
  'tuneLoss': [0.1018923819065094,
   0.0648772194981575,
   0.04325742647051811,
   0.1011604443192482,
   0.07257822155952454]},
 {'testAUC': 0.7450290967246618,
  'testAccuracy': 0.725,
  'testLoss': 0.059425462037324905,
  'trainAUC': 0.8268106403400521,
  'trainAccuracy': 0.7639999999999999,
  'trainLoss': 0.04865995645523071,
  'tuneLoss': 0.07675313875079155})

In [31]:
multi_runs(fine_tune=False, K=0.6, q_head_index=None, q_means_stds=q_mu_s, 
           use_last_linear=True, init_from_act_index=None,
           learning_rate=0.01, weight_decay=0.001, plots=False)

Train pass no. 1
Train pass no. 2
Train pass no. 3
Train pass no. 4
Train pass no. 5


({'testAUC': [0.5978670177748517,
   0.8302399627242371,
   0.8734820824881677,
   0.8253927242662258,
   0.7136785978249393],
  'testAccuracy': [0.5825, 0.705, 0.7825, 0.795, 0.61],
  'testLoss': [0.06931723654270172,
   0.05731850862503052,
   0.048672523349523544,
   0.05553197115659714,
   0.06524057686328888],
  'trainAUC': [0.7069243156199678,
   0.9259259259259259,
   0.8455882352941176,
   0.9322638146167558,
   0.8045977011494252],
  'trainAccuracy': [0.68, 0.88, 0.82, 0.88, 0.82],
  'trainLoss': [0.06171843782067299,
   0.03910035640001297,
   0.05351971462368965,
   0.03264171630144119,
   0.05738341063261032],
  'tuneLoss': [0.06355435401201248,
   0.06309343129396439,
   0.07828254252672195,
   0.06203703582286835,
   0.08579516410827637]},
 {'testAUC': 0.7681320770156842,
  'testAccuracy': 0.6950000000000001,
  'testLoss': 0.05921616330742836,
  'trainAUC': 0.8430599985212386,
  'trainAccuracy': 0.8160000000000001,
  'trainLoss': 0.04887272715568543,
  'tuneLoss': 0.07055

In [32]:
multi_runs(fine_tune=False, K=0.6, q_head_index=None, q_means_stds=q_mu_s, 
           use_last_linear=True, init_from_act_index=None,
           learning_rate=0.001, weight_decay=0.001, plots=False)

Train pass no. 1
Train pass no. 2
Train pass no. 3
Train pass no. 4
Train pass no. 5


({'testAUC': [0.8916292003430443,
   0.8289628914628914,
   0.7654018767849857,
   0.8291145833333333,
   0.872281564589257],
  'testAccuracy': [0.5825, 0.48, 0.695, 0.7675, 0.78],
  'testLoss': [0.06362002342939377,
   0.06852685660123825,
   0.06734263896942139,
   0.05265888571739197,
   0.05053317919373512],
  'trainAUC': [0.7934027777777778,
   0.6983333333333334,
   0.833616298811545,
   0.9233333333333333,
   0.9504],
  'trainAccuracy': [0.68, 0.62, 0.82, 0.82, 0.88],
  'trainLoss': [0.06113786622881889,
   0.0658850371837616,
   0.048361096531152725,
   0.03763493150472641,
   0.03457426652312279],
  'tuneLoss': [0.08563832938671112,
   0.08356793969869614,
   0.0528334379196167,
   0.06560914218425751,
   0.056474268436431885]},
 {'testAUC': 0.8374780233027023,
  'testAccuracy': 0.6609999999999999,
  'testLoss': 0.0605363167822361,
  'trainAUC': 0.8398171486511978,
  'trainAccuracy': 0.764,
  'trainLoss': 0.04951863959431648,
  'tuneLoss': 0.06882462352514267})

In [33]:
multi_runs(fine_tune=False, K=0.9, q_head_index=None, q_means_stds=q_mu_s, 
           use_last_linear=False, init_from_act_index=None,
           learning_rate=0.005, weight_decay=0.001, plots=False)

Train pass no. 1
Train pass no. 2
Train pass no. 3
Train pass no. 4
Train pass no. 5


({'testAUC': [0.3935055503682955,
   0.25936814958091553,
   0.45587912087912086,
   0.43270948202221016,
   0.38731640146878826],
  'testAccuracy': [0.5175, 0.38, 0.6175, 0.5725, 0.545],
  'testLoss': [0.12048670649528503,
   0.11617061495780945,
   0.0905999168753624,
   0.10561389476060867,
   0.10739358514547348],
  'trainAUC': [0.541871921182266,
   0.3397745571658615,
   0.5566502463054187,
   0.44871794871794873,
   0.3246527777777778],
  'trainAccuracy': [0.6, 0.48, 0.62, 0.52, 0.54],
  'trainLoss': [0.08359470963478088,
   0.1006491556763649,
   0.09294570982456207,
   0.10623269528150558,
   0.1131293773651123],
  'tuneLoss': [0.12242378294467926,
   0.14295832812786102,
   0.11155940592288971,
   0.1432107388973236,
   0.13935348391532898]},
 {'testAUC': 0.38575574086386605,
  'testAccuracy': 0.5265000000000001,
  'testLoss': 0.1080529436469078,
  'trainAUC': 0.4423334902298546,
  'trainAccuracy': 0.552,
  'trainLoss': 0.09931032955646515,
  'tuneLoss': 0.1319011479616165})

In [0]:
multi_runs(fine_tune=False, K=0.95, q_head_index=None, q_means_stds=q_mu_s, 
           use_last_linear=True, init_from_act_index=34,
           learning_rate=0.01, weight_decay=0.0001, plots=False)

In [0]:
multi_runs(fine_tune=False, K=0.95, q_head_index=0, q_means_stds=q_mu_s, 
           use_last_linear=True, init_from_act_index=34,
           learning_rate=0.01, weight_decay=0.0005, plots=False)

In [0]:
multi_runs(fine_tune=False, K=0.95, q_head_index=0, q_means_stds=[[0,0,0], [1,1,1]], 
           use_last_linear=True, init_from_act_index=34,
           learning_rate=0.01, weight_decay=0.0005, plots=False)

In [0]:
multi_runs(fine_tune=False, K=0.5, q_head_index=0, q_means_stds=q_mu_s, 
           use_last_linear=True, init_from_act_index=34,
           learning_rate=0.01, weight_decay=0.0005, plots=False)

In [0]:
multi_runs(fine_tune=False, K=0.5, q_head_index=0, q_means_stds=q_mu_s, 
           use_last_linear=True, init_from_act_index=None,
           learning_rate=0.01, weight_decay=0.0005, plots=False)

In [0]:
multi_runs(fine_tune=False, K=0.95, q_head_index=0, q_means_stds=q_mu_s, 
           use_last_linear=True, init_from_act_index=None,
           learning_rate=0.01, weight_decay=0.0005, plots=False)

In [0]:
multi_runs(fine_tune=False, K=0.95, q_head_index=0, q_means_stds=q_mu_s, 
           use_last_linear=True, init_from_act_index=34,
           learning_rate=0.1, weight_decay=0.0005, plots=False)

In [0]:
multi_runs(fine_tune=False, K=0.95, q_head_index=0, q_means_stds=q_mu_s, 
           use_last_linear=True, init_from_act_index=4,
           learning_rate=0.1, weight_decay=0.0005, plots=False)

In [0]:
multi_runs(fine_tune=False, K=0.99, q_head_index=0, q_means_stds=q_mu_s, 
           use_last_linear=True, init_from_act_index=34,
           learning_rate=0.1, weight_decay=0.0005, plots=False)

In [0]:
multi_runs(fine_tune=True, K=None, q_head_index=None, q_means_stds=q_mu_s, 
           use_last_linear=True, init_from_act_index=None,
           learning_rate=0.1, weight_decay=0.0005, plots=False)

In [0]:
multi_runs(fine_tune=True, K=None, q_head_index=None, q_means_stds=q_mu_s, 
           use_last_linear=True, init_from_act_index=None,
           learning_rate=0.1, weight_decay=0.0005, plots=False)

In [0]:
multi_runs(fine_tune=True, K=None, q_head_index=None, q_means_stds=q_mu_s, 
           use_last_linear=True, init_from_act_index=None,
           learning_rate=0.01, weight_decay=0.0001, plots=False)

In [0]:
# Temporarily added dropout in the code (will not be able to reproduce)
multi_runs(fine_tune=True, K=None, q_head_index=None, q_means_stds=q_mu_s, 
           use_last_linear=True, init_from_act_index=None,
           learning_rate=0.005, weight_decay=0.0005, plots=False)

In [0]:
# Temporarily added dropout in the code (will not be able to reproduce)
multi_runs(fine_tune=True, K=None, q_head_index=None, q_means_stds=q_mu_s, 
           use_last_linear=True, init_from_act_index=34,
           learning_rate=0.005, weight_decay=0.0005, plots=False)

### Results

All runs use the following parameters

```
{'batch_size': 10,
 'num_epochs': 100,
 'num_run': 5,
 'num_train': 50,
 'num_tune': 25,
 'num_val': 400,
 'use_mnist': False,
 'use_qnet_weights': True,
 'val_batch_size': 10}
 ```

| fine_tune      | q_head_index | norm_q_means | use_last_linear | init_from_act_index | K     | Optimiser | learning_rate | momentum | weight_decay | testAUC |
|----------------|--------------|--------------|-----------------|---------------------|-------|-----------|---------------|----------|--------------|---------|
| False          | None (fc3)   | True         | True            | Activation 34       | ~0.99 | Adam      | 0.000         | None     | 0.000        | 0.83    |
| True           | None (fc3)   | True         | True            | None                | 1     | Adam      | 0.01          | None     | 0.001        | 0.83    |
| True           | None (fc3)   | False        | True            | None                | 1     | Adam      | 0.01          | None     | 0.001        | 0.76    |
| True           | None (fc3)   | True         | False           | None                | 1     | Adam      | 0.01          | None     | 0.001        | 0.68    |
| True           | None (fc3)   | True         | False           | None                | 1     | Adam      | 0.005         | None     | 0.005        | 0.81    |
| True           | 0            | True         | True            | None                | 1     | Adam      | 0.01          | None     | 0.005        | 0.76    |
| True           | None (fc3)   | True         | True            | None                | 1     | Adam      | 0.1           | None     | 0.001        | 0.51    |
| True           | None (fc3)   | True         | True            | None                | 1     | Adam      | 0.02          | None     | 0.02         | 0.71    |
| False          | None (fc3)   | True         | True            | None                | 0.9   | Adam      | 0.01          | None     | 0.001        | 0.75    |
| False          | None (fc3)   | True         | True            | None                | 0.6   | Adam      | 0.01          | None     | 0.001        | 0.67    |
| False          | None (fc3)   | True         | True            | None                | 0.6   | Adam      | 0.001         | None     | 0.001        | 0.68    |
| False          | None (fc3)   | True         | False           | None                | 0.9   | Adam      | 0.005         | None     | 0.001        | 0.4     |
| False          | None (fc3)   | True         | True            | Activation 34       | 0.95  | Adam      | 0.01          | None     | 0.0001       | 0.77    |
| False          | 0            | True         | True            | Activation 34       | 0.95  | Adam      | 0.01          | None     | 0.0005       | 0.81    |
| False          | 0            | False        | True            | Activation 34       | 0.95  | Adam      | 0.01          | None     | 0.0005       | 0.71    |
| False          | 0            | True         | True            | Activation 34       | 0.5   | Adam      | 0.01          | None     | 0.0005       | 0.55    |
| False          | 0            | True         | True            | None                | 0.5   | SDG       | 0.1           | 0.9      | 0.0005       | 0.73    |
| False          | 0            | True         | True            | None                | 0.95  | SDG       | 0.1           | 0.9      | 0.0005       | 0.79    |
| False          | 0            | True         | True            | Activation 34       | 0.95  | SDG       | 0.1           | 0.9      | 0.0005       | 0.66    |
| False          | 0            | True         | True            | Activation 4        | 0.95  | SDG       | 0.1           | 0.9      | 0.0005       | 0.57    |
| False          | 0            | True         | True            | Activation 34       | 0.99  | SDG       | 0.1           | 0.9      | 0.0005       | 0.66    |
| True           | None (fc3)   | True         | True            | None                | 1     | SDG       | 0.1           | 0.9      | 0.0005       | 0.5     |
| True           | None (fc3)   | True         | True            | None                | 1     | Adam      | 0.1           | None     | 0.0005       | 0.51    |
| True w/DropOut | None (fc3)   | True         | True            | None                | 1     | Adam      | 0.005         | None     | 0.0005       | 0.81    |
| True w/DropOut | None (fc3)   | True         | True            | Activation 34       | 1     | Adam      | 0.005         | None     | 0.005        | 0.78    |

## Hyperparameters Tuning (not used)

In [0]:
def multi_runs_tune(config, reporter):

    averaged_results = {}
    tuning_losses = []
    for run_ix in range(params['num_run']):
        if params['fine_tune']:
            K = None
        else:
            K = config['k']

        model = PrefQNet(fine_tune=params['fine_tune'], k=K, 
                         q_head_index=None, q_means_stds=q_mu_s, use_last_linear=True, init_from_act_index=34).to(device)

        if params['use_qnet_weights']:
            load_weights(model)
        
        results = run_model(model, K, config['lr'], config['decay'], num_epochs=params['num_epochs'])

        if reporter is not None:  # Hyperp-tuning pass
            tuning_losses.append(results['tuneLoss'][-1])
            reporter(timesteps_total=run_ix, mean_loss=sum(tuning_losses)/len(tuning_losses))

def launch_tune():

    space = {
        "k": (0.05, 0.95), 
        "lr": (0.001, 0.1), 
        'decay': (0.0001, 0.05)
    }

    if params['fine_tune']:
        space['k'] = (1, 1)

    config = {"num_samples": params['num_tune_iters'], "stop": {"timesteps_total": params['num_run']}}

    algo = BayesOptSearch(space, metric="mean_loss", mode="min", utility_kwargs={
        "kind": "ucb", "kappa": 2.5, "xi": 0.0})

    scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min")

    return tune.run(multi_runs_tune, resources_per_trial={'gpu': 1, 'cpu': 2}, verbose=1, 
                    name="tune_exp", search_alg=algo, scheduler=scheduler, **config)