In [37]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import time
from absl import app
from absl import flags
import numpy as np
import os
import tensorflow as tf
import pandas as pd

from bandits.algorithms.bootstrapped_bnn_sampling import BootstrappedBNNSampling
from bandits.core.contextual_bandit import run_contextual_bandit
from bandits.data.data_sampler import sample_adult_data
from bandits.data.data_sampler import sample_census_data
from bandits.data.data_sampler import sample_covertype_data
from bandits.data.data_sampler import sample_jester_data

from bandits.data.data_sampler import sample_statlog_data
from bandits.data.data_sampler import sample_stock_data
from bandits.algorithms.fixed_policy_sampling import FixedPolicySampling
from bandits.algorithms.linear_full_posterior_sampling import LinearFullPosteriorSampling
from bandits.algorithms.neural_linear_sampling import NeuralLinearPosteriorSampling
from bandits.algorithms.parameter_noise_sampling import ParameterNoiseSampling
from bandits.algorithms.posterior_bnn_sampling import PosteriorBNNSampling
from bandits.data.synthetic_data_sampler import sample_linear_data
from bandits.data.synthetic_data_sampler import sample_sparse_linear_data
from bandits.data.synthetic_data_sampler import sample_wheel_bandit_data
from bandits.algorithms.uniform_sampling import UniformSampling


import numpy as np
import pandas as pd
import tensorflow as tf



In [45]:
# Set up your file routes to the data files.
base_route = os.getcwd()
data_route = 'contextual_bandits/datasets'

FLAGS = flags.FLAGS
FLAGS.set_default('alsologtostderr', True)
# flags.DEFINE_string('logdir', '/tmp/bandits/', 'Base directory to save output')
# flags.DEFINE_string(
#     'mushroom_data',
#     os.path.join(base_route, data_route, 'mushroom.data'),
#     'Directory where Mushroom data is stored.')
# flags.DEFINE_string(
#     'financial_data',
#     os.path.join(base_route, data_route, 'raw_stock_contexts'),
#     'Directory where Financial data is stored.')
# flags.DEFINE_string(
#     'jester_data',
#     os.path.join(base_route, data_route, 'jester_data_40jokes_19181users.npy'),
#     'Directory where Jester data is stored.')
# flags.DEFINE_string(
#     'statlog_data',
#     os.path.join(base_route, data_route, 'shuttle.trn'),
#     'Directory where Statlog data is stored.')
# flags.DEFINE_string(
#     'adult_data',
#     os.path.join(base_route, data_route, 'adult.full'),
#     'Directory where Adult data is stored.')
# flags.DEFINE_string(
#     'covertype_data',
#     os.path.join(base_route, data_route, 'covtype.data'),
#     'Directory where Covertype data is stored.')
# flags.DEFINE_string(
#     'census_data',
#     os.path.join(base_route, data_route, 'USCensus1990.data.txt'),
#     'Directory where Census data is stored.')

In [42]:


def one_hot(df, cols):
  """Returns one-hot encoding of DataFrame df including columns in cols."""
  for col in cols:
    dummies = pd.get_dummies(df[col], prefix=col, drop_first=False)
    df = pd.concat([df, dummies], axis=1)
    df = df.drop(col, axis=1)
  return df


def sample_mushroom_data(file_name,
                         num_contexts,
                         r_noeat=0,
                         r_eat_safe=5,
                         r_eat_poison_bad=-35,
                         r_eat_poison_good=5,
                         prob_poison_bad=0.5):
  """Samples bandit game from Mushroom UCI Dataset.

  Args:
    file_name: Route of file containing the original Mushroom UCI dataset.
    num_contexts: Number of points to sample, i.e. (context, action rewards).
    r_noeat: Reward for not eating a mushroom.
    r_eat_safe: Reward for eating a non-poisonous mushroom.
    r_eat_poison_bad: Reward for eating a poisonous mushroom if harmed.
    r_eat_poison_good: Reward for eating a poisonous mushroom if not harmed.
    prob_poison_bad: Probability of being harmed by eating a poisonous mushroom.

  Returns:
    dataset: Sampled matrix with n rows: (context, eat_reward, no_eat_reward).
    opt_vals: Vector of expected optimal (reward, action) for each context.

  We assume r_eat_safe > r_noeat, and r_eat_poison_good > r_eat_poison_bad.
  """

  # first two cols of df encode whether mushroom is edible or poisonous
  df = pd.read_csv(file_name, header=None)
  df = one_hot(df, df.columns)
  ind = np.random.choice(range(df.shape[0]), num_contexts, replace=True)

  contexts = df.iloc[ind, 2:]
  no_eat_reward = r_noeat * np.ones((num_contexts, 1))
  random_poison = np.random.choice(
      [r_eat_poison_bad, r_eat_poison_good],
      p=[prob_poison_bad, 1 - prob_poison_bad],
      size=num_contexts)
  eat_reward = r_eat_safe * df.iloc[ind, 0]
  eat_reward += np.multiply(random_poison, df.iloc[ind, 1])
  eat_reward = eat_reward.values.reshape((num_contexts, 1))

  # compute optimal expected reward and optimal actions
  exp_eat_poison_reward = r_eat_poison_bad * prob_poison_bad
  exp_eat_poison_reward += r_eat_poison_good * (1 - prob_poison_bad)
  opt_exp_reward = r_eat_safe * df.iloc[ind, 0] + max(
      r_noeat, exp_eat_poison_reward) * df.iloc[ind, 1]

  if r_noeat > exp_eat_poison_reward:
    # actions: no eat = 0 ; eat = 1
    opt_actions = df.iloc[ind, 0]  # indicator of edible
  else:
    # should always eat (higher expected reward)
    opt_actions = np.ones((num_contexts, 1))

  opt_vals = (opt_exp_reward.values, opt_actions.values)

  return np.hstack((contexts, no_eat_reward, eat_reward)), opt_vals


In [43]:
def sample_data(data_type, num_contexts=None):
  """Sample data from given 'data_type'.

  Args:
    data_type: Dataset from which to sample.
    num_contexts: Number of contexts to sample.

  Returns:
    dataset: Sampled matrix with rows: (context, reward_1, ..., reward_num_act).
    opt_rewards: Vector of expected optimal reward for each context.
    opt_actions: Vector of optimal action for each context.
    num_actions: Number of available actions.
    context_dim: Dimension of each context.
  """

  if data_type == 'linear':
    # Create linear dataset
    num_actions = 8
    context_dim = 10
    noise_stds = [0.01 * (i + 1) for i in range(num_actions)]
    dataset, _, opt_linear = sample_linear_data(num_contexts, context_dim,
                                                num_actions, sigma=noise_stds)
    opt_rewards, opt_actions = opt_linear
  elif data_type == 'sparse_linear':
    # Create sparse linear dataset
    num_actions = 7
    context_dim = 10
    noise_stds = [0.01 * (i + 1) for i in range(num_actions)]
    num_nnz_dims = int(context_dim / 3.0)
    dataset, _, opt_sparse_linear = sample_sparse_linear_data(
        num_contexts, context_dim, num_actions, num_nnz_dims, sigma=noise_stds)
    opt_rewards, opt_actions = opt_sparse_linear
  elif data_type == 'mushroom':
    # Create mushroom dataset
    num_actions = 2
    context_dim = 117
    file_name = 'contextual_bandits/datasets/mushroom.data'
    dataset, opt_mushroom = sample_mushroom_data(file_name, num_contexts)
    opt_rewards, opt_actions = opt_mushroom
  elif data_type == 'financial':
    num_actions = 8
    context_dim = 21
    num_contexts = min(3713, num_contexts)
    noise_stds = [0.01 * (i + 1) for i in range(num_actions)]
    file_name = FLAGS.financial_data
    dataset, opt_financial = sample_stock_data(file_name, context_dim,
                                               num_actions, num_contexts,
                                               noise_stds, shuffle_rows=True)
    opt_rewards, opt_actions = opt_financial
  elif data_type == 'jester':
    num_actions = 8
    context_dim = 32
    num_contexts = min(19181, num_contexts)
    file_name = FLAGS.jester_data
    dataset, opt_jester = sample_jester_data(file_name, context_dim,
                                             num_actions, num_contexts,
                                             shuffle_rows=True,
                                             shuffle_cols=True)
    opt_rewards, opt_actions = opt_jester
  elif data_type == 'statlog':
    file_name = FLAGS.statlog_data
    num_actions = 7
    num_contexts = min(43500, num_contexts)
    sampled_vals = sample_statlog_data(file_name, num_contexts,
                                       shuffle_rows=True)
    contexts, rewards, (opt_rewards, opt_actions) = sampled_vals
    dataset = np.hstack((contexts, rewards))
    context_dim = contexts.shape[1]
  elif data_type == 'adult':
    file_name = FLAGS.adult_data
    num_actions = 14
    num_contexts = min(45222, num_contexts)
    sampled_vals = sample_adult_data(file_name, num_contexts,
                                     shuffle_rows=True)
    contexts, rewards, (opt_rewards, opt_actions) = sampled_vals
    dataset = np.hstack((contexts, rewards))
    context_dim = contexts.shape[1]
  elif data_type == 'covertype':
    file_name = FLAGS.covertype_data
    num_actions = 7
    num_contexts = min(150000, num_contexts)
    sampled_vals = sample_covertype_data(file_name, num_contexts,
                                         shuffle_rows=True)
    contexts, rewards, (opt_rewards, opt_actions) = sampled_vals
    dataset = np.hstack((contexts, rewards))
    context_dim = contexts.shape[1]
  elif data_type == 'census':
    file_name = FLAGS.census_data
    num_actions = 9
    num_contexts = min(150000, num_contexts)
    sampled_vals = sample_census_data(file_name, num_contexts,
                                      shuffle_rows=True)
    contexts, rewards, (opt_rewards, opt_actions) = sampled_vals
    dataset = np.hstack((contexts, rewards))
    context_dim = contexts.shape[1]
  elif data_type == 'wheel':
    delta = 0.95
    num_actions = 5
    context_dim = 2
    mean_v = [1.0, 1.0, 1.0, 1.0, 1.2]
    std_v = [0.05, 0.05, 0.05, 0.05, 0.05]
    mu_large = 50
    std_large = 0.01
    dataset, opt_wheel = sample_wheel_bandit_data(num_contexts, delta,
                                                  mean_v, std_v,
                                                  mu_large, std_large)
    opt_rewards, opt_actions = opt_wheel

  return dataset, opt_rewards, opt_actions, num_actions, context_dim


def display_results(algos, opt_rewards, opt_actions, h_rewards, t_init, name):
  """Displays summary statistics of the performance of each algorithm."""

  print('---------------------------------------------------')
  print('---------------------------------------------------')
  print('{} bandit completed after {} seconds.'.format(
    name, time.time() - t_init))
  print('---------------------------------------------------')

  performance_pairs = []
  for j, a in enumerate(algos):
    performance_pairs.append((a.name, np.sum(h_rewards[:, j])))
  performance_pairs = sorted(performance_pairs,
                             key=lambda elt: elt[1],
                             reverse=True)
  for i, (name, reward) in enumerate(performance_pairs):
    print('{:3}) {:20}| \t \t total reward = {:10}.'.format(i, name, reward))

  print('---------------------------------------------------')
  print('Optimal total reward = {}.'.format(np.sum(opt_rewards)))
  print('Frequency of optimal actions (action, frequency):')
  print([[elt, list(opt_actions).count(elt)] for elt in set(opt_actions)])
  print('---------------------------------------------------')
  print('---------------------------------------------------')

In [44]:


# Problem parameters
num_contexts = 2000

# Data type in {linear, sparse_linear, mushroom, financial, jester,
#                 statlog, adult, covertype, census, wheel}
data_type = 'mushroom'

# Create dataset
sampled_vals = sample_data(data_type, num_contexts)
dataset, opt_rewards, opt_actions, num_actions, context_dim = sampled_vals

# Define hyperparameters and algorithms
hparams = tf.contrib.training.HParams(num_actions=num_actions)

hparams_linear = tf.contrib.training.HParams(num_actions=num_actions,
                                           context_dim=context_dim,
                                           a0=6,
                                           b0=6,
                                           lambda_prior=0.25,
                                           initial_pulls=2)

hparams_nlinear = tf.contrib.training.HParams(num_actions=num_actions,
                                            context_dim=context_dim,
                                            init_scale=0.3,
                                            activation=tf.nn.relu,
                                            layer_sizes=[50],
                                            batch_size=512,
                                            activate_decay=True,
                                            initial_lr=0.1,
                                            max_grad_norm=5.0,
                                            show_training=False,
                                            freq_summary=1000,
                                            buffer_s=-1,
                                            initial_pulls=2,
                                            reset_lr=True,
                                            lr_decay_rate=0.5,
                                            training_freq=1,
                                            training_freq_network=50,
                                            training_epochs=100,
                                            a0=6,
                                            b0=6,
                                            lambda_prior=0.25)

hparams_nlinear2 = tf.contrib.training.HParams(num_actions=num_actions,
                                             context_dim=context_dim,
                                             init_scale=0.3,
                                             activation=tf.nn.relu,
                                             layer_sizes=[50],
                                             batch_size=512,
                                             activate_decay=True,
                                             initial_lr=0.1,
                                             max_grad_norm=5.0,
                                             show_training=False,
                                             freq_summary=1000,
                                             buffer_s=-1,
                                             initial_pulls=2,
                                             reset_lr=True,
                                             lr_decay_rate=0.5,
                                             training_freq=10,
                                             training_freq_network=50,
                                             training_epochs=100,
                                             a0=6,
                                             b0=6,
                                             lambda_prior=0.25)


algos = [
  UniformSampling('Uniform Sampling', hparams),
  UniformSampling('Uniform Sampling 2', hparams),
  NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear),
  NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2),
]

# Run contextual bandit problem
t_init = time.time()
results = run_contextual_bandit(context_dim, num_actions, dataset, algos)
_, h_rewards = results

# Display results
display_results(algos, opt_rewards, opt_actions, h_rewards, t_init, data_type)





W0520 11:10:06.436576 63272 module_wrapper.py:139] From C:\Users\1\Documents\Github\models\research\deep_contextual_bandits\bandits\algorithms\neural_bandit_model.py:101: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.






W0520 11:10:07.131717 63272 module_wrapper.py:139] From C:\Users\1\Documents\Github\models\research\deep_contextual_bandits\bandits\algorithms\neural_bandit_model.py:105: The name tf.train.get_or_create_global_step is deprecated. Please use tf.compat.v1.train.get_or_create_global_step instead.






W0520 11:10:07.138698 63272 module_wrapper.py:139] From C:\Users\1\Documents\Github\models\research\deep_contextual_bandits\bandits\algorithms\neural_bandit_model.py:108: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.






W0520 11:10:07.141691 63272 module_wrapper.py:139] From C:\Users\1\Documents\Github\models\research\deep_contextual_bandits\bandits\algorithms\neural_bandit_model.py:74: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.






W0520 11:10:07.142688 63272 module_wrapper.py:139] From C:\Users\1\Documents\Github\models\research\deep_contextual_bandits\bandits\algorithms\neural_bandit_model.py:74: The name tf.AUTO_REUSE is deprecated. Please use tf.compat.v1.AUTO_REUSE instead.



Instructions for updating:
Please use `layer.__call__` method instead.


W0520 11:10:07.144683 63272 deprecation.py:323] From D:\anaconda\envs\tf_gpu_1\lib\site-packages\tensorflow_core\contrib\layers\python\layers\layers.py:1866: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.
Instructions for updating:
Please use `layer.__call__` method instead.


Instructions for updating:
Use keras.layers.Dense instead.


W0520 11:10:07.158645 63272 deprecation.py:323] From C:\Users\1\Documents\Github\models\research\deep_contextual_bandits\bandits\algorithms\neural_bandit_model.py:83: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.
Instructions for updating:
Use keras.layers.Dense instead.





W0520 11:10:07.172635 63272 module_wrapper.py:139] From C:\Users\1\Documents\Github\models\research\deep_contextual_bandits\bandits\algorithms\neural_bandit_model.py:127: The name tf.squared_difference is deprecated. Please use tf.math.squared_difference instead.






W0520 11:10:07.176596 63272 module_wrapper.py:139] From C:\Users\1\Documents\Github\models\research\deep_contextual_bandits\bandits\algorithms\neural_bandit_model.py:132: The name tf.train.inverse_time_decay is deprecated. Please use tf.compat.v1.train.inverse_time_decay instead.






W0520 11:10:07.182581 63272 module_wrapper.py:139] From C:\Users\1\Documents\Github\models\research\deep_contextual_bandits\bandits\algorithms\neural_bandit_model.py:190: The name tf.summary.scalar is deprecated. Please use tf.compat.v1.summary.scalar instead.






W0520 11:10:07.186570 63272 module_wrapper.py:139] From C:\Users\1\Documents\Github\models\research\deep_contextual_bandits\bandits\algorithms\neural_bandit_model.py:193: The name tf.summary.merge_all is deprecated. Please use tf.compat.v1.summary.merge_all instead.






W0520 11:10:07.187567 63272 module_wrapper.py:139] From C:\Users\1\Documents\Github\models\research\deep_contextual_bandits\bandits\algorithms\neural_bandit_model.py:140: The name tf.summary.FileWriter is deprecated. Please use tf.compat.v1.summary.FileWriter instead.



UnparsedFlagAccessError: Trying to access flag --logdir before flags were parsed.

UnparsedFlagAccessError: Trying to access flag --mushroom_data before flags were parsed.