In [45]:
import numpy as np
import tensorflow as tf

class HashDND(object):
    """differentiable neural dictionary, using LSH for approximate
    nearest neighbour lookup. Assumes keys are vectors. Also assumes we only
    use float32 and doesn't handle batched operations :("""

    sentinel_value = np.inf

    @classmethod
    def _setup_variables(cls, hash_bits, max_neighbours, key_size, value_shape):
        """setup variables with appropriate initializers given the shapes"""
        init = tf.constant_initializer(cls.sentinel_value)
        if value_shape is None:
            value_shape = [2**hash_bits * max_neighbours,]
        else:
            value_shape = [2**hash_bits * max_neighbours,] + list(value_shape)
        values = tf.get_variable(name='values',
                                  shape=value_shape,
                                  initializer=init)
        keys = tf.get_variable(name='keys',
                           shape=[2**hash_bits * max_neighbours, key_size],
                           initializer=init)
        return keys, values

    def __init__(self, hash_bits, max_neighbours, key_size,
                 similarity_measure=None, value_shape=None,  scope_name='dnd'):
        """Set up the dnd.

        Args:
            hash_bits (int): how many bits for the hash. There will be
                `2**num_bits` individual buckets.
            max_neighbours (int): how many entries to store in each bucket.
                This controls the number of neighbours we check against.
                Operations will be linear in this value and it will likely
                effect learning performance significantly as well.
            key_size (int): size of the key vectors. We use the unhashed key
                vectors to compute similarities between keys we find from the
                nearest neighbour lookup.
            value_shapes (list): list of shapes for the values stored in the
                dictionary.
            similarity_measure (Optional[callable]): function which adds ops
                to compare a query key with all of the other keys in the
                bucket. If unspecified, the cosine similarity is used. Should
                be a callable which takes two input tensors: the query key
                (shaped `[key_size]`) and a  `[max_neighbours, key_size]`
                tensor  of keys to compare against. Should return a
                `[max_neighbours]` tensor of similarities, between 0 and 1
                where 1 means the two keys were identical.
            name (Optional[str]): a name under which to group ops and
                variables. Defaults to `dnd`.
        """
        self._name = scope_name
        self._hash_size = hash_bits
        self._key_size = key_size
        self._bucket_size = max_neighbours
        with tf.variable_scope(self._name):
            self._keys, self._values = HashDND._setup_variables(hash_bits,
                                                                max_neighbours,
                                                                key_size,
                                                                value_shape)
            self._hash_config = get_simhash_config(self._key_size, self._hash_size)

        if not similarity_measure:
            similarity_measure = cosine_similarity
        self._similarity_measure = similarity_measure

    def store(self, query, value):
        """Gets an op which will store the key-value pair.
        
        Args:
            query: tensor, shape=(key_size,)
            value: tensor, shape=(value_shape,)

        Returns:
            operation to store
        """
        with tf.name_scope(self._name + '/store'):
            bucket_keys, bucket_values, idx = self._get_bucket(tf.expand_dims(query, 0))
            bucket_keys = bucket_keys[0]
            bucket_values = bucket_values[0]
            idx = idx[0]
            # is there space?
            can_store = tf.reduce_any(tf.equal(bucket_keys[:, 0], self.sentinel_value))

            def _empty_store():
                return self._get_store_op_empty(query, value, idx, bucket_keys)

            def _full_store():
                return self._get_store_op_full(query, value, idx, bucket_keys)

            store_op = tf.cond(can_store, _empty_store, _full_store)
        return store_op

    def _flatten_index(self, index, bucket_index):
        """turn a bucket-level index into a global index"""
        return index + (bucket_index * self._bucket_size)

    def _update_at_index(self, index, new_key, new_val):
        """make update ops to insert at the appropriate (flattened) index"""
        # update the keys
        keys_update = tf.scatter_update(self._keys, index, new_key)
        # and update the values
        values_update = tf.scatter_update(self._values, index, new_val)
        # make sure they all happen at once
        return tf.group(keys_update, values_update)

    def _get_store_op_empty(self, store_key, store_val, bucket_index,
                            bucket_keys):
        """get an op to store given key and values in the first empty space.

        Returns an op with no output that will run all of the required updates.
        """
        # first find the first empty spot (assuming there is one)
        with tf.name_scope('empty_store'):
            empty_indices = tf.where(tf.equal(bucket_keys[:, 0], self.sentinel_value))
            empty_indices = tf.cast(empty_indices, tf.int32)
            store_idx = self._flatten_index(empty_indices[0][0], bucket_index)
            return self._update_at_index(store_idx, store_key, store_val)

    def _get_store_op_full(self, store_key, store_vals, bucket_index,
                           bucket_keys):
        """get an op to store given keys and values when there are no empty
        slots.

        Returns an op with no output that will run all of the require updates.
        """
        with tf.name_scope('store_full'):
            idx = tf.random_uniform([], minval=0, maxval=self._bucket_size,
                                    dtype=tf.int32)
            store_idx = self._flatten_index(idx, bucket_index)
            return self._update_at_index(store_idx, store_key, store_vals)

    def _get_averaged_value(self, values, similarities):
        """get a weighted sum of values."""
        weighted_values = similarities * values
        all_values = tf.reduce_sum(weighted_values, axis=1)
        return all_values

    def get(self, queries):
        """Get the values in the dictionary corresponding to a particular key,
        or zeros if the key is not present.

        The default similarity is the cosine distance.

        Args:
            queries: tensor, shape=(batch_size, key_size)

        Returns:
            value (tuple): associated values.
        """
        # TODO: what to return when the bucket is empty?
        # at the moment it is all zeros
        with tf.name_scope(self._name + '/get'):
            bucket_keys, bucket_values, indices = self._get_bucket(queries)
            # for the index where the keys are sentinel, mask it out, shape=(batch_size, bucket_size)
            used_positions = tf.not_equal(bucket_keys[:, :, 0], self.sentinel_value)
            # if used 1, otherwise 0
            used_mask = tf.cast(used_positions, tf.float32)
            # set 0 value for non stored key
            zero_keys = tf.zeros_like(bucket_keys)
            _used_positions = tf.tile(tf.expand_dims(used_positions, 2), [1, 1, self._key_size])
            masked_keys = tf.where(_used_positions, bucket_keys, zero_keys) # shape=(batch_size, bucket_size, key_size)
            similarities = self._similarity_measure(queries, masked_keys)
            # Take only True values
            zero_mask = tf.zeros_like(used_positions, dtype=tf.float32)
            values = tf.where(used_positions, bucket_values, zero_mask)
            # Get rid of zero masked positions, shape=(batch_size, bucket_size)
            similarities = tf.where(used_positions, similarities, zero_mask)
            sim_shape = tf.shape(similarities)
            # normalise them to sum to one, and maybe give them a kick\
            sum_tensor = tf.tile(tf.reduce_sum(similarities, axis=1, keep_dims=True), [1, sim_shape[1]])
            # avoid zero division
            sum_tensor += tf.constant(1e-8, dtype=tf.float32)
            similarities /= sum_tensor
            # self.used_positions = similarities
            self.values = values
            results = self._get_averaged_value(values, similarities)
            self.results = results
        return results
    
    def _get_bucket(self, queries):
        """look up the contents of a bucket by hash. Also return the bucket
        index so we can create updates to the storage variables.
        Args:
            queries: tensor, shape=[batch_size, key_size]
        Returns:
            keys: tensor, shape=(batch_size, key_size)
            values: tensor, shape=(batch_size, value shape)
            idx: tensor, shape=(batch_size,)
        """
        # shape = (batch_size,)
        idx = simhash(queries, self._hash_config)
        # Get idx th key and value 
        keys, values = self._get_keys_values_by_idx(idx)
        return keys, values, idx
    
    def _get_keys_values_by_idx(self, idx):
        """
        Args:
            idx: tensor, shape=(batch_size,)
        
        Returns:
            keys, values: tensors
        """
        bucket_start = idx * self._bucket_size
        bucket_end = (idx + 1) * self._bucket_size
        st_end = tf.stack((bucket_start, bucket_end), axis=1)
        key_idx = tf.map_fn(lambda x: tf.range(x[0], x[1]), st_end)
        # tensor with shape=(batch_size, bucket_size, key_size)
        keys = tf.gather(self._keys, key_idx)
        # tensor with shape=(batch_size, bucket_size, value_shape)
        values = tf.gather(self._values, key_idx)
        return keys, values
    
    
def cosine_similarity(query, bucket):
    """Cosine similarity: the cosine of the angle between two vectors.
    Also the dot product, if the vectors are normalised in the l2 norm,
    which is how it is implemented here.
    
    Args:
        query: tensor, shape=(batch_size, key_size)
        bucket: tensor, shape=(batch_size, bucket_size, key_size)
    Returns:
        similarities: tensor, shape=(batch_size, bucket_size)
    """
    query = tf.expand_dims(query, 2) #(batch_size, key_size, 1)
    query = tf.nn.l2_normalize(query, dim=0)
    bucket = tf.nn.l2_normalize(bucket, dim=1)
    return tf.squeeze(tf.matmul(bucket, query), 2, name='cos_sim')


def get_simhash_config(key_size, hash_bits):
    """Gets any necessary configuration and data structures necessary for
    consistent hashing.

    Args:
        key_size: int, size of the key size
        hash_bits: int, the number of bits we output.

    Returns:
        dict: dictionary with two keys: "matrix" corresponding to a variable
            used for the random projection and "bases" used in the conversion
            to integers.
    """
    with tf.variable_scope('simhash_config'):
        mat = tf.get_variable(
            'projection_matrix',
            shape=[key_size, hash_bits],
            initializer=tf.random_normal_initializer())
        bases = 2 ** tf.range(hash_bits)
        return {'matrix': mat, 'bases': bases}


def simhash(inputs, config):
    """SimHash the inputs into an integer with `num_bits` used bits.
    
    Args:
        inputs: tensor, shape=(batch_size, key_size)
        config: dict, config["matrix"] is tensor with shape=(key_size, hash_bits)
            config["bases"] is tensor with shape=(log2(num_bucket,))
    Returns:
        index, tensor, shape=(batch_size)
    """
    with tf.variable_scope('simhash'):
        #shape=(batch_size, hash_bits)
        projected = tf.matmul(inputs, config['matrix'])
        bits = costum_sign(projected) * 0.5 + 0.5
        # return bits
        bits = tf.cast(bits, tf.int32)
        # convert to single bits size integer
        # shape = (batch_size,)
        index = tf.reduce_sum(bits * tf.expand_dims(config['bases'], 0), axis=1)
        return index

def costum_sign(x, dtype=tf.float32):
    return tf.cast((x>=0), dtype=dtype)

In [46]:
import os
import time
import numpy as np
import tensorflow as tf
from logging import getLogger
import random
from collections import deque
from rltensor.networks.ff import MLPModel

from rltensor.agents.agent import Agent
from rltensor.utils import get_shape

logger = getLogger(__name__)


class NEC(Agent):
    def __init__(self, env, conf, controller_cls=MLPModel,
                 diff_memory_cls=HashDND, default_conf=None, sess=None):
        self.controller_cls = controller_cls
        self.diff_memory_cls = diff_memory_cls
        self.key_dim = conf["key_dim"]
        self.delay = conf["delay"]
        self.recent_rewards = deque(maxlen=self.delay)
        self.recent_terminals = deque(maxlen=self.delay)
        super(NEC, self).__init__(env, conf, default_conf, sess)
        
    def _build_graph(self):
        """Build all of the network and optimizations
        
        just for conveninece of trainig, seprate placehoder for train and target network
        critic network input: [raw_data, smoothed, downsampled]
        """
        # state shape has to be (batch, length,) + input_dim
        self.state = tf.placeholder(tf.float32,
                                     get_shape(self.state_dim, maxlen=self.window_length),
                                     name='state')
        _state = self.processor.tensor_process(self.state)
        # Employ maximal strategy
        self.controller = self.controller_cls(self.key_dim,
                                              self.conf["controller"],
                                              scope_name="controller")
        self.diff_memories = []
        for i in range(self.action_dim):
            memory = self.diff_memory_cls(key_size=self.key_dim,
                                          **self.conf["diff_memory"],
                                           scope_name="diff_memory_{}".format(i))
            self.diff_memories.append(memory)
        self.query = self.controller(_state, self.training)
        # returned q_val is tuple 
        self.q_val_list = [memory.get(self.query) for memory in self.diff_memories]
        self.q_val = tf.stack(self.q_val_list, axis=1)
        self.max_action = tf.argmax(self.q_val, axis=1)
        self.max_q_val = tf.reduce_max(self.q_val, axis=1)
        # Build action graph
        self.action = tf.placeholder(tf.int32, (None,), name='action')
        action_one_hot = tf.one_hot(self.action, depth=self.action_dim)
        self.action_q_val = tf.reduce_sum(self.q_val * action_one_hot, axis=1)
        # Build target
        self.target = tf.placeholder(tf.float32, (None,), name="target")
        self.terminal = tf.placeholder(tf.bool, (None,), name="terminal")
        # Store values to differentiable memory
        self.store_ops = [memory.store(self.query[0], self.target[0]) for memory in self.diff_memories]
        # Clip error to stabilize learning
        self.error = self.target - self.action_q_val
        clipped_error = tf.where(tf.abs(self.error) < self.error_clip,
                                    0.5 * tf.square(self.error),
                                    tf.abs(self.error), name='clipped_error')
        self.loss = tf.reduce_mean(clipped_error, name='loss')
        # Build optimization
        # self.update_op = self._get_update_op()
        self.learning_rate_op = self._get_learning_rate()
        self.optimizer = self._get_optimizer(self.optimizer_name,
                                             self.learning_rate_op,
                                             self.optimizer_conf)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            grads_vars = self.optimizer.compute_gradients(self.loss)
            if "grad_clip" in self.conf and self.conf["grad_clip"] is not None:
                grads_vars = [
                    (tf.clip_by_norm(gv[0], clip_norm=self.conf["grad_clip"]), gv[1]) 
                        for gv in grads_vars]
            self.q_optim = self.optimizer.apply_gradients(grads_vars)

    def observe(self, observation, action, reward, terminal, training):
        # clip reward into  (min_r, max_r)
        reward = max(self.min_r, min(self.max_r, reward))
        # assert len(self.memory.observations) == len(self.recent_rewards)
        # We always keep data
        self.recent_rewards.append(reward)
        self.recent_terminals.append(terminal)
        target_val = self._calc_target(observation)
        # we keep target value instead of reward directly
        self.memory.append(observation, action, target_val, terminal, is_store=True)
        delay_state = self.memory.get_delay_state()
        action = self.memory.delay_actions[0]
        self._diff_memories_append(delay_state, target_val, action)
        step = self.global_step.eval(session=self.sess)
        if (step + 1) % self.t_train_freq == 0:
            is_update = True
        else:
            is_update = False
        # print(is_update, step)
        if training:
            experiences = self.memory.sample(self.batch_size)
            weights = np.ones(self.batch_size)
            result = self.q_learning_minibatch(experiences, weights, is_update)
            return result
        else:
            return None

    def q_learning_minibatch(self, experiences, batch_weights, is_update=True):
        feed_dict = {
            self.state: [experience.state for experience in experiences],
            self.target: [experience.q_val for experience in experiences],
            self.action: [experience.action for experience in experiences],
            self.training: True,
        }
        query, q_val = self.sess.run([self.query, self.q_val], feed_dict=feed_dict)
        # print("length", len(experiences))
        # print("query", query[0])
        # print("query", query.shape)
        print("q_val", q_val[0][0])
        # print("q_val", q_val.shape)
        if is_update:
            self.sess.run(self.q_optim, feed_dict=feed_dict);
        q_t, loss, error = self.sess.run([self.action_q_val, self.loss, self.error],
                                     feed_dict=feed_dict)
        return q_t, loss, error, is_update
    
    def predict(self, state, ep=None):
        if ep is None:
            ep = self.epsilon.eval(session=self.sess)
        if random.random() < ep:
            action = np.random.randint(0, self.action_dim)
        else:
            action = self.sess.run(self.max_action, 
                                   feed_dict={self.state: [state],
                                              self.training: False})[0]
        return action
    
    def _calc_target(self, observation):
        target_val = 0
        backward = self.delay - 1
        for i in range(len(self.recent_rewards)):
            target_val += (self.gamma) ** i * self.recent_rewards[i]
            backward -= 1
            if self.recent_terminals[i]:
                break
        state = self.memory.get_delay_state(observation, backward)
        feed_dict = {
            self.state: [state],
            self.training: False}
        max_q_val = self.sess.run(self.max_q_val, feed_dict=feed_dict)[0]
        target_val += self.gamma**(self.delay - backward) * max_q_val
        return target_val
    
    def update_target_q_network(self):
        # We have no operations for updating target network
        pass
    
    def _get_update_op(self):
        pass
    
    def _get_memory(self, window_length, limit, *args, **kwargs):
        return DelayMemory(self.delay, window_length, limit)
    
    def _diff_memories_append(self, state, value, action):
        feed_dict = {
            self.state: [state],
            self.target:[value],
            self.training: True,
        }
        self.sess.run(self.store_ops[action], feed_dict=feed_dict)

In [47]:
from collections import namedtuple
from copy import deepcopy
from six.moves import xrange 

from rltensor.memories import SequentialMemory

DelayExperience = namedtuple('DelayExperience', 'state, action, q_val')

class DelayMemory(SequentialMemory):
    def __init__(self, delay, window_length, limit, *args, **kwargs):
        self.delay = delay
        # Take more observations to make state
        self.delay_observations = deque(maxlen=self.delay+window_length)
        self.delay_actions = deque(maxlen=self.delay)
        self.delay_terminals = deque(maxlen=self.delay)
        super(DelayMemory, self).__init__(window_length, limit, *args, **kwargs)
        
    def sample(self, batch_size, weights=None, batch_idxs=None):
        if batch_idxs is None:
            if weights is not None:
                _weights = weights[1:]
                _weights /= np.sum(_weights)
            else:
                _weights = None
            # Draw random indexes such that we have at least a single entry before each
            # index. Thus, draw samples from [1, self.nb_entries)
            batch_idxs = self._sample_batch_indexes(1, self.nb_entries, batch_size, _weights)
        assert np.min(batch_idxs) >= 1
        assert np.max(batch_idxs) < self.nb_entries
        assert len(batch_idxs) == batch_size

        # Create experiences
        if weights is not None:
            _weights = weights[:-1]
            _weights /= np.sum(_weights)
        else:
            _weights = None
        experiences = []
        # Each idx is index for state1
        for i, idx in enumerate(batch_idxs):
            # Observatio and terminal happens at the same time, so 
            # previous index has to keep terminal==False.
            s0_i = idx - 1
            terminal0 = self.terminals[s0_i]
            while terminal0:
                # Repeat sampling until getting proper idx
                s0_i  = self._sample_batch_indexes(0, self.nb_entries-1, 1, _weights)[0]
                terminal0 = self.terminals[s0_i]
                batch_idxs[i] = s0_i + 1
            assert 0 <= s0_i < self.nb_entries - 1

            # This code is slightly complicated by the fact that subsequent observations might be
            # from different episodes. We ensure that an experience never spans multiple episodes.
            # This is probably not that important in practice but it seems cleaner.
            state = [self.observations[s0_i],]
            for offset in xrange(1, self.window_length):
                current_idx = s0_i - offset
                current_terminal = self.terminals[current_idx] if current_idx >= 0 else False
                if current_idx < 0 or (not self.ignore_episode_boundaries and current_terminal):
                    # The previously handled observation was terminal, don't add the current one.
                    # Otherwise we would leak into a different episode.
                    break
                state.insert(0, self.observations[current_idx])
            # Complete unobserved state with 0
            while len(state) < self.window_length:
                state.insert(0, np.zeros_like(state[0]))
            action = self.actions[idx]
            if action is None:
                print("action", action, idx, self.nb_entries)
            q_val = self.rewards[idx]
            assert len(state) == self.window_length
            experiences.append(DelayExperience(state=state, action=action, q_val=q_val))
        assert len(experiences) == batch_size
        # Keep sampled sampled idx for prioritized sampling
        self.sampled_idx = batch_idxs
        return experiences

    
    def append(self, observation, action, reward, terminal, is_store=True):
        # Reward means Q value just for keeping compatibility
        self.recent_observations.append(observation)
        self.recent_terminals.append(terminal)
        if is_store:
            self.delay_observations.append(observation)
            self.delay_actions.append(action)
            self.delay_terminals.append(terminal)
            self.observations.append(self.delay_observations[0])
            self.actions.append(self.delay_actions[0])
            self.rewards.append(reward)
            self.terminals.append(self.delay_terminals[0])
            
    
    def get_delay_state(self, observation=None, backward=0):
        _observations = deepcopy(self.delay_observations)
        if observation is not None:
            _observations.append(observation)
        if backward > 1:
            _observations = deque(list(_observations)[:-backward])
        if observation is not None:
            padding = np.zeros_like(observation)
        else:
            padding = np.zeros_like(self.delay_observations[0])
        while len(_observations) < self.window_length:
            _observations.insert(0, padding)
        # Make sure window length observations
        return np.array(_observations)[-self.window_length:]

In [49]:
import tensorflow as tf
import gym

from rltensor.agents import DQN
from rltensor.processors import AtariProcessor
from rltensor.networks import MLPModel


conf = {"controller":[
            {"name": "conv2d", "kernel_size":(8, 8), "num_filter":32, "stride":4,
             "padding": 'SAME', "is_batch":False, 'activation': tf.nn.relu},
            {"name": "conv2d", "kernel_size":(5, 5), "num_filter":64, "stride":2,
             "padding": 'SAME', "is_batch":True, 'activation': tf.nn.relu},
           {"name": "conv2d", "kernel_size": (3, 3), "num_filter":64, "stride":1,
             "padding": 'SAME', "is_batch":True, 'activation': tf.nn.relu},
            {"name": "dense", "is_flatten":True, "is_batch":True, "num_hidden": 512, 'activation': tf.nn.relu},
        ],
        "diff_memory":{
            "hash_bits":10,
            "max_neighbours":50,
        },
        "key_dim":300,
        "delay":100,
        "memory_limit": 100000,
        "window_length": 4,
        "gamma": 0.99,
        "learning_rate": 2.5e-4,
        "learning_rate_minimum": 2.5e-4,
        "learning_rate_decay": 0.9,
        "learning_rate_decay_step": 100,
        "ep": 1e-3,
        "min_r": -1,
        "max_r": 1,
        "batch_size": 32,
        "error_clip": 1.0,
        "processor": AtariProcessor(84, 84),
        "t_learn_start": 100,
        "t_train_freq": 1,
        "t_target_q_update_freq": 10000,
        "ep_start": 1.0,
        "ep_end": 0.1,
        "t_ep_end": int(1e6),
        "model_dir": "./logs",
        "log_freq": 1000,
        "avg_length": 10000,
        "env_name": 'Breakout-v0',
        "processor": AtariProcessor(84, 84),
        "optimizer":"rmsp",
}

env = gym.make('Breakout-v0')
tf.reset_default_graph()
nec = NEC(env, conf, controller_cls=MLPModel, diff_memory_cls=HashDND)
nec.fit(int(1e7), render_freq=None, save_video_path="./videos")

[2017-08-20 18:29:13,778] Making new env: Breakout-v0
[2017-08-20 18:29:15,632] Finished writing results. You can upload them to the scoreboard via gym.upload('/home/tomoaki/work/Development/RL/videos')
[2017-08-20 18:29:19,415] Clearing 6 monitor files from previous run (because force=True was provided)
[2017-08-20 18:29:19,423] Starting new video recorder writing to /home/tomoaki/work/Development/RL/videos/openaigym.video.8.10153.video000000.mp4


  0%|          | 0/10000000 [00:00<?, ?it/s]

Model saved in file: params/model.ckpt


[A
  0%|          | 1/10000000 [00:00<342:41:39,  8.11it/s][A
  0%|          | 3/10000000 [00:00<292:46:14,  9.49it/s][A
  0%|          | 5/10000000 [00:00<259:35:43, 10.70it/s][A
  0%|          | 7/10000000 [00:00<237:04:55, 11.72it/s][A
  0%|          | 9/10000000 [00:00<222:23:00, 12.49it/s][A
  0%|          | 11/10000000 [00:00<218:05:44, 12.74it/s][A
  0%|          | 13/10000000 [00:00<209:27:55, 13.26it/s][A
  0%|          | 15/10000000 [00:01<202:14:52, 13.73it/s][A
  0%|          | 17/10000000 [00:01<198:37:37, 13.98it/s][A
  0%|          | 19/10000000 [00:01<198:37:21, 13.99it/s][A
  0%|          | 21/10000000 [00:01<199:42:30, 13.91it/s][A
  0%|          | 23/10000000 [00:01<197:18:20, 14.08it/s][A
  0%|          | 25/10000000 [00:01<201:01:46, 13.82it/s][A
  0%|          | 27/10000000 [00:01<201:11:27, 13.81it/s][A
  0%|          | 29/10000000 [00:02<197:39:20, 14.05it/s][A
  0%|          | 31/10000000 [00:02<197:58:54, 14.03it/s][A
  0%|          | 33/10000

q_val 0.0


  0%|          | 101/10000000 [00:07<362:45:14,  7.66it/s]

q_val 0.0


  0%|          | 103/10000000 [00:08<475:59:25,  5.84it/s]

q_val 0.0


  0%|          | 104/10000000 [00:08<558:28:35,  4.97it/s]

q_val 0.0


  0%|          | 105/10000000 [00:08<616:33:35,  4.51it/s]

q_val 0.0


  0%|          | 106/10000000 [00:08<668:01:43,  4.16it/s]

q_val 0.0


  0%|          | 107/10000000 [00:09<713:03:59,  3.90it/s]

q_val 0.0


  0%|          | 108/10000000 [00:09<757:35:30,  3.67it/s]

q_val 0.0


  0%|          | 109/10000000 [00:09<760:16:42,  3.65it/s]

q_val 0.0


  0%|          | 110/10000000 [00:09<733:22:31,  3.79it/s]

q_val 0.0


  0%|          | 111/10000000 [00:10<722:46:30,  3.84it/s]

q_val 0.0


  0%|          | 112/10000000 [00:10<716:29:30,  3.88it/s]

q_val 0.0


  0%|          | 113/10000000 [00:10<707:05:43,  3.93it/s]

q_val 0.0
q_val 0.0


  0%|          | 115/10000000 [00:11<862:38:05,  3.22it/s]

q_val 0.0


  0%|          | 116/10000000 [00:11<833:41:08,  3.33it/s]

q_val 0.0


  0%|          | 117/10000000 [00:12<864:12:34,  3.21it/s]

q_val 0.0


  0%|          | 118/10000000 [00:12<880:45:21,  3.15it/s]

q_val 0.0


  0%|          | 119/10000000 [00:12<926:29:59,  3.00it/s]

q_val 0.0
q_val 0.0


  0%|          | 121/10000000 [00:13<1023:06:55,  2.71it/s]

q_val 0.0


  0%|          | 122/10000000 [00:13<1001:18:22,  2.77it/s]

q_val 0.0


  0%|          | 123/10000000 [00:14<932:13:22,  2.98it/s] 

q_val 0.0
q_val 0.0


  0%|          | 124/10000000 [00:14<969:47:08,  2.86it/s]

q_val 0.0


  0%|          | 126/10000000 [00:15<945:00:43,  2.94it/s] 

q_val 0.0


  0%|          | 127/10000000 [00:15<890:40:56,  3.12it/s]

q_val 0.0
q_val 0.0


  0%|          | 128/10000000 [00:15<977:56:52,  2.84it/s]

q_val 0.0


  0%|          | 129/10000000 [00:16<1013:26:53,  2.74it/s]

q_val 0.0


  0%|          | 131/10000000 [00:17<1023:09:38,  2.71it/s]

q_val 0.0


  0%|          | 132/10000000 [00:17<974:38:27,  2.85it/s] 

q_val 0.0


  0%|          | 133/10000000 [00:17<980:03:12,  2.83it/s]

q_val 0.0
q_val 0.0


  0%|          | 135/10000000 [00:18<960:02:35,  2.89it/s]

q_val 0.0


  0%|          | 136/10000000 [00:18<897:26:15,  3.10it/s]

q_val 0.0


  0%|          | 137/10000000 [00:19<856:23:54,  3.24it/s]

q_val 0.0


  0%|          | 138/10000000 [00:19<823:34:24,  3.37it/s]

q_val 0.0


  0%|          | 139/10000000 [00:19<803:21:42,  3.46it/s]

q_val 0.0


  0%|          | 140/10000000 [00:19<782:08:05,  3.55it/s]

q_val 0.0


  0%|          | 141/10000000 [00:20<771:29:09,  3.60it/s]

q_val 0.0


  0%|          | 142/10000000 [00:20<766:55:24,  3.62it/s]

q_val 0.0


  0%|          | 143/10000000 [00:20<762:04:23,  3.64it/s]

q_val 0.0


  0%|          | 144/10000000 [00:20<788:52:20,  3.52it/s]

q_val 0.0


  0%|          | 145/10000000 [00:21<855:31:51,  3.25it/s]

q_val 0.0


  0%|          | 146/10000000 [00:21<895:23:01,  3.10it/s]

q_val 0.0
q_val 0.0
Model saved in file: params/model.ckpt


In [12]:
len(nec.store_ops)

4

In [157]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
x_val = np.random.randint(0, 2, (3, 4), dtype=bool)
x = tf.Variable(x_val)
y = tf.where(x)
tf.global_variables_initializer().run()
z = y.eval()

In [158]:
z

array([[0, 0],
       [0, 3],
       [1, 0],
       [1, 1],
       [1, 2],
       [2, 2]])

In [152]:
np.sum(x_val)

20

In [151]:
x_val[0][0][1]

False

In [50]:
2 ** 10

1024