In [1]:
%%time
import pandas as pd
# texts = clean_movie_data(data)
df = pd.read_csv("data/All-seasons.csv")

CPU times: user 296 ms, sys: 368 ms, total: 664 ms
Wall time: 298 ms


In [2]:
df.head()

Unnamed: 0,Season,Episode,Character,Line
0,10,1,Stan,"You guys, you guys! Chef is going away. \n"
1,10,1,Kyle,Going away? For how long?\n
2,10,1,Stan,Forever.\n
3,10,1,Chef,I'm sorry boys.\n
4,10,1,Stan,"Chef said he's been bored, so he joining a gro..."


In [3]:
texts = df["Line"].values

file = open("data/cornell movie-dialogs corpus/movie_lines.txt", "r",encoding='utf-8', errors='ignore')
data = file.read()

import re
from tqdm import tqdm

def clean_movie_data(data):
    pattern = " \+\+\+\$\+\+\+ u[0-9] \+\+\+\$\+\+\+ m[0-9] \+\+\+\$\+\+\+ \w* \+\+\+\$\+\+\+ "
    lines = re.split(pattern, data.lower())
    line_words = []
    for line in tqdm(lines):
        try:
            cleaned_line = clean_text(line, max_dist=0)
            if len(cleaned_line) >= 1:
                line_words.append(cleaned_line)
        except KeyboardInterrupt:
            break
    return line_words

In [4]:
texts[:10]

array(['You guys, you guys! Chef is going away. \n',
       'Going away? For how long?\n', 'Forever.\n', "I'm sorry boys.\n",
       "Chef said he's been bored, so he joining a group called the Super Adventure Club. \n",
       'Wow!\n',
       'Chef?? What kind of questions do you think adventuring around the world is gonna answer?!\n',
       "What's the meaning of life? Why are we here?\n",
       "I hope you're making the right choice.\n",
       "I'm gonna miss him.  I'm gonna miss Chef and I...and I don't know how to tell him! \n"], dtype=object)

import nltk
nltk.download()

In [11]:
%%time
from hedgeable_ai.functions.preprocessing.word2index import Word2IndexProcessor

processor = Word2IndexProcessor(texts[:], is_processed=False)

CPU times: user 41min 47s, sys: 25.3 s, total: 42min 13s
Wall time: 41min 49s


In [6]:
import tensorflow as tf

In [7]:
from __future__ import print_function
import tensorflow as tf

def layer_normalization(inputs, epsilon=1e-8, scope="layer_normalization", reuse=None):
    '''Applies layer normalization.
    
    Args:
      inputs: A tensor with 2 or more dimensions, where the first dimension has
        `batch_size`.
      epsilon: A floating number. A very small number for preventing ZeroDivision Error.
      scope: Optional scope for `variable_scope`.
      reuse: Boolean, whether to reuse the weights of a previous layer
        by the same name.
      
    Returns:
      A tensor with the same shape and data dtype as `inputs`.
    '''
    with tf.variable_scope(scope, reuse=reuse):
        shape = inputs.get_shape().as_list()
        hidden_dim = shape[-1]
        mean, variance = tf.nn.moments(inputs, [-1], keep_dims=True)
        gamma = tf.get_variable("gamma", hidden_dim, initializer=tf.ones_initializer(tf.float32))
        beta = tf.get_variable("beta", hidden_dim, initializer=tf.zeros_initializer(tf.float32))
        normalized_inputs = (inputs - mean) / tf.sqrt(variance + epsilon)
        outputs = gamma * normalized_inputs + beta
    return outputs

def embedding(inputs,vocab_size, num_units, 
              zero_pad=True, scale=True,
              scope="embedding", reuse=None):
    '''Embeds a given tensor.
    Args:
      inputs: A `Tensor` with type `int32` or `int64` containing the ids
         to be looked up in `lookup table`.
      vocab_size: An int. Vocabulary size.
      num_units: An int. Number of embedding hidden units.
      zero_pad: A boolean. If True, all the values of the fist row (id 0)
        should be constant zeros.
      scale: A boolean. If True. the outputs is multiplied by sqrt num_units.
      scope: Optional scope for `variable_scope`.
      reuse: Boolean, whether to reuse the weights of a previous layer
        by the same name.
    Returns:
      A `Tensor` with one more rank than inputs's. The last dimensionality
        should be `num_units`.
        
    For example,
    
    ```
    import tensorflow as tf
    
    inputs = tf.to_int32(tf.reshape(tf.range(2*3), (2, 3)))
    outputs = embedding(inputs, 6, 2, zero_pad=True)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        print sess.run(outputs)
    >>
    [[[ 0.          0.        ]
      [ 0.09754146  0.67385566]
      [ 0.37864095 -0.35689294]]
     [[-1.01329422 -1.09939694]
      [ 0.7521342   0.38203377]
      [-0.04973143 -0.06210355]]]
    ```
    
    ```
    import tensorflow as tf
    
    inputs = tf.to_int32(tf.reshape(tf.range(2*3), (2, 3)))
    outputs = embedding(inputs, 6, 2, zero_pad=False)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        print sess.run(outputs)
    >>
    [[[-0.19172323 -0.39159766]
      [-0.43212751 -0.66207761]
      [ 1.03452027 -0.26704335]]
     [[-0.11634696 -0.35983452]
      [ 0.50208133  0.53509563]
      [ 1.22204471 -0.96587461]]]    
    ```    
    '''
    with tf.variable_scope(scope, reuse=reuse):
        lookup_table = tf.get_variable('lookup_table',
                                       dtype=tf.float32,
                                       shape=[vocab_size, num_units],
                                       initializer=tf.contrib.layers.xavier_initializer())
        if zero_pad:
            lookup_table = tf.concat((tf.zeros(shape=[1, num_units]),
                                      lookup_table[1:, :]), 0)
        outputs = tf.nn.embedding_lookup(lookup_table, inputs)
        
        if scale:
            outputs = outputs * tf.sqrt(num_units) 
            
    return outputs
    
def multihead_attention(queries, 
                        keys,
                        values,
                        num_units=None, 
                        num_heads=8, 
                        drop_rate=0,
                        training=True,
                        causality=False,
                        scope="multihead_attention", 
                        reuse=None):
    '''Applies multihead attention.
    
    Args:
      queries: A 3d tensor with shape of [N, T_q, C_q].
      keys: A 3d tensor with shape of [N, T_k, C_k].
      values: A 3d tensor with shape of [N, T_k, C_k].
      num_units: A scalar. Attention size.
      drop_rate: A floating point number.
      training: Boolean. Controller of mechanism for dropout.
      causality: Boolean. If true, units that reference the future are masked. 
      num_heads: An int. Number of heads.
      scope: Optional scope for `variable_scope`.
      reuse: Boolean, whether to reuse the weights of a previous layer
        by the same name.
        
    Returns
      A 3d tensor with shape of (N, T_q, C)  
    '''
    with tf.variable_scope(scope, reuse=reuse):
        # Set the fall back option for num_units
        if num_units is None:
            num_units = queries.get_shape().as_list()[-1]
        
        # Linear projections
        Q = tf.layers.dense(queries, num_units, activation=None) # (N, T_q, C)
        K = tf.layers.dense(keys, num_units, activation=None) # (N, T_k, C)
        V = tf.layers.dense(values, num_units, activation=None) # (N, T_k, C)
        
        # Split and concat
        # The size will be [N * num_heads, T_k, C_k/num_heads]
        Q_ = tf.concat(tf.split(Q, num_heads, axis=2), axis=0) # (h*N, T_q, C/h) 
        K_ = tf.concat(tf.split(K, num_heads, axis=2), axis=0) # (h*N, T_k, C/h) 
        V_ = tf.concat(tf.split(V, num_heads, axis=2), axis=0) # (h*N, T_k, C/h) 

        # Multiplication
        alignments = tf.matmul(Q_, tf.transpose(K_, [0, 2, 1])) # (h*N, T_q, T_k)
        
        # Scale
        d = tf.constant(K_.get_shape().as_list()[-1], tf.float32)
        alignments = tf.nn.softmax(alignments / tf.sqrt(d))
  
        # Causality = Future blinding
        if causality:
            shape = tf.shape(alignments)
            masks = tf.ones(shape[1:]) # (T_q, T_k)
            masks = tf.contrib.linalg.LinearOperatorTriL(masks).to_dense() # (T_q, T_k)
            masks = tf.tile(tf.expand_dims(masks, 0), [shape[0], 1, 1]) # (h*N, T_q, T_k)
            paddings = tf.ones_like(masks)*(-2**32+1) # minimum value for float
            alignments = tf.where(tf.equal(masks, 0), paddings, alignments) # (h*N, T_q, T_k)
          
        # Dropouts
        alignments = tf.layers.dropout(alignments, rate=drop_rate, training=training)
        # Weighted sum
        outputs = tf.matmul(alignments, V_) # ( h*N, T_q, C/h)
        # Restore shape
        outputs = tf.concat(tf.split(outputs, num_heads, axis=0), axis=2) # (N, T_q, C)
        # Residual connection
        outputs += queries 
        # Normalize
        outputs = layer_normalization(outputs) # (N, T_q, C)
    return outputs

def feedforward(inputs, 
                num_units=[2048, 512],
                scope="multihead_attention", 
                reuse=None):
    '''Point-wise feed forward net.
    
    Args:
      inputs: A 3d tensor with shape of [N, T, C].
      num_units: A list of two integers.
      scope: Optional scope for `variable_scope`.
      reuse: Boolean, whether to reuse the weights of a previous layer
        by the same name.
        
    Returns:
      A 3d tensor with the same shape and dtype as inputs
    '''
    with tf.variable_scope(scope, reuse=reuse):
        # Inner layer
        params = {"inputs": inputs, "filters": num_units[0], "kernel_size": 1,
                  "activation": tf.nn.relu, "use_bias": True}
        outputs = tf.layers.conv1d(**params)
        
        # Readout layer
        params = {"inputs": outputs, "filters": num_units[1], "kernel_size": 1,
                  "activation": None, "use_bias": True}
        outputs = tf.layers.conv1d(**params)
        
        # Residual connection
        outputs += inputs
        
        # Normalize
        outputs = layer_normalization(outputs)
    
    return outputs

def label_smoothing(inputs, epsilon=0.1):
    '''Applies label smoothing. See https://arxiv.org/abs/1512.00567.
    
    Args:
      inputs: A 3d tensor with shape of [N, T, V], where V is the number of vocabulary.
      epsilon: Smoothing rate.
    
    For example,
    
    ```
    import tensorflow as tf
    inputs = tf.convert_to_tensor([[[0, 0, 1], 
       [0, 1, 0],
       [1, 0, 0]],
      [[1, 0, 0],
       [1, 0, 0],
       [0, 1, 0]]], tf.float32)
       
    outputs = label_smoothing(inputs)
    
    with tf.Session() as sess:
        print(sess.run([outputs]))
    
    >>
    [array([[[ 0.03333334,  0.03333334,  0.93333334],
        [ 0.03333334,  0.93333334,  0.03333334],
        [ 0.93333334,  0.03333334,  0.03333334]],
       [[ 0.93333334,  0.03333334,  0.03333334],
        [ 0.93333334,  0.03333334,  0.03333334],
        [ 0.03333334,  0.93333334,  0.03333334]]], dtype=float32)]   
    ```    
    '''
    K = inputs.get_shape().as_list()[-1] # number of channels
    return ((1-epsilon) * inputs) + (epsilon / K)

In [8]:
import tensorflow as tf
from tensorflow.contrib.layers import fully_connected, conv2d
from tensorflow.contrib.layers import conv2d_transpose, flatten

from hedgeable_ai.models.nn import BaseModel
        
class FeedForward(BaseModel):
    def __init__(self, model_params, scope_name, *args, **kwargs):
        super().__init__(model_params, scope_name, *args, **kwargs)
    
    def __call__(self, x, training=True):
        with tf.variable_scope(self.scope_name, reuse=self.reuse):
            for i, params in enumerate(self.model_params):
                with tf.variable_scope('layer_' + str(i)):
                    if "is_flatten" in params and params["is_flatten"]:
                        x = flatten(x)
                    if "drop_rate" in params:
                        x = tf.layers.dropout(x, rate=params["drop_rate"], training=training)
                    # demtermine which layer to use
                    if params["name"] == "dense":
                        x = fully_connected(x, params["num_hidden"], activation_fn=None,
                                            reuse=self.reuse, scope="dense")
                    elif params["name"] == "conv2d":
                        x =  conv2d(x, params["num_filter"], params["kernel_size"],
                                    params["stride"], params["padding"], 
                                    scope="conv2d", reuse=self.reuse, activation_fn=None)
                    elif params["name"] == "deconv2d":
                        x =  conv2d_transpose(x, params["num_filter"], params["kernel_size"],
                                              params["stride"], params["padding"], 
                                              scope="deconv2d", reuse=self.reuse, activation_fn=None)
                    elif params["name"] == "reshape":
                        x = tf.reshape(x, (-1,) + params["reshape_size"])
                    elif params["name"] == "pooling":
                        del params["name"]
                        x = tf.nn.pool(x, **params)
                    elif params["name"] == None:
                        pass
                    else:
                        raise NotImplementedError("No implementation for 'name'={}".format(params["name"]))         
                    if "is_batch" in params and params["is_batch"]:
                        x = tf.layers.batch_normalization(x, training=training, momentum=0.9,
                                                          reuse=self.reuse, name="batch_norm")
                    if "activation" in params:
                        x = params["activation"](x)
            if self.reuse is False:
                self.global_scope_name = tf.get_variable_scope().name
                self.variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.global_scope_name)
        self.reuse = True
        return x

In [9]:
import tensorflow as tf
import tensorflow.contrib.rnn as rnn
import tensorflow.contrib.seq2seq as seq2seq
from hedgeable_ai.models.nn import BaseModel, get_shape, get_length
import tensorflow.contrib.rnn as rnn

import tensorflow as tf

from hedgeable_ai.models.nn.params import nn_is_logit
from hedgeable_ai.models.nn import BaseNN, get_shape

from hedgeable_ai.models.nn.rnn import get_cell

count = 0
class DialogueAgent(BaseNN):
    def __init__(self, processor, maxlen=20,
                 conf=None, additional_length=3,
                 num_hidden=512, num_heads=8, num_blocks=6, drop_rate=0.1,
                 position_scale=1000, *args, **kwargs):
        self.num_blocks = num_blocks
        # leave index 0 for padding and 1 for  <eos>
        self.vocab_size = processor.vocab_size + 2
        self.maxlen = maxlen
        self.additional_length = additional_length
        self.dec_maxlen = maxlen + additional_length
        self.position_scale = position_scale
        self.drop_rate = drop_rate
        self.num_heads = num_heads
        self.num_hidden = num_hidden
        self.reuse = False
        super().__init__(processor=processor, conf=conf, *args, **kwargs)
        
    def _build_graph(self):
        """Build tensorflow graph
        
        Note:
            You build graphs for output and input, which will be used 
            for training and prediction.
        """
        # Build Basic Netwoiork
        self.enc_input = tf.placeholder(tf.int32, shape=(None, None), name="encoder_input")
        self.dec_input = tf.placeholder(tf.int32, shape=(None, None), name="decoder_input")
        batch_size = tf.shape(self.dec_input)[0]
        eos_padding = tf.ones((batch_size, 1), dtype=tf.int32)
        dec_target = self.dec_input
        _dec_input = tf.concat((eos_padding, self.dec_input[:, :-1]), axis=1)
        logits = self._get_output(self.enc_input, _dec_input, self.training)
        # predictions = tf.cast(tf.arg_max(self.logits, dimension=-1), tf.int32)
        target_smoothed = label_smoothing(tf.one_hot(dec_target, depth=self.vocab_size), epsilon=0.1)
        _loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=target_smoothed)
        masks = tf.cast(tf.sign(dec_target), tf.float32)
        self.loss = tf.reduce_mean(tf.reduce_sum(masks * _loss, [1]))
        
        self.learning_rate_op = self._get_learning_rate()
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        # with tf.control_dependencies(update_ops):
        self.optimizer = self._get_optimizer(self.optimizer_name, self.learning_rate_op, self.optimizer_conf)
        grads_vars = self.optimizer.compute_gradients(self.loss)
        if "grad_clip" in self.conf and self.conf["grad_clip"] is not None:
            grads_vars = [
                (tf.clip_by_norm(gv[0], clip_norm=self.conf["grad_clip"]), gv[1]) 
                    for gv in grads_vars]
        self.train_step = self.optimizer.apply_gradients(grads_vars)
        # prediction flow
        # logits = self._get_output(self.enc_input, _dec_input, self.training)
        self.predictions = tf.cast(tf.arg_max(logits, dimension=-1), tf.int32)
        self.logits = logits
        
    def _get_output(self, enc_input, dec_input, training=True):
        tensor_batch_size = tf.shape(enc_input)[0]
        with tf.variable_scope("embedding", reuse=self.reuse):
            embeddings = tf.get_variable("embedding", [self.vocab_size, self.num_hidden],
                                     initializer=tf.contrib.layers.xavier_initializer())
            embeddings = tf.sqrt(float(self.num_hidden)) * embeddings
        # Encoder
        with tf.variable_scope("encoder", reuse=self.reuse):
            enc_input_embedded = tf.nn.embedding_lookup(embeddings, self.enc_input)
            ## Positional Encoding
            enc_length = tf.shape(enc_input)[1]
            position_idx = tf.range(tf.reduce_max(enc_length))
            position_idx = tf.tile(tf.expand_dims(position_idx, 0), [tensor_batch_size, 1])
            enc_position_embeddings = tf.get_variable("embedding_position",
                                                      [self.maxlen, self.num_hidden],
                                                      initializer=tf.contrib.layers.xavier_initializer())
            enc_position_embedded = tf.nn.embedding_lookup(enc_position_embeddings, position_idx)
            enc_x = enc_input_embedded + enc_position_embedded
            ## Dropout
            enc_x = tf.layers.dropout(enc_x, rate=self.drop_rate, training=training)
            # Encoder blocks    
            for i in range(self.num_blocks):
                with tf.variable_scope("num_blocks_{}".format(i)):
                    ### Multihead Attention
                    enc_x = multihead_attention(queries=enc_x, 
                                                keys=enc_x, 
                                                values=enc_x,
                                                num_units=self.num_hidden,
                                                num_heads=self.num_heads, 
                                                drop_rate=self.drop_rate,
                                                training=training,
                                                causality=False,
                                                reuse=self.reuse,
                                                scope="self_attention")
                    ### Feed Forward
                    enc_x = feedforward(enc_x, num_units=[4*self.num_hidden, self.num_hidden])
            
        # Decoder
        with tf.variable_scope("decoder", reuse=self.reuse):
            dec_input_embedded = tf.nn.embedding_lookup(embeddings, dec_input)
            ## Positional Encoding
            dec_length = tf.shape(dec_input)[1]
            position_idx = tf.range(dec_length)
            position_idx = tf.tile(tf.expand_dims(position_idx, 0), [tensor_batch_size, 1])
            dec_position_embeddings = tf.get_variable("embedding_position",
                                                      [self.dec_maxlen, self.num_hidden],
                                                      initializer=tf.contrib.layers.xavier_initializer())
            dec_position_embedded = tf.nn.embedding_lookup(dec_position_embeddings, position_idx)
            dec_x = dec_input_embedded + dec_position_embedded
            ## Dropout
            dec_x = tf.layers.dropout(dec_x, rate=self.drop_rate, training=training)
                
            ## Blocks
            for i in range(self.num_blocks):
                with tf.variable_scope("num_blocks_{}".format(i)):
                    ### Multihead Attention
                    dec_x = multihead_attention(queries=dec_x, 
                                                keys=dec_x, 
                                                values=dec_x,
                                                num_units=self.num_hidden,
                                                num_heads=self.num_heads, 
                                                drop_rate=self.drop_rate,
                                                training=training,
                                                causality=True,
                                                reuse=self.reuse,
                                                scope="self_attention")
                        
                    dec_x = multihead_attention(queries=dec_x, 
                                                keys=enc_x, 
                                                values=enc_x,
                                                num_units=self.num_hidden,
                                                num_heads=self.num_heads, 
                                                drop_rate=self.drop_rate,
                                                training=training,
                                                causality=False,
                                                reuse=self.reuse,
                                                scope="vanilla_attention")
                    ### Feed Forward
                    dec_x = feedforward(dec_x, num_units=[4*self.num_hidden, self.num_hidden],
                                       scope="fully_connected", reuse=self.reuse)
                
            # Final linear projection
            dim_size = dec_x.get_shape().as_list()[2]
            shape = tf.shape(dec_x)
            dec_x = tf.reshape(dec_x, [-1, shape[2]])
            logits = tf.matmul(dec_x, tf.transpose(embeddings))
            logits = tf.reshape(logits, [shape[0], shape[1], self.vocab_size])
        self.reuse = True
        return logits
        
    def _optimize(self, batch_X, batch_y, *args, **kwargs):
        global count
        batch_X = batch_X[0]
        batch_X, Xlen = self.processor.batch_padding(batch_X, self.maxlen)
        length = np.max(Xlen) + self.additional_length
        batch_y = self._batch_padding(batch_y, length)
        feed_dict = {self.enc_input: batch_X,
                     self.dec_input: batch_y,
                     self.training: True}
        
        _, loss = self.sess.run([self.train_step, self.loss], feed_dict=feed_dict)
        # print("loss", loss)
        if count % 100 == 0:
            # logits = self.sess.run(self.logits, feed_dict=feed_dict)
            # print("logits", logits)
            sentences = texts[:5]
            predictions = self.generate_sentences(sentences)
            print("******************************************")
            for prediction in predictions:
                print(prediction)
        count += 1
        return loss
    
    def generate_sentences(self, sentences):
        X = [self.processor.encode(sentence) for sentence in sentences]
        # X = sentences
        # X = [x_[::-1] for x_ in X]
        X, Xlen = self.processor.batch_padding(X, self.maxlen)
        batch_size = X.shape[0]
        y = np.array([[] for _ in range(batch_size)], dtype=int)
        not_finished = np.array([True for _ in range(batch_size)])
        for i in range(self.dec_maxlen):
            feed_dict = {self.enc_input: X,
                         self.dec_input: y,
                         self.training: False}
            predictions = self.sess.run(self.predictions, feed_dict=feed_dict)
            # print(predictions)
            new_y = predictions[:, -1]
            new_y = np.array([new_y[k] if not_finished[k] else 0 for k in range(batch_size)])
            y = np.concatenate((y, new_y[:, np.newaxis]), axis=1)
            # print(y)
            new_not_finished = np.array(new_y > 1, dtype=int)
            not_finished = np.array(
                np.array(not_finished, dtype=int) * np.array(new_not_finished, dtype=int),
                dtype=bool)
            if np.sum(not_finished) == 0:
                break
        return [self.processor.decode(i) for i in y]
    
    def _batch_padding(self, batch, length):
        EOS = 1
        PAD = 0
        padded_batch = []
        for x in batch:
            x = list(x)
            if len(x) > length:
                x = x[:length]
            elif len(x) < length:
                x.append(EOS)
            while len(x) < length:
                x.append(PAD)
            padded_batch.append(x)
        return np.array(padded_batch)

In [None]:
import tensorflow as tf
import numpy as np


conf = {
        "learning_rate": 1e-4,
        "learning_rate_minimum": 1e-4,
        "learning_rate_decay": 0.5,
        "learning_rate_decay_step": 1,
        "batch_size": 64,
        "model_dir": "./attention_logs",
        "load_file_path": None,
        "save_file_path": None,
        "log_freq": 1,
        "grad_clip":None,
        "optimizer":"adam",
}

train_X = processor.data[:-1]
train_y = processor.data[1:]
# with tf.device('/gpu:0'):
tf.reset_default_graph()
agent = DialogueAgent(processor, maxlen=20, conf=conf, additional_length=3)
agent.fit(train_X, train_y, num_epochs=100, batch_bar=False, log_freq=1, batch_log_freq=100)


  0%|          | 0/100 [00:00<?, ?it/s]

Model saved in file: params/model.ckpt


[A


******************************************
['out', 'out', 'you', 'you', 'you']
['out', 'out', 'you', 'you', 'you', 'you']
['out', 'out', 'you', 'you', 'you', 'you']
['out', 'out', 'you', 'you', 'you', 'you']
['out', 'out', 'i', 'you', 'you']
******************************************
['the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', '.', '.']
['the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', '.', '.', 'the', 'the', 'to', 'a', 'to', 'a', 'to', 'a', 'to', 'a', 'to', 'a']
['the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', '.', '.', 'the', 'the', 'to', 'to', 'to', 'to', 'to', 'to', 'to', 'a', 'a', 'a']
['the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', '.', '.', 'to', 'the', 'to', 'to', 'to', 'to', 'to', 'to', 'to', 'a', 'a', 'a']
['the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', 'the', '.', 'to', 'to', 'to', 'to', 'to', 'to', 'to', 'to', 'to', 'to', 'to', 'to', 'a']
******************************************
['men', 'men', '.', 

  1%|          | 1/100 [04:09<6:51:22, 249.32s/it]

******************************************
['a', 'a', '.', '.', 'a', '.', 'a', 'a', '.', '.', 'a', 'a', '.', '.']
['a', 'a', '.', '.', 'a', '.', 'a', 'a', '.', '.', 'a', 'a', '.', '.']
['a', 'a', '.', '.', 'a', '.', 'a', 'a', '.', '.', 'a', 'a', '.', '.']
['a', 'a', '.', '.', 'a', '.', 'a', 'a', '.', '.', 'a', 'a', '.', '.']
['a', 'a', '.', '.', 'a', '.', 'a', 'a', '.', '.', 'a', 'a', '.', '.']
******************************************
['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.']
['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.']
['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.']
['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.']
['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.']
******************************************
['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.']
['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.']
['.', '.', '.', '.', '.', '.', '.', '.', '.', '.'

  2%|▏         | 2/100 [08:18<6:47:15, 249.34s/it]

******************************************
['.', '.']
['.', '.']
['.', '.']
['.', '.']
['.', '.']
******************************************
['.', '.']
['.', '.']
['.', '.']
['.', '.']
['.', '.']
******************************************
['.', '.']
['.', '.']
['.', '.']
['.', '.']
['.', '.']
******************************************
['.', '.']
['the', 'the']
['the', 'the']
['the', 'the']
['.', '.']
******************************************
['the', 'the']
['the', 'the']
['the', 'the']
['the', 'the']
['the', 'the']
******************************************
['.', '.']
['.', '.']
['.', '.']
['.', '.']
['.', '.']
******************************************
[]
[]
[]
[]
[]
******************************************
['.', '.']
['.', '.']
['.', '.']
['.', '.']
['.', '.']
******************************************
[]
[]
[]
[]
[]
******************************************
[]
[]
[]
[]
[]
******************************************
['.', '.']
['.', '.']
['.', '.']
['.', '.']
['.', '.']


  3%|▎         | 3/100 [12:26<6:42:34, 249.02s/it]

******************************************
['.', '.']
['.', '.']
['.', '.']
['.', '.']
['.', '.']
******************************************
['.', '.']
['.', '.']
['.', '.']
['.', '.']
['.', '.']
******************************************
['.', '.']
['.', '.']
['.', '.']
['.', '.']
['.', '.']
******************************************
['.', '.']
['.', '.']
['.', '.']
['.', '.']
['.', '.']
******************************************
['.', '.']
['.', '.']
['.', '.']
['.', '.']
['.', '.']
******************************************
['.', '.']
['.', '.']
['.', '.']
['.', '.']
['.', '.']
******************************************
['to', 'to']
['to', 'to']
['to', 'to']
['to', 'to']
['to', 'to']
******************************************
['.', '.']
['.', '.']
['.', '.']
['.', '.']
['.', '.']
******************************************
['.', '.']
['.', '.']
['.', '.']
['.', '.']
['.', '.']
******************************************
['.', '.']
['.', '.']
['.', '.']
['.', '.']
['.', '.']
**********

  4%|▍         | 4/100 [16:29<6:35:13, 247.02s/it]

******************************************
['.', '.']
['.', '.']
['.', '.']
['.', '.']
['.', '.']
******************************************
['.', '.']
['.', '.']
['.', '.']
['.', '.']
['.', '.']
******************************************
['.', '.']
['.', '.']
['.', '.']
['.', '.']
['.', '.']
******************************************
['.', '.']
['.', '.']
['.', '.']
['.', '.']
['.', '.']
******************************************
['.', '.']
['.', '.']
['.', '.']
['.', '.']
['.', '.']
******************************************
['.', '.']
['.', '.']
['.', '.']
['.', '.']
['.', '.']
******************************************
['.', '.']
['.', '.']
['.', '.']
['.', '.']
['.', '.']


In [13]:
predictions = agent.generate_sentences(texts[:10])
for prediction in predictions:
    print(prediction)

['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.']
['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.']
['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.']
['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.']
['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.']
['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.']
[]
['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.']
['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.']
['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.']


In [14]:
agent.fit(train_X, train_y, num_epochs=100, batch_bar=False, log_freq=1, batch_log_freq=100)


  0%|          | 0/100 [00:00<?, ?it/s]

Model saved in file: params/model.ckpt


[A


******************************************
['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.']
['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.']
['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.']
['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.']
['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.']
******************************************
['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.']
['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.']
['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.']
['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.']
['.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.']
*****************************

In [10]:
sess = tf.InteractiveSession()

In [197]:
np.array([True, True, True, False], dtype=int)

array([1, 1, 1, 0])

In [55]:
tf.reset_default_graph()
x = tf.Variable(np.ones((10, 10)))
with tf.variable_scope("hello", reuse=False):
    x = tf.contrib.layers.fully_connected(x, 10)
    # x = tf.contrib.layers.fully_connected(x, 10)

In [56]:
with tf.variable_scope("hello", reuse=True):
    x = tf.contrib.layers.fully_connected(x, 10)

In [51]:
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

In [52]:
x.eval()

array([[ 0.04597573,  0.        ,  0.        ,  0.02855326,  0.        ,
         0.30168584,  0.00910732,  0.        ,  0.90480631,  0.220675  ],
       [ 0.04597573,  0.        ,  0.        ,  0.02855326,  0.        ,
         0.30168584,  0.00910732,  0.        ,  0.90480631,  0.220675  ],
       [ 0.04597573,  0.        ,  0.        ,  0.02855326,  0.        ,
         0.30168584,  0.00910732,  0.        ,  0.90480631,  0.220675  ],
       [ 0.04597573,  0.        ,  0.        ,  0.02855326,  0.        ,
         0.30168584,  0.00910732,  0.        ,  0.90480631,  0.220675  ],
       [ 0.04597573,  0.        ,  0.        ,  0.02855326,  0.        ,
         0.30168584,  0.00910732,  0.        ,  0.90480631,  0.220675  ],
       [ 0.04597573,  0.        ,  0.        ,  0.02855326,  0.        ,
         0.30168584,  0.00910732,  0.        ,  0.90480631,  0.220675  ],
       [ 0.04597573,  0.        ,  0.        ,  0.02855326,  0.        ,
         0.30168584,  0.00910732,  0.        