In [None]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import numpy as np
import random
import string
import tensorflow as tf
from tensorflow.python.ops.rnn_cell import _linear
import zipfile
from six.moves import range
from six.moves.urllib.request import urlretrieve
import collections
import matplotlib.pyplot as plt
import codecs
import time
import os
import gc
from six.moves import cPickle as pickle

from plot_module import text_plot
from plot_module import structure_vocabulary_plots
from plot_module import ComparePlots

from model_module import maybe_download
from model_module import read_data
from model_module import check_not_one_byte
from model_module import id2char
from model_module import char2id
from model_module import BatchGenerator
from model_module import characters
from model_module import batches2string
from model_module import logprob
from model_module import sample_distribution
from model_module import MODEL

In [None]:
if not os.path.exists('enwik8_filtered'):
    if not os.path.exists('enwik8'):
        filename = maybe_download('enwik8.zip', 36445475)
    full_text = read_data(filename)
    new_text = u""
    new_text_list = list()
    for i in range(len(full_text)):
        if (i+1) % 10000000 == 0:
            print("%s characters are filtered" % i)
        if ord(full_text[i]) < 256:
            new_text_list.append(full_text[i])
    text = new_text.join(new_text_list)
    del new_text_list
    del new_text
    del full_text

    (not_one_byte_counter, min_character_order_index, max_character_order_index, number_of_characters, present_characters_indices) = check_not_one_byte(text)

    print("number of not one byte characters: ", not_one_byte_counter) 
    print("min order index: ", min_character_order_index)
    print("max order index: ", max_character_order_index)
    print("total number of characters: ", number_of_characters)
    
    f = open('enwik8_filtered', 'w')
    f.write(text.encode('utf8'))
    f.close()
    
else:
    f = open('enwik8_filtered', 'r')
    text = f.read().decode('utf8')
    f.close() 
    (not_one_byte_counter, min_character_order_index, max_character_order_index, number_of_characters, present_characters_indices) = check_not_one_byte(text)

    print("number of not one byte characters: ", not_one_byte_counter) 
    print("min order index: ", min_character_order_index)
    print("max order index: ", max_character_order_index)
    print("total number of characters: ", number_of_characters) 

In [None]:
#different
offset = 20000
valid_size = 22500
valid_text = text[offset:offset+valid_size]
train_text = text[offset+valid_size:]
train_size = len(train_text)
print(train_size, train_text[:64])
print(valid_size, valid_text[:64])

In [None]:
vocabulary_size = number_of_characters
vocabulary = list()
characters_positions_in_vocabulary = list()

character_position_in_vocabulary = 0
for i in range(256):
    if present_characters_indices[i]:
        vocabulary.append(unichr(i))
        characters_positions_in_vocabulary.append(character_position_in_vocabulary)
        character_position_in_vocabulary += 1
    else:
        characters_positions_in_vocabulary.append(-1)


string_vocabulary = u""
for i in range(vocabulary_size):
    string_vocabulary += vocabulary[i]
print("Vocabulary: ", string_vocabulary)
print("char2id(u'a') = %s,  char2id(u'z') = %s,  char2id(u' ') = %s" % (char2id(u'a', characters_positions_in_vocabulary),
                                                                        char2id(u'z', characters_positions_in_vocabulary),
                                                                        char2id(u' ', characters_positions_in_vocabulary)))
print("id2char(78) = %s,  id2char(156) = %s,  id2char(140) = %s" % (id2char(78,
                                                                            vocabulary),
                                                                    id2char(156,
                                                                            vocabulary),
                                                                    id2char(140,
                                                                            vocabulary)))

In [None]:
batch_size_test=64
num_unrollings_test=10

train_batches_test = BatchGenerator(train_text,
                                    batch_size_test,
                                    vocabulary_size,
                                    characters_positions_in_vocabulary,
                                    num_unrollings_test)
valid_batches_test = BatchGenerator(valid_text,
                                    1,
                                    vocabulary_size,
                                    characters_positions_in_vocabulary,
                                    1)

print(batches2string(train_batches_test.next(), vocabulary))
print(batches2string(train_batches_test.next(), vocabulary))
print(batches2string(valid_batches_test.next(), vocabulary))
print(batches2string(valid_batches_test.next(), vocabulary))

In [None]:
indices_GL = {"batch_size": 0,
              "num_unrollings": 1,
              "num_layers": 2,
              "num_nodes": 3,
              "half_life": 4,
              "decay": 5,
              "num_steps": 6,
              "averaging_number": 7,
              "type": 8}


class delay_deterministic(MODEL):
    
    def analize(self, compressed_inp):
        memory_concat = list(self.Sample_Memory[0])
        memory_concat.append(compressed_inp)
        for num_layer in range(1, self._memory_layers):
            memory_concat.extend(self.Sample_Memory[num_layer])
        X = tf.concat(1, memory_concat)
        for Analisys_Matrix, Analisys_Bias in zip(self.Analisys_Matrices, self.Analisys_Biases):
            X = tf.nn.relu(tf.nn.xw_plus_b(X, Analisys_Matrix, Analisys_Bias))
        return X 
    
    def update_below_including_layer(self, compressed_inp, layer_num):
        assign_list = list()
        not_changed_layers = [i for i in range(layer_num+1, self._memory_layers)]
        not_changed_layers.sort(reverse=True)
        for lay_num in not_changed_layers:
            assign_list.extend(self.Sample_Memory[lay_num])
        
        if layer_num > 0:
            layers_in_cycle = [i for i in range(1, layer_num+1)]
            layers_in_cycle.sort(reverse=True)
            for lay_num in layers_in_cycle:
                for block_num in range(self._memory_lengths[lay_num]-1):
                    with tf.control_dependencies(assign_list):
                        assign_list.append(tf.assign(self.Sample_Memory[lay_num][block_num], self.Sample_Memory[lay_num][block_num+1]))
                lower_layer_summary = tf.concat(1, self.Sample_Memory[lay_num-1])
                new_block = tf.tanh(tf.nn.xw_plus_b(lower_layer_summary,
                                                    self.Compress_Matrices[lay_num],
                                                    self.Compress_Biases[lay_num]))
                with tf.control_dependencies(assign_list):
                    assign_list.append(tf.assign(self.Sample_Memory[lay_num][self._memory_lengths[lay_num]-1], new_block))
        for block_num in range(self._memory_lengths[0]-1):
            with tf.control_dependencies(assign_list):
                assign_list.append(tf.assign(self.Sample_Memory[0][block_num], self.Sample_Memory[0][block_num+1]))
        with tf.control_dependencies(assign_list):
            assign_list.append(tf.assign(self.Sample_Memory[0][self._memory_lengths[0]-1], compressed_inp))
        return assign_list
    
    def update_memory(self, compressed_inp, layer_num):
        if layer_num > 0:
            assign_list = tf.cond(tf.equal(tf.mod(self.counter, tf.constant(self.abs_freqs[layer_num], dtype=tf.int32)),
                                           tf.constant(0, dtype=tf.int32)),
                                  lambda: self.update_below_including_layer(compressed_inp, layer_num),
                                  lambda: self.update_memory(compressed_inp, layer_num-1))  
        else:
            assign_list = self.update_below_including_layer(compressed_inp, 0)
        return assign_list

    
    def __init__(self,
                 batch_size,
                 vocabulary,
                 characters_positions_in_vocabulary,
                 memory_layers,             # integer
                 memory_nodes,              # number of nodes in vector on memory layer (list of length memory_layers)
                 memory_lengths,            # list of memory lengths for all memory levels (list of length memory_layers)
                 analisys_layers,           # integer
                 analisys_nodes,             # list of numbers of nodes on each analisys layer
                 frequency,              # number of times the highest memory layer fully updates during one epoch
                 init_bias,
                 threshold,    #{'fixed': True/False, 'min':  , 'max':  ,'epochs':  }
                 normal_run_prob,
                 swap_prob,
                 support,
                 train_text,
                 valid_text,
                 mean=0.,
                 stddev='default',
                 init_learning_rate=1.):
        self._results = list()
        self._batch_size = batch_size
        self._vocabulary = vocabulary
        self._vocabulary_size = len(vocabulary)
        self._characters_positions_in_vocabulary = characters_positions_in_vocabulary
        
        self._num_unrollings = memory_lengths[0]
        for num_layer in range(1, memory_layers):
            self._num_unrollings *= memory_lengths[num_layer]
        self._num_unrollings *= frequency

        self._memory_layers = memory_layers                 # number of memory layers
        self._memory_nodes = memory_nodes                   # number of nodes in vector on memory layer
        self._memory_lengths = memory_lengths               # number of vectors on memory layer
        self._analisys_layers = analisys_layers
        self._analisys_nodes = analisys_nodes
        self._frequency = frequency
        self._train_text = train_text
        self._valid_text = valid_text
        self._valid_size = len(valid_text)
        
        
        self._mean = mean
        
        self._stddev = list()
        if stddev == 'default':
            self._stddev = 1.4
        else:
            self._stddev = stddev

        self._init_learning_rate = init_learning_rate
  
        self._indices = {"batch_size": 0,
                         "num_unrollings": 1,
                         "memory_layers": 2,
                         "memory_nodes": 3,
                         "memory_lengths": 4,
                         "analisys_layers": 5,
                         "analisys_nodes": 6,
                         "frequency": 7,
                         "half_life": 8,
                         "decay": 9,
                         "num_steps": 10,
                         "averaging_number": 11,
                         "init_mean": 12,
                         "init_stddev": 13,
                         "init_learning_rate": 14,                         
                         "type": 15}
        self._graph = tf.Graph()
        
        self._last_num_steps = 0
        with self._graph.as_default(): 
            with self._graph.device('/gpu:0'): 
                
                second_dim_of_trigger_matrix = 0
                second_dim_of_trigger_matrix = self._vocabulary_size
                for memory_layer_idx, memory_length in enumerate(memory_lengths):
                    second_dim_of_trigger_matrix += memory_length * memory_nodes[memory_layer_idx]
                self.trigger_matrix = tf.Variable(tf.truncated_normal([second_dim_of_trigger_matrix, 1], stddev = 0.1))
                self.trigger_bias = tf.Variable([self._init_bias])
                
                self.Saved_Memory = list()
                for layer_num in range(self._memory_layers):
                    layer_memory = list()
                    for _ in range(self._memory_lengths[layer_num]):
                        layer_memory.append(tf.Variable(tf.zeros([self._batch_size, self._memory_nodes[layer_num]]),
                                                        trainable=False))
                    self.Saved_Memory.append(layer_memory)
                self.Compress_Matrices = list()
                self.Compress_Biases = list()
                self.Compress_Matrices.append(
                    tf.Variable(tf.truncated_normal([self._vocabulary_size, self._memory_nodes[0]], mean=self._mean, stddev =self._stddev / self._vocabulary_size**0.5),
                                trainable=True)
                                        )
    
                self.Compress_Biases.append(
                    tf.Variable(tf.zeros([self._memory_nodes[0]]),
                                trainable=True)
                                       )
                for layer_num in range(1, self._memory_layers):
                    self.Compress_Matrices.append(
                        tf.Variable(tf.truncated_normal([self._memory_lengths[layer_num-1]*self._memory_nodes[layer_num-1],
                                                         self._memory_nodes[layer_num]],
                                                        mean=self._mean,
                                                        stddev = self._stddev / (self._memory_lengths[layer_num-1]*self._memory_nodes[layer_num-1])**0.5),
                                    trainable=True)
                                                 )
                    self.Compress_Biases.append(
                        tf.Variable(tf.zeros([self._memory_nodes[layer_num]]),
                                    trainable=True)
                                                )
                self.Analisys_Matrices = list()
                self.Analisys_Biases = list()
                first_dim = self._memory_nodes[0]
                for layer_num in range(self._memory_layers):
                    first_dim += self._memory_lengths[layer_num] * self._memory_nodes[layer_num]
                if self._analisys_layers > 1:
                    self.Analisys_Matrices.append(
                        tf.Variable(
                            tf.truncated_normal([first_dim, self._analisys_nodes[0]], mean=self._mean, stddev = self._stddev / first_dim**0.5),
                            trainable=True
                                    )
                                                  )
                    self.Analisys_Biases.append(
                        tf.Variable(
                            tf.zeros([self._analisys_nodes[0]]),
                            trainable=True
                                    )
                                                )
                    for layer_num in range(1, self._analisys_layers):
                        self.Analisys_Matrices.append(
                            tf.Variable(
                                tf.truncated_normal([self._analisys_nodes[layer_num-1], self._analisys_nodes[layer_num]], mean=self._mean, stddev = self._stddev / self._analisys_nodes[layer_num-1]**0.5),
                                trainable=True
                                        )
                                                      ) 
                        self.Analisys_Biases.append(
                            tf.Variable(
                                tf.zeros([self._analisys_nodes[layer_num]]),
                                trainable=True
                                        )
                                                    )
                    
                    
                else:
                    self.Analisys_Matrices.append(
                        tf.Variable(
                            tf.truncated_normal([first_dim, self._analisys_nodes[0]], mean=self._mean, stddev = self._stddev / first_dim**0.5),
                            trainable=True
                                    )
                                            )
                    self.Analisys_Biases.append(
                        tf.Variable(
                            tf.zeros([self._analisys_nodes[0]]),
                            trainable=True
                                    )
                                          )
                    
                    
                        
                        

                # classifier 
                weights = tf.Variable(tf.truncated_normal([self._analisys_nodes[-1], self._vocabulary_size], stddev = self._stddev / self._analisys_nodes[-1]**0.5))
                bias = tf.Variable(tf.zeros([self._vocabulary_size]))
                
                """PLACEHOLDERS train data"""
                self._train_data = list()
                for _ in range(self._num_unrollings + 1):
                    self._train_data.append(
                        tf.placeholder(tf.float32, shape=[self._batch_size, self._vocabulary_size]))
                train_inputs = self._train_data[: self._num_unrollings]
                train_labels = self._train_data[1:]  # labels are inputs shifted by one time step.
                # Unrolled LSTM loop.
                
                """global step"""
                self._global_step = tf.Variable(0, trainable=False)
                        
                memory = list()
                for saved_memory_layer in self.Saved_Memory:
                    memory.append(list(saved_memory_layer))
                
                compressed_inputs = tf.nn.tanh(
                    tf.nn.xw_plus_b(tf.concat(0, train_inputs),
                                    self.Compress_Matrices[0],
                                    self.Compress_Biases[0])
                                               )
                    
                new_memory = list()
                new_memory.append(tf.split(0, self._num_unrollings, compressed_inputs))
                current_num_blocks = self._num_unrollings
                for num_layer in range(1, self._memory_layers):
                    block_size = self._memory_lengths[num_layer-1]
                    current_num_blocks /= block_size
                    for_next_layer = list()
                    for i in range(current_num_blocks):
                        for_next_layer.append(tf.concat(1, new_memory[num_layer-1][i*block_size : (i+1)*block_size]))
                    layer_compressed = tf.tanh(tf.nn.xw_plus_b(tf.concat(0, for_next_layer),
                                                               self.Compress_Matrices[num_layer],
                                                               self.Compress_Biases[num_layer]))
                    new_memory.append(tf.split(0, current_num_blocks, layer_compressed))
                    
                for idx, new_memory_layer in enumerate(new_memory):
                    memory[idx].extend(new_memory_layer)
                
                X = list()
                offsets = [0]*self._memory_layers
                for i in range(self._num_unrollings):
                    offsets[0] = i
                    for layer_num in range(1, self._memory_layers):
                        offsets[layer_num] = offsets[layer_num-1] / self._memory_lengths[layer_num-1]
                    X_t = list()
                    X_t.extend(memory[0][offsets[0]:offsets[0]+self._memory_lengths[0]+1])
                    for layer_num in range(1, self._memory_layers):
                        X_t.extend(memory[layer_num][offsets[layer_num]:offsets[layer_num]+self._memory_lengths[layer_num]])
                    X_t = tf.concat(1, X_t)
                    for Analisys_Matrix, Analisys_Bias in zip(self.Analisys_Matrices, self.Analisys_Biases):
                        X_t = tf.nn.relu(tf.nn.xw_plus_b(X_t, Analisys_Matrix, Analisys_Bias))
                    X.append(X_t)
                        
                self.X = tf.concat(0, X)
                
                with tf.control_dependencies(X):
                    save_list = list()
                    for layer_num in range(self._memory_layers):
                        for i in range(self._memory_lengths[layer_num]):
                            save_list.append(tf.assign(self.Saved_Memory[layer_num][-1-i], memory[layer_num][-1-i]))
                
                """skip operation"""
                self._skip_operation = tf.group(*save_list)

                with tf.control_dependencies(save_list):
                        # Classifier.
                    logits = tf.nn.xw_plus_b(self.X, weights, bias)
                    """loss"""
                    self._loss = tf.reduce_mean(
                        tf.nn.softmax_cross_entropy_with_logits(
                        logits, tf.concat(0, train_labels)))

                # Optimizer.
                
                """PLACEHOLDERS half life and decay"""
                self._half_life = tf.placeholder(tf.int32)
                self._decay = tf.placeholder(tf.float32)
                """learning rate"""
                self._learning_rate = tf.train.exponential_decay(self._init_learning_rate,
                                                                 self._global_step,
                                                                 self._half_life,
                                                                 self._decay,
                                                                 staircase=True)
                optimizer = tf.train.GradientDescentOptimizer(self._learning_rate)
                gradients, v = zip(*optimizer.compute_gradients(self._loss))
                gradients, _ = tf.clip_by_global_norm(gradients, 1.25)
                """optimizer"""
                self._optimizer = optimizer.apply_gradients(zip(gradients, v), global_step=self._global_step)
                """train prediction"""
                self._train_prediction = tf.nn.softmax(logits)

                        
                self.Sample_Memory = list()
                for layer_num in range(self._memory_layers):
                    layer_memory = list()
                    for block_num in range(self._memory_lengths[layer_num]):
                        layer_memory.append(tf.Variable(tf.zeros([1, self._memory_nodes[layer_num]]), trainable=False))
                    self.Sample_Memory.append(layer_memory) 

                # Sampling and validation eval: batch 1, no unrolling.
                self.abs_freqs = [1]
                for i in range(1, self._memory_layers):
                    self.abs_freqs.append(self._memory_lengths[i-1] * self.abs_freqs[i-1])
                        
                """PLACEHOLDER sample input"""
                self._sample_input = tf.placeholder(tf.float32, shape=[1, self._vocabulary_size])
                        
                self.counter = tf.Variable(0, trainable=False)
                reset_list = list()
                for layer_num in range(self._memory_layers):
                    for block_num in range(self._memory_lengths[layer_num]):
                        reset_list.append(tf.assign(self.Sample_Memory[layer_num][block_num], tf.zeros([1, self._memory_nodes[layer_num]])))
                reset_list.append(self.counter.assign(tf.constant(0)))
                """reset sample state"""
                self._reset_sample_state = tf.group(*reset_list)
                self.compressed_inp = tf.tanh(tf.nn.xw_plus_b(self._sample_input, self.Compress_Matrices[0], self.Compress_Biases[0]))
                #print('self.compressed_inp.shape =', self.compressed_inp.get_shape().as_list())
                self.sample_output = self.analize(self.compressed_inp)
                
                with tf.control_dependencies([self.sample_output]):
                    sample_save_list = self.update_memory(self.compressed_inp, self._memory_layers-1)
                with tf.control_dependencies(sample_save_list):
                    sample_save_list.append(tf.assign_add(self.counter, tf.constant(1)))

                with tf.control_dependencies(sample_save_list):
                    """sample prediction"""
                    self._sample_prediction = tf.nn.softmax(tf.nn.xw_plus_b(self.sample_output, weights, bias)) 
                    self.saved_sample_memory = [memory_layer + 0.00000001 for memory_layer in self.Sample_Memory[0]]
                
                """saver"""
                self.saver = tf.train.Saver(max_to_keep=None)
                            
                        
    
    def _generate_metadata(self, half_life, decay, num_averaging_iterations):
        metadata = list()
        metadata.append(self._batch_size)
        metadata.append(self._num_unrollings)
        metadata.append(self._memory_layers)
        metadata.append(self._memory_nodes)
        metadata.append(self._memory_lengths)
        metadata.append(self._analisys_layers)
        metadata.append(self._analisys_nodes)   
        metadata.append(self._frequency)
        metadata.append(half_life)
        metadata.append(decay)
        metadata.append(self._last_num_steps)
        metadata.append(num_averaging_iterations)
        metadata.append(self._mean)
        metadata.append(self._stddev)
        metadata.append(self._init_learning_rate)
        metadata.append('delay_deterministic')
        return metadata
