In [1]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import numpy as np
import random
import string
import tensorflow as tf
from tensorflow.python.ops.rnn_cell import _linear
import zipfile
from six.moves import range
from six.moves.urllib.request import urlretrieve
import collections
import matplotlib.pyplot as plt
import codecs
import time
import os
import gc
from six.moves import cPickle as pickle

from plot_module import text_plot
from plot_module import structure_vocabulary_plots
from plot_module import ComparePlots

from model_module import maybe_download
from model_module import read_data
from model_module import check_not_one_byte
from model_module import id2char
from model_module import char2id
from model_module import BatchGenerator
from model_module import characters
from model_module import batches2string
from model_module import logprob
from model_module import sample_distribution
from model_module import MODEL

In [2]:
if not os.path.exists('enwik8_filtered'):
    if not os.path.exists('enwik8'):
        filename = maybe_download('enwik8.zip', 36445475)
    full_text = read_data(filename)
    new_text = u""
    new_text_list = list()
    for i in range(len(full_text)):
        if (i+1) % 10000000 == 0:
            print("%s characters are filtered" % i)
        if ord(full_text[i]) < 256:
            new_text_list.append(full_text[i])
    text = new_text.join(new_text_list)
    del new_text_list
    del new_text
    del full_text

    (not_one_byte_counter, min_character_order_index, max_character_order_index, number_of_characters, present_characters_indices) = check_not_one_byte(text)

    print("number of not one byte characters: ", not_one_byte_counter) 
    print("min order index: ", min_character_order_index)
    print("max order index: ", max_character_order_index)
    print("total number of characters: ", number_of_characters)
    
    f = open('enwik8_filtered', 'w')
    f.write(text.encode('utf8'))
    f.close()
    
else:
    f = open('enwik8_filtered', 'r')
    text = f.read().decode('utf8')
    f.close() 
    (not_one_byte_counter, min_character_order_index, max_character_order_index, number_of_characters, present_characters_indices) = check_not_one_byte(text)

    print("number of not one byte characters: ", not_one_byte_counter) 
    print("min order index: ", min_character_order_index)
    print("max order index: ", max_character_order_index)
    print("total number of characters: ", number_of_characters)    

number of not one byte characters:  0
min order index:  9
max order index:  255
total number of characters:  196


In [3]:
#different
offset = 20000
valid_size = 10000
valid_text = text[offset:offset+valid_size]
train_text = text[offset+valid_size:]
train_size = len(train_text)
print(train_size, train_text[:64])
print(valid_size, valid_text[:64])

99350000 n in the February 1934 riots, anarchists divided over a 'united 
10000 ture in Mutual Aid: A Factor of Evolution (1897). Subsequent ana


In [4]:
vocabulary_size = number_of_characters
vocabulary = list()
characters_positions_in_vocabulary = list()

character_position_in_vocabulary = 0
for i in range(256):
    if present_characters_indices[i]:
        vocabulary.append(unichr(i))
        characters_positions_in_vocabulary.append(character_position_in_vocabulary)
        character_position_in_vocabulary += 1
    else:
        characters_positions_in_vocabulary.append(-1)


string_vocabulary = u""
for i in range(vocabulary_size):
    string_vocabulary += vocabulary[i]
print("Vocabulary: ", string_vocabulary)
print("char2id(u'a') = %s,  char2id(u'z') = %s,  char2id(u' ') = %s" % (char2id(u'a', characters_positions_in_vocabulary),
                                                                        char2id(u'z', characters_positions_in_vocabulary),
                                                                        char2id(u' ', characters_positions_in_vocabulary)))
print("id2char(78) = %s,  id2char(156) = %s,  id2char(140) = %s" % (id2char(78,
                                                                            vocabulary),
                                                                    id2char(156,
                                                                            vocabulary),
                                                                    id2char(140,
                                                                            vocabulary)))


Vocabulary:  	
 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ
char2id(u'a') = 67,  char2id(u'z') = 92,  char2id(u' ') = 2
id2char(78) = l,  id2char(156) = Ø,  id2char(140) = È


In [5]:
batch_size_test=64
num_unrollings_test=10

train_batches_test = BatchGenerator(train_text,
                                    batch_size_test,
                                    vocabulary_size,
                                    characters_positions_in_vocabulary,
                                    num_unrollings_test)
valid_batches_test = BatchGenerator(valid_text,
                                    1,
                                    vocabulary_size,
                                    characters_positions_in_vocabulary,
                                    1)

print(batches2string(train_batches_test.next(), vocabulary))
print(batches2string(train_batches_test.next(), vocabulary))
print(batches2string(valid_batches_test.next(), vocabulary))
print(batches2string(valid_batches_test.next(), vocabulary))

[u'n in the Fe', u".\n* ''[[Con", u"oldier's so", u'\xf6hm-Bawerk ', u'tification,', u' warrior, a', u'uot; would ', u' 115       ', u'orbata acid', u'>\n      <co', u'ate, the co', u'other natio', u'ing the his', u'et bromine;', u' Christ&quo', u' average]] ', u' their home', u'ks and a ri', u'on]]/[[Joel', u' new era fo', u'aph that th', u' known as t', u's from the ', u'ast majorit', u'trips, thou', u'ent of regi', u'metric aspe', u'd named by ', u'Z</timestam', u'tude of 1 c', u'!&quot; [ht', u'o ==\n\n* [[D', u'[[Belarusia', u'iton]], Rus', u'ccessful si', u'es his theo', u' explain th', u' the South.', u'sing with a', u'd ball is h', u'e could des', u'[Friedrich ', u'th virtuall', u' foreign ac', u'variant in ', u'd and watch', u"t; ''[[Foot", u' became Lea', u'stern Europ', u' </contribu', u'ese terms n', u'arting in t', u'gence of th', u'of the cons', u'uickly swit', u', thus star', u'lly develop', u'g the offic', u'esult, the ', u'red HMMWV. ', u'ament is de', u'University ', 

In [6]:
indices_GL = {"batch_size": 0,
              "num_unrollings": 1,
              "num_layers": 2,
              "num_nodes": 3,
              "half_life": 4,
              "decay": 5,
              "num_steps": 6,
              "averaging_number": 7,
              "type": 8}


class reverse_rnn(MODEL):
    def layer(self, 
              inp_t_or_state_down_t_minus_1,
              state_t_minus_1,
              state_up_t_minus_1,
              layer_num):
        X_t = tf.concat(1, [inp_t_or_state_down_t_minus_1,
                            state_t_minus_1,
                            state_up_t_minus_1])
        RES = tf.matmul(X_t, self.Matrices[layer_num]) + self.Biases[layer_num]
        state_t = tf.tanh(RES)
        return state_t
    
    def last_layer(self, 
                   state_down_t_minus_1,
                   state_t_minus_1):
        X_t = tf.concat(1, [state_down_t_minus_1,
                            state_t_minus_1])
        RES = tf.matmul(X_t, self.Matrices[-1]) + self.Biases[-1]
        state_t = tf.tanh(RES)
        return state_t

    
    def iteration(self, inp, state):
        num_layers = len(state)
        #print('num_layers: ', num_layers)
        new_state = list()
        inter_state = self.layer(inp,
                                 state[0],
                                 state[1],
                                 0)
        out = inter_state
        new_state.append(inter_state)
        if num_layers > 2:
            for i in range(num_layers-2):
                inter_state = self.layer(state[i],
                                         state[i+1],
                                         state[i+2],
                                         i+1)
                new_state.append(inter_state)
        inter_state = self.last_layer(state[-2],
                                      state[-1])
        new_state.append(inter_state)
        return out, new_state
    
    def __init__(self,
                 batch_size,
                 vocabulary,
                 characters_positions_in_vocabulary,
                 num_unrollings,
                 num_layers,
                 num_nodes,
                 train_text,
                 valid_text,
                 mean=0.,
                 stddev=0.1,
                 shift=0.):
        self._results = list()
        self._batch_size = batch_size
        self._vocabulary = vocabulary
        self._vocabulary_size = len(vocabulary)
        self._characters_positions_in_vocabulary = characters_positions_in_vocabulary
        self._num_unrollings = num_unrollings
        self._num_layers = num_layers
        self._num_nodes = num_nodes
        self._train_text = train_text
        self._valid_text = valid_text
        self._valid_size = len(valid_text)
        self._indices = {"batch_size": 0,
                         "num_unrollings": 1,
                         "num_layers": 2,
                         "num_nodes": 3,
                         "half_life": 4,
                         "decay": 5,
                         "num_steps": 6,
                         "averaging_number": 7,
                         "type": 8}
        self._graph = tf.Graph()
        
        self._last_num_steps = 0
        with self._graph.as_default(): 
            with self._graph.device('/gpu:0'): 
                self.Matrices = list()
                self.Biases = list()
                self.Matrices.append(tf.Variable(tf.truncated_normal([self._vocabulary_size + self._num_nodes[0] + self._num_nodes[1],
                                                                      self._num_nodes[0]],
                                                                     mean=mean, stddev=stddev)))
                self.Biases.append(tf.Variable([shift for _ in range(self._num_nodes[0])]))
                if self._num_layers > 2:
                    for i in range(self._num_layers - 2):
                        self.Matrices.append(tf.Variable(tf.truncated_normal([self._num_nodes[i] + self._num_nodes[i+1] + self._num_nodes[i+2],
                                                                              self._num_nodes[i+1]],
                                                                             mean=mean, stddev=stddev)))
                        self.Biases.append(tf.Variable([shift for _ in range(self._num_nodes[i+1])]))
                self.Matrices.append(tf.Variable(tf.truncated_normal([self._num_nodes[-1] + self._num_nodes[-2],
                                                                      self._num_nodes[-1]],
                                                                     mean=mean, stddev=stddev)))     
                self.Biases.append(tf.Variable([shift for _ in range(self._num_nodes[-1])]))

                # classifier 
                weights = tf.Variable(tf.truncated_normal([self._num_nodes[-1], self._vocabulary_size], stddev = 0.1))
                bias = tf.Variable(tf.zeros([self._vocabulary_size]))
                
                """PLACEHOLDERS train data"""
                self._train_data = list()
                for _ in range(self._num_unrollings + 1):
                    self._train_data.append(
                        tf.placeholder(tf.float32, shape=[self._batch_size, self._vocabulary_size]))
                train_inputs = self._train_data[: self._num_unrollings]
                train_labels = self._train_data[1:]  # labels are inputs shifted by one time step.
                # Unrolled LSTM loop.

                saved_state = list()
                for i in range(self._num_layers):
                    saved_state.append(tf.Variable(tf.zeros([self._batch_size, self._num_nodes[i]]), trainable=False))

                outputs = list()
                state = saved_state
                for inp in train_inputs:
                    output, state = self.iteration(inp, state)
                    outputs.append(output)

                save_list = list()
                for i in range(self._num_layers):
                    save_list.append(saved_state[i].assign(state[i]))
                
                """skip operation"""
                self._skip_operation = tf.group(*save_list)

                with tf.control_dependencies(save_list):
                        # Classifier.
                    logits = tf.nn.xw_plus_b(tf.concat(0, outputs), weights, bias)
                    """loss"""
                    self._loss = tf.reduce_mean(
                        tf.nn.softmax_cross_entropy_with_logits(
                        logits, tf.concat(0, train_labels)))
                # Optimizer.
                self._global_step = tf.Variable(0)
                """PLACEHOLDERS half life and decay"""
                self._half_life = tf.placeholder(tf.int32)
                self._decay = tf.placeholder(tf.float32)
                """learning rate"""
                self._learning_rate = tf.train.exponential_decay(0.5,
                                                                 self._global_step,
                                                                 self._half_life,
                                                                 self._decay,
                                                                 staircase=True)
                optimizer = tf.train.GradientDescentOptimizer(self._learning_rate)
                gradients, v = zip(*optimizer.compute_gradients(self._loss))
                gradients, _ = tf.clip_by_global_norm(gradients, 1.25)
                """optimizer"""
                self._optimizer = optimizer.apply_gradients(zip(gradients, v), global_step=self._global_step)
                """train prediction"""
                self._train_prediction = tf.nn.softmax(logits)

                # Sampling and validation eval: batch 1, no unrolling.
                saved_sample_state = list()
                for i in range(self._num_layers):
                    saved_sample_state.append(tf.Variable(tf.zeros([1, self._num_nodes[i]]), trainable=False)) 
                """PLACEHOLDER sample input"""
                self._sample_input = tf.placeholder(tf.float32, shape=[1, self._vocabulary_size])

                reset_list = list()
                for i in range(self._num_layers):
                    reset_list.append(saved_sample_state[i].assign(tf.zeros([1, self._num_nodes[i]])))

                """reset sample state"""
                self._reset_sample_state = tf.group(*reset_list)

                sample_output, sample_state = self.iteration(self._sample_input, saved_sample_state)

                sample_save_list = list()
                for i in range(self._num_layers):
                    sample_save_list.append(saved_sample_state[i].assign(sample_state[i]))

                with tf.control_dependencies(sample_save_list):
                    """sample prediction"""
                    self._sample_prediction = tf.nn.softmax(tf.nn.xw_plus_b(sample_output, weights, bias)) 
                
                
                """saver"""
                self.saver = tf.train.Saver(max_to_keep=None)
                            
                        
    
    def _generate_metadata(self, half_life, decay, num_averaging_iterations):
        metadata = list()
        metadata.append(self._batch_size)
        metadata.append(self._num_unrollings)
        metadata.append(self._num_layers)
        metadata.append(self._num_nodes)
        metadata.append(half_life)
        metadata.append(decay)
        metadata.append(self._last_num_steps)
        metadata.append(num_averaging_iterations)
        metadata.append('reverse_rnn')
        return metadata
  

In [23]:
model = reverse_rnn(64,
            vocabulary,
            characters_positions_in_vocabulary,
            10,
            3,
            [256, 256, 256],
            train_text,
            valid_text)

In [24]:
model.run(80,
          0.95,
            8000,
            500,
            1.5,
            50,
            200,
            print_intermediate_results = True)

Initialized
Average loss at step 0: 6.167276 learning rate: 0.500000
Percentage_of correct: 0.00%
´ô
: àt4£) ¢ëå^wïªzZÝ¥E ï0":îiÔ0Wh8å?÷]ELx ÿÎÿÏX^¼¾·Ò îbPöø¯?CÕ%Îø\çîMÊtzå¢vaaÚ
CºíØ]«ï2Ö²¾,Åkzf³¼Òêm?å>àwtÃt¬xy .[ßÐ	Ióá³h$ãÛoíZð/HÍÑ¼Þ½BRîµBgó.ËØc×Ôt-ªA®zoìæ
Xòæïöýæs0¹çîªe´Á¥¡{+Þ¦é¾(Ñ§Þ¸GºyonvûùÂgKG²0SètÈæÁ1±Íý§&æ4.G4îþï¸Tq[¼àwÁ«Í¼àV¾_Ë
½ítuáWçìÌ%oïÎÁÌ¢qT\¯3ä·Q´Öü-qQÍ,,äqêÅ\tY(íèTØB}I úæÊþï7gå0W¬DçÌ^ri:¤ÄHo)ÍÊÓåL²»ý
£¶R$1`$¾ÉÇq-æË ItWPzdÞ7ÇÉñ÷ïªâØ$@»O	õ§/¥@YqRTðL¹þY;§¯jlÔÍ? TßÁfuSÒoÑÃêNðñfyyt¤VT
Validation percentage of correct: 3.22%

Average loss at step 400000: 4.261978 learning rate: 0.331710
Percentage_of correct: 7.95%
Validation percentage of correct: 6.83%

Average loss at step 800000: 3.827640 learning rate: 0.220063
Percentage_of correct: 9.20%
Validation percentage of correct: 9.60%

Average loss at step 1200000: 3.644975 learning rate: 0.145994
Percentage_of correct: 10.22%
Validation percentage of correct: 10.53%

Average loss at step 1600000: 3.562257 learning rate: 0.0

In [7]:
iter_num = 3
results_GL = list()   
for i in range(iter_num):
    model = reverse_rnn(64,
                             vocabulary,
                             characters_positions_in_vocabulary,
                             30,
                             2,
                             [128, 128],
                             train_text,
                             valid_text)
    model.simple_run(200,
                         'reverse_rnn/estimate_variables/average#%s' % i,
                            20000,
                               4000,
                               5000,        #learning has a chance to be stopped after every block of steps
                               30,
                               0.9,
                               3,
                    fixed_num_steps=True)
    results_GL.extend(model._results)
    model.destroy()
    del model

Number of steps = 20000     Percentage = 46.98%     Time = 656s     Learning rate = 0.0212
Number of steps = 20000     Percentage = 47.07%     Time = 656s     Learning rate = 0.0212
Number of steps = 20000     Percentage = 47.06%     Time = 664s     Learning rate = 0.0212
