In [1]:
%load_ext autoreload
%autoreload 2

In [39]:
import random

import numpy as np
import tensorflow as tf

from helpers.dataset import TensorFlowDataset, import_zip_file
from helpers.evaluate import TensorFlowModelEvaluator
from models.rnn import LSTM
from vanilla_neural_nets.recurrent_neural_network.training_data import WordLevelRNNTrainingDataBuilder

# Create some data

In [3]:
PATH = '../data/text8.zip'
N_CLASSES = VOCABULARY_SIZE = 1000
BATCH_SIZE = 128
TIME_STEPS = 10
TRAINING_EXAMPLES = BATCH_SIZE * 50

In [4]:
corpus = import_zip_file(path=PATH, n_characters=1000000)

In [5]:
training_data = WordLevelRNNTrainingDataBuilder.build(corpus=corpus, vocabulary_size=VOCABULARY_SIZE)
training_data = np.array(training_data.training_data_as_indices[0][:TRAINING_EXAMPLES*(TIME_STEPS+1)])
training_data = training_data.reshape(TRAINING_EXAMPLES, TIME_STEPS+1)

In [6]:
training_dataset = TensorFlowDataset(
    data=training_data[:BATCH_SIZE*48, :TIME_STEPS], 
    labels=training_data[:BATCH_SIZE*48, 1:]
)

validation_dataset = TensorFlowDataset(
    data=training_data[BATCH_SIZE*48:BATCH_SIZE*49, :TIME_STEPS], 
    labels=training_data[BATCH_SIZE*48:BATCH_SIZE*49, 1:]
)

test_dataset = TensorFlowDataset(
    data=training_data[BATCH_SIZE*49:, :TIME_STEPS], 
    labels=training_data[BATCH_SIZE*49:, 1:]
)

In [7]:
HIDDEN_STATE_SIZE = 100
LEARNING_RATE = .1
N_EPOCHS = int(1e8)

# Define graph

In [40]:
graph = tf.Graph()

with graph.as_default():
    
    dataset = TensorFlowDataset(
        data=tf.placeholder(dtype=tf.int32, shape=[BATCH_SIZE, TIME_STEPS]),
        labels=tf.placeholder(dtype=tf.int32, shape=[BATCH_SIZE, TIME_STEPS])
    )
    
    model = LSTM(
        dataset=dataset, 
        n_classes=N_CLASSES, 
        hidden_state_size=HIDDEN_STATE_SIZE,
        learning_rate=LEARNING_RATE
    )

In [41]:
with tf.Session(graph=graph) as session:

    session.run(tf.initialize_all_variables())

    evaluator = TensorFlowModelEvaluator(
        model=model,
        session=session,
        validation_dataset=validation_dataset,
        test_dataset=test_dataset
    )

    for epoch in range(N_EPOCHS):
        
        if epoch % int(1e7) == 0:

            mini_batch_data, mini_batch_labels = training_dataset.sample(BATCH_SIZE)
            mini_batch_dataset = TensorFlowDataset(data=mini_batch_data, labels=mini_batch_labels)
            evaluator.optimize(mini_batch_dataset)

            print('Epoch: {}'.format( int(epoch / 1e7) ))
            print('Train Loss: {:.3f}'.format(evaluator.training_loss))
            print('Validation Loss: {:.3f}\n'.format(evaluator.validation_loss))

    print('Test Loss: {0:.3f}\n'.format(evaluator.test_loss))

Epoch: 0
Train Loss: 7.772
Validation Loss: 7.773

Epoch: 1
Train Loss: 7.754
Validation Loss: 7.745

Epoch: 2
Train Loss: 7.672
Validation Loss: 7.721

Epoch: 3
Train Loss: 7.730
Validation Loss: 7.698

Epoch: 4
Train Loss: 7.691
Validation Loss: 7.675

Epoch: 5
Train Loss: 7.623
Validation Loss: 7.653

Epoch: 6
Train Loss: 7.613
Validation Loss: 7.631

Epoch: 7
Train Loss: 7.570
Validation Loss: 7.607

Epoch: 8
Train Loss: 7.564
Validation Loss: 7.581

Epoch: 9
Train Loss: 7.546
Validation Loss: 7.554

Test Loss: 7.479

