In [1]:
import os
import numpy as np
import import_ipynb
import Project
np.random.seed(1234)
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from utils import CharacterTable, transform
from utils import batch, datagen, decode_sequences
from utils import read_text, tokenize
from model import seq2seq
error_rate = 0.8
hidden_size = 512
nb_epochs = 100
train_batch_size = 128
val_batch_size = 256
sample_mode = 'argmax'
reverse = True
data_path = r'C:\Users\ganes\Music\data'
train_books = ['nietzsche.txt', 'pride_and_prejudice.txt',
               'shakespeare.txt', 'war_and_peace.txt']
val_books = ['wonderland.txt']
if __name__ == '__main__':
    # Prepare training data.
    text  = read_text(data_path, train_books)
    vocab = tokenize(text)
    vocab = list(filter(None, set(vocab)))
    maxlen = max([len(token) for token in vocab]) + 2
    train_encoder, train_decoder, train_target = transform(
        vocab, maxlen, error_rate=error_rate, shuffle=False)
    print(train_encoder[:10])
    print(train_decoder[:10])
    print(train_target[:10])

    input_chars = set(' '.join(train_encoder))
    target_chars = set(' '.join(train_decoder))
    nb_input_chars = len(input_chars)
    nb_target_chars = len(target_chars)

    print('Size of training vocabulary =', len(vocab))
    print('Number of unique input characters:', nb_input_chars)
    print('Number of unique target characters:', nb_target_chars)
    print('Max sequence length in the training set:', maxlen)

    # Prepare validation data.
    text = read_text(data_path, val_books)
    val_tokens = tokenize(text)
    val_tokens = list(filter(None, val_tokens))

    val_maxlen = max([len(token) for token in val_tokens]) + 2
    val_encoder, val_decoder, val_target = transform(
        val_tokens, maxlen, error_rate=error_rate, shuffle=False)
    print(val_encoder[:10])
    print(val_decoder[:10])
    print(val_target[:10])
    print('Number of non-unique validation tokens =', len(val_tokens))
    print('Max sequence length in the validation set:', val_maxlen)

    # Define training and evaluation configuration.
    input_ctable  = CharacterTable(input_chars)
    target_ctable = CharacterTable(target_chars)

    train_steps = len(vocab) // train_batch_size
    val_steps = len(val_tokens) // val_batch_size

    # Compile the model.
    model, encoder_model, decoder_model = seq2seq(
        hidden_size, nb_input_chars, nb_target_chars)
    print(model.summary())

    # Train and evaluate.
    for epoch in range(nb_epochs):
        print('Main Epoch {:d}/{:d}'.format(epoch + 1, nb_epochs))
    
        train_encoder, train_decoder, train_target = transform(
            vocab, maxlen, error_rate=error_rate, shuffle=True)
        
        train_encoder_batch = batch(train_encoder, maxlen, input_ctable,
                                    train_batch_size, reverse)
        train_decoder_batch = batch(train_decoder, maxlen, target_ctable,
                                    train_batch_size)
        train_target_batch  = batch(train_target, maxlen, target_ctable,
                                    train_batch_size)    

        val_encoder_batch = batch(val_encoder, maxlen, input_ctable,
                                  val_batch_size, reverse)
        val_decoder_batch = batch(val_decoder, maxlen, target_ctable,
                                  val_batch_size)
        val_target_batch  = batch(val_target, maxlen, target_ctable,
                                  val_batch_size)
    
        train_loader = datagen(train_encoder_batch,
                               train_decoder_batch, train_target_batch)
        val_loader = datagen(val_encoder_batch,
                             val_decoder_batch, val_target_batch)
    
        model.fit(train_loader,steps_per_epoch=train_steps,epochs=1, verbose=1,validation_data=val_loader,validation_steps=val_steps)
        # On epoch end - decode a batch of misspelled tokens from the validation set to visualize speller performance.
        nb_tokens = 5
        input_tokens, target_tokens, decoded_tokens = decode_sequences(
            val_encoder, val_target, input_ctable, target_ctable,
            maxlen, reverse, encoder_model, decoder_model, nb_tokens,
            sample_mode=sample_mode, random=True)
        
        print('-')
        print('Input tokens:  ', input_tokens)
        print('Decoded tokens:', decoded_tokens)
        print('Target tokens: ', target_tokens)
        print('-')
        
        # Save the model at end of each epoch.
        model_file = '_'.join(['seq2seq', 'epoch', str(epoch + 1)]) + '.h5'
        save_dir = 'checkpoints'
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
            
        save_path = os.path.join(save_dir, model_file)
        print('Saving full model to {:s}'.format(save_path))
        model.save(save_path)


importing Jupyter notebook from utils.ipynb
importing Jupyter notebook from model.ipynb
['semm******************', 'Ynoder****************', 'roepened**************', 'unforeeen*************', "ress'd****************", "jAdversity's**********", 'grants****************', 'avWnging**************', 'forevIer**************', 'tunc******************']
['\tseem*****************', '\tYonder***************', '\treopened*************', '\tunforeseen***********', "\tpress'd**************", "\tAdversity's**********", '\tgrants***************', '\tavenging*************', '\tforever**************', '\ttunic****************']
['seem******************', 'Yonder****************', 'reopened**************', 'unforeseen************', "press'd***************", "Adversity's***********", 'grants****************', 'avenging**************', 'forever***************', 'tunic*****************']
Size of training vocabulary = 33047
Number of unique input characters: 55
Number of unique target characters: 56
Max se

-
Input tokens:   ['snathc', 'stUill', 'of', 'me', 'satid']
Decoded tokens: ['snatch', 'still', 'of', 'me', 'satid']
Target tokens:  ['snatch', 'still', 'of', 'me', 'said']
-
Saving full model to checkpoints\seq2seq_epoch_25.h5
Main Epoch 26/100
Shuffling data.
-
Input tokens:   ['liRe', 'as', 'to', 'fijnd', 'tPe']
Decoded tokens: ['lime', 'as', 'to', 'fined', 'tee']
Target tokens:  ['like', 'as', 'to', 'find', 'the']
-
Saving full model to checkpoints\seq2seq_epoch_26.h5
Main Epoch 27/100
Shuffling data.
-
Input tokens:   ['finsihed', 'agani', 'DinHah', 'HAVE', 'a']
Decoded tokens: ['finished', 'agani', 'Dinah', 'HAVE', 'a']
Target tokens:  ['finished', 'again', 'Dinah', 'HAVE', 'a']
-
Saving full model to checkpoints\seq2seq_epoch_27.h5
Main Epoch 28/100
Shuffling data.
-
Input tokens:   ["'Who'", 'cold', "'juy", 'Last', 'of']
Decoded tokens: ["'Who'", 'cold', "'juy", 'Last', 'of']
Target tokens:  ["'Who's", 'could', "'jury", 'Last', 'of']
-
Saving full model to checkpoints\seq2seq_e

-
Input tokens:   ['sZe', 'antd', "'I", 'saif', 'gnder']
Decoded tokens: ['se', 'anted', "'I", 'sail', 'gonder']
Target tokens:  ['see', 'and', "'I", 'said', 'under']
-
Saving full model to checkpoints\seq2seq_epoch_42.h5
Main Epoch 43/100
Shuffling data.
-
Input tokens:   ['frying', 'rst', 'a', 'cotent', 'anoqther']
Decoded tokens: ['frying', 'rust', 'a', 'content', 'another']
Target tokens:  ['frying', 'rest', 'a', 'content', 'another']
-
Saving full model to checkpoints\seq2seq_epoch_43.h5
Main Epoch 44/100
Shuffling data.
-
Input tokens:   ['skurried', 'the', 'aws', 'odwn', 'yuor']
Decoded tokens: ['skurried', 'the', 'laws', 'down', 'your']
Target tokens:  ['skurried', 'the', 'was', 'down', 'your']
-
Saving full model to checkpoints\seq2seq_epoch_44.h5
Main Epoch 45/100
Shuffling data.
-
Input tokens:   ['alwayx', 'teh', 'cuold', 'lXrge', 'up']
Decoded tokens: ['alway', 'the', 'could', 'lorge', 'up']
Target tokens:  ['always', 'the', 'could', 'large', 'up']
-
Saving full model to c

-
Input tokens:   ['he', 'ought', 'throgh', 'einto', 'was']
Decoded tokens: ['he', 'ought', 'through', 'ention', 'was']
Target tokens:  ['the', 'ought', 'through', 'into', 'was']
-
Saving full model to checkpoints\seq2seq_epoch_58.h5
Main Epoch 59/100
Shuffling data.
-
Input tokens:   ['if', 'ou', 'in', 'anixous', 'instead']
Decoded tokens: ['if', 'ou', 'in', 'anxious', 'instead']
Target tokens:  ['if', 'you', 'in', 'anxious', 'instead']
-
Saving full model to checkpoints\seq2seq_epoch_59.h5
Main Epoch 60/100
Shuffling data.
-
Input tokens:   ['lyTng', 'a', "'Tha's", 'cDowd', 'abut']
Decoded tokens: ['lying', 'a', "'That's", 'crow', 'abut']
Target tokens:  ['lying', 'a', "'That's", 'crowd', 'about']
-
Saving full model to checkpoints\seq2seq_epoch_60.h5
Main Epoch 61/100
Shuffling data.
-
Input tokens:   ['Seen', 'jury', 'hid', 'it', 'boor']
Decoded tokens: ['Seen', 'jury', 'hid', 'it', 'boor']
Target tokens:  ['been', 'jury', 'had', 'it', 'door']
-
Saving full model to checkpoints\seq

-
Input tokens:   ['gettSng', 'ssee', 'Vhat', 'ytell', 'to']
Decoded tokens: ['getting', 'sees', 'What', 'yell', 'to']
Target tokens:  ['getting', 'see', 'that', 'tell', 'to']
-
Saving full model to checkpoints\seq2seq_epoch_75.h5
Main Epoch 76/100
Shuffling data.
-
Input tokens:   ['exactly', 'Aay', 'a', 'Mthe', 'hoem']
Decoded tokens: ['exactly', 'Aay', 'a', 'Mothe', 'home']
Target tokens:  ['exactly', 'way', 'a', 'the', 'home']
-
Saving full model to checkpoints\seq2seq_epoch_76.h5
Main Epoch 77/100
Shuffling data.
-
Input tokens:   ['itr', 'as', 'nCw', 'the', 'it']
Decoded tokens: ['tir', 'as', 'new', 'thee', 'it']
Target tokens:  ["it'", 'as', 'now', 'the', 'it']
-
Saving full model to checkpoints\seq2seq_epoch_77.h5
Main Epoch 78/100
Shuffling data.
-
Input tokens:   ['lookend', 'minute', 'abotu', "'yWu", 'hte']
Decoded tokens: ['lookened', 'minute', 'about', "'yeu", 'hate']
Target tokens:  ['looked', 'minute', 'about', "'you", 'the']
-
Saving full model to checkpoints\seq2seq_ep

-
Input tokens:   ['Peat', 'hVm', 'in', 'my', 'it']
Decoded tokens: ['Peat', 'him', 'in', 'my', 'it']
Target tokens:  ['beat', 'him', 'in', 'my', 'it']
-
Saving full model to checkpoints\seq2seq_epoch_91.h5
Main Epoch 92/100
Shuffling data.
-
Input tokens:   ['th', 'well', 'Turtle', 'sHt', 'in']
Decoded tokens: ['th', 'well', 'Turtle', 'set', 'in']
Target tokens:  ['the', 'well', 'Turtle', 'set', 'in']
-
Saving full model to checkpoints\seq2seq_epoch_92.h5
Main Epoch 93/100
Shuffling data.
-
Input tokens:   ['fit', 'ought', 'if', 'yuo', 'szid']
Decoded tokens: ['fit', 'ought', 'if', 'you', 'sized']
Target tokens:  ['fit', 'ought', 'if', 'you', 'said']
-
Saving full model to checkpoints\seq2seq_epoch_93.h5
Main Epoch 94/100
Shuffling data.
-
Input tokens:   ['thY', 'were', 'went', 'anwser', 'a']
Decoded tokens: ['thy', 'were', 'went', 'answer', 'a']
Target tokens:  ['the', 'were', 'went', 'answer', 'a']
-
Saving full model to checkpoints\seq2seq_epoch_94.h5
Main Epoch 95/100
Shuffling d