In [1]:
import os
import fasttext
import fasttext.util
import torch
import math
import random
from model import gan
from early_stopping import EarlyStopping
from sklearn.utils import shuffle
from sklearn.neighbors import NearestNeighbors
import numpy as np

# Unique Naming
from datetime import datetime
import random, string
import importlib

In [2]:
def random_string(length=10):
    """
        Generate a random string of given length. For safely storing produced images.
    """
    letters = string.ascii_lowercase
    return ''.join(random.choice(letters) for i in range(length))


def get_model_id():
    """
        Creates folder with unique ID in which everything related to a particular testrun can be saved.
    :return: Unique folder identifier
    """
    # Construct testrun identifier
    TIME_STAMP = datetime.now().strftime("%Y_%d_%m__%H_%M_%S__%f_")
    model_folder_id = TIME_STAMP + '_' + random_string() + '/'

    try:
        os.makedirs(model_folder_id)
    except Exception as e:
        print('Exception occurred: ', e)

    return model_folder_id

In [36]:
### VARIABLES & ADMINISTRATIVE STUFF ###
# System
#dataset_path = '/media/daniel/Elements/FastText_Data/'  # In case dataset is stored somewhere else, e.g. on hard-drive
dataset_path = '/media/daniel/Elements/FastText_Data/'  # Data in same directory
dictionary_path = '/media/daniel/Elements/FastText_Data/'  # Dictionaries in same directory
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

# Network
embedding_dim = 300
internal_dim = 300
output_dim = 2

# Train hyperparameters
epochs = 1000
batch_size = 32
vocab_size = 50
num_minibatches = vocab_size // batch_size
real_label, fake_label = 1, 0
languages = {'src': ['de', 'nl'], 'trgt': ['en']}  # Target language to be indicated in last position
checkpoint_frequency = 0  # 0 == Off; i > 0 == actual checkpoint frequency in epochs
avg_grads = False  # Boolean indicating whether to average the grads of decoder & discriminator accumulated over nr of source languages by nr of source langs
early_stop = False # Boolean indicating whether to stop early if loss won't decrease for a certain threshold
eval_frequency = 200

#testing parameters
N = [1] # List of n nearest neighbors that will be performed in evaluation


In [4]:
# Changed the way languages are stored. 
# For easy access to complete set of all included languages, just concatenate lists
languages = {'src': ['de', 'nl'], 'trgt': ['en']}

for lang in languages['src']+languages['trgt']:
    print(lang)
print(languages)

de
nl
en
{'trgt': ['en'], 'src': ['de', 'nl']}


In [5]:
# Set up saving paths
data_storage_path = './'
model_id = get_model_id()
checkpoint_path = data_storage_path + model_id + 'Checkpoint/'
final_state_path = data_storage_path + model_id + 'Final/'

try:
    if checkpoint_frequency > 0:
        os.makedirs(checkpoint_path)
        print('Created:', checkpoint_path)
    os.makedirs(final_state_path)
    print('Created:', final_state_path)
except Exception as e:
    raise Warning('Exception occurred: Cound not create dirs! Exception:', e)
    
print('Model ID:', model_id)

Created: ./2020_08_06__22_06_24__630394__qyzxfdpjtn/Final/
Model ID: 2020_08_06__22_06_24__630394__qyzxfdpjtn/


# Get vocab

In [6]:
def full_vocab(vocab):
    # Returns the word embeddings and matching labels for the full vocabulary
    words = vocab.words
    vectors = [vocab[word] for word in words]
    return vectors, words

In [7]:
def cleaned_vocab(vocab, vocab_size):
    # Remove all punctuation tokens while valid nr of tokens is insufficient yet for having full vocab size
    # TODO & possibly reserve testing vocab
    # Return clean & restricted vocab
    words = vocab.words[:vocab_size]              # Y (labels)
    vects = [vocab[word] for word in words]       # X (input data)

    return vects, words

In [8]:
def add_lang_to_vocab(lang_id, vocab_size, vocabs, full_vocabs):
    # Get dataset
    if dataset_path == './':
        fasttext.util.download_model(lang_id)  # Download word embedding vector data if not available
    vocab = fasttext.load_model(dataset_path + 'cc.' + lang_id + '.300.bin')  # Load language data

    # Add train data (embedding-vectors) and labels (words) to vocab
    X, Y = cleaned_vocab(vocab,500000)
    x, y = cleaned_vocab(vocab, vocab_size)
    vocabs[lang_id] = {'x': torch.tensor(x), 'y': y}
    full_vocabs[lang_id] = {'X': X, 'Y': Y}

    return vocabs, full_vocabs

In [9]:
def load_vocab(languages):
    vocabs = {}
    full_vocabs = {}
    
    for language in languages['src']+languages['trgt']:
        vocabs, full_vocabs = add_lang_to_vocab(language, vocab_size, vocabs, full_vocabs)

    print('Successfully loaded language models.')
    return vocabs, full_vocabs

In [10]:
#load vocab (keep in independent cell for bugfixing purposes)
vocabs, full_vocabs = load_vocab(languages)



Successfully loaded language models.


In [11]:
source_vocabs = {}
source_full_vocabs = {}

for source_language in languages['src']:
    source_vocabs[source_language] = vocabs[source_language]
    source_full_vocabs[source_language] = full_vocabs[source_language]
target_full_vocabs = full_vocabs[languages['trgt'][0]]


# Dictionary related functions

In [12]:
def convert_dictionary(dictionary_text):
    # Converts an input dictionary text file to a python dictionary
    dictionary = {}
    source = True
    source_word = ''
    target_word = ''
    
    for character in dictionary_text:
        if source is True:
            if character is '\t' or character is ' ':
                source = False
            else:
                source_word = source_word + character
        else:
            if character is '\n':
                source = True
                if source_word in dictionary:
                    dictionary[source_word].append(target_word)
                else:
                    dictionary[source_word] = [target_word]
                source_word = ''
                target_word = ''
            else:
                target_word = target_word + character
                
    return dictionary

In [13]:
def load_dictionaries(languages):
    # Loads in the bilingual dictionaries
    dictionaries = {}
    
    for source_language in languages['src']:
        file = open(dictionary_path + source_language + '-' + languages['trgt'][0] + '.txt', 'r', errors='ignore')
        dictionary_text = file.read()
        dictionaries[source_language] = convert_dictionary(dictionary_text)
    
    return dictionaries

In [14]:
def split_translation_task(languages, source_full_vocabs, dictionaries):
    # Creates a split in eval and training translation task
    eval_words = {}
    test_words = {}
    
    for source_language in languages['src']:
        source_words = list(dictionaries[source_language].keys())
#         random.shuffle(source_words)
        eval_list = []
        for source_word in source_words:
            if source_word in source_full_vocabs[source_language]['Y']:
                eval_list.append(source_word)
            if len(eval_list) is 200:
                eval_words[source_language] = eval_list
                break
#             eval_words[source_language] = source_words[0:50]
#             test_words[source_language] = source_words[50:150]        
#         eval_words[source_language] = source_words[0:int(len(source_words)/2)]
#         test_words[source_language] = source_words[int(len(source_words)/2):len(source_words)]
        
    return eval_words, test_words

In [15]:
#load in dictionaries (independent cell for bugfixing)
dictionaries = load_dictionaries(languages)

# split in train and evaluation
eval_words, test_words = split_translation_task(languages, source_full_vocabs, dictionaries)

# Nearest neighbor fitting

In [16]:
def fit_neighbors(N, languages, full_vocabs):
    target_neighbors = {}
    
    for n in N:
        target_neighbors[n] = NearestNeighbors(n_neighbors=n, metric='cosine').fit(full_vocabs[languages['trgt'][0]]['X'])
        
    return target_neighbors

In [17]:
neighbors = fit_neighbors(N, languages, full_vocabs)

# Evaluation functions

In [18]:
def compute_cosine(vector1, vector2):
    # Computes the cosine simularity between two vectors
    dot_product = np.dot(vector1, vector2)
    norm_vector1 = np.linalg.norm(vector1)
    norm_vector2 = np.linalg.norm(vector2)
    return dot_product/(norm_vector1*norm_vector2)

In [19]:
def get_n_translations(generator, language, source_vector, target_vocab, neighbors):
    # Gets n possible translations, as given by the n nearest neighbors of the transformed
    # source vector in the target embeddings space, we will use a package for this for optimization
    # purposes. n is given in the nearest neighbor fitting stage.
    #print(source_vector.numpy()[0])
    transformed_source_embedding = generator(torch.as_tensor(source_vector), language).detach().numpy()

    # only takes 2D arrays, hence the extra bracket [1][0] stands for select indices of
    # the first input vector (the only one in this case)

    vocab_indices = neighbors.kneighbors(np.array([transformed_source_embedding]))[1][0]
    target_vectors = []
    target_words = []
    for index in vocab_indices:
        target_vectors.append(target_vocab['X'][index])
        target_words.append(target_vocab['Y'][index])

    return target_vectors, target_words

In [20]:
def get_n_translations_batch(generator, language, source_vectors, target_vocab, neighbors):
    # Gets n possible translations, as given by the n nearest neighbors of the transformed
    # source vector in the target embeddings space, we will use a package for this for optimization
    # purposes. n is given in the nearest neighbor fitting stage.
    #print(source_vector.numpy()[0])
    transformed_source_embedding = generator(torch.as_tensor(source_vectors), language).detach().numpy()

    # only takes 2D arrays, hence the extra bracket [1][0] stands for select indices of
    # the first input vector (the only one in this case)

    vocab_indices = neighbors.kneighbors(np.array(transformed_source_embedding))[1]
    target_vectors = []
    target_words = []
    for target_indices in vocab_indices:
        vectors = []
        words = []
        for index in target_indices:
            vectors.append(target_vocab['X'][index])
            words.append(target_vocab['Y'][index])
        target_vectors.append(vectors)
        target_words.append(words)

    return target_vectors, target_words

In [21]:
def get_average_cosine(generator, language, source_word_vectors, target_vocab, neighbors):
    # Computes the average cosine simularity between the source words and their translations
    sum_of_cosines = 0
    for source_word_vector in source_word_vectors[:30]:
        translated_word_vector = get_n_translations(generator, language, source_word_vector, target_vocab, neighbors[1])[0][0]
        sum_of_cosines += compute_cosine(source_word_vector, translated_word_vector)
    return sum_of_cosines/len(source_word_vectors)

In [22]:
def get_average_cosine_batch(generator, language, source_word_vectors, target_vocab, neighbors):
    # Computes the average cosine simularity between the source words and their translations
    sum_of_cosines = 0
    translated_word_vectors = get_n_translations_batch(generator, language, source_word_vectors, target_vocab, neighbors[1])[0]
    for source_word_vector, translated_word_vector in zip(source_word_vectors, translated_word_vectors):
        sum_of_cosines += compute_cosine(source_word_vector, translated_word_vector[0])
    return sum_of_cosines/len(source_word_vectors)

In [23]:
def get_translation_accuracy(generator, language, source_words, source_vocab, target_vocab, dictionary, neighbors):
    # Compute the accuracy of translation over the given set of source words
    correct_translations = 0
    for source_word in source_words:
        source_word_index = source_vocab['Y'].index(source_word)
        source_word_vector = source_vocab['X'][source_word_index]
        n_target_words = get_n_translations(generator, language, source_word_vector, target_vocab, neighbors)[1]
        for target_word in n_target_words:
            if target_word in dictionary[source_word]:
                correct_translations += 1
                break
    return correct_translations/len(source_words)

In [24]:
def get_translation_accuracy_batch(generator, language, source_words, source_vocab, target_vocab, dictionary, neighbors):
    # Compute the accuracy of translation over the given set of source words
    correct_translations = 0
    source_word_vectors = []
    for source_word in source_words:
        source_word_index = source_vocab['Y'].index(source_word)
        source_word_vectors.append(source_vocab['X'][source_word_index])
    target_words = get_n_translations_batch(generator, language, source_word_vectors, target_vocab, neighbors)[1]
    for n_target_words in target_words:
        for target_word in n_target_words:
            if target_word in dictionary[source_word]:
                correct_translations += 1
                break
    return correct_translations/len(source_words)

In [25]:
def evaluation(generator, languages, source_training_vocabs, source_eval_words, source_full_vocabs, target_full_vocabs, dictionaries, neighbors, N):
    # Evaluates the current model by using both an unsupervised cosine similiraty metric and a 
    # supervised translation accuracy metric. We have included both to see how they compare.
    for source_language in languages['src']:
        cosine_metric =  get_average_cosine_batch(generator, source_language, source_training_vocabs[source_language]['x'], target_full_vocabs, neighbors) #experimental
#         cosine_metric = ''
        accuracy_text = 'accuracies are '
        for n in N:
            accuracy = get_translation_accuracy_batch(generator, source_language, source_eval_words[source_language], source_full_vocabs[source_language], target_full_vocabs, dictionaries[source_language], neighbors[n])
            accuracy_text = str(accuracy_text) + 'p@' + str(n) + '=' + str(accuracy) + ', '
        
        print('evaluation of source language ' + source_language + ': average cosine=',cosine_metric, accuracy_text)
    

In [26]:
def testing(generator, languages, source_test_words, source_full_vocabs, target_full_vocabs, dictionaries, neighbors, N):
    # Testing based on translation accuracy on testing set
    for source_language in languages['src']:
        accuracy_text = ''
        for n in N:
            accuracy = get_translation_accuracy_batch(generator, source_language, source_test_words[source_language], source_full_vocabs[source_language], target_full_vocabs, dictionaries[source_language], neighbors[n])
            accuracy_text = accuracy_text + 'p@' + n + '=' + accuracy + ', '
        
        print('Testing accuracies of source language ' + source_language + ": " + accuracy_text)

# Define training related functions

In [27]:
def save_checkpoint(data, save):
    if save:
        torch.save(data, checkpoint_path + 'checkpoint_%d.pt' % data['epoch'])

In [28]:
def mean_param(model):
    return torch.mean(torch.cat([param.data.view(-1) for param in model.parameters()], 0))

In [29]:
def get_dataset_sample(lang, vocab, batch_size, include_y=False):
    """
    This function draws batch_size-many training samples at random 
    from a vocab corresponding to queried language.  
    """
    indices = torch.LongTensor(batch_size).random_(0, len(vocab))
    if include_y:
        return vocab['x'][indices], vocab['y'][indices]
    return vocab['x'][indices]


def get_train_data(languages, vocabs, batch_size, include_y=False):
    """
    Returns one set of samples datapoints form a vocabulary for each provided language.
    """
    x, y = {}, {}
    
    # Source languages
    for lang in languages['src']+languages['trgt']:
        if include_y:
            x[lang], y[lang] = get_dataset_sample(lang, vocabs[lang], batch_size, include_y)
        else:
            x[lang] = get_dataset_sample(lang, vocabs[lang], batch_size)
    
    # Return
    if include_y:
        return x, y
    return x

In [30]:
# For debugging - Compute sum of abs(gradients) of model
def get_summed_abs_grads(model):
#     summed_abs = torch.tensor(0)
    summed_abs = 0
    for p in model.parameters():
        summed_abs += torch.sum(torch.abs(p))
    return summed_abs

In [37]:
def main():
    
    importlib.reload(gan)
    
    NLLLoss = torch.nn.NLLLoss()
    nr_src_langs = len(languages['src'])
    nr_trgt_langs = len(languages['trgt'])
    nr_langs = nr_src_langs + nr_trgt_langs
    print('Nr source languages:', nr_src_langs)
    print('Nr target languages:', len(languages['trgt'])) 
    print('\n', languages)
    
    if avg_grads:
        avg_factor = 1/nr_src_langs
        print('Decoder gradient averaging factor:', avg_factor, "\n")
    
    # Get bilingual dictionary for evaluating train loss or at least testing
    dicts = dict()
    #TODO

    # Set up model architecture
    net = gan.GAN(embedding_dim, internal_dim, output_dim, languages['src'])

    # Get optimizers; 1 per source language of encoder and 1 for discriminator
    optimizers = {'gen': {}}
    for lang in languages['src']:
        optimizers['gen'][lang] = torch.optim.Adam([{'params': net.generator.encoders[lang].parameters()},
                                                    {'params': net.generator.decoder.parameters()}],
                                                    lr=0.000001, betas=(0.9, 0.999), eps=1e-08, 
                                                    weight_decay=0, amsgrad=False)
    optimizers['dis'] = torch.optim.Adam(net.discriminator.parameters(),
                                         lr=0.0001, betas=(0.9, 0.999), eps=1e-08, 
                                         weight_decay=0, amsgrad=False)
    
    # Train
    train_loss_gen, train_loss_dis = [], []
    eval_loss = [] # TODO: To be populated...
    last_loss = -1
    
    es = EarlyStopping(patience=10) #patience = amount of epochs the loss has to stop decreasing in a row for it to early stop
    
    for epoch in range(epochs):
        print('Epoch ', epoch, '/', epochs)
        loss_gen, loss_dis = 0., 0.

        # Train #
        for batch in range(num_minibatches):
            #print('Epoch ', epoch, ', Batch ', batch, '/', num_minibatches)
            
            # Update discriminator #
            net.discriminator.train()
            net.generator.eval()
            net.discriminator.zero_grad()
            
            # Retrieve data
            x = get_train_data(languages, vocabs, batch_size)#.to(device) 

            # Init data-storage
            y_preds = torch.zeros([nr_langs*batch_size, 2])
            y_true = torch.zeros([nr_langs*batch_size]).long()
            
            y_true[0:batch_size] = real_label  # First elements are target embeddings

            
            # All-real minibatch
            x_real = x[languages['trgt'][0]]  # Extract all-real data
            y_preds[0:batch_size] = net.discriminator(x_real)
            
            # All-fake minibatches - One minibatch per source language
            for i, language in enumerate(languages['src']):
                idx_from = batch_size*i+batch_size*nr_trgt_langs
                idx_to = batch_size*(i+1)+batch_size*nr_trgt_langs
                x_trans = net.generator(x[language], language)  # Generate fake data aka translate
                y_preds[idx_from:idx_to] = net.discriminator(x_trans)
            #print('Preds:', y_preds)
            
            # Loss proportional to discriminator's probability of correctly distinguishing TP and FP
            loss = NLLLoss(torch.log(y_preds+0.0000001), y_true)  # NLLLoss needs log(prob_distribution); adding small amount to avoid log(0)
            loss.backward()    # Compute gradients only for discriminator
            loss_dis += loss
            
            # Weight update for discriminator
            optimizers['dis'].step() 

            
            # Update generator #
            net.generator.train()
            net.discriminator.eval()
            net.generator.zero_grad()
            
            # Retrieve data
            x = get_train_data(languages, vocabs, batch_size)#.to(device)
            
            # All-real minibatch
            x_real = x[languages['trgt'][0]]  # Extract all-real data
            y_true = torch.full((batch_size,), fake_label).long()#.to(device)  # Pretend true targets were fake
            y_pred = net.discriminator(x_real)
            # Loss proportional to discriminator's probability of misclassifying TP and FP
            loss_real = NLLLoss(torch.log(y_pred+0.0000001), y_true)  # NLLLoss needs log(prob_distribution); adding small amount to avoid log(0)
            
            # All-fake minibatches - One minibatch per source language
            y_true = torch.full((batch_size,), real_label).long()#.to(device) # Try to fool the discriminator
            for language in languages['src']:
                x_src = x[language]
                x_trans = net.generator(x_src, language)
                y_pred = net.discriminator(x_trans)
                # Loss proportional to discriminator's probability of misclassifying TP and FP
                loss = NLLLoss(torch.log(y_pred+0.0000001), y_true) + loss_real  # Add loss for real-misclassification here
                loss.backward(retain_graph=True)    # Compute gradients only for discriminator
                loss_gen += loss
            
            # Perform weight updates
            for language in languages['src']:
                optimizers['gen'][language].step()
        
        # Document accumulated losses per epoch
        train_loss_gen.append(loss_gen.detach().numpy())
        train_loss_dis.append(loss_dis.detach().numpy())
        
        #print('Mean: ', mean_param(net.generator.decoder))
        print('Progress: ', loss_gen.detach().numpy(), 
                            loss_dis.detach().numpy())
        
        # Evaluation step
        if epoch > 50 and epoch % eval_frequency is 0:
            evaluation(net.generator, languages, source_vocabs, eval_words, source_full_vocabs, target_full_vocabs, dictionaries, neighbors, N)

        if early_stop: # if early stopping is enabled or not
            if es.step(loss_gen.detach()): # using the real loss of the generator for now, maybe use something else later? e.g. evaluation loss?
                print('early stopping')
                break  # early stop criterion is met, stop the loop now
        
        # Save checkpoints
        #print(loss_real_total_g.detach().numpy(), loss_fake_total_g.detach().numpy())
        
#        save = checkpoint_frequency > 0 and epoch % checkpoint_frequency == 0 and \
#            last_loss > loss_real_total_g+loss_fake_total_g  # Provisional: save when loss of generator has improved
#        last_loss = loss_real_total_g+loss_fake_total_g
#        save_checkpoint({'epoch': epoch,
#                         'model_state_dict': net.state_dict(),
#                         'optimizer_state_dicts': 
#                             {**{lang: optimizers['gen'][lang].state_dict() for lang in languages['src']}, 
#                              **{languages['trgt'][0]: optimizers['dis']}
#                            },
#                         'losses': {'train_loss_real_d': train_loss_real_d[-1],
#                                    'train_loss_fake_d': train_loss_fake_d[-1],
#                                    'train_loss_real_g': train_loss_real_g[-1],
#                                    'train_loss_fake_g': train_loss_fake_g[-1],},
#                         }, save)

    # Final testing
#     testing(net.generator, languages, test_words, source_full_vocabs, target_full_vocabs, dictionaries, neighbors, N)

    # Store model
    torch.save(net.state_dict(), final_state_path + 'final_model%d.pt' % epoch)

if __name__ == "__main__":
    # execute only if run as a script
    main()
    print('Done.')



Nr source languages: 2
Nr target languages: 1

 {'trgt': ['en'], 'src': ['de', 'nl']}
Epoch  0 / 1000
Progress:  2.7978802 0.6904523
Epoch  1 / 1000
Progress:  2.8078442 0.68881506
Epoch  2 / 1000
Progress:  2.8237314 0.68502575
Epoch  3 / 1000
Progress:  2.8262534 0.68289846
Epoch  4 / 1000
Progress:  2.8251662 0.67986727
Epoch  5 / 1000
Progress:  2.8482802 0.67673093
Epoch  6 / 1000
Progress:  2.8592534 0.67234105
Epoch  7 / 1000
Progress:  2.8823152 0.67123485
Epoch  8 / 1000
Progress:  2.871921 0.67038965
Epoch  9 / 1000
Progress:  2.915812 0.6679189
Epoch  10 / 1000
Progress:  2.9034603 0.6616455
Epoch  11 / 1000
Progress:  2.9221044 0.6578126
Epoch  12 / 1000
Progress:  2.940775 0.6579758
Epoch  13 / 1000
Progress:  2.9399943 0.65598077
Epoch  14 / 1000
Progress:  2.9722185 0.65462744
Epoch  15 / 1000
Progress:  2.9615703 0.65223897
Epoch  16 / 1000
Progress:  2.9952235 0.6491081
Epoch  17 / 1000
Progress:  2.978819 0.64463735
Epoch  18 / 1000
Progress:  2.9991984 0.6442275
Epoc

Progress:  6.052578 0.28343984
Epoch  169 / 1000
Progress:  5.9444914 0.2855443
Epoch  170 / 1000
Progress:  5.971283 0.28337416
Epoch  171 / 1000
Progress:  5.8606863 0.2776939
Epoch  172 / 1000
Progress:  5.653369 0.2758764
Epoch  173 / 1000
Progress:  6.14488 0.27191833
Epoch  174 / 1000
Progress:  6.0321326 0.27756166
Epoch  175 / 1000
Progress:  6.0121136 0.27383286
Epoch  176 / 1000
Progress:  6.1823635 0.2682749
Epoch  177 / 1000
Progress:  6.208598 0.25876993
Epoch  178 / 1000
Progress:  6.137473 0.2645544
Epoch  179 / 1000
Progress:  6.168501 0.260738
Epoch  180 / 1000
Progress:  5.9991894 0.26277477
Epoch  181 / 1000
Progress:  6.5124826 0.25537118
Epoch  182 / 1000
Progress:  6.199836 0.25001326
Epoch  183 / 1000
Progress:  6.3220134 0.25745758
Epoch  184 / 1000
Progress:  6.3502474 0.25180915
Epoch  185 / 1000
Progress:  6.6305704 0.24277382
Epoch  186 / 1000
Progress:  6.308431 0.24655692
Epoch  187 / 1000
Progress:  6.385908 0.24688073
Epoch  188 / 1000
Progress:  6.41147

Progress:  10.420068 0.10157868
Epoch  336 / 1000
Progress:  10.130104 0.09682845
Epoch  337 / 1000
Progress:  9.8980255 0.098262966
Epoch  338 / 1000
Progress:  10.174983 0.10079909
Epoch  339 / 1000
Progress:  10.198662 0.09602233
Epoch  340 / 1000
Progress:  10.355782 0.09762322
Epoch  341 / 1000
Progress:  10.309566 0.096177764
Epoch  342 / 1000
Progress:  10.577058 0.095574945
Epoch  343 / 1000
Progress:  10.659878 0.096191965
Epoch  344 / 1000
Progress:  10.373066 0.094225325
Epoch  345 / 1000
Progress:  10.454596 0.095009156
Epoch  346 / 1000
Progress:  10.282398 0.09399998
Epoch  347 / 1000
Progress:  10.362335 0.091837436
Epoch  348 / 1000
Progress:  10.198611 0.092046976
Epoch  349 / 1000
Progress:  10.290011 0.093071334
Epoch  350 / 1000
Progress:  10.745592 0.08977934
Epoch  351 / 1000
Progress:  10.458462 0.09211901
Epoch  352 / 1000
Progress:  10.7716675 0.09380721
Epoch  353 / 1000
Progress:  10.365324 0.092080444
Epoch  354 / 1000
Progress:  10.580066 0.094896995
Epoch 

Progress:  13.187643 0.053464785
Epoch  502 / 1000
Progress:  12.935348 0.053700354
Epoch  503 / 1000
Progress:  13.157127 0.054158423
Epoch  504 / 1000
Progress:  12.976543 0.054386634
Epoch  505 / 1000
Progress:  12.979776 0.051262215
Epoch  506 / 1000
Progress:  13.008888 0.05343077
Epoch  507 / 1000
Progress:  12.923284 0.052905962
Epoch  508 / 1000
Progress:  12.866987 0.05314349
Epoch  509 / 1000
Progress:  13.20101 0.05129783
Epoch  510 / 1000
Progress:  13.106085 0.051640317
Epoch  511 / 1000
Progress:  13.036948 0.051633347
Epoch  512 / 1000
Progress:  13.185152 0.052738983
Epoch  513 / 1000
Progress:  13.004602 0.051901776
Epoch  514 / 1000
Progress:  13.085854 0.052028134
Epoch  515 / 1000
Progress:  13.322951 0.05232362
Epoch  516 / 1000
Progress:  12.968729 0.052332472
Epoch  517 / 1000
Progress:  12.987437 0.05178757
Epoch  518 / 1000
Progress:  12.876876 0.05173297
Epoch  519 / 1000
Progress:  13.141197 0.050658237
Epoch  520 / 1000
Progress:  13.2702465 0.051685583
Epoc

Progress:  13.928995 0.04391007
Epoch  666 / 1000
Progress:  14.352275 0.044425685
Epoch  667 / 1000
Progress:  14.680357 0.04357821
Epoch  668 / 1000
Progress:  14.783454 0.043067455
Epoch  669 / 1000
Progress:  14.373749 0.04406683
Epoch  670 / 1000
Progress:  14.186407 0.04362668
Epoch  671 / 1000
Progress:  14.249035 0.04319488
Epoch  672 / 1000
Progress:  14.461113 0.04426982
Epoch  673 / 1000
Progress:  14.527557 0.042992648
Epoch  674 / 1000
Progress:  14.640118 0.044877574
Epoch  675 / 1000
Progress:  14.526541 0.043826133
Epoch  676 / 1000
Progress:  14.601173 0.043299768
Epoch  677 / 1000
Progress:  14.445819 0.043397326
Epoch  678 / 1000
Progress:  14.511651 0.044402476
Epoch  679 / 1000
Progress:  14.72421 0.044145893
Epoch  680 / 1000
Progress:  14.484047 0.043211695
Epoch  681 / 1000
Progress:  14.501548 0.044250738
Epoch  682 / 1000
Progress:  14.341645 0.043059345
Epoch  683 / 1000
Progress:  14.577433 0.042795684
Epoch  684 / 1000
Progress:  14.458569 0.04442283
Epoch 

Progress:  15.32365 0.041222494
Epoch  832 / 1000
Progress:  15.33811 0.04125854
Epoch  833 / 1000
Progress:  15.10354 0.04249735
Epoch  834 / 1000
Progress:  15.135586 0.041321587
Epoch  835 / 1000
Progress:  15.271593 0.04254968
Epoch  836 / 1000
Progress:  15.080326 0.04128231
Epoch  837 / 1000
Progress:  15.254029 0.042000085
Epoch  838 / 1000
Progress:  15.205343 0.041406497
Epoch  839 / 1000
Progress:  15.250182 0.042302653
Epoch  840 / 1000
Progress:  15.229713 0.042199194
Epoch  841 / 1000
Progress:  15.065575 0.040415067
Epoch  842 / 1000
Progress:  15.298212 0.041946907
Epoch  843 / 1000
Progress:  15.302725 0.04050635
Epoch  844 / 1000
Progress:  15.080395 0.04273894
Epoch  845 / 1000
Progress:  15.318881 0.04086825
Epoch  846 / 1000
Progress:  15.287977 0.04053329
Epoch  847 / 1000
Progress:  15.271564 0.042799342
Epoch  848 / 1000
Progress:  15.091356 0.0420447
Epoch  849 / 1000
Progress:  15.104284 0.04077611
Epoch  850 / 1000
Progress:  15.17013 0.041511696
Epoch  851 / 

Progress:  15.933777 0.03596059
Epoch  997 / 1000
Progress:  16.234394 0.03587774
Epoch  998 / 1000
Progress:  15.918175 0.03585711
Epoch  999 / 1000
Progress:  15.9748125 0.034994043
Done.
