# Keras Install

In [0]:
!pip install keras --upgrade

# Clone Github

In [0]:
import os
#print(os.getcwd())
os.chdir('../content')
#os.chdir('../')
!ls

In [0]:
!rm -R syllable-aware/
!ls

In [0]:
!git clone https://github.com/nlpchile/syllable-aware.git
!ls

In [0]:
import os
os.chdir('syllable-aware')
!ls

# Run LSTM.py

In [0]:
!python3 LSTM.py

# Setting Seed for Reproducibility

In [0]:
################################################################################
## Setting Seed for Reproducibility

import os
import numpy as np
import random

import tensorflow as tf

# Setting PYTHONHASHSEED for determinism was not listed anywhere for TensorFlow,
# but apparently it is necessary for the Theano backend
# (https://github.com/fchollet/keras/issues/850).

os.environ['PYTHONHASHSEED'] = '0'
seed = 0 # must be the same as PYTHONHASHSEED

np.random.seed(seed)
random.seed(seed)

# Limit operation to 1 thread for deterministic results.

session_conf = tf.ConfigProto(intra_op_parallelism_threads=1,
                              inter_op_parallelism_threads=1
                             )

from keras import backend as K

tf.set_random_seed(seed)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)

################################################################################

# LSTM CLASS

In [0]:
import keras

class Model:
  
  def __init__(self,
               vocab_size,
               embedding_dim,
               hidden_dim,
               input_length,
               recurrent_dropout,
               dropout,
               seed):

    self.vocab_size = vocab_size
    self.embedding_dim = embedding_dim
    self.hidden_dim = hidden_dim
    self.input_length = input_length
    self.recurrent_dropout = recurrent_dropout
    self.dropout = dropout
    self.seed = seed


    self.word_embeddings = keras.layers.Embedding(input_dim = self.vocab_size+1,
                                                  output_dim = self.embedding_dim,
                                                  input_length = self.input_length,
                                                  mask_zero = True)

    self.lstm_1 = keras.layers.LSTM(units = self.hidden_dim,
                                    recurrent_dropout = self.recurrent_dropout,
                                    return_sequences = True,
                                    unroll = False,
                                    implementation = 2)

    self.dropout_1 = keras.layers.Dropout(rate = self.dropout,
                                          seed = self.seed)

    self.lstm_2 = keras.layers.LSTM(units = self.hidden_dim,
                                    recurrent_dropout = self.recurrent_dropout,
                                    return_sequences = False,
                                    unroll = False,
                                    implementation = 2)

    self.dense = keras.layers.Dense(units = self.vocab_size,
                                    activation = 'softmax')
    
    
  def build(self, optimizer, metrics):   
    
    self.optimizer = optimizer    
    self.metrics = metrics
    
    # self.learning_rate = learning_rate # (add to forward)
    # self.optimizer = keras.optimizers.RMSprop(lr = self.learning_rate)
    
    
    # Build
    
    self.model = keras.models.Sequential([self.word_embeddings, self.lstm_1, self.dropout_1, self.lstm_2, self.dense])
    
    self.model.compile(loss = 'categorical_crossentropy',
                       optimizer = self.optimizer,
                       metrics = self.metrics)
  
  
  def fit(self, generator, epochs, workers):#, callbacks):
    
    self.g = generator # Object/Instance Generator, containing .generator() and .steps_per_epoch
    
    self.epochs = epochs
    self.workers = workers  
    #self.callbacks = callbacks

    self.model.fit_generator(generator = self.g.generator(),
                             steps_per_epoch = self.g.steps_per_epoch,
                             epochs= self.epochs,
                             workers = self.workers,
                             #callbacks = self.callbacks,
                             shuffle = False)

# Test LSTM Class
## fit( ) Method has not been tested yet

In [0]:
import keras


## Hyperparameters


D = 512

T = 1000

Lprima = 100

recurrent_dropout = 0.3

dropout = 0.3

seed = 0 # para capa Dropout


## Train Generator

batch_size = 128

ind_corpus_train = #

#ind_val_tokens =

vocab = #


## Fit Model

epochs = 300

workers = 2

# https://keras.io/callbacks/
#callbacks =


## Model


model = Model(vocab_size = T,
              embedding_dim = D,
              hidden_dim = D,
              input_length = Lprima,
              recurrent_dropout = recurrent_dropout,
              dropout = dropout,
              seed = seed)


optimizer = 'rmsprop' #'adam'

metrics = ['top_k_categorical_accuracy', 'categorical_accuracy']


model.build(optimizer = optimizer,
            metrics = metrics)

train_generator = GeneralGenerator(batch_size = batch_size,
                                   ind_tokens = ind_corpus_train,
                                   voc = vocab,
                                   max_len = Lprima)

#val_gen = GeneralGenerator(batch_size = batch_size,
#                           ind_tokens = ind_val_tokens, #
#                           voc = vocab,
#                           max_len = Lprima)

model.fit(generator = train_generator,
          epochs = epochs,
          workers = workers)#,
          #callbacks = callbacks)


## Test


print('\n')
print(model.summary())

print('\n')
print(model.loss)

print('\n')
print(model.optimizer)

print('\n')
print(model.metrics)

print('\n')
model.get_config()
