In [1]:
import pandas as pd, numpy as np, random
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# !pip install tensorflow==2.0.0-alpha0
import tensorflow as tf
from tensorflow import keras
tf.__version__,keras.__version__,tf.test.is_gpu_available()

('2.0.0-alpha0', '2.2.4-tf', False)

# Test Integer Sequences

https://machinelearningmastery.com/encoder-decoder-attention-sequence-to-sequence-prediction-keras/

- Input: sequence of 5 integers
- Output: the first two integers, then padded with 0; or other criterions

In [218]:
seqlen = 5
maxint = 60
batchsize = 32

## Generate Data

In [260]:
one_hot_decode = lambda X: np.argmax(X,axis=X.ndim-1)
 
def gen_pairs(batchsize=2, onehot=True):
    while True:
        X = np.random.randint(0, maxint-1,(batchsize,seqlen),dtype=int)
        y = X.copy(); 
#         y = np.hstack((y[:,:2],np.zeros((batchsize,3),dtype=int)))  # original: first 2 integers
#         y = np.clip(y,maxint//3,2*maxint//3)
#         y.sort(axis=1)
        y = np.cumsum(y,1)/np.tile(y.sum(1),(seqlen,1)).T*(maxint-1); y = y.astype(int)
        
        if onehot: #https://stackoverflow.com/a/49217762
            I = np.eye(maxint)
            X = I[X.reshape(-1)].reshape(*X.shape,maxint)
            y = I[y.reshape(-1)].reshape(*y.shape,maxint)
        
        yield X,y

X,y = next(gen_pairs(2,False))
X,y

(array([[ 3, 57, 19,  5, 28],
        [47, 31, 34, 21, 54]]), array([[ 1, 31, 41, 44, 59],
        [14, 24, 35, 41, 59]]))

## Keras Model w/o Attention

In [264]:
# define 
cellSize = 100
model = tf.keras.models.Sequential()
# model.add(keras.layers.LSTM(cellSize, input_shape=(seqlen, maxint)))  #original
# model.add(keras.layers.RepeatVector(seqlen))                          #original
# model.add(keras.layers.LSTM(cellSize, return_sequences=True))         #original
model.add(keras.layers.Bidirectional(keras.layers.LSTM(cellSize, return_sequences=True, input_shape=(seqlen, maxint))))
model.add(keras.layers.Bidirectional(keras.layers.LSTM(cellSize, return_sequences=True)))
model.add(keras.layers.TimeDistributed(keras.layers.Dense(maxint,
                                                          activation='softmax')
                                      ))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

In [266]:
# Train
import tempfile
_,weightsfile=tempfile.mkstemp()
from tensorflow.keras.callbacks import EarlyStopping,ModelCheckpoint
es=EarlyStopping(monitor='val_acc', patience=3, verbose=1)
mc=ModelCheckpoint(weightsfile,monitor='val_acc',save_best_only=True,save_weights_only=True,verbose=0)

model.fit_generator(gen_pairs(32, True), steps_per_epoch=1000, epochs=1000,callbacks=[es,mc],
                    validation_data=gen_pairs(32, True),validation_steps=100)
model.load_weights(weightsfile)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 00007: early stopping


<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7ff3a10a9320>

In [267]:
# Evaluate
X,y = next(gen_pairs(10000,True))
yhat = model.predict(X, verbose=0)
wrong = list(map(np.any, one_hot_decode(y)-one_hot_decode(yhat))) #np.any returns true (1) if any element is nonzero
print(f'Accuracy: {1-sum(wrong)/y.shape[0]}%') # different from acc during training as we count the WHOLE sequence as right/wrong here

for i in np.where(wrong)[0][:10]:   # output examples that are wrong
    print(f'True:{one_hot_decode(y[i])}, Prediction:{one_hot_decode(yhat[i])}')

Accuracy: 0.5846%
True:[ 0  4 20 43 59], Prediction:[ 0  5 21 43 59]
True:[ 2 11 30 58 59], Prediction:[ 2 12 30 58 59]
True:[11 31 37 49 59], Prediction:[10 31 38 49 59]
True:[21 22 39 42 59], Prediction:[20 22 39 43 59]
True:[14 25 40 52 59], Prediction:[14 25 40 51 59]
True:[23 37 38 53 59], Prediction:[23 36 37 53 59]
True:[10 23 49 50 59], Prediction:[10 22 50 50 59]
True:[12 19 35 53 59], Prediction:[12 18 35 53 59]
True:[ 5 13 35 53 59], Prediction:[ 4 13 35 53 59]
True:[ 2 29 29 48 59], Prediction:[ 2 29 29 49 59]


## Keras Model w/ Attention

# Translate dates to standard format

https://www.coursera.org/learn/nlp-sequence-models/notebook/npjGi/neural-machine-translation-with-attention

# language translation 

https://www.tensorflow.org/alpha/tutorials/text/nmt_with_attention

In [4]:
import urllib, os, tempfile, zipfile
os.chdir(tempfile.gettempdir())
urllib.request.urlretrieve('http://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip','spa-eng.zip')
zipfile.ZipFile('spa-eng.zip').extractall()
# ./spa-eng/spa.txt

In [11]:
import unicodedata, re

def unicode_to_ascii(s):
    return ''.join(c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn')

def preprocess_sentence(w):
    w = unicode_to_ascii(w.lower().strip())

    # "he is a boy." => "he is a boy ." (https://stackoverflow.com/questions/3645931/python-padding-punctuation-with-white-spaces-keeping-punctuation)
    w = re.sub(r"([?.!,¿])", r" \1 ", w)
    w = re.sub(r'[" "]+', " ", w)

    w = re.sub(r"[^a-zA-Z?.!,¿]+", " ", w)
    w = w.rstrip().strip()
    w = '<start> ' + w + ' <end>'
    return w

print(preprocess_sentence("May I borrow this book?"))
print(preprocess_sentence("¿Puedo tomar prestado este libro?").encode('utf-8'))

<start> may i borrow this book ? <end>
b'<start> \xc2\xbf puedo tomar prestado este libro ? <end>'
