In [1]:
from IPython.display import HTML
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import random
import json
import os
import time

from faker import Faker
import babel
from babel.dates import format_date

import tensorflow as tf

import tensorflow.contrib.legacy_seq2seq as seq2seq
# from utilities import show_graph

from sklearn.model_selection import train_test_split

  from ._conv import register_converters as _register_converters


In [2]:
fake = Faker()
fake.seed(42)
random.seed(42)

FORMATS = ['short',
           'medium',
           'long',
           'full',
           'd MMM YYY',
           'd MMMM YYY',
           'dd MMM YYY',
           'd MMM, YYY',
           'd MMMM, YYY',
           'dd, MMM YYY',
           'd MM YY',
           'd MMMM YYY',
           'MMMM d YYY',
           'MMMM d, YYY',
           'dd.MM.YY',
           ]

# change this if you want it to work with only a single language
LOCALES = babel.localedata.locale_identifiers()
LOCALES = [lang for lang in LOCALES if 'en' in str(lang)]

In [3]:
def create_date():
    """
        Creates some fake dates 
        :returns: tuple containing 
                  1. human formatted string
                  2. machine formatted string
                  3. date object.
    """
    dt = fake.date_object()

    # wrapping this in a try catch because
    # the locale 'vo' and format 'full' will fail
    try:
        human = format_date(dt,
                            format=random.choice(FORMATS),
                            locale=random.choice(LOCALES))

        case_change = random.randint(0,3) # 1/2 chance of case change
        if case_change == 1:
            human = human.upper()
        elif case_change == 2:
            human = human.lower()

        machine = dt.isoformat()
    except AttributeError as e:
        return None, None, None

    return human, machine #, dt

data = [create_date() for _ in range(50000)]

In [4]:
data[:5]

[('7 07 13', '2013-07-07'),
 ('30 JULY 1977', '1977-07-30'),
 ('Tuesday, 14 September 1971', '1971-09-14'),
 ('18 09 88', '1988-09-18'),
 ('31, Aug 1986', '1986-08-31')]

In [5]:
x = [x for x, y in data]
y = [y for x, y in data]

u_characters = set(' '.join(x))
char2numX = dict(zip(u_characters, range(len(u_characters))))

u_characters = set(' '.join(y))
char2numY = dict(zip(u_characters, range(len(u_characters))))

In [6]:
char2numX['<PAD>'] = len(char2numX)
num2charX = dict(zip(char2numX.values(), char2numX.keys()))
max_len = max([len(date) for date in x])

x = [[char2numX['<PAD>']]*(max_len - len(date)) +[char2numX[x_] for x_ in date] for date in x]
print(''.join([num2charX[x_] for x_ in x[4]]))
x = np.array(x)

<PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD><PAD>31, Aug 1986


In [7]:
char2numY['<GO>'] = len(char2numY)
num2charY = dict(zip(char2numY.values(), char2numY.keys()))

y = [[char2numY['<GO>']] + [char2numY[y_] for y_ in date] for date in y]
print(''.join([num2charY[y_] for y_ in y[4]]))
y = np.array(y)

<GO>1986-08-31


In [8]:
x_seq_length = len(x[0])
y_seq_length = len(y[0])- 1

In [9]:
def batch_data(x, y, batch_size):
    shuffle = np.random.permutation(len(x))
    start = 0
#     from IPython.core.debugger import Tracer; Tracer()()
    x = x[shuffle]
    y = y[shuffle]
    while start + batch_size <= len(x):
        yield x[start:start+batch_size], y[start:start+batch_size]
        start += batch_size

In [10]:
epochs = 2
batch_size = 128
nodes = 32
embed_size = 10
bidirectional = False

tf.reset_default_graph()
sess = tf.InteractiveSession()

# Tensor where we will feed the data into graph
inputs = tf.placeholder(tf.int32, (None, x_seq_length), 'inputs')
outputs = tf.placeholder(tf.int32, (None, None), 'output')
targets = tf.placeholder(tf.int32, (None, None), 'targets')

# Embedding layers
input_embedding = tf.Variable(tf.random_uniform((len(char2numX), embed_size), -1.0, 1.0), name='enc_embedding')
output_embedding = tf.Variable(tf.random_uniform((len(char2numY), embed_size), -1.0, 1.0), name='dec_embedding')
date_input_embed = tf.nn.embedding_lookup(input_embedding, inputs)
date_output_embed = tf.nn.embedding_lookup(output_embedding, outputs)

with tf.variable_scope("encoding") as encoding_scope:

    if not bidirectional:
        
        # Regular approach with LSTM units
        lstm_enc = tf.contrib.rnn.LSTMCell(nodes)
        _, last_state = tf.nn.dynamic_rnn(lstm_enc, inputs=date_input_embed, dtype=tf.float32)

    else:
        
        # Using a bidirectional LSTM architecture instead
        enc_fw_cell = tf.contrib.rnn.LSTMCell(nodes)
        enc_bw_cell = tf.contrib.rnn.LSTMCell(nodes)

        ((enc_fw_out, enc_bw_out) , (enc_fw_final, enc_bw_final)) = tf.nn.bidirectional_dynamic_rnn(cell_fw=enc_fw_cell,
                                                        cell_bw=enc_bw_cell, inputs=date_input_embed, dtype=tf.float32)
        enc_fin_c = tf.concat((enc_fw_final.c , enc_bw_final.c),1)
        enc_fin_h = tf.concat((enc_fw_final.h , enc_bw_final.h),1)
        last_state = tf.contrib.rnn.LSTMStateTuple(c=enc_fin_c , h=enc_fin_h)
    
    
with tf.variable_scope("decoding") as decoding_scope:
    
    if not bidirectional:      
        lstm_dec = tf.contrib.rnn.LSTMCell(nodes)    
    else:
        lstm_dec = tf.contrib.rnn.LSTMCell(2*nodes)
    
    dec_outputs, _ = tf.nn.dynamic_rnn(lstm_dec, inputs=date_output_embed, initial_state=last_state)

        

logits = tf.layers.dense(dec_outputs, units=len(char2numY), use_bias=True) 
    
    
#connect outputs to 
with tf.name_scope("optimization"):
    # Loss function
    loss = tf.contrib.seq2seq.sequence_loss(logits, targets, tf.ones([batch_size, y_seq_length]))
    # Optimizer
    optimizer = tf.train.RMSPropOptimizer(1e-3).minimize(loss)

In [11]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)

In [12]:
sess.run(tf.global_variables_initializer())
epochs = 10
for epoch_i in range(epochs):
    start_time = time.time()
    for batch_i, (source_batch, target_batch) in enumerate(batch_data(X_train, y_train, batch_size)):
        _, batch_loss, batch_logits = sess.run([optimizer, loss, logits],
            feed_dict = {inputs: source_batch,
             outputs: target_batch[:, :-1],
             targets: target_batch[:, 1:]})
    accuracy = np.mean(batch_logits.argmax(axis=-1) == target_batch[:,1:])
    print('Epoch {:3} Loss: {:>6.3f} Accuracy: {:>6.4f} Epoch duration: {:>6.3f}s'.format(epoch_i, batch_loss, 
                                                                      accuracy, time.time() - start_time))

Epoch   0 Loss:  1.340 Accuracy: 0.5484 Epoch duration:  8.784s
Epoch   1 Loss:  0.805 Accuracy: 0.7039 Epoch duration:  8.627s
Epoch   2 Loss:  0.661 Accuracy: 0.7539 Epoch duration:  8.569s
Epoch   3 Loss:  0.566 Accuracy: 0.7937 Epoch duration:  8.572s
Epoch   4 Loss:  0.510 Accuracy: 0.8195 Epoch duration:  8.580s
Epoch   5 Loss:  0.431 Accuracy: 0.8484 Epoch duration:  8.592s
Epoch   6 Loss:  0.367 Accuracy: 0.8797 Epoch duration:  8.663s
Epoch   7 Loss:  0.334 Accuracy: 0.8734 Epoch duration:  8.707s
Epoch   8 Loss:  0.294 Accuracy: 0.8938 Epoch duration:  8.704s
Epoch   9 Loss:  0.261 Accuracy: 0.9203 Epoch duration:  8.617s


In [13]:
source_batch, target_batch = next(batch_data(X_test, y_test, batch_size))

dec_input = np.zeros((len(source_batch), 1)) + char2numY['<GO>']
for i in range(y_seq_length):
    batch_logits = sess.run(logits,
                feed_dict = {inputs: source_batch,
                 outputs: dec_input})
    prediction = batch_logits[:,-1].argmax(axis=-1)
    dec_input = np.hstack([dec_input, prediction[:,None]])
    
print('Accuracy on test set is: {:>6.3f}'.format(np.mean(dec_input == target_batch)))

Accuracy on test set is:  0.888


In [14]:
num_preds = 2
source_chars = [[num2charX[l] for l in sent if num2charX[l]!="<PAD>"] for sent in source_batch[:num_preds]]
dest_chars = [[num2charY[l] for l in sent] for sent in dec_input[:num_preds, 1:]]

for date_in, date_out in zip(source_chars, dest_chars):
    print(''.join(date_in)+' => '+''.join(date_out))

2 Oct. 1970 => 1970-10-23
22, Dec 1993 => 1993-12-22


In [106]:
x_ = tf.placeholder(dtype=tf.float32, shape=[None, 2], name='x')
y_ = tf.placeholder(dtype=tf.float32, shape=[None, 2], name='y')

W_ = tf.Variable(tf.random_uniform([2,2], -1, 1), name='W')
b_ = tf.Variable(tf.zeros([2,]), name='b')

logits_ = tf.add(tf.matmul(x_, W_), b_)

unit_logits_ = tf.nn.l2_normalize(logits_, axis=1)
unit_x_ = tf.nn.l2_normalize(x_, axis=1)
unit_y_ =  tf.nn.l2_normalize(y_, axis=1)

loss_ = tf.losses.cosine_distance(unit_logits_, unit_y_, axis=1, reduction=tf.losses.Reduction.MEAN)
opt_ = tf.train.AdamOptimizer(1e-3).minimize(loss_)

In [110]:
x = np.random.random([100,2])
y = np.matmul(x, np.array([[0,-1], [1,0]]))

In [84]:
def cos_dist(a,b):
    return 1-np.dot(a,b) / (np.linalg.norm(a)*np.linalg.norm(b))

In [115]:
cos_dist([0,1], [1,0])

1.0

In [120]:
sess.run(tf.global_variables_initializer())
epochs = 100
batch_size = 10
for epoch_i in range(epochs):
    start_time = time.time()
    for batch_i, (source_batch, target_batch) in enumerate(batch_data(x, y, batch_size)):
        _, batch_loss, batch_logits, W, b = sess.run([opt_, loss_, logits_, W_, b_],
            feed_dict = {x_: source_batch, y_: target_batch})
    dist = np.mean([cos_dist(batch_logits[i], target_batch[i]) for i in range(batch_size)])
    if not epoch_i % 10:
        print('Epoch {:3} Loss: {:>6.3f} Dist: {:>6.4f} Epoch duration: {:>6.3f}s'.format(epoch_i, batch_loss, 
                                                                      dist, time.time() - start_time))
sess.run([W_, b_])

Epoch   0 Loss:  0.938 Dist: 0.9377 Epoch duration:  0.008s
Epoch  10 Loss:  0.181 Dist: 0.1809 Epoch duration:  0.007s
Epoch  20 Loss:  0.138 Dist: 0.1381 Epoch duration:  0.007s
Epoch  30 Loss:  0.040 Dist: 0.0400 Epoch duration:  0.007s
Epoch  40 Loss:  0.049 Dist: 0.0495 Epoch duration:  0.007s
Epoch  50 Loss:  0.027 Dist: 0.0265 Epoch duration:  0.007s
Epoch  60 Loss:  0.017 Dist: 0.0172 Epoch duration:  0.007s
Epoch  70 Loss:  0.016 Dist: 0.0156 Epoch duration:  0.007s
Epoch  80 Loss:  0.016 Dist: 0.0157 Epoch duration:  0.007s
Epoch  90 Loss:  0.005 Dist: 0.0052 Epoch duration:  0.007s


[array([[-0.3071613 , -0.54311997],
        [ 0.68342453,  0.42378855]], dtype=float32),
 array([ 0.17894115, -0.2693293 ], dtype=float32)]

In [123]:
sess.run(logits_, feed_dict={x_:[[1,0], [0,1]]})

array([[-0.12822016, -0.8124493 ],
       [ 0.86236566,  0.15445924]], dtype=float32)

In [114]:
a = tf.nn.l2_normalize(tf.constant([[1.0, 1.0], [0, 1.0]]), axis=1)
b = tf.nn.l2_normalize(tf.constant([[1.0, 1.0], [1.0, 0]]), axis=1)
cd = tf.losses.cosine_distance(a, b, axis=1, reduction=tf.losses.Reduction.NONE)
sess.run([a,b,cd])

[array([[0.70710677, 0.70710677],
        [0.        , 1.        ]], dtype=float32),
 array([[0.70710677, 0.70710677],
        [1.        , 0.        ]], dtype=float32),
 array([[5.9604645e-08],
        [1.0000000e+00]], dtype=float32)]

In [118]:
cos_dist([0.70710677, 0.70710677], [0.70710677, 0.70710677])

-2.220446049250313e-16

In [119]:
cos_dist([0, 1], [1, 0])

1.0