# Day 6: Sequence Models in Deep Learning

### Exercise 6.1 
Convince yourself a RNN is just an FF unfolded in time. Run the NumpyRNN code. Set break-points and compare with what you learned about back-propagation in the previous day.

Start by loading data Part-of-speech data and configure it for the exercises

### WSJ Data

In [None]:
# Load Part-of-Speech data 
from lxmls.readers.pos_corpus import PostagCorpusData
data = PostagCorpusData()

Model configuration

In [None]:
from lxmls.deep_learning.numpy_models.rnn import NumpyRNN

In [None]:
# RNN configuration
embedding_size = 50   # Size of word embeddings
hidden_size = 20     # size of hidden layer
learning_rate = 0.1

In [None]:
model = NumpyRNN(
    input_size=data.input_size,
    embedding_size=embedding_size,
    hidden_size=hidden_size,
    output_size=data.output_size,
    learning_rate=learning_rate
)

In [None]:
train_batches = data.batches('train', batch_size=1)

#### Milestone 1:

Check gradients using the empirical gradient computation

In [None]:
from lxmls.deep_learning.rnn import rnn_parameter_handlers
from lxmls.deep_learning.numpy_models.rnn import cross_entropy_loss, backpropagation
batch = data.batches('train', batch_size=1)[0]

In [None]:
gradient = backpropagation(batch['input'], batch['output'], model.parameters)

In [None]:
print [x.shape for x in model.parameters]

In [None]:
# Select one parameter from the network
layer_index = -1 # 0
row = 2
column = batch['input'].nonzero()[0][1]

In [None]:
# Get functions to get and set values of a particular parameter of the model
get_parameter, set_parameter = rnn_parameter_handlers(
    layer_index=layer_index,
    row=row, 
    column=column
)

In [None]:
import numpy as np
from copy import deepcopy

scale = 10
resolution = 1000
perturbations = np.linspace(-scale, scale, resolution)

# Get study weight value and loss
study_loss = cross_entropy_loss(batch['input'], batch['output'], model.parameters)

# Compute the loss when varying the study weight
parameters = deepcopy(model.parameters)
study_weight = float(get_parameter(parameters))
loss_range = []
for perturbation in perturbations:
    perturbated_parameters = set_parameter(parameters, study_weight + perturbation)
    perturbated_loss = cross_entropy_loss(batch['input'], batch['output'], perturbated_parameters)
    loss_range.append(perturbated_loss)

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
# Plot empirical
weight_range = study_weight + perturbations
plt.plot(weight_range, loss_range)
plt.plot(study_weight, study_loss, 'xr')
# Plot real
h = plt.plot(
    weight_range,
    get_parameter(gradient)*(weight_range - study_weight) + study_loss, 
    'r--'
)
plt.title('layer %d parameter (%d, %d)' % (layer_index, row, column))
plt.xlabel('parameter value')
plt.ylabel('loss value')

#### Milestone 2:
Train a RNN

In [None]:
num_epochs = 20

In [None]:
train_batches = data.batches('train', batch_size=1)

In [None]:
# Get batch iterators for train and test
train_batches = data.batches('train', batch_size=1)
dev_set = data.batches('dev', batch_size=1)
test_set = data.batches('test', batch_size=1)

# Epoch loop
for epoch in range(num_epochs):

    # Batch loop
    for batch in train_batches:
        model.update(input=batch['input'], output=batch['output'])

    # Evaluation dev
    is_hit = []
    for batch in dev_set:
        is_hit.extend(model.predict(input=batch['input']) == batch['output'])
    accuracy = 100*np.mean(is_hit)

    # Inform user
    print("Epoch %d: dev accuracy %2.2f %%" % (epoch+1, accuracy))

# Evaluation test
is_hit = []
for batch in test_set:
    is_hit.extend(model.predict(input=batch['input']) == batch['output'])
accuracy = 100*np.mean(is_hit)

# Inform user
print("Epoch %d: dev accuracy %2.2f %%" % (epoch+1, accuracy))