In [5]:
import cntk as C
import numpy as np

#Import local modules
import os
import sys
modulesPath = "scripts"
modulesPath = os.path.abspath(os.path.join(modulesPath))
if modulesPath not in sys.path: sys.path.append(modulesPath)
from bicorpus import Bicorpus

C.cntk_py.set_fixed_random_seed(0)

#Model hyperparameters
my_dtype = np.float32
hidden_dim = 512
num_layers = 2
attention_dim = 128
use_attention = True
use_embedding = True
embedding_dim = 200

vocabSize = 30000
sourceVocabSize = vocabSize
destVocabSize = vocabSize

In [6]:
files = {}

sourceTraining = "corpora/europarl-v7.es-en.es"
destTraining = "corpora/europarl-v7.es-en.en"



with open(sourceTraining, "r", encoding = "utf-8") as sourceFile:
    sourceLines = sourceFile.readlines()
with open(destTraining, "r", encoding = "utf-8") as destFile:
    destLines = destFile.readlines()

trainingCorp = Bicorpus(sourceLines, destLines, vocabSize = vocabSize, numSequences = 10000)


500 sequences read.
1000 sequences read.
1500 sequences read.
2000 sequences read.
2500 sequences read.
3000 sequences read.
3500 sequences read.
4000 sequences read.
4500 sequences read.
5000 sequences read.
5500 sequences read.
6000 sequences read.
6500 sequences read.
7000 sequences read.
7500 sequences read.
8000 sequences read.
8500 sequences read.
9000 sequences read.
9500 sequences read.
10000 sequences read.


In [7]:
training_lines = trainingCorp.training_lines()
sourceWordToI, destWordToI = trainingCorp.getIndexDicts()
sourceVocabSize, destVocabSize = len(sourceWordToI), len(destWordToI)

seq_start_index = sourceWordToI[Bicorpus.start_token()]
seq_end_index = sourceWordToI[Bicorpus.end_token()]
seq_start = C.constant(np.asarray([i == seq_start_index for i in range(destVocabSize)], dtype = my_dtype))

In [9]:
# Source and target inputs to the model
sourceAxis = C.Axis("sourceAxis")
destAxis = C.Axis("destAxis")
sourceSequence = C.layers.SequenceOver[sourceAxis]
destSequence = C.layers.SequenceOver[destAxis]

In [12]:
#Returns a general sequence-to-sequence model
def create_model():
    
    embed = C.layers.Embedding_dim, name = "embed" if use_embedding else identity #Where is "identity defined?
    
    with C.layers.default_options(enable_self_stabilization = True, go_backwards = not use_attention):
        LastRecurrence = C.layers.Fold if not use_attention else C.layers.Recurrence,
        encode = C.layers.Sequential([
            embed,
            C.layers.Stabilizer(),
            C.layers.For(range(num_layers - 1), lambda: C.layers.Recurrence(C.layers.GRU(hidden_dim))),
            LastRecurrence(hidden_dim, return_full_state = True),
            C.layers.Label("encoded_h")                                  
        ])
        
    with C.layers.default_options(enable_self_stabilization = True):
        stab_in = C.layers.Stabilizer()
        rec_blocks = [C.layers.GRU for i in range(num_hidden_layers)]
        stab_out = C.layers.Stabilizer()
        proj_out = C.layers.Dense(destVocabSize, name = "out_proj")
        if use_attention:
            attention_model = C.layers.AttentionModel(attention_dim, name = "attention_model")
            
        @C.Function
        def decode(history, input):
            encoded_input = encode(input)
            r = history
            r = embed(r)
            r = stab_in(r)
            for i in range(num_layers):
                rec_block = rec_blocks[i]
                if i == 0:
                    if use_attention:
                        @C.Function
                        def gru_with_attention(dh, x):
                            h_att = attention_model(encoded_input.outputs[0], dh)
                            x = C.splice(h_att)
                            return rec_block(dh, x)
                        r = C.layers.Recurrence(gru_with_attention)(r)
                    else:
                        r = C.layers.Recurrence(rec_block)(r)
                else:
                    r = C.layers.RecurrenceFrom(rec_block)( *(encoded_input.outputs + (r,)) )
            r = stab_out(r)
            r = proj_out(r)
            r = C.layers.Label("out_proj_out")(r)
            return r
        
        return decode

In [13]:
def create_model_train(s2smodel):
    @C.Function
    def model_train(input, labels):
        past_labels = C.layers.Delay(initial_state = sequence_start)(labels)
        return s2smodel(past_labels, input)

In [None]:
#Model used in testing
def create_model_greedy(s2smodel):
    @C.Function
    @C.layers.Signature(InputSequence[C.layers.Tensor[input_vocab_dim]])
    def model_greedy(input):
        unfold = C.layers.UnfoldFrom(lambda history: s2smodel(history, input) >> C.hardmax,
                                    until_predicate = lambda w: w[..., sentence_end_index],
                                    length_increase = length_increase)
        return unfold(initial_state = sentence_start, dynamic_axes_like = input)
    return model_greedy

In [15]:
a = [0, 2, 3]
x = lambda b: b[..., 3]
x(a)

TypeError: list indices must be integers or slices, not tuple