In [None]:
#default_exp core

Below I add learning embeddings with normalization. The normalization method is taken from [A robust self-learning method for fully unsupervised cross-lingual mappings of word embeddings](https://arxiv.org/abs/1805.06297) by Mikel Artetxe, Gorka Labaka and Eneko Agirre.

In [None]:
#export
class PermuteEmbeddingNorm(nn.Module):
    def __init__(self, num_embeddings, embedding_dim, padding_idx):
        super().__init__()
        
        self.num_embeddings = num_embeddings
        self.embedding_dim = embedding_dim
        self.padding_idx = padding_idx
        
        # to conform to nn.Embedding api
        self.max_norm=None
        self.norm_type=2.0
        self.scale_grad_by_freq=False
        self.sparse = False

        self.weight = nn.Parameter( torch.Tensor(num_embeddings, embedding_dim) )
        nn.init.kaiming_uniform_(self.weight)
        self.p = nn.Parameter( torch.eye(self.num_embeddings) )
        self.p.requires_grad = False
        
        self.reset_parameters()
        
    def forward(self, words):
        return F.embedding(words, self.p @ self.normalized_weight())
    
    def normalized_weight(self):
        w1 = self.weight / self.weight.norm(dim=1).unsqueeze(1)
        w2 = w1 - w1.mean(0)
        w3 = w2 / w2.norm(dim=1).unsqueeze(1)
        return w3
        
    def reset_parameters(self): pass

We need to define a new architecture that will be using our new encoder

In [None]:
#export
class p_normAWD_LSTM(AWD_LSTM):
    def __init__(self, vocab_sz, emb_sz, n_hid, n_layers, pad_token=1, hidden_p=0.2, input_p=0.6, embed_p=0.1,
                 weight_p=0.5, bidir=False, packed=False):
        store_attr(self, 'emb_sz,n_hid,n_layers,pad_token,packed')
        self.bs = 1
        self.n_dir = 2 if bidir else 1
        self.encoder = PermuteEmbeddingNorm(vocab_sz, emb_sz, padding_idx=pad_token)
        self.encoder_dp = self.encoder 
        self.rnns = nn.ModuleList([self._one_rnn(emb_sz if l == 0 else n_hid, (n_hid if l != n_layers - 1 else emb_sz)//self.n_dir,
                                                 bidir, weight_p, l) for l in range(n_layers)])
        self.encoder.weight.data.uniform_(-self.initrange, self.initrange)
        self.input_dp = RNNDropout(input_p)
        self.hidden_dps = nn.ModuleList([RNNDropout(hidden_p) for l in range(n_layers)])

I repeated the experiments as done up to this point with normalization. There has been no significant improvement on the translation task.

The next thing to try is aligning the embeddings using [vecmap](https://github.com/artetxem/vecmap). The results can be telling when it comes to understanding the quality of our embeddings and the evaluation framework. Are our embeddings lending themselves at all to the translation task? (maybe the vocab is too small, maybe they are not of high enough quality). Is the evaluation framework using the English to Spanish dictionary working at all? Maybe there is a bug preventing it from doing anything useful?

In [1]:
#hide
from nbdev.export import notebook2script
notebook2script()

Converted 00_data.ipynb.
Converted 01_train_LM_en.ipynb.
Converted 02_train_LM_es.ipynb.
Converted 03_translate_en_to_es.ipynb.
Converted 04_LM_with_normalized_embeddings.ipynb.
Converted 05_aligning_the_embeddings_using_vecmap.ipynb.
Converted 99_index.ipynb.
