Skip to content
This repository has been archived by the owner on Apr 22, 2022. It is now read-only.

Commit

Permalink
This seems more appropriate.
Browse files Browse the repository at this point in the history
  • Loading branch information
gugarosa committed Jun 29, 2020
1 parent b1edee8 commit ef38f32
Show file tree
Hide file tree
Showing 12 changed files with 53 additions and 53 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
ignore_token=target_pad_index, init_weights=None, device=device)

# Training the model
att_seq2seq.fit(train_iterator, val_iterator, epochs=10)
att_seq2seq.fit(train_iterator, val_iterator, epochs=1)

# Evaluating the model
att_seq2seq.evaluate(test_iterator)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
init_weights=None, device=device)

# Training the model
conv_seq2seq.fit(train_iterator, val_iterator, epochs=10)
conv_seq2seq.fit(train_iterator, val_iterator, epochs=1)

# Evaluating the model
conv_seq2seq.evaluate(test_iterator)
Expand Down
6 changes: 3 additions & 3 deletions textformer/models/att_seq2seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def forward(self, x, y, teacher_forcing_ratio=0.5):
# For every possible token in the sequence
for t in range(1, y.shape[0]):
# Decodes the tensor
pred, hidden, _ = self.D(x, hidden, outputs)
pred, hidden, _ = self.D(x, outputs, hidden)

# Gathers the prediction of current token
preds[t] = pred
Expand Down Expand Up @@ -134,7 +134,7 @@ def generate_text(self, start, field, length=10, temperature=1.0):
# Inhibits the gradient from updating the parameters
with torch.no_grad():
# Decodes only the last token, i.e., last sampled token
preds, hidden, _ = self.D(tokens[-1], hidden, outputs)
preds, hidden, _ = self.D(tokens[-1], outputs, hidden)

# Regularize the prediction with the temperature
preds /= temperature
Expand Down Expand Up @@ -195,7 +195,7 @@ def translate_text(self, start, src_field, trg_field, max_length=10):
# Inhibits the gradient from updating the parameters
with torch.no_grad():
# Decodes only the last token, i.e., last sampled token
preds, hidden, att = self.D(tokens[-1], hidden, outputs)
preds, hidden, att = self.D(tokens[-1], outputs, hidden)

# Retrieving current token attention values
atts[i] = att
Expand Down
10 changes: 5 additions & 5 deletions textformer/models/decoders/att_bi_gru.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,13 @@ def __init__(self, n_output=128, n_hidden_enc=128, n_hidden_dec=128, n_embedding
logger.debug(
f'Size: ({self.n_output}, {self.n_hidden}) | Embeddings: {self.n_embedding} | Core: {self.rnn} | Attention: {self.a} | Output: {self.fc}.')

def forward(self, x, h, y):
def forward(self, x, o, h):
"""Performs a forward pass over the architecture.
Args:
x (torch.Tensor): Tensor containing the data.
x (torch.Tensor): Tensor containing the input data.
o (torch.Tensor): Tensor containing the encoded outputs.
h (torch.Tensor): Tensor containing the hidden states.
y (torch.Tensor): Tensor containing the encoder outputs.
Returns:
The prediction and hidden state.
Expand All @@ -74,10 +74,10 @@ def forward(self, x, h, y):
embedded = self.dropout(self.embedding(x.unsqueeze(0)))

# Calculates the attention
attention = self.a(h, y).unsqueeze(1)
attention = self.a(o, h).unsqueeze(1)

# Permutes the encoder outputs
encoder_outputs = y.permute(1, 0, 2)
encoder_outputs = o.permute(1, 0, 2)

# Calculates the weights from the attention-based layer
weighted = torch.bmm(attention, encoder_outputs).permute(1, 0, 2)
Expand Down
12 changes: 6 additions & 6 deletions textformer/models/decoders/conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,13 +89,13 @@ def __init__(self, n_output=128, n_hidden=128, n_embedding=128, n_layers=1,

logger.debug(f'Size: ({self.n_output}, {self.n_hidden}) | Embeddings: {self.n_embedding} | Core: {self.conv}.')

def forward(self, y, enc_c, enc_o):
def forward(self, y, c, o):
"""Performs a forward pass over the architecture.
Args:
y (torch.Tensor): Tensor containing the true labels.
enc_c (torch.Tensor): Tensor containing the convolutional features.
enc_o (torch.Tensor): Tensor containing combined outputs.
c (torch.Tensor): Tensor containing the convolutional features.
o (torch.Tensor): Tensor containing combined outputs.
Returns:
The output and attention values.
Expand All @@ -116,7 +116,7 @@ def forward(self, y, enc_c, enc_o):
hidden = self.fc1(embedded).permute(0, 2, 1)

# For every convolutional layer
for c in self.conv:
for layer in self.conv:
# Applying dropout
hidden = self.dropout(hidden)

Expand All @@ -132,13 +132,13 @@ def forward(self, y, enc_c, enc_o):
conv = torch.cat((pad, hidden), dim=2)

# Pass down through convolutional layer
conv = c(conv)
conv = layer(conv)

# Activates with a GLU function
conv = nn.functional.glu(conv, dim=1)

# Calculating attention
attention, conv = self.a(embedded, conv, enc_c, enc_o)
attention, conv = self.a(embedded, conv, c, o)

# Applying residual connections
conv = (conv + hidden) * self.scale
Expand Down
6 changes: 3 additions & 3 deletions textformer/models/decoders/gru.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,11 @@ def __init__(self, n_output=128, n_hidden=128, n_embedding=128, dropout=0.5):
logger.debug(
f'Size: ({self.n_output}, {self.n_hidden}) | Embeddings: {self.n_embedding} | Core: {self.rnn} | Output: {self.fc}.')

def forward(self, x_enc, h, c):
def forward(self, x, h, c):
"""Performs a forward pass over the architecture.
Args:
x_enc (torch.Tensor): Tensor containing the encoded data.
x_enc (torch.Tensor): Tensor containing the input data.
h (torch.Tensor): Tensor containing the hidden states.
c (torch.Tensor): Tensor containing the context.
Expand All @@ -66,7 +66,7 @@ def forward(self, x_enc, h, c):
"""

# Calculates the embedded layer
embedded = self.dropout(self.embedding(x_enc.unsqueeze(0)))
embedded = self.dropout(self.embedding(x.unsqueeze(0)))

# Concatenating the embedding and context tensors
concat_embedded = torch.cat((embedded, c), dim=2)
Expand Down
6 changes: 3 additions & 3 deletions textformer/models/decoders/lstm.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,11 @@ def __init__(self, n_output=128, n_hidden=128, n_embedding=128, n_layers=1, drop
logger.debug(
f'Size: ({self.n_output}, {self.n_hidden}) | Embeddings: {self.n_embedding} | Core: {self.rnn} | Output: {self.fc}.')

def forward(self, x_enc, h, c):
def forward(self, x, h, c):
"""Performs a forward pass over the architecture.
Args:
x_enc (torch.Tensor): Tensor containing the encoded data.
x_enc (torch.Tensor): Tensor containing the input data.
h (torch.Tensor): Tensor containing the hidden states.
c (torch.Tensor): Tensor containing the cell.
Expand All @@ -69,7 +69,7 @@ def forward(self, x_enc, h, c):
"""

# Calculates the embedded layer
embedded = self.dropout(self.embedding(x_enc.unsqueeze(0)))
embedded = self.dropout(self.embedding(x.unsqueeze(0)))

# Calculates the RNN layer
output, (hidden, cell) = self.rnn(embedded, (h, c))
Expand Down
2 changes: 1 addition & 1 deletion textformer/models/decoders/multi_head.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def __init__(self, n_output=128, n_hidden=128, n_forward=256, n_layers=1,
# Output layer
self.out = nn.Linear(n_hidden, n_output)

def forward(self, y, y_mask, x_enc, x_mask):
def forward(self, y, y_mask, x, x_mask):
"""Performs a forward pass over the architecture.
Args:
Expand Down
4 changes: 2 additions & 2 deletions textformer/models/encoders/bi_gru.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ def forward(self, x):
# Calculates the RNN outputs
outputs, hidden = self.rnn(embedded)

# Initial Decoder hidden layer is the final hidden state of the Encoder forward and backward RNNs
# Also, they are fed through a Linear layer
# Calculates the final hidden state of the encoder forward and backward RNNs
# Also, they are fed through a linear layer
hidden = torch.tanh(self.fc(torch.cat((hidden[-2, :, :], hidden[-1, :, :]), dim=1)))

return outputs, hidden
24 changes: 12 additions & 12 deletions textformer/models/joint_seq2seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,12 @@ def forward(self, x, y, teacher_forcing_ratio=0.5):
hidden = context = self.E(x)

# Make sure that the first decoding will come from the true labels
x_enc = y[0, :]
x = y[0, :]

# For every possible token in the sequence
for t in range(1, y.shape[0]):
# Decodes the tensor
pred, hidden = self.D(x_enc, hidden, context)
pred, hidden = self.D(x, hidden, context)

# Gathers the prediction of current token
preds[t] = pred
Expand All @@ -85,12 +85,12 @@ def forward(self, x, y, teacher_forcing_ratio=0.5):
# If teacher forcing should be used
if teacher_forcing:
# Gathers the new input from the true labels
x_enc = y[t]
x = y[t]

# If teacher forcing should not be used
else:
# Gathers the new input from the best prediction
x_enc = pred.argmax(1)
x = pred.argmax(1)

return preds

Expand Down Expand Up @@ -126,14 +126,14 @@ def generate_text(self, start, field, length=10, temperature=1.0):
hidden = context = self.E(tokens)

# Removes the batch dimension from the tokens
tokens_enc = tokens.squeeze(0)
tokens = tokens.squeeze(0)

# For every possible length
for i in range(length):
# Inhibits the gradient from updating the parameters
with torch.no_grad():
# Decodes only the last token, i.e., last sampled token
preds, hidden = self.D(tokens_enc[-1], hidden, context)
preds, hidden = self.D(tokens[-1], hidden, context)

# Regularize the prediction with the temperature
preds /= temperature
Expand All @@ -142,10 +142,10 @@ def generate_text(self, start, field, length=10, temperature=1.0):
sampled_token = distributions.Categorical(logits=preds).sample()

# Concatenate the sampled token with the input tokens
tokens_enc = torch.cat((tokens_enc, sampled_token.unsqueeze(0)))
tokens = torch.cat((tokens, sampled_token.unsqueeze(0)))

# Decodes the tokens into text
sampled_text = [field.vocab.itos[t] for t in tokens_enc]
sampled_text = [field.vocab.itos[t] for t in tokens]

return sampled_text

Expand Down Expand Up @@ -184,28 +184,28 @@ def translate_text(self, start, src_field, trg_field, max_length=10):
hidden = context = self.E(tokens)

# Creating a tensor with `<sos>` token from target vocabulary
tokens_enc = torch.LongTensor([trg_field.vocab.stoi[trg_field.init_token]]).unsqueeze(0).to(self.device)
tokens = torch.LongTensor([trg_field.vocab.stoi[trg_field.init_token]]).unsqueeze(0).to(self.device)

# For every possible token in maximum length
for i in range(max_length):
# Inhibits the gradient from updating the parameters
with torch.no_grad():
# Decodes only the last token, i.e., last sampled token
preds, hidden = self.D(tokens_enc[-1], hidden, context)
preds, hidden = self.D(tokens[-1], hidden, context)

# Samples a token using argmax
sampled_token = preds.argmax(1)

# Concatenate the sampled token with the input tokens
tokens_enc = torch.cat((tokens_enc, sampled_token.unsqueeze(0)))
tokens = torch.cat((tokens, sampled_token.unsqueeze(0)))

# Check if has reached the end of string
if sampled_token == trg_field.vocab.stoi[trg_field.eos_token]:
# If yes, breaks the loop
break

# Decodes the tokens into text
translated_text = [trg_field.vocab.itos[t] for t in tokens_enc]
translated_text = [trg_field.vocab.itos[t] for t in tokens]

return translated_text[1:]

Expand Down
8 changes: 4 additions & 4 deletions textformer/models/layers/attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,23 +30,23 @@ def __init__(self, n_hidden_enc, n_hidden_dec):
# Defining the weight-based layer
self.v = nn.Linear(n_hidden_dec, 1, bias=False)

def forward(self, h, y):
def forward(self, o, h):
"""Performs a forward pass over the layer.
Args:
o (torch.Tensor): Tensor containing the encoded outputs.
h (torch.Tensor): Tensor containing the hidden states.
y (torch.Tensor): Tensor containing the encoder outputs.
Returns:
The attention-based weights.
"""

# Repeating the decoder hidden states as its smaller than the encoder ones
hidden = h.unsqueeze(1).repeat(1, y.shape[0], 1)
hidden = h.unsqueeze(1).repeat(1, o.shape[0], 1)

# Permuting the outputs
encoder_outputs = y.permute(1, 0, 2)
encoder_outputs = o.permute(1, 0, 2)

# Calculating the energy between decoder hidden state and encoder hidden states
energy = torch.tanh(self.e(torch.cat((hidden, encoder_outputs), dim=2)))
Expand Down
24 changes: 12 additions & 12 deletions textformer/models/seq2seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,12 @@ def forward(self, x, y, teacher_forcing_ratio=0.5):
hidden, cell = self.E(x)

# Make sure that the first decoding will come from the true labels
x_enc = y[0, :]
x = y[0, :]

# For every possible token in the sequence
for t in range(1, y.shape[0]):
# Decodes the tensor
pred, hidden, cell = self.D(x_enc, hidden, cell)
pred, hidden, cell = self.D(x, hidden, cell)

# Gathers the prediction of current token
preds[t] = pred
Expand All @@ -86,12 +86,12 @@ def forward(self, x, y, teacher_forcing_ratio=0.5):
# If teacher forcing should be used
if teacher_forcing:
# Gathers the new input from the true labels
x_enc = y[t]
x = y[t]

# If teacher forcing should not be used
else:
# Gathers the new input from the best prediction
x_enc = pred.argmax(1)
x = pred.argmax(1)

return preds

Expand Down Expand Up @@ -127,14 +127,14 @@ def generate_text(self, start, field, length=10, temperature=1.0):
hidden, cell = self.E(tokens)

# Removes the batch dimension from the tokens
tokens_enc = tokens.squeeze(0)
tokens = tokens.squeeze(0)

# For every possible length
for i in range(length):
# Inhibits the gradient from updating the parameters
with torch.no_grad():
# Decodes only the last token, i.e., last sampled token
preds, hidden, cell = self.D(tokens_enc[-1], hidden, cell)
preds, hidden, cell = self.D(tokens[-1], hidden, cell)

# Regularize the prediction with the temperature
preds /= temperature
Expand All @@ -143,10 +143,10 @@ def generate_text(self, start, field, length=10, temperature=1.0):
sampled_token = distributions.Categorical(logits=preds).sample()

# Concatenate the sampled token with the input tokens
tokens_enc = torch.cat((tokens_enc, sampled_token.unsqueeze(0)))
tokens = torch.cat((tokens, sampled_token.unsqueeze(0)))

# Decodes the tokens into text
sampled_text = [field.vocab.itos[t] for t in tokens_enc]
sampled_text = [field.vocab.itos[t] for t in tokens]

return sampled_text

Expand Down Expand Up @@ -185,28 +185,28 @@ def translate_text(self, start, src_field, trg_field, max_length=10):
hidden, cell = self.E(tokens)

# Creating a tensor with `<sos>` token from target vocabulary
tokens_enc = torch.LongTensor([trg_field.vocab.stoi[trg_field.init_token]]).unsqueeze(0).to(self.device)
tokens = torch.LongTensor([trg_field.vocab.stoi[trg_field.init_token]]).unsqueeze(0).to(self.device)

# For every possible token in maximum length
for i in range(max_length):
# Inhibits the gradient from updating the parameters
with torch.no_grad():
# Decodes only the last token, i.e., last sampled token
preds, hidden, cell = self.D(tokens_enc[-1], hidden, cell)
preds, hidden, cell = self.D(tokens[-1], hidden, cell)

# Samples a token using argmax
sampled_token = preds.argmax(1)

# Concatenate the sampled token with the input tokens
tokens_enc = torch.cat((tokens_enc, sampled_token.unsqueeze(0)))
tokens = torch.cat((tokens, sampled_token.unsqueeze(0)))

# Check if has reached the end of string
if sampled_token == trg_field.vocab.stoi[trg_field.eos_token]:
# If yes, breaks the loop
break

# Decodes the tokens into text
translated_text = [trg_field.vocab.itos[t] for t in tokens_enc]
translated_text = [trg_field.vocab.itos[t] for t in tokens]

return translated_text[1:]

Expand Down

0 comments on commit ef38f32

Please sign in to comment.