Skip to content
This repository has been archived by the owner on Apr 22, 2022. It is now read-only.

Commit

Permalink
Continuing to implement the Convolutional Seq2Seq.
Browse files Browse the repository at this point in the history
  • Loading branch information
gugarosa committed Jun 2, 2020
1 parent 5dd3067 commit fa60a40
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 15 deletions.
36 changes: 36 additions & 0 deletions examples/applications/generation/conv_seq2seq_generation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from torchtext.data import BPTTIterator, Field

from textformer.datasets.generative import GenerativeDataset
from textformer.models import ConvSeq2Seq

# Defines the device which should be used, e.g., `cpu` or `cuda`
device = 'cpu'

# Defines the input file
file_path = 'data/generative/chapter1_harry.txt'

# Defines a datatype for further tensor conversion
source = Field(lower=True)

# Creates the GenerativeDataset
dataset = GenerativeDataset(file_path, source)

# Builds the vocabulary
source.build_vocab(dataset, min_freq=1)

# Creates an iterator that backpropagates through time
train_iterator = BPTTIterator(dataset, batch_size=16, bptt_len=10, device=device)

# Creating the ConvSeq2Seq model
conv_seq2seq = ConvSeq2Seq(n_input=len(source.vocab), n_output=len(source.vocab),
n_hidden=512, n_embedding=256, n_layers=1, kernel_size=3,
ignore_token=None, init_weights=None, device=device)

# Training the model
conv_seq2seq.fit(train_iterator, epochs=10)

# Generating artificial text
text = conv_seq2seq.generate_text(
'Mr. Dursley', source, length=100, temperature=0.5)

print(' '.join(text))
1 change: 1 addition & 0 deletions textformer/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@
"""

from textformer.models.att_seq2seq import AttSeq2Seq
from textformer.models.conv_seq2seq import ConvSeq2Seq
from textformer.models.joint_seq2seq import JointSeq2Seq
from textformer.models.seq2seq import Seq2Seq
6 changes: 3 additions & 3 deletions textformer/models/conv_seq2seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import textformer.utils.logging as l
from textformer.core.model import Model
from textformer.models.decoders import LSTMDecoder
from textformer.models.decoders import ConvDecoder
from textformer.models.encoders import ConvEncoder

logger = l.get_logger(__name__)
Expand Down Expand Up @@ -44,7 +44,7 @@ def __init__(self, n_input=128, n_output=128, n_hidden=128, n_embedding=128, n_l
E = ConvEncoder(n_input, n_hidden, n_embedding, n_layers, kernel_size, dropout, max_length)

# Creating the decoder network
D = LSTMDecoder(n_output, n_hidden, n_embedding, n_layers, dropout)
D = ConvDecoder(n_output, n_embedding, n_hidden, n_layers, kernel_size, dropout, ignore_token, device, max_length)

# Overrides its parent class with any custom arguments if needed
super(ConvSeq2Seq, self).__init__(E, D, ignore_token, init_weights, device)
Expand All @@ -69,6 +69,6 @@ def forward(self, x, y, teacher_forcing_ratio=0.0):
conv, output = self.E(x)

# Decodes the encoded inputs
preds, _ = self.decoder(y, conv, output)
preds, _ = self.D(y, conv, output)

return preds
1 change: 1 addition & 0 deletions textformer/models/decoders/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@
"""

from textformer.models.decoders.att_bi_gru import AttBiGRUDecoder
from textformer.models.decoders.conv import ConvDecoder
from textformer.models.decoders.gru import GRUDecoder
from textformer.models.decoders.lstm import LSTMDecoder
29 changes: 17 additions & 12 deletions textformer/models/encoders/conv.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import math

import torch
from torch import nn

import textformer.utils.logging as l
Expand Down Expand Up @@ -42,15 +45,16 @@ def __init__(self, n_input=128, n_hidden=128, n_embedding=128, n_layers=1, kerne
# Number of layers
self.n_layers = n_layers

# Kernel size
# Checks if kernel size is even
if kernel_size % 2 == 0:
# If yes, adds one to make it odd
self.kernel_size = kernel_size + 1

# Maximum length of positional embeddings
self.max_length = max_length

#
self.scale = torch.sqrt(torch.FloatTensor([0.5]))
# Scale for the residual learning
self.scale = math.sqrt(0.5)

# Embedding layers
self.embedding = nn.Embedding(n_input, n_embedding)
Expand All @@ -65,12 +69,13 @@ def __init__(self, n_input=128, n_hidden=128, n_embedding=128, n_layers=1, kerne
out_channels=2 * n_hidden,
kernel_size=kernel_size,
padding=(kernel_size - 1) // 2)
for _ in range(n_layers)])
for _ in range(n_layers)])

# Dropout layer
self.dropout = nn.Dropout(dropout)

logger.debug(f'Size: ({self.n_input}, {self.n_hidden}) | Embeddings: {self.n_embedding} | Core: {self.conv}.')
logger.debug(
f'Size: ({self.n_input}, {self.n_hidden}) | Embeddings: {self.n_embedding} | Core: {self.conv}.')

def forward(self, x):
"""Performs a forward pass over the architecture.
Expand All @@ -94,26 +99,26 @@ def forward(self, x):
embedded = self.dropout(x_embedded + pos_embedded)

# Passing down to the first linear layer and permuting its dimension
conv = self.fc1(embedded).permute(0, 2, 1)
hidden = self.fc1(embedded).permute(0, 2, 1)

# For every convolutional layer
for i, c in enumerate(self.conv):
# Pass down through convolutional layer
conv = c(self.dropout(hidden))

#
conv = F.glu(conv, dim=1)
# Activates with a GLU function
conv = nn.functional.glu(conv, dim=1)

#
# Sums the activation with its residual learning
conv = (conv + hidden) * self.scale

#
# Puts back to the next layer input
hidden = conv

#
# Passes down back to embedding size
conv = self.fc2(conv.permute(0, 2, 1))

#
# Sums the embedded features with the convolutional-extracted ones
output = (conv + embedded) * self.scale

return conv, output

0 comments on commit fa60a40

Please sign in to comment.