Skip to content
Permalink
Browse files

Use PackedSequence for ConvRNN (#126)

* Use PackedSequence in ConvRNNs instead
- Zero-padding is incompatible with the current model

* Update test script

* Clean up comments and logic

* Add TQDM to requirements

* Add results using default hyperparameters
  • Loading branch information...
daemon committed Jun 24, 2018
1 parent 2f7731b commit e4149ff40079555ee40b2d7b1f4bb2c226359407
Showing with 45 additions and 64 deletions.
  1. +1 −1 conv_rnn/README.md
  2. +27 −34 conv_rnn/model.py
  3. +2 −2 conv_rnn/test.py
  4. +14 −27 conv_rnn/train.py
  5. +1 −0 requirements.txt
@@ -17,7 +17,7 @@ You may then run `python train.py` and `python test.py` for training and testing
### Empirical results
Best dev | Test
-- | --
52.04359673024523 | 50.85972850678733
48.1 | 48.9

### References
[1] Chenglong Wang, Feijun Jiang, and Hongxia Yang. 2017. A Hybrid Framework for Text Modeling with Convolutional RNN. In Proceedings of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD '17).
@@ -3,10 +3,12 @@
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as nn_func
import torch.nn.functional as F
import torch.nn.utils.rnn as rnn_utils

import data


class ConvRNNModel(nn.Module):
def __init__(self, word_model, **config):
super().__init__()
@@ -17,14 +19,7 @@ def __init__(self, word_model, **config):
self.batch_size = config["mbatch_size"]
n_fmaps = config["n_feature_maps"]
self.rnn_type = config["rnn_type"]

self.h_0_cache = torch.autograd.Variable(torch.zeros(2, self.batch_size, self.hidden_size))
self.c_0_cache = torch.autograd.Variable(torch.zeros(2, self.batch_size, self.hidden_size))

self.no_cuda = config["no_cuda"]
if not self.no_cuda:
self.h_0_cache = self.h_0_cache.cuda()
self.c_0_cache = self.c_0_cache.cuda()

if self.rnn_type.upper() == "LSTM":
self.bi_rnn = nn.LSTM(embedding_dim, self.hidden_size, 1, batch_first=True, bidirectional=True)
@@ -40,43 +35,40 @@ def convert_dataset(self, dataset):
dataset = np.stack(dataset)
model_in = dataset[:, 1].reshape(-1)
model_out = dataset[:, 0].flatten().astype(np.int)
model_out = torch.autograd.Variable(torch.from_numpy(model_out))
model_in = self.preprocess(model_in)
model_in = torch.autograd.Variable(model_in)
model_out = torch.from_numpy(model_out)
indices, lengths = self.preprocess(model_in)
if not self.no_cuda:
model_out = model_out.cuda()
model_in = model_in.cuda()
return (model_in, model_out)
indices = indices.cuda()
lengths = lengths.cuda()
lengths, sort_idx = torch.sort(lengths, descending=True)
indices = indices[sort_idx]
model_out = model_out[sort_idx]
return ((indices, lengths), model_out)

def preprocess(self, sentences):
return torch.from_numpy(np.array(self.word_model.lookup(sentences)))
indices, lengths = self.word_model.lookup(sentences)
return torch.LongTensor(indices), torch.LongTensor(lengths)

def forward(self, x):
x = self.word_model(x) # shape: (batch, max sent, embed dim)
if x.size(0) == self.batch_size:
h_0 = self.h_0_cache
c_0 = self.c_0_cache
else:
h_0 = torch.autograd.Variable(torch.zeros(2, x.size(0), self.hidden_size))
c_0 = torch.autograd.Variable(torch.zeros(2, x.size(0), self.hidden_size))
if not self.no_cuda:
h_0 = h_0.cuda()
c_0 = c_0.cuda()
def forward(self, x, lengths):
x = self.word_model(x)
x = rnn_utils.pack_padded_sequence(x, lengths, batch_first=True)
rnn_seq, rnn_out = self.bi_rnn(x)
if self.rnn_type.upper() == "LSTM":
rnn_seq, rnn_out = self.bi_rnn(x, (h_0, c_0)) # shape: (batch, seq len, 2 * hidden_size), (2, batch, hidden_size)
rnn_out = rnn_out[0] # (h_0, c_0)
else:
rnn_seq, rnn_out = self.bi_rnn(x, h_0) # shape: (batch, 2, hidden_size)
rnn_out = rnn_out[0]

rnn_seq, _ = rnn_utils.pad_packed_sequence(rnn_seq, batch_first=True)
rnn_out.data = rnn_out.data.permute(1, 0, 2)
x = self.conv(rnn_seq.unsqueeze(1)).squeeze(3) # shape: (batch, channels, seq len)
x = nn_func.relu(x) # shape: (batch, channels, seq len)
x = nn_func.max_pool1d(x, x.size(2)) # shape: (batch, channels)
x = self.conv(rnn_seq.unsqueeze(1)).squeeze(3)
x = F.relu(x)
x = F.max_pool1d(x, x.size(2))
out = [t.squeeze(1) for t in rnn_out.chunk(2, 1)]
out.append(x.squeeze(-1))
x = torch.cat(out, 1)
x = nn_func.relu(self.fc1(x))
x = F.relu(self.fc1(x))
return self.fc2(x)


class WordEmbeddingModel(nn.Module):
def __init__(self, id_dict, weights, unknown_vocab=[], static=True, padding_idx=0):
super().__init__()
@@ -123,9 +115,10 @@ def lookup(self, sentences):
indices_list.append(indices)
if len(indices) > max_len:
max_len = len(indices)
lengths = [len(x) for x in indices_list]
for indices in indices_list:
indices.extend([self.padding_idx] * (max_len - len(indices)))
return indices_list
return indices_list, lengths

def set_seed(seed=0, no_cuda=False):
np.random.seed(seed)
@@ -27,8 +27,8 @@ def main():

conv_rnn.eval()
for test_in, test_out in test_loader:
scores = conv_rnn(test_in)
n_correct = (torch.max(scores, 1)[1].view(-1).data == test_out.data).sum()
scores = conv_rnn(*test_in)
n_correct = (torch.max(scores, 1)[1].view(-1).data == test_out.data).float().sum()
accuracy = n_correct / len(test_set)
print("Test set accuracy: {}".format(accuracy))

@@ -3,7 +3,7 @@
import random

from torch import utils
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
@@ -79,24 +79,21 @@ def train(**kwargs):
conv_rnn.train()
criterion = nn.CrossEntropyLoss()
parameters = list(filter(lambda p: p.requires_grad, conv_rnn.parameters()))
optimizer = torch.optim.SGD(parameters, lr=lr, weight_decay=weight_decay, momentum=0.9)
scheduler = ReduceLROnPlateau(optimizer, patience=kwargs["dev_per_epoch"] * 4)
optimizer = torch.optim.Adam(parameters, lr=lr, weight_decay=weight_decay)
train_set, dev_set, test_set = data.SSTDataset.load_sst_sets("data")

collate_fn = conv_rnn.convert_dataset
train_loader = utils.data.DataLoader(train_set, shuffle=True, batch_size=mbatch_size, drop_last=True,
collate_fn=collate_fn)
train_loader = utils.data.DataLoader(train_set, shuffle=True, batch_size=mbatch_size, collate_fn=collate_fn)
dev_loader = utils.data.DataLoader(dev_set, batch_size=len(dev_set), collate_fn=collate_fn)
test_loader = utils.data.DataLoader(test_set, batch_size=len(test_set), collate_fn=collate_fn)

def evaluate(loader, dev=True):
conv_rnn.eval()
for m_in, m_out in loader:
scores = conv_rnn(m_in)
loss = criterion(scores, m_out).cpu().data[0]
n_correct = (torch.max(scores, 1)[1].view(m_in.size(0)).data == m_out.data).float().sum().item()
accuracy = n_correct / m_in.size(0)
scheduler.step(accuracy)
scores = conv_rnn(*m_in)
loss = criterion(scores, m_out).item()
n_correct = (torch.max(scores, 1)[1].view(m_in[0].size(0)).data == m_out.data).float().sum().item()
accuracy = n_correct / m_in[0].size(0)
if dev and accuracy >= evaluate.best_dev:
evaluate.best_dev = accuracy
print("Saving best model ({})...".format(accuracy))
@@ -111,23 +108,13 @@ def evaluate(loader, dev=True):
if verbose:
print()
i = 0
for j, (train_in, train_out) in enumerate(train_loader):
for (j, (train_in, train_out)), _ in zip(enumerate(train_loader), tqdm(range(len(train_loader)))):
optimizer.zero_grad()

if not kwargs["no_cuda"]:
train_in.cuda()
train_out.cuda()

scores = conv_rnn(train_in)
scores = conv_rnn(*train_in)
loss = criterion(scores, train_out)
loss.backward()
optimizer.step()
accuracy = (torch.max(scores, 1)[1].view(-1).data == train_out.data).float().sum() / mbatch_size
if verbose and i % (mbatch_size * 10) == 0:
print("accuracy: {}, {} / {}".format(accuracy, j * mbatch_size, len(train_set)))
i += mbatch_size
if i % (len(train_set) // kwargs["dev_per_epoch"]) < mbatch_size:
evaluate(dev_loader)
evaluate(dev_loader)
evaluate(test_loader, dev=False)
return evaluate.best_dev

@@ -153,10 +140,10 @@ def main():
parser.add_argument("--dev_per_epoch", default=9, type=int)
parser.add_argument("--fc_size", default=200, type=int)
parser.add_argument("--gpu_number", default=0, type=int)
parser.add_argument("--hidden_size", default=200, type=int)
parser.add_argument("--hidden_size", default=150, type=int)
parser.add_argument("--input_file", default="saves/model.pt", type=str)
parser.add_argument("--lr", default=1E-1, type=float)
parser.add_argument("--mbatch_size", default=64, type=int)
parser.add_argument("--lr", default=5E-4, type=float)
parser.add_argument("--mbatch_size", default=16, type=int)
parser.add_argument("--n_epochs", default=30, type=int)
parser.add_argument("--n_feature_maps", default=200, type=float)
parser.add_argument("--n_labels", default=5, type=int)
@@ -167,7 +154,7 @@ def main():
parser.add_argument("--rnn_type", choices=["lstm", "gru"], default="lstm", type=str)
parser.add_argument("--seed", default=3, type=int)
parser.add_argument("--quiet", action="store_true", default=False)
parser.add_argument("--weight_decay", default=1E-4, type=float)
parser.add_argument("--weight_decay", default=1E-3, type=float)
args = parser.parse_args()
if args.random_search:
do_random_search(vars(args))
@@ -8,3 +8,4 @@ pyjnius==1.1.1
scikit-learn==0.19.1
scipy==1.0.0
torchtext==0.2.3
tqdm>=4.23,<4.23.99

0 comments on commit e4149ff

Please sign in to comment.
You can’t perform that action at this time.