-
Notifications
You must be signed in to change notification settings - Fork 58
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge remote-tracking branch 'vdpwi/master'
- Loading branch information
Showing
10 changed files
with
583 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
MIT License | ||
|
||
Copyright (c) 2018 Ralph Tang | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
{ | ||
"folders": | ||
[ | ||
{ | ||
"path": "vdpwi" | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
from collections import namedtuple | ||
|
||
from tqdm import tqdm | ||
import numpy as np | ||
import scipy.stats as stats | ||
import torch | ||
import torch.optim as optim | ||
import torch.nn as nn | ||
import torch.nn.functional as F | ||
import torch.utils as utils | ||
|
||
from utils.log import LogWriter | ||
import data | ||
import model as mod | ||
|
||
Context = namedtuple("Context", "model, train_loader, dev_loader, test_loader, optimizer, criterion, params, log_writer") | ||
EvaluateResult = namedtuple("EvaluateResult", "pearsonr, spearmanr") | ||
|
||
def create_context(config): | ||
def collate_fn(batch): | ||
emb1 = [] | ||
emb2 = [] | ||
labels = [] | ||
cmp_labels = [] | ||
pad_cube = [] | ||
max_len1 = 0; max_len2 = 0 | ||
|
||
for s1, s2, l, cl in batch: | ||
emb1.append(s1) | ||
emb2.append(s2) | ||
max_len1 = max(max_len1, len(s1)) | ||
max_len2 = max(max_len2, len(s2)) | ||
labels.append(l) | ||
cmp_labels.append(cl) | ||
|
||
for s1, s2 in zip(emb1, emb2): | ||
pad1 = (max_len1 - len(s1)) | ||
pad2 = (max_len2 - len(s2)) | ||
pad_mask = np.ones((max_len1, max_len2)) | ||
pad_mask[:len(s1), :len(s2)] = 0 | ||
pad_cube.append(pad_mask) | ||
s1.extend([embedding.weight.size(0) - 1] * pad1) | ||
s2.extend([embedding.weight.size(0) - 1] * pad2) | ||
|
||
pad_cube = np.array(pad_cube) | ||
emb1 = torch.LongTensor(emb1) | ||
emb2 = torch.LongTensor(emb2) | ||
labels = torch.Tensor(labels) | ||
emb1 = torch.autograd.Variable(emb1, requires_grad=False) | ||
emb2 = torch.autograd.Variable(emb2, requires_grad=False) | ||
labels = torch.autograd.Variable(labels, requires_grad=False) | ||
pad_cube = torch.autograd.Variable(torch.from_numpy(pad_cube).float(), requires_grad=False) | ||
if not config.cpu: | ||
emb1 = emb1.cuda() | ||
emb2 = emb2.cuda() | ||
labels = labels.cuda() | ||
pad_cube = pad_cube.cuda() | ||
return emb1, emb2, labels, pad_cube, cmp_labels | ||
|
||
embedding, (train_set, dev_set, test_set) = data.load_dataset(config.dataset) | ||
model = mod.VDPWIModel(embedding, config) | ||
if config.restore: | ||
model.load(config.input_file) | ||
if not config.cpu: | ||
model = model.cuda() | ||
|
||
train_loader = utils.data.DataLoader(train_set, shuffle=True, batch_size=config.mbatch_size, collate_fn=collate_fn) | ||
dev_loader = utils.data.DataLoader(dev_set, batch_size=1, collate_fn=collate_fn) | ||
test_loader = utils.data.DataLoader(test_set, batch_size=1, collate_fn=collate_fn) | ||
|
||
params = list(filter(lambda x: x.requires_grad, model.parameters())) | ||
if config.optimizer == "adam": | ||
optimizer = optim.Adam(params, lr=config.lr, weight_decay=config.weight_decay) | ||
elif config.optimizer == "sgd": | ||
optimizer = optim.SGD(params, lr=config.lr, momentum=config.momentum, weight_decay=config.weight_decay) | ||
elif config.optimizer == "rmsprop": | ||
optimizer = optim.RMSprop(params, lr=config.lr, alpha=config.decay, momentum=config.momentum, weight_decay=config.weight_decay) | ||
criterion = nn.KLDivLoss() | ||
log_writer = LogWriter() | ||
return Context(model, train_loader, dev_loader, test_loader, optimizer, criterion, params, log_writer) | ||
|
||
def test(config): | ||
context = create_context(config) | ||
result = evaluate(context, context.test_loader) | ||
print("Final test result: {}".format(result)) | ||
|
||
def evaluate(context, data_loader): | ||
model = context.model | ||
model.eval() | ||
predictions = [] | ||
true_labels = [] | ||
for sent1, sent2, _, pad_cube, truth in data_loader: | ||
scores = model(sent1, sent2, pad_cube) | ||
scores = F.softmax(scores).cpu().data.numpy()[0] | ||
prediction = np.dot(np.arange(1, len(scores) + 1), scores) | ||
predictions.append(prediction); true_labels.append(truth[0][0]) | ||
|
||
pearsonr = stats.pearsonr(predictions, true_labels)[0] | ||
spearmanr = stats.spearmanr(predictions, true_labels)[0] | ||
context.log_writer.log_dev_metrics(pearsonr, spearmanr) | ||
return EvaluateResult(pearsonr, spearmanr) | ||
|
||
def train(config): | ||
context = create_context(config) | ||
context.log_writer.log_hyperparams() | ||
best_dev_pr = 0 | ||
for epoch_no in range(config.n_epochs): | ||
print("Epoch number: {}".format(epoch_no + 1)) | ||
loader_wrapper = tqdm(context.train_loader, total=len(context.train_loader), desc="Loss") | ||
context.model.train() | ||
loss = 0 | ||
for sent1, sent2, label_pmf, pad_cube, _ in loader_wrapper: | ||
context.optimizer.zero_grad() | ||
scores = F.log_softmax(context.model(sent1, sent2, pad_cube)) | ||
|
||
loss = context.criterion(scores, label_pmf) | ||
loss.backward() | ||
nn.utils.clip_grad_norm(context.params, config.clip_norm) | ||
context.optimizer.step() | ||
|
||
loss = loss.cpu().data[0] | ||
loader_wrapper.set_description("Loss: {:<8}".format(round(loss, 5))) | ||
context.log_writer.log_train_loss(loss) | ||
result = evaluate(context, context.dev_loader) | ||
print("Dev result: {}".format(result)) | ||
if best_dev_pr < result.pearsonr: | ||
best_dev_pr = result.pearsonr | ||
print("Saving best model...") | ||
context.model.save(config.output_file) | ||
|
||
def main(): | ||
config = data.Configs.base_config() | ||
if config.mode == "train": | ||
train(config) | ||
elif config.mode == "test": | ||
test(config) | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
import argparse | ||
import os | ||
|
||
import torch | ||
import torch.nn as nn | ||
import torch.utils.data as data | ||
|
||
class Configs(object): | ||
@staticmethod | ||
def base_config(): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--classifier", type=str, default="vdpwi", choices=["vdpwi", "resnet"]) | ||
parser.add_argument("--clip_norm", type=float, default=50) | ||
parser.add_argument("--cpu", action="store_true", default=False) | ||
parser.add_argument("--dataset", type=str, default="sick", choices=["sick"]) | ||
parser.add_argument("--decay", type=float, default=0.95) | ||
parser.add_argument("--input_file", type=str, default="local_saves/model.pt") | ||
parser.add_argument("--lr", type=float, default=5E-4) | ||
parser.add_argument("--mbatch_size", type=int, default=16) | ||
parser.add_argument("--mode", type=str, default="train", choices=["train", "test"]) | ||
parser.add_argument("--momentum", type=float, default=0.1) | ||
parser.add_argument("--n_epochs", type=int, default=35) | ||
parser.add_argument("--n_labels", type=int, default=5) | ||
parser.add_argument("--optimizer", type=str, default="rmsprop", choices=["adam", "sgd", "rmsprop"]) | ||
parser.add_argument("--output_file", type=str, default="local_saves/model.pt") | ||
parser.add_argument("--res_fmaps", type=int, default=32) | ||
parser.add_argument("--res_layers", type=int, default=16) | ||
parser.add_argument("--restore", action="store_true", default=False) | ||
parser.add_argument("--rnn_hidden_dim", type=int, default=250) | ||
parser.add_argument("--weight_decay", type=float, default=1E-5) | ||
parser.add_argument("--wordvecs_file", type=str, default="local_data/glove/glove.840B.300d.txt") | ||
return parser.parse_known_args()[0] | ||
|
||
@staticmethod | ||
def sick_config(): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--n_labels", type=int, default=5) | ||
parser.add_argument("--sick_cache", type=str, default="local_data/sick/.vec-cache") | ||
parser.add_argument("--sick_data", type=str, default="local_data/sick") | ||
return parser.parse_known_args()[0] | ||
|
||
class LabeledEmbeddedDataset(data.Dataset): | ||
def __init__(self, sentence_indices1, sentence_indices2, labels, compare_labels=None): | ||
assert len(sentence_indices1) == len(labels) == len(sentence_indices2) | ||
self.sentence_indices1 = sentence_indices1 | ||
self.sentence_indices2 = sentence_indices2 | ||
self.labels = labels | ||
self.compare_labels = compare_labels | ||
|
||
def __getitem__(self, idx): | ||
cmp_lbl = None if self.compare_labels is None else self.compare_labels[idx] | ||
return self.sentence_indices1[idx], self.sentence_indices2[idx], self.labels[idx], cmp_lbl | ||
|
||
def __len__(self): | ||
return len(self.labels) | ||
|
||
def load_sick(): | ||
config = Configs.sick_config() | ||
def fetch_indices(name): | ||
sentence_indices = [] | ||
filename = os.path.join(config.sick_data, dataset, name) | ||
with open(filename) as f: | ||
for line in f: | ||
indices = [embed_ids.get(word, -1) for word in line.strip().split()] | ||
indices = list(filter(lambda x: x >= 0, indices)) | ||
sentence_indices.append(indices) | ||
return sentence_indices | ||
|
||
def read_labels(filename): | ||
labels = [] | ||
with open(filename) as f: | ||
for line in f: | ||
labels.append([float(val) for val in line.split()]) | ||
return labels | ||
|
||
sets = [] | ||
embeddings = [] | ||
embed_ids = {} | ||
with open(os.path.join(config.sick_cache)) as f: | ||
for i, line in enumerate(f): | ||
word, vec = line.split(" ", 1) | ||
vec = list(map(float, vec.strip().split())) | ||
embed_ids[word] = i | ||
embeddings.append(vec) | ||
padding_idx = len(embeddings) | ||
embeddings.append([0.0] * 300) | ||
|
||
for dataset in ("train", "dev", "test"): | ||
sparse_filename = os.path.join(config.sick_data, dataset, "sim_sparse.txt") | ||
truth_filename = os.path.join(config.sick_data, dataset, "sim.txt") | ||
sparse_labels = read_labels(sparse_filename) | ||
cmp_labels = read_labels(truth_filename) | ||
indices1 = fetch_indices("a.toks") | ||
indices2 = fetch_indices("b.toks") | ||
sets.append(LabeledEmbeddedDataset(indices1, indices2, sparse_labels, cmp_labels)) | ||
embedding = nn.Embedding(len(embeddings), 300) | ||
embedding.weight.data.copy_(torch.Tensor(embeddings)) | ||
embedding.weight.requires_grad = False | ||
return embedding, sets | ||
|
||
def load_dataset(dataset): | ||
return _loaders[dataset]() | ||
|
||
_loaders = dict(sick=load_sick) |
Oops, something went wrong.