diff --git a/neuralcoref/train/model.py b/neuralcoref/train/model.py index 4cdb25b..5519f02 100644 --- a/neuralcoref/train/model.py +++ b/neuralcoref/train/model.py @@ -11,6 +11,7 @@ import torch.nn as nn import torch.utils.data + class Model(nn.Module): def __init__(self, vocab_size, embedding_dim, H1, H2, H3, D_pair_in, D_single_in, dropout=0.5): super(Model, self).__init__() @@ -70,6 +71,8 @@ def forward(self, inputs, concat_axis=1): else: spans, words, single_features = inputs words = words.type(torch.LongTensor) + if self.cuda: + words = words.cuda() embed_words = self.drop(self.word_embeds(words).view(words.size()[0], -1)) single_input = torch.cat([spans, embed_words, single_features], 1) single_scores = self.single_top(single_input) @@ -77,6 +80,9 @@ def forward(self, inputs, concat_axis=1): batchsize, pairs_num, _ = ana_spans.size() ant_words_long = ant_words.view(batchsize, -1).type(torch.LongTensor) ana_words_long = ana_words.view(batchsize, -1).type(torch.LongTensor) + if self.cuda: + ant_words_long = ant_words_long.cuda() + ana_words_long = ana_words_long.cuda() ant_embed_words = self.drop(self.word_embeds(ant_words_long).view(batchsize, pairs_num, -1)) ana_embed_words = self.drop(self.word_embeds(ana_words_long).view(batchsize, pairs_num, -1)) pair_input = torch.cat([ant_spans, ant_embed_words, ana_spans, ana_embed_words, pair_features], 2) diff --git a/neuralcoref/train/training.md b/neuralcoref/train/training.md index 82de7fc..7e702f7 100644 --- a/neuralcoref/train/training.md +++ b/neuralcoref/train/training.md @@ -14,12 +14,12 @@ python -m spacy download en ```` ## Get the data -The following assumes you want to train on English, Arabic or Chinese. +The following assumes you want to train on English, Arabic or Chinese. If you want to train on another language, see the section [train on a new language](#train-on-a-new-language) below. First, download the [OntoNotes 5.0 dataset](https://catalog.ldc.upenn.edu/LDC2013T19) from LDC. -Then, download the [CoNLL-2012 skeleton files](http://conll.cemantix.org/2012/data.html) from the CoNLL 2012 shared task site, +Then, download the [CoNLL-2012 skeleton files](http://conll.cemantix.org/2012/data.html) from the CoNLL 2012 shared task site, and combine these skeleton files with the OntoNotes files to get the `*._conll` text files which can be used as inputs for the training. This can be done by executing the script [compile_coref_data.sh](/neuralcoref/train/conll_processing_script/compile_coref_data.sh) @@ -43,15 +43,15 @@ or by following these steps: * `cat conll-2012/v4/data/train/data/my_lang/annotations/*/*/*/*.v4_gold_conll >> train.my_lang.v4_gold_conll` * `cat conll-2012/v4/data/development/data/my_lang/annotations/*/*/*/*.v4_gold_conll >> dev.my_lang.v4_gold_conll` * `cat conll-2012/v4/data/test/data/my_lang/annotations/*/*/*/*.v4_gold_conll >> test.my_lang.v4_gold_conll` - + ## Prepare the data -Once you have the set of `*.v4_gold_conll` files, you can prepare the training data by running +Once you have the set of `*.v4_gold_conll` files, move these files into separate (`train`, `test`, `dev`) subdirectories inside a new directory. You can use the already present `data` directory or create another directory anywhere you want. Now, you can prepare the training data by running [conllparser.py](/neuralcoref/train/conllparser.py) on each split of the data set (`train`, `test`, `dev`) as ````bash -python -m neuralcoref.train.conllparser --path ./data/train/ -python -m neuralcoref.train.conllparser --path ./data/test/ -python -m neuralcoref.train.conllparser --path ./data/dev/ +python -m neuralcoref.train.conllparser --path ./$path_to_data_directory/train/ +python -m neuralcoref.train.conllparser --path ./$path_to_data_directory/test/ +python -m neuralcoref.train.conllparser --path ./$path_to_data_directory/dev/ ```` Conllparser will: @@ -61,8 +61,8 @@ Conllparser will: - gather the mention features in a set of numpy arrays to be used as input for the neural net model. ## Train the model -Once the files have been pre-processed -(you should have a set of `*.npy` files in a sub-directory `/numpy` in each of your (`train`|`test`|`dev`) data folder), +Once the files have been pre-processed +(you should have a set of `*.npy` files in a sub-directory `/numpy` in each of your (`train`|`test`|`dev`) data folder), you can start the training process using [learn.py](/neuralcoref/train/learn.py), for example as ````bash python -m neuralcoref.train.learn --train ./data/train/ --eval ./data/dev/ @@ -73,13 +73,13 @@ There many parameters and options for the training. You can list them with the u python -m neuralcoref.train.learn --help ```` -You can follow the training by running [Tensorboard for pyTorch](https://github.com/lanpa/tensorboard-pytorch) +You can follow the training by running [Tensorboard for pyTorch](https://github.com/lanpa/tensorboard-pytorch) (it requires a version of Tensorflow, any version will be fine). Run it with `tensorboard --logdir runs`. ## Some details on the training -The model and the training as thoroughfully described in our -[very detailed blog post](https://medium.com/huggingface/how-to-train-a-neural-coreference-model-neuralcoref-2-7bb30c1abdfe). -The training process is similar to the mention-ranking training described in +The model and the training as thoroughfully described in our +[very detailed blog post](https://medium.com/huggingface/how-to-train-a-neural-coreference-model-neuralcoref-2-7bb30c1abdfe). +The training process is similar to the mention-ranking training described in [Clark and Manning (2016)](http://cs.stanford.edu/people/kevclark/resources/clark-manning-emnlp2016-deep.pdf), namely: - A first step of training uses a standard cross entropy loss on the mention pair labels, - A second step of training uses a cross entropy loss on the top pairs only, and