Skip to content

Commit

Permalink
Merge b662950 into 9d24bdc
Browse files Browse the repository at this point in the history
  • Loading branch information
michelole committed Jun 24, 2019
2 parents 9d24bdc + b662950 commit 47d8286
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 13 deletions.
2 changes: 1 addition & 1 deletion scripts/train_embeddings.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@

# Parameters follows https://github.com/ncbi-nlp/BioSentVec/blob/master/src/train_biowordvec.sh
# `sentences.txt` should be first generated by `SentenceDumper.java`
fasttext skipgram -input sentences.txt -output n2c2-fasttext -dim 200 -t 0.001 -minCount 0 -neg 10 -wordNgrams 6 -ws 30
fasttext skipgram -input sentences.txt -output n2c2-fasttext -dim 200 -t 0.001 -minCount 0 -neg 10 -wordNgrams 6 -ws 20
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@ public class PerceptronClassifier extends CriterionBasedClassifier {
* Location of precalculated vectors, extracted from the huge BioWordVec `.bin` file using `print_vectors.sh`.
*/
private static final File PRETRAINED_VECTORS = new File(PerceptronClassifier.class.getClassLoader().getResource("BioWordVec-vectors.vec").getFile());
private static final File SELFTRAINED_VECTORS = new File(PerceptronClassifier.class.getClassLoader().getResource("self-trained-vectors.vec").getFile());

private final boolean preTrained;
private final File vectors;

public PerceptronClassifier(Criterion c, boolean preTrained) {
super(c);
this.preTrained = preTrained;
this.vectors = preTrained ? PRETRAINED_VECTORS : SELFTRAINED_VECTORS;
}

private String preprocess(String text) {
Expand Down Expand Up @@ -54,10 +55,6 @@ public void train(List<Patient> examples) {
trainData.put(preprocess(p.getText()), p.getEligibility(criterion).name());
}

if (preTrained) {
FastTextFacade.train(trainData, PRETRAINED_VECTORS);
} else {
FastTextFacade.train(trainData);
}
FastTextFacade.train(trainData, vectors);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@
import org.apache.logging.log4j.Logger;

import java.io.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;

Expand Down Expand Up @@ -133,7 +130,7 @@ private static List<String> getTrainCommand(File inputFile, File modelFile) {
command.add("-epoch");
command.add(String.valueOf(EPOCHS));
command.add("-lr");
command.add(String.format("%.2f", LEARNING_RATE));
command.add(String.format(Locale.ROOT, "%.2f", LEARNING_RATE));
command.add("-dim");
command.add(String.valueOf(DIMENSIONS));
return command;
Expand Down

0 comments on commit 47d8286

Please sign in to comment.