Skip to content

Commit

Permalink
Release: 3.1.1.
Browse files Browse the repository at this point in the history
  • Loading branch information
jdchoi77 committed May 7, 2015
1 parent 10b2851 commit 5893df8
Show file tree
Hide file tree
Showing 28 changed files with 330 additions and 678 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,14 @@ protected String getFeature(CommonFeatureToken token, NERState state, DEPNode no
default: return super.getFeature(token, state, node);
}
}

// @Override
// protected String[] getFeatures(CommonFeatureToken token, NERState state, DEPNode node)
// {
// switch (token.getField())
// {
// case as: return state.getAmbiguityClasses(node);
// default: return super.getFeatures(token, state, node);
// }
// }
}
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,52 @@ public NERState(DEPTree tree, CFlag flag, PrefixTree<String,NERInfoSet> namedEnt
public void init(PrefixTree<String,NERInfoSet> namedEntityDictionary)
{
ne_dictionary = namedEntityDictionary;
info_list = ne_dictionary.getAll(d_tree.toNodeArray(), 1, DEPNode::getWordForm, true, false);
// info_list = ne_dictionary.getAll(d_tree.toNodeArray(), 1, DEPNode::getWordForm, true, false);
info_list = ne_dictionary.getAll(d_tree.toNodeArray(), 1, DEPNode::getLowerSimplifiedWordForm, true, false);
ambiguity_classes = getAmbiguityClasses();
}

// private void initAmbiguityClasses()
// {
// List<Set<String>> sets = IntStream.range(0, t_size).mapToObj(k -> new HashSet<String>()).collect(Collectors.toList());
// StringJoiner[] joiners = new StringJoiner[t_size];
// int i, j, size = info_list.size();
// ObjectIntIntTriple<NERInfoSet> t;
// String tag;
//
// for (i=1; i<t_size; i++)
// joiners[i] = new StringJoiner("-");
//
// for (i=0; i<size; i++)
// {
// t = info_list.get(i);
// tag = t.o.joinTags(StringConst.COLON);
//
// if (t.i1 == t.i2)
// joiners[t.i1].add(NERLib.toBILOUTag(BILOU.U, tag));
// else
// {
// joiners[t.i1].add(NERLib.toBILOUTag(BILOU.B, tag));
// joiners[t.i2].add(NERLib.toBILOUTag(BILOU.L, tag));
//
// for (j=t.i1+1; j<t.i2; j++)
// joiners[j].add(NERLib.toBILOUTag(BILOU.I, tag));
// }
//
// for (j=t.i1; j<=t.i2; j++)
// sets.get(j).addAll(t.o.getCategorySet());
// }
//
// ambiguity_class_set = new String[t_size][];
// ambiguity_classes = new String[t_size];
//
// for (i=1; i<t_size; i++)
// {
// ambiguity_classes [i] = joiners.length == 0 ? null : joiners[i].toString();
// ambiguity_class_set[i] = DSUtils.toArray(sets.get(i));
// }
// }

private String[] getAmbiguityClasses()
{
StringJoiner[] joiners = new StringJoiner[t_size];
Expand Down Expand Up @@ -116,6 +158,11 @@ public String getAmbiguityClass(DEPNode node)
return ambiguity_classes[node.getID()];
}

// public String[] getAmbiguityClasses(DEPNode node)
// {
// return ambiguity_class_set[node.getID()];
// }
//
// public String[] getCooccuranceFeatures(DEPNode node)
// {
// String[] categories = {"PER", "LOC", "ORG", "MISC"};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ static public AbstractNERecognizer getNERecognizer(TLanguage language, String mo
@SuppressWarnings("unchecked")
static public PrefixTree<String,NERInfoSet> getNERDictionary(ObjectInputStream in)
{
BinUtils.LOG.info("Loading named entity dictionary.\n");
BinUtils.LOG.info("Loading named entity gazetteers.\n");
PrefixTree<String,NERInfoSet> tree = null;

try
Expand Down
68 changes: 68 additions & 0 deletions src/main/java/edu/emory/clir/clearnlp/constituent/CTLib.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
package edu.emory.clir.clearnlp.constituent;

import java.util.List;
import java.util.Set;
import java.util.function.Predicate;
import java.util.regex.Pattern;

/**
* @author Jinho D. Choi ({@code jinho.choi@emory.edu})
Expand All @@ -35,4 +38,69 @@ static public String toForms(List<CTNode> tokens, int beginIndex, int endIndex,

return build.substring(delim.length());
}

static public Predicate<CTNode> matchC(String constituentTag)
{
return node -> node.isConstituentTag(constituentTag);
}

static public Predicate<CTNode> matchCo(String... constituentTags)
{
return node -> node.isConstituentTagAny(constituentTags);
}

static public Predicate<CTNode> matchCo(Set<String> constituentTags)
{
return node -> constituentTags.contains(node.getConstituentTag());
}

static public Predicate<CTNode> matchCp(String constituentPrefix)
{
return node -> node.getConstituentTag().startsWith(constituentPrefix);
}

static public Predicate<CTNode> matchCF(String constituentTag, String functionTag)
{
return node -> node.isConstituentTag(constituentTag) && node.hasFunctionTag(functionTag);
}

static public Predicate<CTNode> matchCFa(String constituentTag, String... functionTags)
{
return node -> node.isConstituentTag(constituentTag) && node.hasFunctionTagAll(functionTags);
}

static public Predicate<CTNode> matchCFo(String constituentTag, String... functionTags)
{
return node -> node.isConstituentTag(constituentTag) && node.hasFunctionTagAny(functionTags);
}

static public Predicate<CTNode> matchF(String functionTag)
{
return node -> node.hasFunctionTag(functionTag);
}

static public Predicate<CTNode> matchFa(String... functionTags)
{
return node -> node.hasFunctionTagAll(functionTags);
}

static public Predicate<CTNode> matchFo(String... functionTags)
{
return node -> node.hasFunctionTagAny(functionTags);
}

static public Predicate<CTNode> matchP(Pattern constituentPattern)
{
return node -> node.matchesConstituentTag(constituentPattern);
}

static public Predicate<CTNode> matchPFa(Pattern constituentPattern, String... functionTags)
{
return node -> node.matchesConstituentTag(constituentPattern) && node.hasFunctionTagAll(functionTags);
}

static public Predicate<CTNode> matchPFo(Pattern constituentPattern, String... functionTags)
{
return node -> node.matchesConstituentTag(constituentPattern) && node.hasFunctionTagAny(functionTags);
}
}
45 changes: 20 additions & 25 deletions src/main/java/edu/emory/clir/clearnlp/constituent/CTLibEn.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,9 @@

import java.util.List;
import java.util.Set;
import java.util.function.Predicate;
import java.util.regex.Pattern;

import edu.emory.clir.clearnlp.constituent.matcher.CTNodeMatcher;
import edu.emory.clir.clearnlp.constituent.matcher.CTNodeMatcherC;
import edu.emory.clir.clearnlp.constituent.matcher.CTNodeMatcherCF;
import edu.emory.clir.clearnlp.constituent.matcher.CTNodeMatcherF;
import edu.emory.clir.clearnlp.constituent.matcher.CTNodeMatcherPrefix;
import edu.emory.clir.clearnlp.constituent.matcher.CTNodeMatcherSet;
import edu.emory.clir.clearnlp.pos.POSLibEn;
import edu.emory.clir.clearnlp.pos.POSTagEn;
import edu.emory.clir.clearnlp.util.DSUtils;
Expand All @@ -41,26 +36,26 @@ public class CTLibEn extends CTLib implements CTTagEn, POSTagEn
{
static final public Pattern P_PASSIVE_NULL = PatternUtils.createClosedORPattern("\\*","\\*-\\d+");

static final public CTNodeMatcher M_NP = new CTNodeMatcherC(C_NP);
static final public CTNodeMatcher M_VP = new CTNodeMatcherC(C_VP);
static final public CTNodeMatcher M_QP = new CTNodeMatcherC(C_QP);
static final public CTNodeMatcher M_ADVP = new CTNodeMatcherC(C_ADVP);
static final public CTNodeMatcher M_SBAR = new CTNodeMatcherC(C_SBAR);
static final public CTNodeMatcher M_EDITED = new CTNodeMatcherC(C_EDITED);
static final public Predicate<CTNode> M_NP = CTLib.matchC(C_NP);
static final public Predicate<CTNode> M_VP = CTLib.matchC(C_VP);
static final public Predicate<CTNode> M_QP = CTLib.matchC(C_QP);
static final public Predicate<CTNode> M_ADVP = CTLib.matchC(C_ADVP);
static final public Predicate<CTNode> M_SBAR = CTLib.matchC(C_SBAR);
static final public Predicate<CTNode> M_EDITED = CTLib.matchC(C_EDITED);

static final public CTNodeMatcher M_NOM = new CTNodeMatcherF(F_NOM);
static final public CTNodeMatcher M_PRD = new CTNodeMatcherF(F_PRD);
static final public Predicate<CTNode> M_NOM = CTLib.matchF(F_NOM);
static final public Predicate<CTNode> M_PRD = CTLib.matchF(F_PRD);

static final public CTNodeMatcher M_NP_SBJ = new CTNodeMatcherCF(C_NP, F_SBJ);
static final public Predicate<CTNode> M_NP_SBJ = CTLib.matchCF(C_NP, F_SBJ);

static final public CTNodeMatcher M_NNx = new CTNodeMatcherPrefix(POS_NN);
static final public CTNodeMatcher M_VBx = new CTNodeMatcherPrefix(POS_VB);
static final public CTNodeMatcher M_WHx = new CTNodeMatcherPrefix("WH");
static final public CTNodeMatcher M_SBARx = new CTNodeMatcherPrefix(C_SBAR);
static final public Predicate<CTNode> M_NNx = CTLib.matchCp(POS_NN);
static final public Predicate<CTNode> M_VBx = CTLib.matchCp(POS_VB);
static final public Predicate<CTNode> M_WHx = CTLib.matchCp("WH");
static final public Predicate<CTNode> M_SBARx = CTLib.matchCp(C_SBAR);

static final public CTNodeMatcher M_NP_NML = new CTNodeMatcherSet(DSUtils.toHashSet(C_NP, C_NML));
static final public CTNodeMatcher M_VBD_VBN = new CTNodeMatcherSet(DSUtils.toHashSet(POS_VBD, POS_VBN));
static final public CTNodeMatcher M_VP_RRC_UCP = new CTNodeMatcherSet(DSUtils.toHashSet(C_VP, C_RRC, C_UCP));
static final public Predicate<CTNode> M_NP_NML = CTLib.matchCo(DSUtils.toHashSet(C_NP, C_NML));
static final public Predicate<CTNode> M_VBD_VBN = CTLib.matchCo(DSUtils.toHashSet(POS_VBD, POS_VBN));
static final public Predicate<CTNode> M_VP_RRC_UCP = CTLib.matchCo(DSUtils.toHashSet(C_VP, C_RRC, C_UCP));

static final private Set<String> S_LGS_PHRASE = DSUtils.toHashSet(C_PP, C_SBAR);
static final private Set<String> S_MAIN_CLAUSE = DSUtils.toHashSet(C_S, C_SQ, C_SINV);
Expand Down Expand Up @@ -466,7 +461,7 @@ static public boolean isWhPhraseLink(CTNode node)

static public boolean isWhPhrase(CTNode node)
{
return M_WHx.matches(node);
return M_WHx.test(node);
}

static public boolean isEditedPhrase(CTNode node)
Expand All @@ -485,9 +480,9 @@ static public boolean isRNR(CTNode node)
return node.getWordForm().startsWith(E_RNR);
}

static public CTNode getNode(CTNode node, CTNodeMatcher matcher, boolean recursive)
static public CTNode getNode(CTNode node, Predicate<CTNode> matcher, boolean recursive)
{
if (matcher.matches(node))
if (matcher.test(node))
return node;

if (recursive && node.getChildrenSize() == 1)
Expand Down
Loading

0 comments on commit 5893df8

Please sign in to comment.