Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Note for optimal behavior, morph column should include 'person' if a token is the head of a person entity * Currently assumes lemmas are available for copula handling (this can probably be fixed)
- Loading branch information
1 parent
db84683
commit 96ad44c
Showing
1 changed file
with
120 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,56 +1,153 @@ | ||
;;;Configuration file to convert Stanford Typed Dependencies to Universal Dependencies | ||
;;;Note not all aspects of universal dependencies are covered, especially handling of the 'name' label | ||
;;;Note not all aspects of universal dependencies are covered, especially handling of the 'flat' label requires NER information assumed below | ||
|
||
;handle free relatives such as "an expectation of what to do" (change pcomp to pobj+rcmod) | ||
func=/prep/;pos=/^W.*/;func=/pcomp/ #1>#3;#3>#2 #2:func=pobj;#1>#2;#2>#3;#3:func=rcmod | ||
|
||
;simple rename | ||
func=/^acomp/ none #1:func=xcomp | ||
func=/^npadvmod/ none #1:func=nmod:npmod | ||
func=/^num/ none #1:func=nummod | ||
func=/^rcmod/ none #1:func=acl:relcl | ||
func=/^vmod/ none #1:func=nfincl | ||
func=/^dobj/ none #1:func=obj | ||
pos=/DT/&func=/^neg/ none #1:func=det | ||
func=/^neg/ none #1:func=advmod | ||
func=/preconj/ none #1:func=cc:preconj | ||
func=/^quantmod/ none #1:func=advmod | ||
func=/^tmod/ none #1:func=npmod:tmod | ||
func=/^tmod/ none #1:func=nmod:tmod | ||
func=/^predet/ none #1:func=det:predet | ||
func=/^possessive/ none #1:func=case | ||
func=/^poss$/ none #1:func=nmod:poss | ||
func=/^prt/ none #1:func=compound:prt | ||
func=/^auxpass/ none #1:func=aux:pass | ||
func=/^csubjpass/ none #1:func=csubj:pass | ||
func=/^nsubjpass/ none #1:func=nsubj:pass | ||
|
||
;disambiguate nn | ||
func=/nn/&pos=/JJ/ none #1:func=amod | ||
func=/nn/&pos=/DT/ none #1:func=det | ||
|
||
; Logan's try of flat (example: actor Bob A B C D Smith) | ||
; First deal with person entities based on 'person' in the morph field | ||
; step 1 let Bob dominate all ABCD | ||
; non-sentence-initial | ||
pos!=/N?NP.*/;pos=/N?NP.*/&func=/nn/;pos=/N?NP.*/&func=/nn/;pos=/N?NP.*/&morph=/(.*person.*)/ #1.#2.*#3.*#4;#4>#2;#4>#3 #2>#3;#3:func=flat | ||
; sentence-initial | ||
pos=/N?NP.*/&func=/nn/&position=/first/;pos=/N?NP.*/&func=/nn/;pos=/N?NP.*/&morph=/(.*person.*)/ #1.*#2.*#3;#3>#1;#3>#2 #1>#2;#2:func=flat | ||
|
||
; step 2 let anything previously dominated by Smith now by Bob | ||
pos=/.*/;pos=/N?NP.*/&func=/nn/;pos=/N?NP.*/&morph=/(.*person.*)/ #1.*#2.*#3;#3>#1;#3>#2 #2>#1 | ||
|
||
; step 3 let Bob dominate Smith and receive func, morph information from Smith | ||
; head has parent | ||
func=/.*/;pos=/N?NP.*/&func=/nn/;pos=/N?NP.*/&func=/(.*)/&morph=/(.*person.*)/ #1.*#2.*#3;#3>#2;#1>#3 #1>#2;#2>#3;#2:func=$1;#2:morph=$2;#3:func=flat;#3:morph=_ | ||
; head has no parent | ||
pos=/N?NP.*/&func=/nn/;pos=/N?NP.*/&func=/(root)/&morph=/(.*person.*)/ #1.*#2;#2>#1 #1>#2;#1:func=$1;#1:morph=$1;#1:head=0;#2:func=flat;#2:morph=_ | ||
|
||
; step 4 move head of flat fountain to actor if the entity is a person, morph and func info also moved to actor | ||
func=/nn/;pos=/N?NP.*/&morph=/(.*person.*)/&func=/(.*)/&head=/(.*)/;func=/flat/ #1.#2.*#3;#2>#1;#2>#3 #1>#2;#1>#3;#1:morph=$1;#1:func=$2;#1:head=$3;#2:func=flat;#2:morph=_ | ||
|
||
; step 4.5: add old flat analysis for non-person | ||
func=/nn/;func=/nn/;text=/.*/ #1.#2;#3>#2;#3>#1 #1>#2;#2:func=flat | ||
pos=/N?NP.*|FW/&func=/nn/;pos=/N?NP.*|FW/&func=/(.*)/;text=/.*/ #2>#1;#3>#2 #1>#2;#2:func=flat;#3>#1;#1:func=$1 | ||
pos=/N?NP.*|FW/&func=/nn/;pos=/N?NP.*|FW/&func=/(root)/ #2>#1 #1>#2;#2:func=flat;#1:head=0;#1:func=$1 | ||
|
||
; step 5 cleaning: whatever dominated by flat will be by its parent | ||
; precedence shouldn't matter for this step, since flat never dominates anything | ||
;func=/.*/;func=/.*/;func=/flat/ #1.*#2.*#3;#3>#1;#2>#3 #2>#1 | ||
func=/.*/;func=/.*/;func=/flat/ #3>#1;#2>#3 #2>#1 | ||
|
||
; all rest nn become compound | ||
func=/nn/ none #1:func=compound | ||
;handle names HERE | ||
;The following undergenerates, probably best to leave off: | ||
;text=/^(Mr|Mrs|Ms|Dr|Prof(essor)?|Doctor|King|Queen|Prince(ss)?|Attorney|Governer)\.?/&func=/^nn/;pos=/^NN?P/ #1.#2;#2>#1 #1>#2;#2:func=name | ||
|
||
|
||
;to-infinitive | ||
text=/^[Tt]o$/&func=/aux/ none #1:func=mark | ||
|
||
;adverbial clause modifying a nominal | ||
pos=/^(N.*|P.*)$/;pos=/[NV].*/&func=/advcl/ #1>#2 #2:func=acl | ||
|
||
;clauses (handling vmod) | ||
pos=/^[NJ].*$/;pos=/^V[VBH]$|V.N|V.G/&func=/vmod/ #1>#2 #2:func=acl | ||
pos=/^V.*/;pos=/^V.[GN]$/&func=/vmod/ #1>#2 #2:func=advcl | ||
pos=/^V.*/;pos=/^V.$/&func=/vmod/ #1>#2 #2:func=advcl | ||
pos=/^V.*/;func=/vmod/;func=/cop/ #1>#2>#3 #2:func=advcl | ||
|
||
;reverse fixed | ||
func=/^mwe$/;func=/(.*)/;func=/.*/ #3>#2>#1;#1.1,3#2 #3>#1;#1>#2;#2:func=fixed;#1:func=$1 | ||
func=/^mwe$/ none #1:func=fixed | ||
|
||
;handle copula | ||
;protect expletive existential first | ||
func=/expl/;lemma=/be/ #2>#1 #2:morph=EXIST | ||
;now convert copula cases | ||
func=/nsubj|csubj/;lemma=/be/&morph!=/EXIST/&func=/(ccomp|parataxis|advcl|rcmod)/;func=/prep|pcomp/;func=/pobj/;text=/.*/ #2>#1;#2>#3>#4;#5>#2 #4>#1;#4>#2;#4>#3;#3:func=case;#4:func=$1;#2:func=cop;#5>#4 | ||
func=/nsubj|csubj/;lemma=/be/&morph!=/EXIST/&func=/root/;func=/prep|pcomp/;func=/pobj/ #2>#1;#2>#3>#4 #4>#1;#4>#2;#4>#3;#3:func=case;#2:func=cop;#4:func=root;#4:head=0 | ||
;restore existential case | ||
morph=/EXIST/ none #1:morph=_ | ||
|
||
;TODO: handle copula without lemma (for corpora without gold lemmas) | ||
|
||
;prepositional phrases | ||
;take care of adnominal pcomp, should become acl (e.g. "mirrors for correcting your pose") | ||
pos=/^N.*|FW|DT/;func=/prep/;func=/pcomp/ #1>#2>#3 #1>#3;#3>#2;#2:func=mark;#3:func=acl | ||
;simple | ||
func=/.*/;func=/^prep$/;func=/pobj/ #1>#2>#3 #3>#2;#2:func=case;#3:func=nmod;#1>#3 | ||
;pcomp without mark | ||
func=/.*/;func=/^prep$/;func=/pcomp/ #1>#2>#3>#4 #3>#2;#2:func=_temp;#3:func=ncmod;#1>#3 | ||
;pcomp with mark | ||
func=/.*/;func=/^prep$/;func=/pcomp/;func=/mark/ #1>#2>#3>#4 #3>#2;#2:func=case;#3:func=ncmod;#1>#3 | ||
func=/_temp/ none #1:func=mark | ||
;coordinated pobj | ||
func=/.*/;func=/conj/;func=/pobj/ #1>#2>#3 #2:func=case;#3>#2;#1>#3;#3:func=conj | ||
|
||
|
||
;reverse conjunction | ||
func=/.*/;func=/^cc$/;func=/^conj$/ #1>#2;#1>#3 #3>#2 | ||
|
||
;fix double coordination results in cc attaching to crossed third conjunct | ||
func=/.*/;func=/^cc$/;func=/^conj$/;func=/^conj$/ #1.*#2.*#3.*#4;#1>#3;#1>#4;#4>#2 #3>#2 | ||
|
||
|
||
;prep for verbal predicate with pcomp | ||
pos=/^V.*|^J.*/;func=/prep/;func=/pcomp/ #1>#2>#3 #1>#3;#3>#2;#2:func=mark;#3:func=advcl | ||
|
||
|
||
;handle obj & xcomp (instead of nsubj of subclause in SD) | ||
func=/.*/;func=/nsubj/;func=/xcomp/ #1>#3;#3>#2 #1>#2;#2:func=obj | ||
|
||
|
||
;Promote remaining preps, which probably don't have a pobj, to nmod (which subsequently goes to obl if necessary) | ||
func=/.*/;func=/^prep$/ #1>#2 #2:func=nmod | ||
|
||
|
||
;handle nmod of predicates (should be obl) | ||
pos=/^(V.*|J.*)$/;func=/nmod/ #1>#2 #2:func=obl | ||
|
||
;complex numbers analyzed as compound | ||
func=/^number/ none #1:func=compound | ||
|
||
;universal POS tags | ||
cpos=/JJ[RS]?/ none #1:pos=ADJ | ||
cpos=/WRB/&func=/advmod|mark|obl/ none #1:pos=SCONJ | ||
cpos=/RB[RS]?/&lemma!=/n.?t/ none #1:pos=ADV | ||
cpos=/RB/&lemma=/n.?t/ none #1:pos=PART | ||
cpos=/UH/ none #1:pos=INTJ | ||
cpos=/CC/ none #1:pos=CCONJ | ||
cpos=/CD/ none #1:pos=NUM | ||
cpos=/NNS?/ none #1:pos=NOUN | ||
text=/%/ none #1:pos=SYM | ||
cpos=/NNPS?/ none #1:pos=PROPN | ||
cpos=/V.*/ none #1:pos=VERB | ||
func=/.*aux.*|cop/ none #1:pos=AUX | ||
cpos=/IN|RP|TO/&func!=/mark/ none #1:pos=ADP | ||
func=/mark/ none #1:pos=SCONJ | ||
func=/(pre)?det/ none #1:pos=DET | ||
cpos=/POS|TO/&func!=/case/ none #1:pos=PART | ||
cpos=/PRP\$?|WP\$?|EX|W?DT/&func!=/det/ none #1:pos=PRON | ||
func=/punct/ none #1:pos=PUNCT | ||
cpos=/FW|LS/ none #1:pos=X | ||
|
||
;clear morph | ||
morph=/.*/ none #1:morph=_ | ||
|
||
;2word mwes | ||
;"apart_from", "as_from", "aside_from", "away_from", "close_by", "close_to", "contrary_to", "far_from", "next_to", "near_to", "out_of", "outside_of", "pursuant_to", "regardless_of", "together_with" | ||
text=/^([Aa]part|[Aa]s|[Aa]side|[Aa]way|[Ff]ar)$/;text=/^from$/ #1.#2 #1>#2;#2:func=mwe | ||
text=/^[Cc]lose$/;text=/^(by|to)$/ #1.#2 #1>#2;#2:func=mwe | ||
text=/^[Cc]ontrary$/;text=/^(to)$/ #1.#2 #1>#2;#2:func=mwe | ||
text=/^([Nn]ext|[Nn]ear|[Pp]ersuant)$/;text=/^(to)$/ #1.#2 #1>#2;#2:func=mwe | ||
text=/^([Oo]ut|[Oo]utside|[Rr]egardless)$/;text=/^(of)$/ #1.#2 #1>#2;#2:func=mwe | ||
text=/^[Tt]ogether$/;text=/^with$/ #1.#2 #1>#2;#2:func=mwe | ||
|
||
;3word mwes | ||
;"by_means_of", "in_accordance_with", "in_addition_to", "in_case_of", "in_front_of", "in_lieu_of", "in_place_of", "in_spite_of", "on_account_of", "on_behalf_of", "on_top_of", "with_regard_to", "with_respect_to" | ||
text=/^[Bb]y$/;text=/^means$/;text=/^of$/ #1.#2.#3 #1>#2>#3;#2:func=mwe;#3:func=mwe | ||
text=/^[Ii]n$/;text=/^accordance$/;text=/^with$/ #1.#2.#3 #1>#2>#3;#2:func=mwe;#3:func=mwe | ||
text=/^[Ii]n$/;text=/^addition$/;text=/^to$/ #1.#2.#3 #1>#2>#3;#2:func=mwe;#3:func=mwe | ||
text=/^[Ii]n$/;text=/^(case|front|lieu|place|spite)$/;text=/^of$/ #1.#2.#3 #1>#2>#3;#2:func=mwe;#3:func=mwe | ||
text=/^[Oo]n$/;text=/^(account|behalf|top)$/;text=/^of$/ #1.#2.#3 #1>#2>#3;#2:func=mwe;#3:func=mwe | ||
text=/^[Ww]ith$/;text=/^(regard|respect)$/;text=/^to$/ #1.#2.#3 #1>#2>#3;#2:func=mwe;#3:func=mwe |