Skip to content

Commit

Permalink
Updated Stanford to UD converter
Browse files Browse the repository at this point in the history
  * Note for optimal behavior, morph column should include 'person' if a token is the head of a person entity
  * Currently assumes lemmas are available for copula handling (this can probably be fixed)
  • Loading branch information
amir-zeldes committed Dec 19, 2017
1 parent db84683 commit 96ad44c
Showing 1 changed file with 120 additions and 23 deletions.
143 changes: 120 additions & 23 deletions examples/stan2uni.ini
@@ -1,56 +1,153 @@
;;;Configuration file to convert Stanford Typed Dependencies to Universal Dependencies
;;;Note not all aspects of universal dependencies are covered, especially handling of the 'name' label
;;;Note not all aspects of universal dependencies are covered, especially handling of the 'flat' label requires NER information assumed below

;handle free relatives such as "an expectation of what to do" (change pcomp to pobj+rcmod)
func=/prep/;pos=/^W.*/;func=/pcomp/ #1>#3;#3>#2 #2:func=pobj;#1>#2;#2>#3;#3:func=rcmod

;simple rename
func=/^acomp/ none #1:func=xcomp
func=/^npadvmod/ none #1:func=nmod:npmod
func=/^num/ none #1:func=nummod
func=/^rcmod/ none #1:func=acl:relcl
func=/^vmod/ none #1:func=nfincl
func=/^dobj/ none #1:func=obj
pos=/DT/&func=/^neg/ none #1:func=det
func=/^neg/ none #1:func=advmod
func=/preconj/ none #1:func=cc:preconj
func=/^quantmod/ none #1:func=advmod
func=/^tmod/ none #1:func=npmod:tmod
func=/^tmod/ none #1:func=nmod:tmod
func=/^predet/ none #1:func=det:predet
func=/^possessive/ none #1:func=case
func=/^poss$/ none #1:func=nmod:poss
func=/^prt/ none #1:func=compound:prt
func=/^auxpass/ none #1:func=aux:pass
func=/^csubjpass/ none #1:func=csubj:pass
func=/^nsubjpass/ none #1:func=nsubj:pass

;disambiguate nn
func=/nn/&pos=/JJ/ none #1:func=amod
func=/nn/&pos=/DT/ none #1:func=det

; Logan's try of flat (example: actor Bob A B C D Smith)
; First deal with person entities based on 'person' in the morph field
; step 1 let Bob dominate all ABCD
; non-sentence-initial
pos!=/N?NP.*/;pos=/N?NP.*/&func=/nn/;pos=/N?NP.*/&func=/nn/;pos=/N?NP.*/&morph=/(.*person.*)/ #1.#2.*#3.*#4;#4>#2;#4>#3 #2>#3;#3:func=flat
; sentence-initial
pos=/N?NP.*/&func=/nn/&position=/first/;pos=/N?NP.*/&func=/nn/;pos=/N?NP.*/&morph=/(.*person.*)/ #1.*#2.*#3;#3>#1;#3>#2 #1>#2;#2:func=flat

; step 2 let anything previously dominated by Smith now by Bob
pos=/.*/;pos=/N?NP.*/&func=/nn/;pos=/N?NP.*/&morph=/(.*person.*)/ #1.*#2.*#3;#3>#1;#3>#2 #2>#1

; step 3 let Bob dominate Smith and receive func, morph information from Smith
; head has parent
func=/.*/;pos=/N?NP.*/&func=/nn/;pos=/N?NP.*/&func=/(.*)/&morph=/(.*person.*)/ #1.*#2.*#3;#3>#2;#1>#3 #1>#2;#2>#3;#2:func=$1;#2:morph=$2;#3:func=flat;#3:morph=_
; head has no parent
pos=/N?NP.*/&func=/nn/;pos=/N?NP.*/&func=/(root)/&morph=/(.*person.*)/ #1.*#2;#2>#1 #1>#2;#1:func=$1;#1:morph=$1;#1:head=0;#2:func=flat;#2:morph=_

; step 4 move head of flat fountain to actor if the entity is a person, morph and func info also moved to actor
func=/nn/;pos=/N?NP.*/&morph=/(.*person.*)/&func=/(.*)/&head=/(.*)/;func=/flat/ #1.#2.*#3;#2>#1;#2>#3 #1>#2;#1>#3;#1:morph=$1;#1:func=$2;#1:head=$3;#2:func=flat;#2:morph=_

; step 4.5: add old flat analysis for non-person
func=/nn/;func=/nn/;text=/.*/ #1.#2;#3>#2;#3>#1 #1>#2;#2:func=flat
pos=/N?NP.*|FW/&func=/nn/;pos=/N?NP.*|FW/&func=/(.*)/;text=/.*/ #2>#1;#3>#2 #1>#2;#2:func=flat;#3>#1;#1:func=$1
pos=/N?NP.*|FW/&func=/nn/;pos=/N?NP.*|FW/&func=/(root)/ #2>#1 #1>#2;#2:func=flat;#1:head=0;#1:func=$1

; step 5 cleaning: whatever dominated by flat will be by its parent
; precedence shouldn't matter for this step, since flat never dominates anything
;func=/.*/;func=/.*/;func=/flat/ #1.*#2.*#3;#3>#1;#2>#3 #2>#1
func=/.*/;func=/.*/;func=/flat/ #3>#1;#2>#3 #2>#1

; all rest nn become compound
func=/nn/ none #1:func=compound
;handle names HERE
;The following undergenerates, probably best to leave off:
;text=/^(Mr|Mrs|Ms|Dr|Prof(essor)?|Doctor|King|Queen|Prince(ss)?|Attorney|Governer)\.?/&func=/^nn/;pos=/^NN?P/ #1.#2;#2>#1 #1>#2;#2:func=name


;to-infinitive
text=/^[Tt]o$/&func=/aux/ none #1:func=mark

;adverbial clause modifying a nominal
pos=/^(N.*|P.*)$/;pos=/[NV].*/&func=/advcl/ #1>#2 #2:func=acl

;clauses (handling vmod)
pos=/^[NJ].*$/;pos=/^V[VBH]$|V.N|V.G/&func=/vmod/ #1>#2 #2:func=acl
pos=/^V.*/;pos=/^V.[GN]$/&func=/vmod/ #1>#2 #2:func=advcl
pos=/^V.*/;pos=/^V.$/&func=/vmod/ #1>#2 #2:func=advcl
pos=/^V.*/;func=/vmod/;func=/cop/ #1>#2>#3 #2:func=advcl

;reverse fixed
func=/^mwe$/;func=/(.*)/;func=/.*/ #3>#2>#1;#1.1,3#2 #3>#1;#1>#2;#2:func=fixed;#1:func=$1
func=/^mwe$/ none #1:func=fixed

;handle copula
;protect expletive existential first
func=/expl/;lemma=/be/ #2>#1 #2:morph=EXIST
;now convert copula cases
func=/nsubj|csubj/;lemma=/be/&morph!=/EXIST/&func=/(ccomp|parataxis|advcl|rcmod)/;func=/prep|pcomp/;func=/pobj/;text=/.*/ #2>#1;#2>#3>#4;#5>#2 #4>#1;#4>#2;#4>#3;#3:func=case;#4:func=$1;#2:func=cop;#5>#4
func=/nsubj|csubj/;lemma=/be/&morph!=/EXIST/&func=/root/;func=/prep|pcomp/;func=/pobj/ #2>#1;#2>#3>#4 #4>#1;#4>#2;#4>#3;#3:func=case;#2:func=cop;#4:func=root;#4:head=0
;restore existential case
morph=/EXIST/ none #1:morph=_

;TODO: handle copula without lemma (for corpora without gold lemmas)

;prepositional phrases
;take care of adnominal pcomp, should become acl (e.g. "mirrors for correcting your pose")
pos=/^N.*|FW|DT/;func=/prep/;func=/pcomp/ #1>#2>#3 #1>#3;#3>#2;#2:func=mark;#3:func=acl
;simple
func=/.*/;func=/^prep$/;func=/pobj/ #1>#2>#3 #3>#2;#2:func=case;#3:func=nmod;#1>#3
;pcomp without mark
func=/.*/;func=/^prep$/;func=/pcomp/ #1>#2>#3>#4 #3>#2;#2:func=_temp;#3:func=ncmod;#1>#3
;pcomp with mark
func=/.*/;func=/^prep$/;func=/pcomp/;func=/mark/ #1>#2>#3>#4 #3>#2;#2:func=case;#3:func=ncmod;#1>#3
func=/_temp/ none #1:func=mark
;coordinated pobj
func=/.*/;func=/conj/;func=/pobj/ #1>#2>#3 #2:func=case;#3>#2;#1>#3;#3:func=conj


;reverse conjunction
func=/.*/;func=/^cc$/;func=/^conj$/ #1>#2;#1>#3 #3>#2

;fix double coordination results in cc attaching to crossed third conjunct
func=/.*/;func=/^cc$/;func=/^conj$/;func=/^conj$/ #1.*#2.*#3.*#4;#1>#3;#1>#4;#4>#2 #3>#2


;prep for verbal predicate with pcomp
pos=/^V.*|^J.*/;func=/prep/;func=/pcomp/ #1>#2>#3 #1>#3;#3>#2;#2:func=mark;#3:func=advcl


;handle obj & xcomp (instead of nsubj of subclause in SD)
func=/.*/;func=/nsubj/;func=/xcomp/ #1>#3;#3>#2 #1>#2;#2:func=obj


;Promote remaining preps, which probably don't have a pobj, to nmod (which subsequently goes to obl if necessary)
func=/.*/;func=/^prep$/ #1>#2 #2:func=nmod


;handle nmod of predicates (should be obl)
pos=/^(V.*|J.*)$/;func=/nmod/ #1>#2 #2:func=obl

;complex numbers analyzed as compound
func=/^number/ none #1:func=compound

;universal POS tags
cpos=/JJ[RS]?/ none #1:pos=ADJ
cpos=/WRB/&func=/advmod|mark|obl/ none #1:pos=SCONJ
cpos=/RB[RS]?/&lemma!=/n.?t/ none #1:pos=ADV
cpos=/RB/&lemma=/n.?t/ none #1:pos=PART
cpos=/UH/ none #1:pos=INTJ
cpos=/CC/ none #1:pos=CCONJ
cpos=/CD/ none #1:pos=NUM
cpos=/NNS?/ none #1:pos=NOUN
text=/%/ none #1:pos=SYM
cpos=/NNPS?/ none #1:pos=PROPN
cpos=/V.*/ none #1:pos=VERB
func=/.*aux.*|cop/ none #1:pos=AUX
cpos=/IN|RP|TO/&func!=/mark/ none #1:pos=ADP
func=/mark/ none #1:pos=SCONJ
func=/(pre)?det/ none #1:pos=DET
cpos=/POS|TO/&func!=/case/ none #1:pos=PART
cpos=/PRP\$?|WP\$?|EX|W?DT/&func!=/det/ none #1:pos=PRON
func=/punct/ none #1:pos=PUNCT
cpos=/FW|LS/ none #1:pos=X

;clear morph
morph=/.*/ none #1:morph=_

;2word mwes
;"apart_from", "as_from", "aside_from", "away_from", "close_by", "close_to", "contrary_to", "far_from", "next_to", "near_to", "out_of", "outside_of", "pursuant_to", "regardless_of", "together_with"
text=/^([Aa]part|[Aa]s|[Aa]side|[Aa]way|[Ff]ar)$/;text=/^from$/ #1.#2 #1>#2;#2:func=mwe
text=/^[Cc]lose$/;text=/^(by|to)$/ #1.#2 #1>#2;#2:func=mwe
text=/^[Cc]ontrary$/;text=/^(to)$/ #1.#2 #1>#2;#2:func=mwe
text=/^([Nn]ext|[Nn]ear|[Pp]ersuant)$/;text=/^(to)$/ #1.#2 #1>#2;#2:func=mwe
text=/^([Oo]ut|[Oo]utside|[Rr]egardless)$/;text=/^(of)$/ #1.#2 #1>#2;#2:func=mwe
text=/^[Tt]ogether$/;text=/^with$/ #1.#2 #1>#2;#2:func=mwe

;3word mwes
;"by_means_of", "in_accordance_with", "in_addition_to", "in_case_of", "in_front_of", "in_lieu_of", "in_place_of", "in_spite_of", "on_account_of", "on_behalf_of", "on_top_of", "with_regard_to", "with_respect_to"
text=/^[Bb]y$/;text=/^means$/;text=/^of$/ #1.#2.#3 #1>#2>#3;#2:func=mwe;#3:func=mwe
text=/^[Ii]n$/;text=/^accordance$/;text=/^with$/ #1.#2.#3 #1>#2>#3;#2:func=mwe;#3:func=mwe
text=/^[Ii]n$/;text=/^addition$/;text=/^to$/ #1.#2.#3 #1>#2>#3;#2:func=mwe;#3:func=mwe
text=/^[Ii]n$/;text=/^(case|front|lieu|place|spite)$/;text=/^of$/ #1.#2.#3 #1>#2>#3;#2:func=mwe;#3:func=mwe
text=/^[Oo]n$/;text=/^(account|behalf|top)$/;text=/^of$/ #1.#2.#3 #1>#2>#3;#2:func=mwe;#3:func=mwe
text=/^[Ww]ith$/;text=/^(regard|respect)$/;text=/^to$/ #1.#2.#3 #1>#2>#3;#2:func=mwe;#3:func=mwe

0 comments on commit 96ad44c

Please sign in to comment.