diff --git a/delphin/codecs/dmrsjson.py b/delphin/codecs/dmrsjson.py index 2da7f8e7..257b705c 100644 --- a/delphin/codecs/dmrsjson.py +++ b/delphin/codecs/dmrsjson.py @@ -2,98 +2,6 @@ """ DMRS-JSON serialization and deserialization. - -Example: - -* *The new chef whose soup accidentally spilled quit and left.* - -:: - - { - "top": 10008, - "index": 10009, - "nodes": [ - { - "nodeid": 10000, - "predicate": "_the_q", - "lnk": {"from": 0, "to": 3} - }, - { - "nodeid": 10001, - "predicate": "_new_a_1", - "sortinfo": {"SF": "prop", "TENSE": "untensed", "MOOD": "indicative", "PROG": "bool", "PERF": "-", "cvarsort": "e"}, - "lnk": {"from": 4, "to": 7} - }, - { - "nodeid": 10002, - "predicate": "_chef_n_1", - "sortinfo": {"PERS": "3", "NUM": "sg", "IND": "+", "cvarsort": "x"}, - "lnk": {"from": 8, "to": 12} - }, - { - "nodeid": 10003, - "predicate": "def_explicit_q", - "lnk": {"from": 13, "to": 18} - }, - { - "nodeid": 10004, - "predicate": "poss", - "sortinfo": {"SF": "prop", "TENSE": "untensed", "MOOD": "indicative", "PROG": "-", "PERF": "-", "cvarsort": "e"}, - "lnk": {"from": 13, "to": 18} - }, - { - "nodeid": 10005, - "predicate": "_soup_n_1", - "sortinfo": {"PERS": "3", "NUM": "sg", "cvarsort": "x"}, - "lnk": {"from": 19, "to": 23} - }, - { - "nodeid": 10006, - "predicate": "_accidental_a_1", - "sortinfo": {"SF": "prop", "TENSE": "untensed", "MOOD": "indicative", "PROG": "-", "PERF": "-", "cvarsort": "e"}, - "lnk": {"from": 24, "to": 36} - }, - { - "nodeid": 10007, - "predicate": "_spill_v_1", - "sortinfo": {"SF": "prop", "TENSE": "past", "MOOD": "indicative", "PROG": "-", "PERF": "-", "cvarsort": "e"}, - "lnk": {"from": 37, "to": 44} - }, - { - "nodeid": 10008, - "predicate": "_quit_v_1", - "sortinfo": {"SF": "prop", "TENSE": "past", "MOOD": "indicative", "PROG": "-", "PERF": "-", "cvarsort": "e"}, - "lnk": {"from": 45, "to": 49} - }, - { - "nodeid": 10009, - "predicate": "_and_c", - "sortinfo": {"SF": "prop", "TENSE": "past", "MOOD": "indicative", "PROG": "-", "PERF": "-", "cvarsort": "e"}, - "lnk": {"from": 50, "to": 53} - }, - { - "nodeid": 10010, - "predicate": "_leave_v_1", - "sortinfo": {"SF": "prop", "TENSE": "past", "MOOD": "indicative", "PROG": "-", "PERF": "-", "cvarsort": "e"}, - "lnk": {"from": 54, "to": 59} - } - ], - "links": [ - {"from": 10000, "to": 10002, "rargname": "RSTR", "post": "H"}, - {"from": 10001, "to": 10002, "rargname": "ARG1", "post": "EQ"}, - {"from": 10003, "to": 10005, "rargname": "RSTR", "post": "H"}, - {"from": 10004, "to": 10005, "rargname": "ARG1", "post": "EQ"}, - {"from": 10004, "to": 10002, "rargname": "ARG2", "post": "NEQ"}, - {"from": 10006, "to": 10007, "rargname": "ARG1", "post": "EQ"}, - {"from": 10007, "to": 10005, "rargname": "ARG1", "post": "NEQ"}, - {"from": 10008, "to": 10002, "rargname": "ARG1", "post": "NEQ"}, - {"from": 10009, "to": 10008, "rargname": "ARG1", "post": "EQ"}, - {"from": 10009, "to": 10010, "rargname": "ARG2", "post": "EQ"}, - {"from": 10010, "to": 10002, "rargname": "ARG1", "post": "NEQ"}, - {"from": 10007, "to": 10002, "rargname": "MOD", "post": "EQ"}, - {"from": 10010, "to": 10008, "rargname": "MOD", "post": "EQ"} - ] - } """ from pathlib import Path diff --git a/delphin/codecs/dmrspenman.py b/delphin/codecs/dmrspenman.py index f3db127e..c98c76fe 100644 --- a/delphin/codecs/dmrspenman.py +++ b/delphin/codecs/dmrspenman.py @@ -2,87 +2,6 @@ """ DMRS-PENMAN serialization and deserialization. - -Example: - -* *The new chef whose soup accidentally spilled quit and left.* - -:: - - (e9 / _quit_v_1 - :lnk "<45:49>" - :cvarsort e - :sf prop - :tense past - :mood indicative - :prog - - :perf - - :ARG1-NEQ (x3 / _chef_n_1 - :lnk "<8:12>" - :cvarsort x - :pers 3 - :num sg - :ind + - :RSTR-H-of (q1 / _the_q - :lnk "<0:3>") - :ARG1-EQ-of (e2 / _new_a_1 - :lnk "<4:7>" - :cvarsort e - :sf prop - :tense untensed - :mood indicative - :prog bool - :perf -) - :ARG2-NEQ-of (e5 / poss - :lnk "<13:18>" - :cvarsort e - :sf prop - :tense untensed - :mood indicative - :prog - - :perf - - :ARG1-EQ (x6 / _soup_n_1 - :lnk "<19:23>" - :cvarsort x - :pers 3 - :num sg - :RSTR-H-of (q4 / def_explicit_q - :lnk "<13:18>"))) - :MOD-EQ-of (e8 / _spill_v_1 - :lnk "<37:44>" - :cvarsort e - :sf prop - :tense past - :mood indicative - :prog - - :perf - - :ARG1-NEQ x6 - :ARG1-EQ-of (e7 / _accidental_a_1 - :lnk "<24:36>" - :cvarsort e - :sf prop - :tense untensed - :mood indicative - :prog - - :perf -))) - :ARG1-EQ-of (e10 / _and_c - :lnk "<50:53>" - :cvarsort e - :sf prop - :tense past - :mood indicative - :prog - - :perf - - :ARG2-EQ (e11 / _leave_v_1 - :lnk "<54:59>" - :cvarsort e - :sf prop - :tense past - :mood indicative - :prog - - :perf - - :ARG1-NEQ x3 - :MOD-EQ e9))) """ from pathlib import Path diff --git a/delphin/codecs/dmrx.py b/delphin/codecs/dmrx.py index b484d718..27139739 100644 --- a/delphin/codecs/dmrx.py +++ b/delphin/codecs/dmrx.py @@ -2,111 +2,6 @@ """ DMRX (XML for DMRS) serialization and deserialization. - -Example: - -* *The new chef whose soup accidentally spilled quit and left.* - -:: - - - - - - - - - - - - - - - - def_explicit_q - - - - poss - - - - - - - - - - - - - - - - - - - - - - - - - - - - RSTR - H - - - ARG1 - EQ - - - RSTR - H - - - ARG1 - EQ - - - ARG2 - NEQ - - - ARG1 - EQ - - - ARG1 - NEQ - - - ARG1 - NEQ - - - ARG1 - EQ - - - ARG2 - EQ - - - ARG1 - NEQ - - - MOD - EQ - - - MOD - EQ - - """ from pathlib import Path diff --git a/delphin/codecs/eds.py b/delphin/codecs/eds.py index a034479e..576bd677 100644 --- a/delphin/codecs/eds.py +++ b/delphin/codecs/eds.py @@ -2,26 +2,6 @@ """ Serialization functions for the "native" EDS format. - -Example: - -* *The new chef whose soup accidentally spilled quit and left.* - -:: - - {e18: - _1:_the_q<0:3>[BV x3] - e8:_new_a_1<4:7>{e SF prop, TENSE untensed, MOOD indicative, PROG bool, PERF -}[ARG1 x3] - x3:_chef_n_1<8:12>{x PERS 3, NUM sg, IND +}[] - _2:def_explicit_q<13:18>[BV x10] - e14:poss<13:18>{e SF prop, TENSE untensed, MOOD indicative, PROG -, PERF -}[ARG1 x10, ARG2 x3] - x10:_soup_n_1<19:23>{x PERS 3, NUM sg}[] - e15:_accidental_a_1<24:36>{e SF prop, TENSE untensed, MOOD indicative, PROG -, PERF -}[ARG1 e16] - e16:_spill_v_1<37:44>{e SF prop, TENSE past, MOOD indicative, PROG -, PERF -}[ARG1 x10] - e18:_quit_v_1<45:49>{e SF prop, TENSE past, MOOD indicative, PROG -, PERF -}[ARG1 x3] - e2:_and_c<50:53>{e SF prop, TENSE past, MOOD indicative, PROG -, PERF -}[ARG1 e18, ARG2 e20] - e20:_leave_v_1<54:59>{e SF prop, TENSE past, MOOD indicative, PROG -, PERF -}[ARG1 x3] - } """ from pathlib import Path diff --git a/delphin/codecs/edsjson.py b/delphin/codecs/edsjson.py index 0cb1228d..9e47a367 100644 --- a/delphin/codecs/edsjson.py +++ b/delphin/codecs/edsjson.py @@ -2,92 +2,6 @@ """ EDS-JSON serialization and deserialization. - -Example: - -* *The new chef whose soup accidentally spilled quit and left.* - -:: - - { - "top": "e18", - "nodes": { - "_1": { - "label": "_the_q", - "edges": {"BV": "x3"}, - "lnk": {"from": 0, "to": 3} - }, - "e8": { - "label": "_new_a_1", - "edges": {"ARG1": "x3"}, - "lnk": {"from": 4, "to": 7}, - "type": "e", - "properties": {"SF": "prop", "TENSE": "untensed", "MOOD": "indicative", "PROG": "bool", "PERF": "-"} - }, - "x3": { - "label": "_chef_n_1", - "edges": {}, - "lnk": {"from": 8, "to": 12}, - "type": "x", - "properties": {"PERS": "3", "NUM": "sg", "IND": "+"} - }, - "_2": { - "label": "def_explicit_q", - "edges": {"BV": "x10"}, - "lnk": {"from": 13, "to": 18} - }, - "e14": { - "label": "poss", - "edges": {"ARG1": "x10", "ARG2": "x3"}, - "lnk": {"from": 13, "to": 18}, - "type": "e", - "properties": {"SF": "prop", "TENSE": "untensed", "MOOD": "indicative", "PROG": "-", "PERF": "-"} - }, - "x10": { - "label": "_soup_n_1", - "edges": {}, - "lnk": {"from": 19, "to": 23}, - "type": "x", - "properties": {"PERS": "3", "NUM": "sg"} - }, - "e15": { - "label": "_accidental_a_1", - "edges": {"ARG1": "e16"}, - "lnk": {"from": 24, "to": 36}, - "type": "e", - "properties": {"SF": "prop", "TENSE": "untensed", "MOOD": "indicative", "PROG": "-", "PERF": "-"} - }, - "e16": { - "label": "_spill_v_1", - "edges": {"ARG1": "x10"}, - "lnk": {"from": 37, "to": 44}, - "type": "e", - "properties": {"SF": "prop", "TENSE": "past", "MOOD": "indicative", "PROG": "-", "PERF": "-"} - }, - "e18": { - "label": "_quit_v_1", - "edges": {"ARG1": "x3"}, - "lnk": {"from": 45, "to": 49}, - "type": "e", - "properties": {"SF": "prop", "TENSE": "past", "MOOD": "indicative", "PROG": "-", "PERF": "-"} - }, - "e2": { - "label": "_and_c", - "edges": {"ARG1": "e18", "ARG2": "e20"}, - "lnk": {"from": 50, "to": 53}, - "type": "e", - "properties": {"SF": "prop", "TENSE": "past", "MOOD": "indicative", "PROG": "-", "PERF": "-"} - }, - "e20": { - "label": "_leave_v_1", - "edges": {"ARG1": "x3"}, - "lnk": {"from": 54, "to": 59}, - "type": "e", - "properties": {"SF": "prop", "TENSE": "past", "MOOD": "indicative", "PROG": "-", "PERF": "-"} - } - } - } - """ from pathlib import Path diff --git a/delphin/codecs/edspenman.py b/delphin/codecs/edspenman.py index 314b4f28..fdb9774d 100644 --- a/delphin/codecs/edspenman.py +++ b/delphin/codecs/edspenman.py @@ -2,85 +2,6 @@ """ EDS-PENMAN serialization and deserialization. - -Example: - -* *The new chef whose soup accidentally spilled quit and left.* - -:: - - (e18 / _quit_v_1 - :lnk "<45:49>" - :type e - :sf prop - :tense past - :mood indicative - :prog - - :perf - - :ARG1 (x3 / _chef_n_1 - :lnk "<8:12>" - :type x - :pers 3 - :num sg - :ind + - :BV-of (_1 / _the_q - :lnk "<0:3>") - :ARG1-of (e8 / _new_a_1 - :lnk "<4:7>" - :type e - :sf prop - :tense untensed - :mood indicative - :prog bool - :perf -) - :ARG2-of (e14 / poss - :lnk "<13:18>" - :type e - :sf prop - :tense untensed - :mood indicative - :prog - - :perf - - :ARG1 (x10 / _soup_n_1 - :lnk "<19:23>" - :type x - :pers 3 - :num sg - :BV-of (_2 / def_explicit_q - :lnk "<13:18>") - :ARG1-of (e16 / _spill_v_1 - :lnk "<37:44>" - :type e - :sf prop - :tense past - :mood indicative - :prog - - :perf - - :ARG1-of (e15 / _accidental_a_1 - :lnk "<24:36>" - :type e - :sf prop - :tense untensed - :mood indicative - :prog - - :perf -))))) - :ARG1-of (e2 / _and_c - :lnk "<50:53>" - :type e - :sf prop - :tense past - :mood indicative - :prog - - :perf - - :ARG2 (e20 / _leave_v_1 - :lnk "<54:59>" - :type e - :sf prop - :tense past - :mood indicative - :prog - - :perf - - :ARG1 x3))) """ from pathlib import Path diff --git a/delphin/codecs/indexedmrs.py b/delphin/codecs/indexedmrs.py index 674dad35..c6b9c7ef 100644 --- a/delphin/codecs/indexedmrs.py +++ b/delphin/codecs/indexedmrs.py @@ -1,34 +1,7 @@ """ Serialization for the Indexed MRS format. - -The Indexed MRS format does not include role names such as `ARG1`, -`ARG2`, etc., so the order of the arguments in a predication is -important. For this reason, serialization with the Indexed MRS format -requires the use of a SEM-I (see the :mod:`delphin.semi` module). - -Example: - -* *The new chef whose soup accidentally spilled quit and left.* - -:: - - < h0, e2:PROP:PAST:INDICATIVE:-:-, - { h4:_the_q<0:3>(x3:3:SG:GENDER:+:PT, h5, h6), - h7:_new_a_1<4:7>(e8:PROP:UNTENSED:INDICATIVE:BOOL:-, x3), - h7:_chef_n_1<8:12>(x3), - h9:def_explicit_q<13:18>(x10:3:SG:GENDER:BOOL:PT, h11, h12), - h13:poss<13:18>(e14:PROP:UNTENSED:INDICATIVE:-:-, x10, x3), - h13:_soup_n_1<19:23>(x10), - h7:_accidental_a_1<24:36>(e15:PROP:UNTENSED:INDICATIVE:-:-, e16:PROP:PAST:INDICATIVE:-:-), - h7:_spill_v_1<37:44>(e16, x10, i17), - h1:_quit_v_1<45:49>(e18:PROP:PAST:INDICATIVE:-:-, x3, i19), - h1:_and_c<50:53>(e2, e18, e20:PROP:PAST:INDICATIVE:-:-), - h1:_leave_v_1<54:59>(e20, x3, i21) }, - { h0 qeq h1, - h5 qeq h7, - h11 qeq h13 } > -""" # noqa: E501 +""" from pathlib import Path diff --git a/delphin/codecs/mrsjson.py b/delphin/codecs/mrsjson.py index e4c994ce..e758f668 100644 --- a/delphin/codecs/mrsjson.py +++ b/delphin/codecs/mrsjson.py @@ -2,114 +2,6 @@ """ MRS-JSON serialization and deserialization. - -Example: - -* *The new chef whose soup accidentally spilled quit and left.* - -:: - - { - "top": "h0", - "index": "e2", - "relations": [ - { - "label": "h4", - "predicate": "_the_q", - "lnk": {"from": 0, "to": 3}, - "arguments": {"BODY": "h6", "RSTR": "h5", "ARG0": "x3"} - }, - { - "label": "h7", - "predicate": "_new_a_1", - "lnk": {"from": 4, "to": 7}, - "arguments": {"ARG1": "x3", "ARG0": "e8"} - }, - { - "label": "h7", - "predicate": "_chef_n_1", - "lnk": {"from": 8, "to": 12}, - "arguments": {"ARG0": "x3"} - }, - { - "label": "h9", - "predicate": "def_explicit_q", - "lnk": {"from": 13, "to": 18}, - "arguments": {"BODY": "h12", "RSTR": "h11", "ARG0": "x10"} - }, - { - "label": "h13", - "predicate": "poss", - "lnk": {"from": 13, "to": 18}, - "arguments": {"ARG1": "x10", "ARG2": "x3", "ARG0": "e14"} - }, - { - "label": "h13", - "predicate": "_soup_n_1", - "lnk": {"from": 19, "to": 23}, - "arguments": {"ARG0": "x10"} - }, - { - "label": "h7", - "predicate": "_accidental_a_1", - "lnk": {"from": 24, "to": 36}, - "arguments": {"ARG1": "e16", "ARG0": "e15"} - }, - { - "label": "h7", - "predicate": "_spill_v_1", - "lnk": {"from": 37, "to": 44}, - "arguments": {"ARG1": "x10", "ARG2": "i17", "ARG0": "e16"} - }, - { - "label": "h1", - "predicate": "_quit_v_1", - "lnk": {"from": 45, "to": 49}, - "arguments": {"ARG1": "x3", "ARG2": "i19", "ARG0": "e18"} - }, - { - "label": "h1", - "predicate": "_and_c", - "lnk": {"from": 50, "to": 53}, - "arguments": {"ARG1": "e18", "ARG2": "e20", "ARG0": "e2"} - }, - { - "label": "h1", - "predicate": "_leave_v_1", - "lnk": {"from": 54, "to": 59}, - "arguments": {"ARG1": "x3", "ARG2": "i21", "ARG0": "e20"} - } - ], - "constraints": [ - {"low": "h1", "high": "h0", "relation": "qeq"}, - {"low": "h7", "high": "h5", "relation": "qeq"}, - {"low": "h13", "high": "h11", "relation": "qeq"} - ], - "variables": { - "h0": {"type": "h"}, - "h1": {"type": "h"}, - "e2": {"type": "e", "properties": {"MOOD": "indicative", "PROG": "-", "SF": "prop", "PERF": "-", "TENSE": "past"}}, - "x3": {"type": "x", "properties": {"NUM": "sg", "PERS": "3", "IND": "+"}}, - "h4": {"type": "h"}, - "h6": {"type": "h"}, - "h5": {"type": "h"}, - "h7": {"type": "h"}, - "e8": {"type": "e", "properties": {"MOOD": "indicative", "PROG": "bool", "SF": "prop", "PERF": "-", "TENSE": "untensed"}}, - "h9": {"type": "h"}, - "x10": {"type": "x", "properties": {"NUM": "sg", "PERS": "3"}}, - "h11": {"type": "h"}, - "h12": {"type": "h"}, - "h13": {"type": "h"}, - "e14": {"type": "e", "properties": {"MOOD": "indicative", "PROG": "-", "SF": "prop", "PERF": "-", "TENSE": "untensed"}}, - "e15": {"type": "e", "properties": {"MOOD": "indicative", "PROG": "-", "SF": "prop", "PERF": "-", "TENSE": "untensed"}}, - "e16": {"type": "e", "properties": {"MOOD": "indicative", "PROG": "-", "SF": "prop", "PERF": "-", "TENSE": "past"}}, - "i17": {"type": "i"}, - "e18": {"type": "e", "properties": {"MOOD": "indicative", "PROG": "-", "SF": "prop", "PERF": "-", "TENSE": "past"}}, - "i19": {"type": "i"}, - "e20": {"type": "e", "properties": {"MOOD": "indicative", "PROG": "-", "SF": "prop", "PERF": "-", "TENSE": "past"}}, - "i21": {"type": "i"} - } - } """ from pathlib import Path diff --git a/delphin/codecs/mrsprolog.py b/delphin/codecs/mrsprolog.py index be500e4e..a4ae4ae8 100644 --- a/delphin/codecs/mrsprolog.py +++ b/delphin/codecs/mrsprolog.py @@ -2,53 +2,6 @@ """ Serialization functions for the MRS-Prolog format. - -Example: - -* *The new chef whose soup accidentally spilled quit and left.* - -:: - - psoa(h0,e2, - [rel('_the_q',h4, - [attrval('ARG0',x3), - attrval('RSTR',h5), - attrval('BODY',h6)]), - rel('_new_a_1',h7, - [attrval('ARG0',e8), - attrval('ARG1',x3)]), - rel('_chef_n_1',h7, - [attrval('ARG0',x3)]), - rel('def_explicit_q',h9, - [attrval('ARG0',x10), - attrval('RSTR',h11), - attrval('BODY',h12)]), - rel('poss',h13, - [attrval('ARG0',e14), - attrval('ARG1',x10), - attrval('ARG2',x3)]), - rel('_soup_n_1',h13, - [attrval('ARG0',x10)]), - rel('_accidental_a_1',h7, - [attrval('ARG0',e15), - attrval('ARG1',e16)]), - rel('_spill_v_1',h7, - [attrval('ARG0',e16), - attrval('ARG1',x10), - attrval('ARG2',i17)]), - rel('_quit_v_1',h1, - [attrval('ARG0',e18), - attrval('ARG1',x3), - attrval('ARG2',i19)]), - rel('_and_c',h1, - [attrval('ARG0',e2), - attrval('ARG1',e18), - attrval('ARG2',e20)]), - rel('_leave_v_1',h1, - [attrval('ARG0',e20), - attrval('ARG1',x3), - attrval('ARG2',i21)])], - hcons([qeq(h0,h1),qeq(h5,h7),qeq(h11,h13)])) """ from pathlib import Path diff --git a/delphin/codecs/mrx.py b/delphin/codecs/mrx.py index fdf4f3c0..ba4c3fe6 100644 --- a/delphin/codecs/mrx.py +++ b/delphin/codecs/mrx.py @@ -2,96 +2,6 @@ """ MRX (XML for MRS) serialization and deserialization. - -Example: - -* *The new chef whose soup accidentally spilled quit and left.* - -:: - - """ import io diff --git a/delphin/codecs/simpledmrs.py b/delphin/codecs/simpledmrs.py index 21ae039a..3202498c 100644 --- a/delphin/codecs/simpledmrs.py +++ b/delphin/codecs/simpledmrs.py @@ -2,40 +2,6 @@ """ Serialization for the SimpleDMRS format. - -Example: - -* *The new chef whose soup accidentally spilled quit and left.* - -:: - - dmrs { - ["The new chef whose soup accidentally spilled quit and left." top=10008 index=10009] - 10000 [_the_q<0:3>]; - 10001 [_new_a_1<4:7> e SF=prop TENSE=untensed MOOD=indicative PROG=bool PERF=-]; - 10002 [_chef_n_1<8:12> x PERS=3 NUM=sg IND=+]; - 10003 [def_explicit_q<13:18>]; - 10004 [poss<13:18> e SF=prop TENSE=untensed MOOD=indicative PROG=- PERF=-]; - 10005 [_soup_n_1<19:23> x PERS=3 NUM=sg]; - 10006 [_accidental_a_1<24:36> e SF=prop TENSE=untensed MOOD=indicative PROG=- PERF=-]; - 10007 [_spill_v_1<37:44> e SF=prop TENSE=past MOOD=indicative PROG=- PERF=-]; - 10008 [_quit_v_1<45:49> e SF=prop TENSE=past MOOD=indicative PROG=- PERF=-]; - 10009 [_and_c<50:53> e SF=prop TENSE=past MOOD=indicative PROG=- PERF=-]; - 10010 [_leave_v_1<54:59> e SF=prop TENSE=past MOOD=indicative PROG=- PERF=-]; - 10000:RSTR/H -> 10002; - 10001:ARG1/EQ -> 10002; - 10003:RSTR/H -> 10005; - 10004:ARG1/EQ -> 10005; - 10004:ARG2/NEQ -> 10002; - 10006:ARG1/EQ -> 10007; - 10007:ARG1/NEQ -> 10005; - 10008:ARG1/NEQ -> 10002; - 10009:ARG1/EQ -> 10008; - 10009:ARG2/EQ -> 10010; - 10010:ARG1/NEQ -> 10002; - 10007:MOD/EQ -> 10002; - 10010:MOD/EQ -> 10008; - } """ from pathlib import Path diff --git a/delphin/codecs/simplemrs.py b/delphin/codecs/simplemrs.py index e9f345f2..907e19ad 100644 --- a/delphin/codecs/simplemrs.py +++ b/delphin/codecs/simplemrs.py @@ -2,31 +2,7 @@ """ Serialization functions for the SimpleMRS format. - -SimpleMRS is a format for Minimal Recursion Semantics that aims to be -readable equally by humans and machines. - -Example: - -* *The new chef whose soup accidentally spilled quit and left.* - -.. code:: simplemrs - - [ TOP: h0 - INDEX: e2 [ e SF: prop TENSE: past MOOD: indicative PROG: - PERF: - ] - RELS: < [ _the_q<0:3> LBL: h4 ARG0: x3 [ x PERS: 3 NUM: sg IND: + ] RSTR: h5 BODY: h6 ] - [ _new_a_1<4:7> LBL: h7 ARG0: e8 [ e SF: prop TENSE: untensed MOOD: indicative PROG: bool PERF: - ] ARG1: x3 ] - [ _chef_n_1<8:12> LBL: h7 ARG0: x3 ] - [ def_explicit_q<13:18> LBL: h9 ARG0: x10 [ x PERS: 3 NUM: sg ] RSTR: h11 BODY: h12 ] - [ poss<13:18> LBL: h13 ARG0: e14 [ e SF: prop TENSE: untensed MOOD: indicative PROG: - PERF: - ] ARG1: x10 ARG2: x3 ] - [ _soup_n_1<19:23> LBL: h13 ARG0: x10 ] - [ _accidental_a_1<24:36> LBL: h7 ARG0: e15 [ e SF: prop TENSE: untensed MOOD: indicative PROG: - PERF: - ] ARG1: e16 [ e SF: prop TENSE: past MOOD: indicative PROG: - PERF: - ] ] - [ _spill_v_1<37:44> LBL: h7 ARG0: e16 ARG1: x10 ARG2: i17 ] - [ _quit_v_1<45:49> LBL: h1 ARG0: e18 [ e SF: prop TENSE: past MOOD: indicative PROG: - PERF: - ] ARG1: x3 ARG2: i19 ] - [ _and_c<50:53> LBL: h1 ARG0: e2 ARG1: e18 ARG2: e20 [ e SF: prop TENSE: past MOOD: indicative PROG: - PERF: - ] ] - [ _leave_v_1<54:59> LBL: h1 ARG0: e20 ARG1: x3 ARG2: i21 ] > - HCONS: < h0 qeq h1 h5 qeq h7 h11 qeq h13 > ] -""" # noqa: E501 +""" from pathlib import Path diff --git a/delphin/commands.py b/delphin/commands.py index 875e7436..8690c643 100644 --- a/delphin/commands.py +++ b/delphin/commands.py @@ -1,11 +1,6 @@ """ PyDelphin API counterparts to the `delphin` commands. - -The public functions in this module largely mirror the front-end -subcommands provided by the `delphin` command, with some small changes -to argument names or values to be better-suited to being called from -within Python. """ import sys diff --git a/delphin/derivation.py b/delphin/derivation.py index 537ab9eb..84d7be94 100644 --- a/delphin/derivation.py +++ b/delphin/derivation.py @@ -2,85 +2,6 @@ """ Classes and functions related to derivation trees. - -Derivation trees represent a unique analysis of an input using an -implemented grammar. They are a kind of syntax tree, but as they use -the actual grammar entities (e.g., rules or lexical entries) as node -labels, they are more specific than trees using general category labels -(e.g., "N" or "VP"). As such, they are more likely to change across -grammar versions. - -.. seealso:: - More information about derivation trees is found at - http://moin.delph-in.net/ItsdbDerivations - -For the following Japanese example... - -:: - - 遠く に 銃声 が 聞こえ た 。 - tooku ni juusei ga kikoe-ta - distant LOC gunshot NOM can.hear-PFV - "Shots were heard in the distance." - -... here is the derivation tree of a parse from -`Jacy `_ in the Unified Derivation -Format (UDF):: - - (utterance-root - (564 utterance_rule-decl-finite 1.02132 0 6 - (563 hf-adj-i-rule 1.04014 0 6 - (557 hf-complement-rule -0.27164 0 2 - (556 quantify-n-rule 0.311511 0 1 - (23 tooku_1 0.152496 0 1 - ("遠く" 0 1))) - (42 ni-narg 0.478407 1 2 - ("に" 1 2))) - (562 head_subj_rule 1.512 2 6 - (559 hf-complement-rule -0.378462 2 4 - (558 quantify-n-rule 0.159015 2 3 - (55 juusei_1 0 2 3 - ("銃声" 2 3))) - (56 ga 0.462257 3 4 - ("が" 3 4))) - (561 vstem-vend-rule 1.34202 4 6 - (560 i-lexeme-v-stem-infl-rule 0.365568 4 5 - (65 kikoeru-stem 0 4 5 - ("聞こえ" 4 5))) - (81 ta-end 0.0227589 5 6 - ("た" 5 6))))))) - -In addition to the UDF format, there is also the UDF export format -"UDX", which adds lexical type information and indicates which daughter -node is the head, and a dictionary representation, which is useful for -JSON serialization. All three are supported by PyDelphin. - -Derivation trees have 3 types of nodes: - - * root nodes, with only an entity name and a single child - - * normal nodes, with 5 fields (below) and a list of children - - - *id* -- an integer id given by the producer of the derivation - - *entity* -- rule or type name - - *score* -- a (MaxEnt) score for the current node's subtree - - *start* -- the character index of the left-most side of the tree - - *end* -- the character index of the right-most side of the tree - - * terminal/left/lexical nodes, which contain the input tokens - processed by that subtree - -This module uses the :class:`UDFNode` class for capturing root and -normal nodes. Root nodes are expressed as a :class:`UDFNode` whose -`id` is `None`. For root nodes, all fields except `entity` and -the list of daughters are expected to be `None`. Leaf nodes are -simply an iterable of token information. - -The :class:`Derivation` class---itself a :class:`UDFNode`---, has some -tree-level operations defined, in particular the -:meth:`Derivation.from_string` method, which is used to read the -serialized derivation into a Python object. - """ import re @@ -410,10 +331,11 @@ class Derivation(UDFNode): """ A [incr tsdb()] derivation. - This class exists to facilitate the reading of UDF string - serializations and dictionary representations (e.g., decoded from - JSON). The resulting structure is otherwise equivalent to a - :class:`UDFNode`, and inherits all its methods. + A Derivation object is simply a :class:`UDFNode` but as it is + intended to represent an entire derivation tree it performs + additional checks on instantiation if the top node is a root node, + namely that the top node only has the *entity* attribute set, and + that it has only one node on its *daughters* list. """ def __init__(self, id, entity, diff --git a/delphin/exceptions.py b/delphin/exceptions.py index e82e6cd0..cab12935 100644 --- a/delphin/exceptions.py +++ b/delphin/exceptions.py @@ -1,4 +1,8 @@ +""" +Basic exception and warning classes for PyDelphin. +""" + # Default modules need to import the PyDelphin version from delphin.__about__ import __version__ # noqa: F401 diff --git a/delphin/hierarchy.py b/delphin/hierarchy.py index 9f312375..63c5e101 100644 --- a/delphin/hierarchy.py +++ b/delphin/hierarchy.py @@ -1,21 +1,6 @@ """ Basic support for hierarchies. - -This module defines the :class:`MultiHierarchy` class for -multiply-inheriting hierarchies. This class manages the insertion of -new nodes into the hierarchy via the class constructor or the -:meth:`MultiHierarchy.update` method, normalizing node identifiers (if -a suitable normalization function is provided at instantiation), and -inserting nodes in the appropriate order. It checks for some kinds of -ill-formed hierarchies, such as cycles and redundant parentage and -provides methods for testing for node compatibility and -subsumption. For convenience, arbitrary data may be associated with -node identifiers. - -While the class may be used directly, it is mainly used to support the -:class:`~delphin.tfs.TypeHierarchy` class and the predicate, property, -and variable hierarchies of :class:`~delphin.semi.SemI` instances. """ from delphin.exceptions import PyDelphinException diff --git a/delphin/interface.py b/delphin/interface.py index 2ae7e57f..2b5fe8a6 100644 --- a/delphin/interface.py +++ b/delphin/interface.py @@ -2,17 +2,6 @@ """ Interfaces for external data providers. - -This module manages the communication between data providers, namely -processors like `ACE `_ or -remote services like the `DELPH-IN Web API -`_, and user code or storage -backends, namely [incr tsdb()] :doc:`test suites `. An -interface sends requests to a provider, then receives and interprets -the response. - -The interface may also detect and deserialize supported DELPH-IN -formats if the appropriate modules are available. """ from collections import Sequence diff --git a/delphin/itsdb.py b/delphin/itsdb.py index 80097d2d..f4d09c20 100644 --- a/delphin/itsdb.py +++ b/delphin/itsdb.py @@ -2,20 +2,6 @@ """ [incr tsdb()] Test Suites - -.. note:: - - This module implements high-level structures and operations on top - of TSDB test suites. For the basic, low-level functionality, see - :mod:`delphin.tsdb`. For complex queries of the databases, see - :mod:`delphin.tsql`. - -[incr tsdb()] is a tool built on top of TSDB databases for the purpose -of profiling and comparing grammar versions using test suites. This -module is named after that tool as it also builds higher-level -operations on top of TSDB test suites but it has a much narrower -scope. The aim of this module is to assist users with creating, -processing, or manipulating test suites. """ from typing import ( diff --git a/delphin/lnk.py b/delphin/lnk.py index dafc909a..54e5ca5e 100644 --- a/delphin/lnk.py +++ b/delphin/lnk.py @@ -1,60 +1,6 @@ """ Surface alignment for semantic entities. - -In DELPH-IN semantic representations, entities are aligned to the -input surface string is through the so-called "lnk" (pronounced -"link") values. There are four types of lnk values which align to the -surface in different ways: - -* Character spans (also called "characterization pointers"); e.g., - `<0:4>` - -* Token indices; e.g., `<0 1 3>` - -* Chart vertex spans; e.g., `<0#2>` - -* Edge identifier; e.g., `<@42>` - -The latter two are unlikely to be encountered by users. Chart vertices -were used by the `PET`_ parser but are now essentially deprecated and -edge identifiers are only used internally in the `LKB`_ for -generation. I will therefore focus on the first two kinds. - -.. _`PET`: http://moin.delph-in.net/PetTop -.. _`LKB`: http://moin.delph-in.net/LkbTop - -Character spans (sometimes called "characterization pointers") are by -far the most commonly used type---possibly even the only type most -users will encounter. These spans indicate the positions *between* -characters in the input string that correspond to a semantic entity, -similar to how Python and Perl do string indexing. For example, -`<0:4>` would capture the first through fourth characters---a span -that would correspond to the first word in a sentence like "Dogs -bark". These spans assume the input is a flat, or linear, string and -can only select contiguous chunks. Character spans are used by REPP -(the Regular Expression PreProcessor; see :mod:`delphin.repp`) to -track the surface alignment prior to string changes introduced by -tokenization. - -Token indices select input tokens rather than characters. This method, -though not widely used, is more suitable for input sources that are -not flat strings (e.g., a lattice of automatic speech recognition -(ASR) hypotheses), or where non-contiguous sequences are needed (e.g., -from input containing markup or other noise). - -.. note:: - - Much of this background is from comments in the `LKB`_ source code: - See: http://svn.emmtee.net/trunk/lingo/lkb/src/mrs/lnk.lisp - -Support for lnk values in PyDelphin is rather simple. The :class:`Lnk` -class is able to parse lnk strings and model the contents for -serialization of semantic representations. In addition, semantic -entities such as DMRS :class:`Nodes ` and MRS -:class:`EPs ` have `cfrom` and `cto` attributes which -are the start and end pointers for character spans (defaulting to `-1` -if a character span is not specified for the entity). """ from delphin.exceptions import PyDelphinException diff --git a/delphin/main.py b/delphin/main.py index 01199072..7e74007c 100644 --- a/delphin/main.py +++ b/delphin/main.py @@ -1,5 +1,9 @@ #!/usr/bin/env python3 +""" +Entry-point for the 'delphin' command. +""" + import sys import os import importlib diff --git a/delphin/predicate.py b/delphin/predicate.py index 4251a05d..26be412e 100644 --- a/delphin/predicate.py +++ b/delphin/predicate.py @@ -1,40 +1,6 @@ """ Semantic predicates. - -Semantic predicates are atomic symbols representing semantic entities -or constructions. For example, in the `English Resource Grammar -`_, `_mouse_n_1` is the predicate for -the word *mouse*, but it is underspecified for lexical semantics---it -could be an animal, a computer's pointing device, or something -else. Another example from the ERG is `compound`, which is used to -link two compounded nouns, such as for *mouse pad*. - -There are two main categories of predicates: **abstract** and -**surface**. In form, abstract predicates do not begin with an -underscore and in usage they often correspond to semantic -constructions that are not represented by a token in the input, such -as the `compound` example above. Surface predicates, in contrast, are -the semantic representation of surface (i.e., lexical) tokens, such as -the `_mouse_n_1` example above. In form, they must always begin with a -single underscore, and have two or three components: lemma, -part-of-speech, and (optionally) sense. - -.. seealso:: - - The DELPH-IN wiki about predicates: - http://moin.delph-in.net/PredicateRfc - -In DELPH-IN there is the concept of "real predicates" which are -surface predicates decomposed into their lemma, part-of-speech, and -sense, but in PyDelphin (as of `v1.0.0`_) predicates are always simple -strings. However, this module has functions for composing and -decomposing predicates from/to their components (the :func:`create` -and :func:`split` functions, respectively). In addition, there are -functions to normalize (:func:`normalize`) and validate -(:func:`is_valid`, :func:`is_surface`, :func:`is_abstract`) predicate -symbols. - -.. _v1.0.0: https://github.com/delph-in/pydelphin/releases/tag/v1.0.0 """ import re diff --git a/delphin/repp.py b/delphin/repp.py index 9b2c26f7..2377bd5a 100644 --- a/delphin/repp.py +++ b/delphin/repp.py @@ -2,19 +2,6 @@ """ Regular Expression Preprocessor (REPP) - -A Regular-Expression Preprocessor [REPP]_ is a method of applying a -system of regular expressions for transformation and tokenization while -retaining character indices from the original input string. - -.. [REPP] Rebecca Dridan and Stephan Oepen. Tokenization: Returning to - a long solved problem---a survey, contrastive experiment, - recommendations, and toolkit. In Proceedings of the 50th Annual - Meeting of the Association for Computational Linguistics (Volume 2: - Short Papers), pages 378–382, Jeju Island, Korea, July 2012. - Association for Computational Linguistics. - URL http://www.aclweb.org/anthology/P12-2074. - """ import re diff --git a/delphin/sembase.py b/delphin/sembase.py index f9a3e582..fb73e239 100644 --- a/delphin/sembase.py +++ b/delphin/sembase.py @@ -1,4 +1,8 @@ +""" +Basic classes and functions for semantic representations. +""" + from typing import Mapping, Tuple, List, Dict, Union, Iterable from delphin.lnk import Lnk, LnkMixin diff --git a/delphin/semi.py b/delphin/semi.py index 7598c0b6..ad2b7ffb 100644 --- a/delphin/semi.py +++ b/delphin/semi.py @@ -1,19 +1,6 @@ """ Semantic Interface (SEM-I) - -Semantic interfaces (SEM-Is) describe the inventory of semantic -components in a grammar, including variables, properties, roles, and -predicates. This information can be used for validating semantic -structures or for filling out missing information in incomplete -representations. - -.. seealso:: - The following DELPH-IN wikis contain more information: - - - Technical specifications: http://moin.delph-in.net/SemiRfc - - Overview and usage: http://moin.delph-in.net/RmrsSemi - """ import re diff --git a/delphin/tfs.py b/delphin/tfs.py index dd337b2b..be2c861a 100644 --- a/delphin/tfs.py +++ b/delphin/tfs.py @@ -1,16 +1,6 @@ """ Basic classes for modeling feature structures. - -This module defines the :class:`FeatureStructure` and -:class:`TypedFeatureStructure` classes, which model an attribute value -matrix (AVM), with the latter including an associated type. They allow -feature access through TDL-style dot notation regular dictionary keys. - -In addition, the :class:`TypeHierarchy` class implements a -multiple-inheritance hierarchy with checks for type subsumption and -compatibility. - """ from delphin.hierarchy import MultiHierarchy diff --git a/delphin/tokens.py b/delphin/tokens.py index db475ec6..cce899c5 100644 --- a/delphin/tokens.py +++ b/delphin/tokens.py @@ -1,4 +1,8 @@ +""" +YY tokens and token lattices. +""" + import re from collections import namedtuple diff --git a/delphin/tsdb.py b/delphin/tsdb.py index d1dfb17d..779a8f89 100644 --- a/delphin/tsdb.py +++ b/delphin/tsdb.py @@ -2,45 +2,6 @@ """ Test Suite Database (TSDB) Primitives - -.. note:: - - This module implements the basic, low-level functionality for - working with TSDB databases. For higher-level views and uses of - these databases, see :mod:`delphin.itsdb`. For complex queries of - the databases, see :mod:`delphin.tsql`. - -TSDB databases are plain-text file-based relational databases -minimally consisting of a directory with a file, called `relations`, -containing the database's schema (see `Schemas`_). Every relation, or -table, in the database has its own file, which may be `gzipped -`_ to save space. The relations -have a simple format with columns delimited by ``@`` and records -delimited by newlines. This makes them easy to inspect at the command -line with standard Unix tools such as ``cut`` and ``awk`` (but gzipped -relations need to be decompressed or piped from a tool such as -``zcat``). - -This module handles the technical details of reading and writing TSDB -databases, including: - -- parsing database schemas - -- transparently opening either the plain-text or gzipped relations on - disk, as appropriate - -- escaping and unescaping reserved characters in the data - -- pairing columns with their schema descriptions - -- casting types (such as ``:integer``, ``:date``, etc.) - -Additionally, this module provides very basic abstractions of -databases and relations as the :class:`Database` and :class:`Relation` -classes, respectively. These serve as base classes for the more -featureful :class:`delphin.itsdb.TestSuite` and -:class:`delphin.itsdb.Table` classes, but may be useful as they are -for simple needs. """ from typing import ( diff --git a/delphin/tsql.py b/delphin/tsql.py index badcef5c..cec5a8fd 100644 --- a/delphin/tsql.py +++ b/delphin/tsql.py @@ -1,99 +1,6 @@ """ TSQL -- Test Suite Query Language - -.. note:: - - This module deals with queries of TSDB databases. For basic, - low-level access to the databases, see :mod:`delphin.tsdb`. For - high-level operations and structures on top of the databases, see - :mod:`delphin.itsdb`. - -This module implements a subset of TSQL, namely the 'select' (or -'retrieve') queries for extracting data from test suites. The general -form of a select query is:: - - [select] [from ] [where ]* - -For example, the following selects item identifiers that took more -than half a second to parse:: - - select i-id from item where total > 500 - -The `select` string is necessary when querying with the generic -:func:`query` function, but is implied and thus disallowed when using -the :func:`select` function. - -The `` is a list of space-separated field names (e.g., -`i-id i-input mrs`), or the special string `*` which selects all -columns from the joined relations. - -The optional `from` clause provides a list of relation names (e.g., -`item parse result`) that are joined on shared keys. The `from` clause -is required when `*` is used for the projection, but it can also be -used to select columns from non-standard relations (e.g., `i-id from -output`). Alternatively, `delphin.itsdb`-style data specifiers (see -:func:`delphin.itsdb.get_data_specifier`) may be used to specify the -relation on the column name (e.g., `item.i-id`). - -The `where` clause provide conditions for filtering the list of -results. Conditions are binary operations that take a column or data -specifier on the left side and an integer (e.g., `10`), a date (e.g., -`2018-10-07`), or a string (e.g., `"sleep"`) on the right side of the -operator. The allowed conditions are: - - ================ ====================================== - Condition Form - ================ ====================================== - Regex match `` ~ "regex"`` - Regex fail `` !~ "regex"`` - Equality `` = (integer|date|"string")`` - Inequality `` != (integer|date|"string")`` - Less-than `` < (integer|date)`` - Less-or-equal `` <= (integer|date)`` - Greater-than `` > (integer|date)`` - Greater-or-equal `` >= (integer|date)`` - ================ ====================================== - -Boolean operators can be used to join multiple conditions or for -negation: - - =========== ===================================== - Operation Form - =========== ===================================== - Disjunction ``X | Y``, ``X || Y``, or ``X or Y`` - Conjunction ``X & Y``, ``X && Y``, or ``X and Y`` - Negation ``!X`` or ``not X`` - =========== ===================================== - -Normally, disjunction scopes over conjunction, but parentheses may be -used to group clauses, so the following are equivalent:: - - ... where i-id = 10 or i-id = 20 and i-input ~ "[Dd]og" - ... where i-id = 10 or (i-id = 20 and i-input ~ "[Dd]og") - -Multiple `where` clauses may also be used as a conjunction that scopes -over disjunction, so the following are equivalent:: - - ... where (i-id = 10 or i-id = 20) and i-input ~ "[Dd]og" - ... where i-id = 10 or i-id = 20 where i-input ~ "[Dd]og" - -This facilitates query construction, where a user may want to apply -additional global constraints by appending new conditions to the query -string. - -PyDelphin has several differences to standard TSQL: - -* `select *` requires a `from` clause -* `select * from item result` does not also include columns from the - intervening `parse` relation -* `select i-input from result` returns a matching `i-input` for every - row in `result`, rather than only the unique rows - -PyDelphin also adds some features to standard TSQL: - -* qualified column names (e.g., `item.i-id`) -* multiple `where` clauses (as described above) """ from typing import ( diff --git a/delphin/util.py b/delphin/util.py index be58c351..28602655 100644 --- a/delphin/util.py +++ b/delphin/util.py @@ -1,4 +1,7 @@ +""" +Utility functions. +""" from typing import Union from pathlib import Path diff --git a/delphin/variable.py b/delphin/variable.py index 1fee92cb..b7312862 100644 --- a/delphin/variable.py +++ b/delphin/variable.py @@ -1,77 +1,7 @@ # -*- coding: utf-8 -*- -r""" +""" Functions for working with MRS variables. - -This module contains functions to inspect the type and identifier of -variables (:func:`split`, :func:`type`, :func:`id`) and check if a -variable string is well-formed (:func:`is_valid`). It additionally has -constants for the standard variable types: :data:`UNSPECIFIC`, -:data:`INDIVIDUAL`, :data:`INSTANCE_OR_HANDLE`, :data:`EVENTUALITY`, -:data:`INSTANCE`, and :data:`HANDLE`. Finally, the -:class:`VariableFactory` class may be useful for tasks like DMRS to -MRS conversion for managing the creation of new variables. - -Variables in MRS ----------------- - -Variables are a concept in Minimal Recursion Semantics coming from -formal semantics. Consider this logical form for a sentence like -"the dog barks":: - - ∃x(dog(x) ^ bark(x)) - -Here *x* is a variable that represents an entity that has the -properties that it is a dog and it is barking. Davidsonian semantics -introduce variables for events as well:: - - ∃e∃x(dog(x) ^ bark(e, x)) - -MRS uses variables in a similar way to Davidsonian semantics, except -that events are not explicitly quantified. That might look like the -following (if we ignore quantifier scope underspecification):: - - the(x4) [dog(x4)] {bark(e2, x4)} - -"Variables" are also used for scope handles and labels, as in this -minor modification that indicates the scope handles:: - - h3:the(x4) [h6:dog(x4)] {h1:bark(e2, x4)} - -There is some confusion of terminology here. Sometimes "variable" is -contrasted with "handle" to mean an instance (`x`) or eventuality -(`e`) variable, but in this module "variable" means the identifiers -used for instances, eventualities, handles, and their supertypes. - -The form of MRS variables is the concatenation of a variable *type* -(also called a *sort*) with a variable *id*. For example, the variable -type `e` and id `2` form the variable `e2`. Generally in MRS the -variable ids, regardless of the type, are unique, so for instance one -would not see `x2` and `e2` in the same structure. - -The variable types are arranged in a hierarchy. While the most -accurate variable type hierarchy for a particular grammar is obtained -via its SEM-I (see :mod:`delphin.semi`), in practice the standard -hierarchy given below is used by all DELPH-IN grammars. The hierarchy -in TDL would look like this (with an ASCII rendering in comments on -the right): - -.. code-block:: tdl - - u := *top*. ; u - i := u. ; / \ - p := u. ; i p - e := i. ; / \ / \ - x := i & p. ; e x h - h := p. - -In PyDelphin the equivalent hierarchy could be created as follows: - ->>> from delphin import hierarchy ->>> h = hierarchy.MultiHierarchy( -... '*top*', -... {'u': '*top*', 'i': 'u', 'p': 'u', 'e': 'i', 'x': 'i p', 'h': 'p'} -... ) """ import re diff --git a/delphin/vpm.py b/delphin/vpm.py index 4058ddfc..ff53f88a 100644 --- a/delphin/vpm.py +++ b/delphin/vpm.py @@ -1,15 +1,6 @@ """ Variable property mapping (VPM). - -Variable property mappings (VPMs) convert grammar-internal variables -(e.g. `event5`) to the grammar-external form (e.g. `e5`), and also map -variable properties (e.g. `PNG: 1pl` might map to `PERS: 1` and -`NUM: pl`). - -.. seealso:: - - Wiki about VPM: http://moin.delph-in.net/RmrsVpm - """ import re diff --git a/delphin/web/server.py b/delphin/web/server.py index 3ceac866..ca30ba1d 100644 --- a/delphin/web/server.py +++ b/delphin/web/server.py @@ -3,7 +3,6 @@ DELPH-IN Web API Server """ - import pathlib import urllib.parse import datetime diff --git a/docs/api/delphin.codecs.dmrsjson.rst b/docs/api/delphin.codecs.dmrsjson.rst index 8c6185f5..af12fe71 100644 --- a/docs/api/delphin.codecs.dmrsjson.rst +++ b/docs/api/delphin.codecs.dmrsjson.rst @@ -4,6 +4,99 @@ delphin.codecs.dmrsjson .. automodule:: delphin.codecs.dmrsjson + Example: + + * *The new chef whose soup accidentally spilled quit and left.* + + :: + + { + "top": 10008, + "index": 10009, + "nodes": [ + { + "nodeid": 10000, + "predicate": "_the_q", + "lnk": {"from": 0, "to": 3} + }, + { + "nodeid": 10001, + "predicate": "_new_a_1", + "sortinfo": {"SF": "prop", "TENSE": "untensed", "MOOD": "indicative", "PROG": "bool", "PERF": "-", "cvarsort": "e"}, + "lnk": {"from": 4, "to": 7} + }, + { + "nodeid": 10002, + "predicate": "_chef_n_1", + "sortinfo": {"PERS": "3", "NUM": "sg", "IND": "+", "cvarsort": "x"}, + "lnk": {"from": 8, "to": 12} + }, + { + "nodeid": 10003, + "predicate": "def_explicit_q", + "lnk": {"from": 13, "to": 18} + }, + { + "nodeid": 10004, + "predicate": "poss", + "sortinfo": {"SF": "prop", "TENSE": "untensed", "MOOD": "indicative", "PROG": "-", "PERF": "-", "cvarsort": "e"}, + "lnk": {"from": 13, "to": 18} + }, + { + "nodeid": 10005, + "predicate": "_soup_n_1", + "sortinfo": {"PERS": "3", "NUM": "sg", "cvarsort": "x"}, + "lnk": {"from": 19, "to": 23} + }, + { + "nodeid": 10006, + "predicate": "_accidental_a_1", + "sortinfo": {"SF": "prop", "TENSE": "untensed", "MOOD": "indicative", "PROG": "-", "PERF": "-", "cvarsort": "e"}, + "lnk": {"from": 24, "to": 36} + }, + { + "nodeid": 10007, + "predicate": "_spill_v_1", + "sortinfo": {"SF": "prop", "TENSE": "past", "MOOD": "indicative", "PROG": "-", "PERF": "-", "cvarsort": "e"}, + "lnk": {"from": 37, "to": 44} + }, + { + "nodeid": 10008, + "predicate": "_quit_v_1", + "sortinfo": {"SF": "prop", "TENSE": "past", "MOOD": "indicative", "PROG": "-", "PERF": "-", "cvarsort": "e"}, + "lnk": {"from": 45, "to": 49} + }, + { + "nodeid": 10009, + "predicate": "_and_c", + "sortinfo": {"SF": "prop", "TENSE": "past", "MOOD": "indicative", "PROG": "-", "PERF": "-", "cvarsort": "e"}, + "lnk": {"from": 50, "to": 53} + }, + { + "nodeid": 10010, + "predicate": "_leave_v_1", + "sortinfo": {"SF": "prop", "TENSE": "past", "MOOD": "indicative", "PROG": "-", "PERF": "-", "cvarsort": "e"}, + "lnk": {"from": 54, "to": 59} + } + ], + "links": [ + {"from": 10000, "to": 10002, "rargname": "RSTR", "post": "H"}, + {"from": 10001, "to": 10002, "rargname": "ARG1", "post": "EQ"}, + {"from": 10003, "to": 10005, "rargname": "RSTR", "post": "H"}, + {"from": 10004, "to": 10005, "rargname": "ARG1", "post": "EQ"}, + {"from": 10004, "to": 10002, "rargname": "ARG2", "post": "NEQ"}, + {"from": 10006, "to": 10007, "rargname": "ARG1", "post": "EQ"}, + {"from": 10007, "to": 10005, "rargname": "ARG1", "post": "NEQ"}, + {"from": 10008, "to": 10002, "rargname": "ARG1", "post": "NEQ"}, + {"from": 10009, "to": 10008, "rargname": "ARG1", "post": "EQ"}, + {"from": 10009, "to": 10010, "rargname": "ARG2", "post": "EQ"}, + {"from": 10010, "to": 10002, "rargname": "ARG1", "post": "NEQ"}, + {"from": 10007, "to": 10002, "rargname": "MOD", "post": "EQ"}, + {"from": 10010, "to": 10008, "rargname": "MOD", "post": "EQ"} + ] + } + + Module Constants ---------------- @@ -19,6 +112,7 @@ delphin.codecs.dmrsjson `']'` + Deserialization Functions ------------------------- @@ -34,6 +128,7 @@ delphin.codecs.dmrsjson See the :func:`decode` codec API documentation. + Serialization Functions ----------------------- @@ -49,6 +144,7 @@ delphin.codecs.dmrsjson See the :func:`encode` codec API documentation. + Complementary Functions ----------------------- diff --git a/docs/api/delphin.codecs.dmrspenman.rst b/docs/api/delphin.codecs.dmrspenman.rst index 9696284c..a6200c9a 100644 --- a/docs/api/delphin.codecs.dmrspenman.rst +++ b/docs/api/delphin.codecs.dmrspenman.rst @@ -4,6 +4,88 @@ delphin.codecs.dmrspenman .. automodule:: delphin.codecs.dmrspenman + Example: + + * *The new chef whose soup accidentally spilled quit and left.* + + :: + + (e9 / _quit_v_1 + :lnk "<45:49>" + :cvarsort e + :sf prop + :tense past + :mood indicative + :prog - + :perf - + :ARG1-NEQ (x3 / _chef_n_1 + :lnk "<8:12>" + :cvarsort x + :pers 3 + :num sg + :ind + + :RSTR-H-of (q1 / _the_q + :lnk "<0:3>") + :ARG1-EQ-of (e2 / _new_a_1 + :lnk "<4:7>" + :cvarsort e + :sf prop + :tense untensed + :mood indicative + :prog bool + :perf -) + :ARG2-NEQ-of (e5 / poss + :lnk "<13:18>" + :cvarsort e + :sf prop + :tense untensed + :mood indicative + :prog - + :perf - + :ARG1-EQ (x6 / _soup_n_1 + :lnk "<19:23>" + :cvarsort x + :pers 3 + :num sg + :RSTR-H-of (q4 / def_explicit_q + :lnk "<13:18>"))) + :MOD-EQ-of (e8 / _spill_v_1 + :lnk "<37:44>" + :cvarsort e + :sf prop + :tense past + :mood indicative + :prog - + :perf - + :ARG1-NEQ x6 + :ARG1-EQ-of (e7 / _accidental_a_1 + :lnk "<24:36>" + :cvarsort e + :sf prop + :tense untensed + :mood indicative + :prog - + :perf -))) + :ARG1-EQ-of (e10 / _and_c + :lnk "<50:53>" + :cvarsort e + :sf prop + :tense past + :mood indicative + :prog - + :perf - + :ARG2-EQ (e11 / _leave_v_1 + :lnk "<54:59>" + :cvarsort e + :sf prop + :tense past + :mood indicative + :prog - + :perf - + :ARG1-NEQ x3 + :MOD-EQ e9))) + + Deserialization Functions ------------------------- diff --git a/docs/api/delphin.codecs.dmrx.rst b/docs/api/delphin.codecs.dmrx.rst index d2fc1445..1418fc93 100644 --- a/docs/api/delphin.codecs.dmrx.rst +++ b/docs/api/delphin.codecs.dmrx.rst @@ -4,6 +4,112 @@ delphin.codecs.dmrx .. automodule:: delphin.codecs.dmrx + Example: + + * *The new chef whose soup accidentally spilled quit and left.* + + :: + + + + + + + + + + + + + + + + def_explicit_q + + + + poss + + + + + + + + + + + + + + + + + + + + + + + + + + + + RSTR + H + + + ARG1 + EQ + + + RSTR + H + + + ARG1 + EQ + + + ARG2 + NEQ + + + ARG1 + EQ + + + ARG1 + NEQ + + + ARG1 + NEQ + + + ARG1 + EQ + + + ARG2 + EQ + + + ARG1 + NEQ + + + MOD + EQ + + + MOD + EQ + + + + Module Constants ---------------- diff --git a/docs/api/delphin.codecs.eds.rst b/docs/api/delphin.codecs.eds.rst index 4fb80852..3d7722f5 100644 --- a/docs/api/delphin.codecs.eds.rst +++ b/docs/api/delphin.codecs.eds.rst @@ -4,6 +4,27 @@ delphin.codecs.eds .. automodule:: delphin.codecs.eds + Example: + + * *The new chef whose soup accidentally spilled quit and left.* + + :: + + {e18: + _1:_the_q<0:3>[BV x3] + e8:_new_a_1<4:7>{e SF prop, TENSE untensed, MOOD indicative, PROG bool, PERF -}[ARG1 x3] + x3:_chef_n_1<8:12>{x PERS 3, NUM sg, IND +}[] + _2:def_explicit_q<13:18>[BV x10] + e14:poss<13:18>{e SF prop, TENSE untensed, MOOD indicative, PROG -, PERF -}[ARG1 x10, ARG2 x3] + x10:_soup_n_1<19:23>{x PERS 3, NUM sg}[] + e15:_accidental_a_1<24:36>{e SF prop, TENSE untensed, MOOD indicative, PROG -, PERF -}[ARG1 e16] + e16:_spill_v_1<37:44>{e SF prop, TENSE past, MOOD indicative, PROG -, PERF -}[ARG1 x10] + e18:_quit_v_1<45:49>{e SF prop, TENSE past, MOOD indicative, PROG -, PERF -}[ARG1 x3] + e2:_and_c<50:53>{e SF prop, TENSE past, MOOD indicative, PROG -, PERF -}[ARG1 e18, ARG2 e20] + e20:_leave_v_1<54:59>{e SF prop, TENSE past, MOOD indicative, PROG -, PERF -}[ARG1 x3] + } + + Deserialization Functions ------------------------- @@ -19,6 +40,7 @@ delphin.codecs.eds See the :func:`decode` codec API documentation. + Serialization Functions ----------------------- diff --git a/docs/api/delphin.codecs.edsjson.rst b/docs/api/delphin.codecs.edsjson.rst index b04414d0..2adbb9c5 100644 --- a/docs/api/delphin.codecs.edsjson.rst +++ b/docs/api/delphin.codecs.edsjson.rst @@ -4,6 +4,92 @@ delphin.codecs.edsjson .. automodule:: delphin.codecs.edsjson + Example: + + * *The new chef whose soup accidentally spilled quit and left.* + + :: + + { + "top": "e18", + "nodes": { + "_1": { + "label": "_the_q", + "edges": {"BV": "x3"}, + "lnk": {"from": 0, "to": 3} + }, + "e8": { + "label": "_new_a_1", + "edges": {"ARG1": "x3"}, + "lnk": {"from": 4, "to": 7}, + "type": "e", + "properties": {"SF": "prop", "TENSE": "untensed", "MOOD": "indicative", "PROG": "bool", "PERF": "-"} + }, + "x3": { + "label": "_chef_n_1", + "edges": {}, + "lnk": {"from": 8, "to": 12}, + "type": "x", + "properties": {"PERS": "3", "NUM": "sg", "IND": "+"} + }, + "_2": { + "label": "def_explicit_q", + "edges": {"BV": "x10"}, + "lnk": {"from": 13, "to": 18} + }, + "e14": { + "label": "poss", + "edges": {"ARG1": "x10", "ARG2": "x3"}, + "lnk": {"from": 13, "to": 18}, + "type": "e", + "properties": {"SF": "prop", "TENSE": "untensed", "MOOD": "indicative", "PROG": "-", "PERF": "-"} + }, + "x10": { + "label": "_soup_n_1", + "edges": {}, + "lnk": {"from": 19, "to": 23}, + "type": "x", + "properties": {"PERS": "3", "NUM": "sg"} + }, + "e15": { + "label": "_accidental_a_1", + "edges": {"ARG1": "e16"}, + "lnk": {"from": 24, "to": 36}, + "type": "e", + "properties": {"SF": "prop", "TENSE": "untensed", "MOOD": "indicative", "PROG": "-", "PERF": "-"} + }, + "e16": { + "label": "_spill_v_1", + "edges": {"ARG1": "x10"}, + "lnk": {"from": 37, "to": 44}, + "type": "e", + "properties": {"SF": "prop", "TENSE": "past", "MOOD": "indicative", "PROG": "-", "PERF": "-"} + }, + "e18": { + "label": "_quit_v_1", + "edges": {"ARG1": "x3"}, + "lnk": {"from": 45, "to": 49}, + "type": "e", + "properties": {"SF": "prop", "TENSE": "past", "MOOD": "indicative", "PROG": "-", "PERF": "-"} + }, + "e2": { + "label": "_and_c", + "edges": {"ARG1": "e18", "ARG2": "e20"}, + "lnk": {"from": 50, "to": 53}, + "type": "e", + "properties": {"SF": "prop", "TENSE": "past", "MOOD": "indicative", "PROG": "-", "PERF": "-"} + }, + "e20": { + "label": "_leave_v_1", + "edges": {"ARG1": "x3"}, + "lnk": {"from": 54, "to": 59}, + "type": "e", + "properties": {"SF": "prop", "TENSE": "past", "MOOD": "indicative", "PROG": "-", "PERF": "-"} + } + } + } + + Module Constants ---------------- diff --git a/docs/api/delphin.codecs.edspenman.rst b/docs/api/delphin.codecs.edspenman.rst index e2489202..00841464 100644 --- a/docs/api/delphin.codecs.edspenman.rst +++ b/docs/api/delphin.codecs.edspenman.rst @@ -4,6 +4,86 @@ delphin.codecs.edspenman .. automodule:: delphin.codecs.edspenman + Example: + + * *The new chef whose soup accidentally spilled quit and left.* + + :: + + (e18 / _quit_v_1 + :lnk "<45:49>" + :type e + :sf prop + :tense past + :mood indicative + :prog - + :perf - + :ARG1 (x3 / _chef_n_1 + :lnk "<8:12>" + :type x + :pers 3 + :num sg + :ind + + :BV-of (_1 / _the_q + :lnk "<0:3>") + :ARG1-of (e8 / _new_a_1 + :lnk "<4:7>" + :type e + :sf prop + :tense untensed + :mood indicative + :prog bool + :perf -) + :ARG2-of (e14 / poss + :lnk "<13:18>" + :type e + :sf prop + :tense untensed + :mood indicative + :prog - + :perf - + :ARG1 (x10 / _soup_n_1 + :lnk "<19:23>" + :type x + :pers 3 + :num sg + :BV-of (_2 / def_explicit_q + :lnk "<13:18>") + :ARG1-of (e16 / _spill_v_1 + :lnk "<37:44>" + :type e + :sf prop + :tense past + :mood indicative + :prog - + :perf - + :ARG1-of (e15 / _accidental_a_1 + :lnk "<24:36>" + :type e + :sf prop + :tense untensed + :mood indicative + :prog - + :perf -))))) + :ARG1-of (e2 / _and_c + :lnk "<50:53>" + :type e + :sf prop + :tense past + :mood indicative + :prog - + :perf - + :ARG2 (e20 / _leave_v_1 + :lnk "<54:59>" + :type e + :sf prop + :tense past + :mood indicative + :prog - + :perf - + :ARG1 x3))) + + Deserialization Functions ------------------------- @@ -19,6 +99,7 @@ delphin.codecs.edspenman See the :func:`decode` codec API documentation. + Serialization Functions ----------------------- diff --git a/docs/api/delphin.codecs.indexedmrs.rst b/docs/api/delphin.codecs.indexedmrs.rst index 2501139d..fd40ead1 100644 --- a/docs/api/delphin.codecs.indexedmrs.rst +++ b/docs/api/delphin.codecs.indexedmrs.rst @@ -4,6 +4,35 @@ delphin.codecs.indexedmrs .. automodule:: delphin.codecs.indexedmrs + The Indexed MRS format does not include role names such as `ARG1`, + `ARG2`, etc., so the order of the arguments in a predication is + important. For this reason, serialization with the Indexed MRS + format requires the use of a SEM-I (see the :mod:`delphin.semi` + module). + + Example: + + * *The new chef whose soup accidentally spilled quit and left.* + + :: + + < h0, e2:PROP:PAST:INDICATIVE:-:-, + { h4:_the_q<0:3>(x3:3:SG:GENDER:+:PT, h5, h6), + h7:_new_a_1<4:7>(e8:PROP:UNTENSED:INDICATIVE:BOOL:-, x3), + h7:_chef_n_1<8:12>(x3), + h9:def_explicit_q<13:18>(x10:3:SG:GENDER:BOOL:PT, h11, h12), + h13:poss<13:18>(e14:PROP:UNTENSED:INDICATIVE:-:-, x10, x3), + h13:_soup_n_1<19:23>(x10), + h7:_accidental_a_1<24:36>(e15:PROP:UNTENSED:INDICATIVE:-:-, e16:PROP:PAST:INDICATIVE:-:-), + h7:_spill_v_1<37:44>(e16, x10, i17), + h1:_quit_v_1<45:49>(e18:PROP:PAST:INDICATIVE:-:-, x3, i19), + h1:_and_c<50:53>(e2, e18, e20:PROP:PAST:INDICATIVE:-:-), + h1:_leave_v_1<54:59>(e20, x3, i21) }, + { h0 qeq h1, + h5 qeq h7, + h11 qeq h13 } > + + Deserialization Functions ------------------------- diff --git a/docs/api/delphin.codecs.mrsjson.rst b/docs/api/delphin.codecs.mrsjson.rst index 08fb7aea..1e106f1a 100644 --- a/docs/api/delphin.codecs.mrsjson.rst +++ b/docs/api/delphin.codecs.mrsjson.rst @@ -4,6 +4,115 @@ delphin.codecs.mrsjson .. automodule:: delphin.codecs.mrsjson + Example: + + * *The new chef whose soup accidentally spilled quit and left.* + + :: + + { + "top": "h0", + "index": "e2", + "relations": [ + { + "label": "h4", + "predicate": "_the_q", + "lnk": {"from": 0, "to": 3}, + "arguments": {"BODY": "h6", "RSTR": "h5", "ARG0": "x3"} + }, + { + "label": "h7", + "predicate": "_new_a_1", + "lnk": {"from": 4, "to": 7}, + "arguments": {"ARG1": "x3", "ARG0": "e8"} + }, + { + "label": "h7", + "predicate": "_chef_n_1", + "lnk": {"from": 8, "to": 12}, + "arguments": {"ARG0": "x3"} + }, + { + "label": "h9", + "predicate": "def_explicit_q", + "lnk": {"from": 13, "to": 18}, + "arguments": {"BODY": "h12", "RSTR": "h11", "ARG0": "x10"} + }, + { + "label": "h13", + "predicate": "poss", + "lnk": {"from": 13, "to": 18}, + "arguments": {"ARG1": "x10", "ARG2": "x3", "ARG0": "e14"} + }, + { + "label": "h13", + "predicate": "_soup_n_1", + "lnk": {"from": 19, "to": 23}, + "arguments": {"ARG0": "x10"} + }, + { + "label": "h7", + "predicate": "_accidental_a_1", + "lnk": {"from": 24, "to": 36}, + "arguments": {"ARG1": "e16", "ARG0": "e15"} + }, + { + "label": "h7", + "predicate": "_spill_v_1", + "lnk": {"from": 37, "to": 44}, + "arguments": {"ARG1": "x10", "ARG2": "i17", "ARG0": "e16"} + }, + { + "label": "h1", + "predicate": "_quit_v_1", + "lnk": {"from": 45, "to": 49}, + "arguments": {"ARG1": "x3", "ARG2": "i19", "ARG0": "e18"} + }, + { + "label": "h1", + "predicate": "_and_c", + "lnk": {"from": 50, "to": 53}, + "arguments": {"ARG1": "e18", "ARG2": "e20", "ARG0": "e2"} + }, + { + "label": "h1", + "predicate": "_leave_v_1", + "lnk": {"from": 54, "to": 59}, + "arguments": {"ARG1": "x3", "ARG2": "i21", "ARG0": "e20"} + } + ], + "constraints": [ + {"low": "h1", "high": "h0", "relation": "qeq"}, + {"low": "h7", "high": "h5", "relation": "qeq"}, + {"low": "h13", "high": "h11", "relation": "qeq"} + ], + "variables": { + "h0": {"type": "h"}, + "h1": {"type": "h"}, + "e2": {"type": "e", "properties": {"MOOD": "indicative", "PROG": "-", "SF": "prop", "PERF": "-", "TENSE": "past"}}, + "x3": {"type": "x", "properties": {"NUM": "sg", "PERS": "3", "IND": "+"}}, + "h4": {"type": "h"}, + "h6": {"type": "h"}, + "h5": {"type": "h"}, + "h7": {"type": "h"}, + "e8": {"type": "e", "properties": {"MOOD": "indicative", "PROG": "bool", "SF": "prop", "PERF": "-", "TENSE": "untensed"}}, + "h9": {"type": "h"}, + "x10": {"type": "x", "properties": {"NUM": "sg", "PERS": "3"}}, + "h11": {"type": "h"}, + "h12": {"type": "h"}, + "h13": {"type": "h"}, + "e14": {"type": "e", "properties": {"MOOD": "indicative", "PROG": "-", "SF": "prop", "PERF": "-", "TENSE": "untensed"}}, + "e15": {"type": "e", "properties": {"MOOD": "indicative", "PROG": "-", "SF": "prop", "PERF": "-", "TENSE": "untensed"}}, + "e16": {"type": "e", "properties": {"MOOD": "indicative", "PROG": "-", "SF": "prop", "PERF": "-", "TENSE": "past"}}, + "i17": {"type": "i"}, + "e18": {"type": "e", "properties": {"MOOD": "indicative", "PROG": "-", "SF": "prop", "PERF": "-", "TENSE": "past"}}, + "i19": {"type": "i"}, + "e20": {"type": "e", "properties": {"MOOD": "indicative", "PROG": "-", "SF": "prop", "PERF": "-", "TENSE": "past"}}, + "i21": {"type": "i"} + } + } + + Module Constants ---------------- @@ -19,6 +128,7 @@ delphin.codecs.mrsjson `']'` + Deserialization Functions ------------------------- @@ -34,6 +144,7 @@ delphin.codecs.mrsjson See the :func:`decode` codec API documentation. + Serialization Functions ----------------------- @@ -49,6 +160,7 @@ delphin.codecs.mrsjson See the :func:`encode` codec API documentation. + Complementary Functions ----------------------- diff --git a/docs/api/delphin.codecs.mrsprolog.rst b/docs/api/delphin.codecs.mrsprolog.rst index 3ce1a071..cc34833d 100644 --- a/docs/api/delphin.codecs.mrsprolog.rst +++ b/docs/api/delphin.codecs.mrsprolog.rst @@ -4,6 +4,54 @@ delphin.codecs.mrsprolog .. automodule:: delphin.codecs.mrsprolog + Example: + + * *The new chef whose soup accidentally spilled quit and left.* + + :: + + psoa(h0,e2, + [rel('_the_q',h4, + [attrval('ARG0',x3), + attrval('RSTR',h5), + attrval('BODY',h6)]), + rel('_new_a_1',h7, + [attrval('ARG0',e8), + attrval('ARG1',x3)]), + rel('_chef_n_1',h7, + [attrval('ARG0',x3)]), + rel('def_explicit_q',h9, + [attrval('ARG0',x10), + attrval('RSTR',h11), + attrval('BODY',h12)]), + rel('poss',h13, + [attrval('ARG0',e14), + attrval('ARG1',x10), + attrval('ARG2',x3)]), + rel('_soup_n_1',h13, + [attrval('ARG0',x10)]), + rel('_accidental_a_1',h7, + [attrval('ARG0',e15), + attrval('ARG1',e16)]), + rel('_spill_v_1',h7, + [attrval('ARG0',e16), + attrval('ARG1',x10), + attrval('ARG2',i17)]), + rel('_quit_v_1',h1, + [attrval('ARG0',e18), + attrval('ARG1',x3), + attrval('ARG2',i19)]), + rel('_and_c',h1, + [attrval('ARG0',e2), + attrval('ARG1',e18), + attrval('ARG2',e20)]), + rel('_leave_v_1',h1, + [attrval('ARG0',e20), + attrval('ARG1',x3), + attrval('ARG2',i21)])], + hcons([qeq(h0,h1),qeq(h5,h7),qeq(h11,h13)])) + + Serialization Functions ----------------------- diff --git a/docs/api/delphin.codecs.mrx.rst b/docs/api/delphin.codecs.mrx.rst index 82985b0a..7ac26eb6 100644 --- a/docs/api/delphin.codecs.mrx.rst +++ b/docs/api/delphin.codecs.mrx.rst @@ -4,6 +4,97 @@ delphin.codecs.mrx .. automodule:: delphin.codecs.mrx + Example: + + * *The new chef whose soup accidentally spilled quit and left.* + + :: + + + + Module Constants ---------------- diff --git a/docs/api/delphin.codecs.simpledmrs.rst b/docs/api/delphin.codecs.simpledmrs.rst index 45989cd0..17ac4158 100644 --- a/docs/api/delphin.codecs.simpledmrs.rst +++ b/docs/api/delphin.codecs.simpledmrs.rst @@ -4,6 +4,41 @@ delphin.codecs.simpledmrs .. automodule:: delphin.codecs.simpledmrs + Example: + + * *The new chef whose soup accidentally spilled quit and left.* + + :: + + dmrs { + ["The new chef whose soup accidentally spilled quit and left." top=10008 index=10009] + 10000 [_the_q<0:3>]; + 10001 [_new_a_1<4:7> e SF=prop TENSE=untensed MOOD=indicative PROG=bool PERF=-]; + 10002 [_chef_n_1<8:12> x PERS=3 NUM=sg IND=+]; + 10003 [def_explicit_q<13:18>]; + 10004 [poss<13:18> e SF=prop TENSE=untensed MOOD=indicative PROG=- PERF=-]; + 10005 [_soup_n_1<19:23> x PERS=3 NUM=sg]; + 10006 [_accidental_a_1<24:36> e SF=prop TENSE=untensed MOOD=indicative PROG=- PERF=-]; + 10007 [_spill_v_1<37:44> e SF=prop TENSE=past MOOD=indicative PROG=- PERF=-]; + 10008 [_quit_v_1<45:49> e SF=prop TENSE=past MOOD=indicative PROG=- PERF=-]; + 10009 [_and_c<50:53> e SF=prop TENSE=past MOOD=indicative PROG=- PERF=-]; + 10010 [_leave_v_1<54:59> e SF=prop TENSE=past MOOD=indicative PROG=- PERF=-]; + 10000:RSTR/H -> 10002; + 10001:ARG1/EQ -> 10002; + 10003:RSTR/H -> 10005; + 10004:ARG1/EQ -> 10005; + 10004:ARG2/NEQ -> 10002; + 10006:ARG1/EQ -> 10007; + 10007:ARG1/NEQ -> 10005; + 10008:ARG1/NEQ -> 10002; + 10009:ARG1/EQ -> 10008; + 10009:ARG2/EQ -> 10010; + 10010:ARG1/NEQ -> 10002; + 10007:MOD/EQ -> 10002; + 10010:MOD/EQ -> 10008; + } + + Deserialization Functions ------------------------- @@ -19,6 +54,7 @@ delphin.codecs.simpledmrs See the :func:`decode` codec API documentation. + Serialization Functions ----------------------- diff --git a/docs/api/delphin.codecs.simplemrs.rst b/docs/api/delphin.codecs.simplemrs.rst index e1aa521e..2b02a883 100644 --- a/docs/api/delphin.codecs.simplemrs.rst +++ b/docs/api/delphin.codecs.simplemrs.rst @@ -4,6 +4,31 @@ delphin.codecs.simplemrs .. automodule:: delphin.codecs.simplemrs + SimpleMRS is a format for Minimal Recursion Semantics that aims to + be readable equally by humans and machines. + + Example: + + * *The new chef whose soup accidentally spilled quit and left.* + + .. code:: simplemrs + + [ TOP: h0 + INDEX: e2 [ e SF: prop TENSE: past MOOD: indicative PROG: - PERF: - ] + RELS: < [ _the_q<0:3> LBL: h4 ARG0: x3 [ x PERS: 3 NUM: sg IND: + ] RSTR: h5 BODY: h6 ] + [ _new_a_1<4:7> LBL: h7 ARG0: e8 [ e SF: prop TENSE: untensed MOOD: indicative PROG: bool PERF: - ] ARG1: x3 ] + [ _chef_n_1<8:12> LBL: h7 ARG0: x3 ] + [ def_explicit_q<13:18> LBL: h9 ARG0: x10 [ x PERS: 3 NUM: sg ] RSTR: h11 BODY: h12 ] + [ poss<13:18> LBL: h13 ARG0: e14 [ e SF: prop TENSE: untensed MOOD: indicative PROG: - PERF: - ] ARG1: x10 ARG2: x3 ] + [ _soup_n_1<19:23> LBL: h13 ARG0: x10 ] + [ _accidental_a_1<24:36> LBL: h7 ARG0: e15 [ e SF: prop TENSE: untensed MOOD: indicative PROG: - PERF: - ] ARG1: e16 [ e SF: prop TENSE: past MOOD: indicative PROG: - PERF: - ] ] + [ _spill_v_1<37:44> LBL: h7 ARG0: e16 ARG1: x10 ARG2: i17 ] + [ _quit_v_1<45:49> LBL: h1 ARG0: e18 [ e SF: prop TENSE: past MOOD: indicative PROG: - PERF: - ] ARG1: x3 ARG2: i19 ] + [ _and_c<50:53> LBL: h1 ARG0: e2 ARG1: e18 ARG2: e20 [ e SF: prop TENSE: past MOOD: indicative PROG: - PERF: - ] ] + [ _leave_v_1<54:59> LBL: h1 ARG0: e20 ARG1: x3 ARG2: i21 ] > + HCONS: < h0 qeq h1 h5 qeq h7 h11 qeq h13 > ] + + Deserialization Functions ------------------------- @@ -19,6 +44,7 @@ delphin.codecs.simplemrs See the :func:`decode` codec API documentation. + Serialization Functions ----------------------- diff --git a/docs/api/delphin.commands.rst b/docs/api/delphin.commands.rst index dc9525c9..8d4e6873 100644 --- a/docs/api/delphin.commands.rst +++ b/docs/api/delphin.commands.rst @@ -4,6 +4,11 @@ delphin.commands .. automodule:: delphin.commands + The public functions in this module largely mirror the front-end + subcommands provided by the `delphin` command, with some small + changes to argument names or values to be better-suited to being + called from within Python. + convert ------- diff --git a/docs/api/delphin.derivation.rst b/docs/api/delphin.derivation.rst index dd6acae3..0d3a44c8 100644 --- a/docs/api/delphin.derivation.rst +++ b/docs/api/delphin.derivation.rst @@ -4,93 +4,172 @@ delphin.derivation .. automodule:: delphin.derivation - Loading Derivation Data - ----------------------- + Derivation trees represent a unique analysis of an input using an + implemented grammar. They are a kind of syntax tree, but as they + use the actual grammar entities (e.g., rules or lexical entries) as + node labels, they are more specific than trees using general + category labels (e.g., "N" or "VP"). As such, they are more likely + to change across grammar versions. + + .. seealso:: + More information about derivation trees is found at + http://moin.delph-in.net/ItsdbDerivations + + For the following Japanese example... + + :: + + 遠く に 銃声 が 聞こえ た 。 + tooku ni juusei ga kikoe-ta + distant LOC gunshot NOM can.hear-PFV + "Shots were heard in the distance." + + ... here is the derivation tree of a parse from `Jacy + `_ in the Unified Derivation + Format (UDF):: + + (utterance-root + (564 utterance_rule-decl-finite 1.02132 0 6 + (563 hf-adj-i-rule 1.04014 0 6 + (557 hf-complement-rule -0.27164 0 2 + (556 quantify-n-rule 0.311511 0 1 + (23 tooku_1 0.152496 0 1 + ("遠く" 0 1))) + (42 ni-narg 0.478407 1 2 + ("に" 1 2))) + (562 head_subj_rule 1.512 2 6 + (559 hf-complement-rule -0.378462 2 4 + (558 quantify-n-rule 0.159015 2 3 + (55 juusei_1 0 2 3 + ("銃声" 2 3))) + (56 ga 0.462257 3 4 + ("が" 3 4))) + (561 vstem-vend-rule 1.34202 4 6 + (560 i-lexeme-v-stem-infl-rule 0.365568 4 5 + (65 kikoeru-stem 0 4 5 + ("聞こえ" 4 5))) + (81 ta-end 0.0227589 5 6 + ("た" 5 6))))))) - For loading a full derivation structure from either the UDF/UDX - string representations or the dictionary representation, the - :class:`Derivation` class provides class methods to help with the - decoding. + In addition to the UDF format, there is also the UDF export format + "UDX", which adds lexical type information and indicates which + daughter node is the head, and a dictionary representation, which + is useful for JSON serialization. All three are supported by + PyDelphin. - >>> from delphin import derivation - >>> d1 = derivation.Derivation.from_string( - ... '(1 entity-name 1 0 1 ("token"))') - ... - >>> d2 = derivation.Derivation.from_dict( - ... {'id': 1, 'entity': 'entity-name', 'score': 1, - ... 'start': 0, 'end': 1, 'form': 'token'}]}) - ... - >>> d1 == d2 - True + Derivation trees have 3 types of nodes: - .. autoclass:: Derivation - :show-inheritance: - :members: + * **root nodes**, with only an entity name and a single child - UDF/UDX Node Types - ------------------ + * **normal nodes**, with 5 fields (below) and a list of children - There are three different node Types + - *id* -- an integer id given by the producer of the derivation + - *entity* -- rule or type name + - *score* -- a (MaxEnt) score for the current node's subtree + - *start* -- the character index of the left-most side of the tree + - *end* -- the character index of the right-most side of the tree - .. autoclass:: UDFNode(id, entity, score=None, start=None, end=None, daughters=None, head=None, type=None, parent=None) - :members: + * **terminal/left/lexical nodes**, which contain the input tokens + processed by that subtree - .. py:attribute:: id + This module uses the :class:`UDFNode` class for capturing root and + normal nodes. Root nodes are expressed as a :class:`UDFNode` whose + `id` is `None`. For root nodes, all fields except `entity` and the + list of daughters are expected to be `None`. Leaf nodes are simply + an iterable of token information. - the unique node identifier + Loading Derivation Data + ----------------------- - .. py:attribute:: entity + There are two functions for loading derivations from either the + UDF/UDX string representation or the dictionary representation: + :func:`from_string` and :func:`from_dict`. - the grammar entity represented by the node + >>> from delphin import derivation + >>> d1 = derivation.from_string( + ... '(1 entity-name 1 0 1 ("token"))') + ... + >>> d2 = derivation.from_dict( + ... {'id': 1, 'entity': 'entity-name', 'score': 1, + ... 'start': 0, 'end': 1, 'form': 'token'}]}) + ... + >>> d1 == d2 + True - .. py:attribute:: score + .. autofunction:: from_string + .. autofunction:: from_dict - the probability or weight of to the node; for many processors, - this will be the unnormalized MaxEnt score assigned to the whole - subtree rooted by this node + UDF/UDX Classes + --------------- - .. py:attribute:: start + There are four classes for representing derivation trees. The + :class:`Derivation` class is used to contain the entire tree, while + :class:`UDFNode`, :class:`UDFTerminal`, and :class:`UDFToken` + represent individual nodes. - the start position (in inter-word, or chart, indices) of the - substring encompassed by this node and its daughters + .. autoclass:: Derivation + :show-inheritance: + :members: - .. py:attribute:: end + .. autoclass:: UDFNode(id, entity, score=None, start=None, end=None, daughters=None, head=None, type=None, parent=None) + :members: - the end position (in inter-word, or chart, indices) of the - substring encompassed by this node and its daughters + .. py:attribute:: id - .. py:attribute:: type + The unique node identifier. - the lexical type (available on preterminal UDX nodes) + .. py:attribute:: entity - .. automethod:: is_root - .. automethod:: to_udf - .. automethod:: to_udx - .. automethod:: to_dict + The grammar entity represented by the node. - .. autoclass:: UDFTerminal(form, tokens=None, parent=None) - :members: + .. py:attribute:: score - .. py:attribute:: form + The probability or weight of to the node; for many + processors, this will be the unnormalized MaxEnt score + assigned to the whole subtree rooted by this node. - the surface form of the terminal + .. py:attribute:: start - .. py:attribute:: tokens + The start position (in inter-word, or chart, indices) of the + substring encompassed by this node and its daughters. - the list of tokens + .. py:attribute:: end - .. automethod:: is_root - .. automethod:: to_udf - .. automethod:: to_udx - .. automethod:: to_dict + The end position (in inter-word, or chart, indices) of the + substring encompassed by this node and its daughters. - .. autoclass:: UDFToken(id, tfs) - :members: + .. py:attribute:: type - .. py:attribute:: id + The lexical type (available on preterminal UDX nodes). - the token identifier + .. automethod:: is_root + .. automethod:: to_udf + .. automethod:: to_udx + .. automethod:: to_dict - .. py:attribute:: form + .. autoclass:: UDFTerminal(form, tokens=None, parent=None) + :members: - the feature structure for the token + .. py:attribute:: form + + The surface form of the terminal. + + .. py:attribute:: tokens + + The list of tokens. + + .. automethod:: is_root + .. automethod:: to_udf + .. automethod:: to_udx + .. automethod:: to_dict + + .. autoclass:: UDFToken(id, tfs) + :members: + + .. py:attribute:: id + + The token identifier. + + .. py:attribute:: form + + The feature structure for the token. diff --git a/docs/api/delphin.hierarchy.rst b/docs/api/delphin.hierarchy.rst index 005a946c..bfa7dac0 100644 --- a/docs/api/delphin.hierarchy.rst +++ b/docs/api/delphin.hierarchy.rst @@ -4,6 +4,22 @@ delphin.hierarchy .. automodule:: delphin.hierarchy + This module defines the :class:`MultiHierarchy` class for + multiply-inheriting hierarchies. This class manages the insertion + of new nodes into the hierarchy via the class constructor or the + :meth:`MultiHierarchy.update` method, normalizing node identifiers + (if a suitable normalization function is provided at + instantiation), and inserting nodes in the appropriate order. It + checks for some kinds of ill-formed hierarchies, such as cycles and + redundant parentage and provides methods for testing for node + compatibility and subsumption. For convenience, arbitrary data may + be associated with node identifiers. + + While the class may be used directly, it is mainly used to support + the :class:`~delphin.tfs.TypeHierarchy` class and the predicate, + property, and variable hierarchies of :class:`~delphin.semi.SemI` + instances. + Classes ------- diff --git a/docs/api/delphin.interface.rst b/docs/api/delphin.interface.rst index b53c5f27..d5b9d6a9 100644 --- a/docs/api/delphin.interface.rst +++ b/docs/api/delphin.interface.rst @@ -4,6 +4,17 @@ delphin.interface .. automodule:: delphin.interface + This module manages the communication between data providers, + namely processors like `ACE + `_ or remote services like + the `DELPH-IN Web API `_, and user + code or storage backends, namely [incr tsdb()] :doc:`test suites + `. An interface sends requests to a provider, then + receives and interprets the response. + + The interface may also detect and deserialize supported DELPH-IN + formats if the appropriate modules are available. + .. autoclass:: Processor :members: diff --git a/docs/api/delphin.itsdb.rst b/docs/api/delphin.itsdb.rst index 72ec8e1f..404ce887 100644 --- a/docs/api/delphin.itsdb.rst +++ b/docs/api/delphin.itsdb.rst @@ -9,9 +9,22 @@ delphin.itsdb .. automodule:: delphin.itsdb - The typical test suite contains these files: - :: + .. note:: + + This module implements high-level structures and operations on + top of TSDB test suites. For the basic, low-level functionality, + see :mod:`delphin.tsdb`. For complex queries of the databases, + see :mod:`delphin.tsql`. + + [incr tsdb()] is a tool built on top of TSDB databases for the + purpose of profiling and comparing grammar versions using test + suites. This module is named after that tool as it also builds + higher-level operations on top of TSDB test suites but it has a + much narrower scope. The aim of this module is to assist users with + creating, processing, or manipulating test suites. + + The typical test suite contains these files:: testsuite/ analysis fold item-set parse relations run tree @@ -19,7 +32,6 @@ delphin.itsdb edge item-phenomenon parameter preference rule set - Test Suite Classes ------------------ diff --git a/docs/api/delphin.lnk.rst b/docs/api/delphin.lnk.rst index 6ac0dd1b..e00b721c 100644 --- a/docs/api/delphin.lnk.rst +++ b/docs/api/delphin.lnk.rst @@ -4,6 +4,61 @@ delphin.lnk .. automodule:: delphin.lnk + In DELPH-IN semantic representations, entities are aligned to the + input surface string is through the so-called "lnk" (pronounced + "link") values. There are four types of lnk values which align to the + surface in different ways: + + * Character spans (also called "characterization pointers"); e.g., + `<0:4>` + + * Token indices; e.g., `<0 1 3>` + + * Chart vertex spans; e.g., `<0#2>` + + * Edge identifier; e.g., `<@42>` + + The latter two are unlikely to be encountered by users. Chart vertices + were used by the `PET`_ parser but are now essentially deprecated and + edge identifiers are only used internally in the `LKB`_ for + generation. I will therefore focus on the first two kinds. + + .. _`PET`: http://moin.delph-in.net/PetTop + .. _`LKB`: http://moin.delph-in.net/LkbTop + + Character spans (sometimes called "characterization pointers") are by + far the most commonly used type---possibly even the only type most + users will encounter. These spans indicate the positions *between* + characters in the input string that correspond to a semantic entity, + similar to how Python and Perl do string indexing. For example, + `<0:4>` would capture the first through fourth characters---a span + that would correspond to the first word in a sentence like "Dogs + bark". These spans assume the input is a flat, or linear, string and + can only select contiguous chunks. Character spans are used by REPP + (the Regular Expression PreProcessor; see :mod:`delphin.repp`) to + track the surface alignment prior to string changes introduced by + tokenization. + + Token indices select input tokens rather than characters. This method, + though not widely used, is more suitable for input sources that are + not flat strings (e.g., a lattice of automatic speech recognition + (ASR) hypotheses), or where non-contiguous sequences are needed (e.g., + from input containing markup or other noise). + + .. note:: + + Much of this background is from comments in the `LKB`_ source code: + See: http://svn.emmtee.net/trunk/lingo/lkb/src/mrs/lnk.lisp + + Support for lnk values in PyDelphin is rather simple. The :class:`Lnk` + class is able to parse lnk strings and model the contents for + serialization of semantic representations. In addition, semantic + entities such as DMRS :class:`Nodes ` and MRS + :class:`EPs ` have `cfrom` and `cto` attributes which + are the start and end pointers for character spans (defaulting to `-1` + if a character span is not specified for the entity). + + Classes ------- @@ -12,6 +67,7 @@ delphin.lnk .. autoclass:: LnkMixin :members: + Exceptions ---------- diff --git a/docs/api/delphin.predicate.rst b/docs/api/delphin.predicate.rst index 95fc7bd6..2e7dcca0 100644 --- a/docs/api/delphin.predicate.rst +++ b/docs/api/delphin.predicate.rst @@ -4,6 +4,42 @@ delphin.predicate .. automodule:: delphin.predicate + Semantic predicates are atomic symbols representing semantic + entities or constructions. For example, in the `English Resource + Grammar `_, `_mouse_n_1` is the + predicate for the word *mouse*, but it is underspecified for + lexical semantics---it could be an animal, a computer's pointing + device, or something else. Another example from the ERG is + `compound`, which is used to link two compounded nouns, such as for + *mouse pad*. + + There are two main categories of predicates: **abstract** and + **surface**. In form, abstract predicates do not begin with an + underscore and in usage they often correspond to semantic + constructions that are not represented by a token in the input, + such as the `compound` example above. Surface predicates, in + contrast, are the semantic representation of surface (i.e., + lexical) tokens, such as the `_mouse_n_1` example above. In form, + they must always begin with a single underscore, and have two or + three components: lemma, part-of-speech, and (optionally) sense. + + .. seealso:: + - The DELPH-IN wiki about predicates: + http://moin.delph-in.net/PredicateRfc + + In DELPH-IN there is the concept of "real predicates" which are + surface predicates decomposed into their lemma, part-of-speech, and + sense, but in PyDelphin (as of `v1.0.0`_) predicates are always + simple strings. However, this module has functions for composing + and decomposing predicates from/to their components (the + :func:`create` and :func:`split` functions, respectively). In + addition, there are functions to normalize (:func:`normalize`) and + validate (:func:`is_valid`, :func:`is_surface`, + :func:`is_abstract`) predicate symbols. + + .. _v1.0.0: https://github.com/delph-in/pydelphin/releases/tag/v1.0.0 + + Module Functions ---------------- @@ -14,6 +50,7 @@ delphin.predicate .. autofunction:: is_surface .. autofunction:: is_abstract + Exceptions ---------- diff --git a/docs/api/delphin.repp.rst b/docs/api/delphin.repp.rst index 7212fd78..aaf5a57e 100644 --- a/docs/api/delphin.repp.rst +++ b/docs/api/delphin.repp.rst @@ -4,6 +4,20 @@ delphin.repp .. automodule:: delphin.repp + A Regular-Expression Preprocessor [REPP]_ is a method of applying a + system of regular expressions for transformation and tokenization + while retaining character indices from the original input string. + + .. [REPP] Rebecca Dridan and Stephan Oepen. Tokenization: Returning + to a long solved problem---a survey, contrastive + experiment, recommendations, and toolkit. In Proceedings + of the 50th Annual Meeting of the Association for + Computational Linguistics (Volume 2: Short Papers), pages + 378–382, Jeju Island, Korea, July 2012. Association for + Computational Linguistics. URL + http://www.aclweb.org/anthology/P12-2074. + + Classes ------- @@ -16,6 +30,7 @@ delphin.repp .. autoclass:: REPPStep(input, output, operation, applied, startmap, endmap) :members: + Exceptions ---------- diff --git a/docs/api/delphin.semi.rst b/docs/api/delphin.semi.rst index 6aec5fe3..f7025b16 100644 --- a/docs/api/delphin.semi.rst +++ b/docs/api/delphin.semi.rst @@ -4,71 +4,86 @@ delphin.semi .. automodule:: delphin.semi - Loading a SEM-I from a File - --------------------------- - - The :func:`load` module function is used to read the regular - file-based SEM-I definitions, but there is also a dictionary - representation which may be useful for JSON serialization, e.g., for - an HTTP API that makes use of SEM-Is. See :meth:`SemI.to_dict()` for - the later. - - .. autofunction:: load - - The SemI Class - -------------- - - The main class modeling a semantic interface is :class:`SemI`. The - predicate synopses have enough complexity that two more subclasses - are used to make inspection easier: :class:`Synopsis` contains the - role information for an individual predicate synopsis, and each role - is modeled with a :class:`SynopsisRole` class. - - .. autoclass:: SemI - - The data in the SEM-I can be directly inspected via the - :attr:`variables`, :attr:`properties`, :attr:`roles`, and - :attr:`predicates` attributes. - - >>> smi = semi.load('../grammars/erg/etc/erg.smi') - >>> smi.variables['e'] - - >>> smi.variables['e'].parents - ['i'] - >>> smi.variables['e'].data - [('SF', 'sf'), ('TENSE', 'tense'), ('MOOD', 'mood'), ('PROG', 'bool'), ('PERF', 'bool')] - >>> 'sf' in smi.properties - True - >>> smi.roles['ARG0'] - 'i' - >>> for synopsis in smi.predicates['can_able'].data: - ... print(', '.join('{0.name} {0.value}'.format(roledata) - ... for roledata in synopsis)) - ... - ARG0 e, ARG1 i, ARG2 p - >>> smi.predicates.descendants('some_q') - ['_another_q', '_many+a_q', '_an+additional_q', '_what+a_q', '_such+a_q', '_some_q_indiv', '_some_q', '_a_q'] - - Note that the variables, properties, and predicates are - :class:`~delphin.tfs.TypeHierarchy` objects. - - .. automethod:: find_synopsis - .. automethod:: from_dict - .. automethod:: to_dict - - .. autoclass:: Synopsis(roles) - :members: - - .. autoclass:: SynopsisRole(name, value, properties=None, optional=False) - - Exceptions and Warnings - ----------------------- - - .. autoexception:: SemIError - :show-inheritance: - - .. autoexception:: SemISyntaxError - :show-inheritance: - - .. autoexception:: SemIWarning - :show-inheritance: + Semantic interfaces (SEM-Is) describe the inventory of semantic + components in a grammar, including variables, properties, roles, + and predicates. This information can be used for validating + semantic structures or for filling out missing information in + incomplete representations. + + .. seealso:: + The following DELPH-IN wikis contain more information: + + - Technical specifications: http://moin.delph-in.net/SemiRfc + - Overview and usage: http://moin.delph-in.net/RmrsSemi + + + Loading a SEM-I from a File + --------------------------- + + The :func:`load` module function is used to read the regular + file-based SEM-I definitions, but there is also a dictionary + representation which may be useful for JSON serialization, e.g., + for an HTTP API that makes use of SEM-Is. See + :meth:`SemI.to_dict()` for the later. + + .. autofunction:: load + + + The SemI Class + -------------- + + The main class modeling a semantic interface is :class:`SemI`. The + predicate synopses have enough complexity that two more subclasses + are used to make inspection easier: :class:`Synopsis` contains the + role information for an individual predicate synopsis, and each role + is modeled with a :class:`SynopsisRole` class. + + .. autoclass:: SemI + + The data in the SEM-I can be directly inspected via the + :attr:`variables`, :attr:`properties`, :attr:`roles`, and + :attr:`predicates` attributes. + + >>> smi = semi.load('../grammars/erg/etc/erg.smi') + >>> smi.variables['e'] + + >>> smi.variables['e'].parents + ['i'] + >>> smi.variables['e'].data + [('SF', 'sf'), ('TENSE', 'tense'), ('MOOD', 'mood'), ('PROG', 'bool'), ('PERF', 'bool')] + >>> 'sf' in smi.properties + True + >>> smi.roles['ARG0'] + 'i' + >>> for synopsis in smi.predicates['can_able'].data: + ... print(', '.join('{0.name} {0.value}'.format(roledata) + ... for roledata in synopsis)) + ... + ARG0 e, ARG1 i, ARG2 p + >>> smi.predicates.descendants('some_q') + ['_another_q', '_many+a_q', '_an+additional_q', '_what+a_q', '_such+a_q', '_some_q_indiv', '_some_q', '_a_q'] + + Note that the variables, properties, and predicates are + :class:`~delphin.tfs.TypeHierarchy` objects. + + .. automethod:: find_synopsis + .. automethod:: from_dict + .. automethod:: to_dict + + .. autoclass:: Synopsis(roles) + :members: + + .. autoclass:: SynopsisRole(name, value, properties=None, optional=False) + + + Exceptions and Warnings + ----------------------- + + .. autoexception:: SemIError + :show-inheritance: + + .. autoexception:: SemISyntaxError + :show-inheritance: + + .. autoexception:: SemIWarning + :show-inheritance: diff --git a/docs/api/delphin.tfs.rst b/docs/api/delphin.tfs.rst index d3bb08bd..6daed287 100644 --- a/docs/api/delphin.tfs.rst +++ b/docs/api/delphin.tfs.rst @@ -4,6 +4,17 @@ delphin.tfs .. automodule:: delphin.tfs + This module defines the :class:`FeatureStructure` and + :class:`TypedFeatureStructure` classes, which model an attribute + value matrix (AVM), with the latter including an associated + type. They allow feature access through TDL-style dot notation + regular dictionary keys. + + In addition, the :class:`TypeHierarchy` class implements a + multiple-inheritance hierarchy with checks for type subsumption and + compatibility. + + Classes ------- diff --git a/docs/api/delphin.tsdb.rst b/docs/api/delphin.tsdb.rst index 2d8537b0..22b12412 100644 --- a/docs/api/delphin.tsdb.rst +++ b/docs/api/delphin.tsdb.rst @@ -4,6 +4,47 @@ delphin.tsdb .. automodule:: delphin.tsdb + + .. note:: + + This module implements the basic, low-level functionality for + working with TSDB databases. For higher-level views and uses of + these databases, see :mod:`delphin.itsdb`. For complex queries + of the databases, see :mod:`delphin.tsql`. + + TSDB databases are plain-text file-based relational databases + minimally consisting of a directory with a file, called + `relations`, containing the database's schema (see + `Schemas`_). Every relation, or table, in the database has its own + file, which may be `gzipped `_ + to save space. The relations have a simple format with columns + delimited by ``@`` and records delimited by newlines. This makes + them easy to inspect at the command line with standard Unix tools + such as ``cut`` and ``awk`` (but gzipped relations need to be + decompressed or piped from a tool such as ``zcat``). + + This module handles the technical details of reading and writing + TSDB databases, including: + + - parsing database schemas + + - transparently opening either the plain-text or gzipped relations + on disk, as appropriate + + - escaping and unescaping reserved characters in the data + + - pairing columns with their schema descriptions + + - casting types (such as ``:integer``, ``:date``, etc.) + + Additionally, this module provides very basic abstractions of + databases and relations as the :class:`Database` and + :class:`Relation` classes, respectively. These serve as base + classes for the more featureful :class:`delphin.itsdb.TestSuite` + and :class:`delphin.itsdb.Table` classes, but may be useful as they + are for simple needs. + + Module Constants ---------------- diff --git a/docs/api/delphin.tsql.rst b/docs/api/delphin.tsql.rst index 53c9cbec..24c9f330 100644 --- a/docs/api/delphin.tsql.rst +++ b/docs/api/delphin.tsql.rst @@ -9,6 +9,101 @@ delphin.tsql .. automodule:: delphin.tsql + .. note:: + + This module deals with queries of TSDB databases. For basic, + low-level access to the databases, see :mod:`delphin.tsdb`. For + high-level operations and structures on top of the databases, + see :mod:`delphin.itsdb`. + + This module implements a subset of TSQL, namely the 'select' (or + 'retrieve') queries for extracting data from test suites. The + general form of a select query is:: + + [select] [from ] [where ]* + + For example, the following selects item identifiers that took more + than half a second to parse:: + + select i-id from item where total > 500 + + The `select` string is necessary when querying with the generic + :func:`query` function, but is implied and thus disallowed when + using the :func:`select` function. + + The `` is a list of space-separated field names (e.g., + `i-id i-input mrs`), or the special string `*` which selects all + columns from the joined relations. + + The optional `from` clause provides a list of relation names (e.g., + `item parse result`) that are joined on shared keys. The `from` + clause is required when `*` is used for the projection, but it can + also be used to select columns from non-standard relations (e.g., + `i-id from output`). Alternatively, `delphin.itsdb`-style data + specifiers (see :func:`delphin.itsdb.get_data_specifier`) may be + used to specify the relation on the column name (e.g., + `item.i-id`). + + The `where` clause provide conditions for filtering the list of + results. Conditions are binary operations that take a column or + data specifier on the left side and an integer (e.g., `10`), a date + (e.g., `2018-10-07`), or a string (e.g., `"sleep"`) on the right + side of the operator. The allowed conditions are: + + ================ ====================================== + Condition Form + ================ ====================================== + Regex match `` ~ "regex"`` + Regex fail `` !~ "regex"`` + Equality `` = (integer|date|"string")`` + Inequality `` != (integer|date|"string")`` + Less-than `` < (integer|date)`` + Less-or-equal `` <= (integer|date)`` + Greater-than `` > (integer|date)`` + Greater-or-equal `` >= (integer|date)`` + ================ ====================================== + + Boolean operators can be used to join multiple conditions or for + negation: + + =========== ===================================== + Operation Form + =========== ===================================== + Disjunction ``X | Y``, ``X || Y``, or ``X or Y`` + Conjunction ``X & Y``, ``X && Y``, or ``X and Y`` + Negation ``!X`` or ``not X`` + =========== ===================================== + + Normally, disjunction scopes over conjunction, but parentheses may + be used to group clauses, so the following are equivalent:: + + ... where i-id = 10 or i-id = 20 and i-input ~ "[Dd]og" + ... where i-id = 10 or (i-id = 20 and i-input ~ "[Dd]og") + + Multiple `where` clauses may also be used as a conjunction that + scopes over disjunction, so the following are equivalent:: + + ... where (i-id = 10 or i-id = 20) and i-input ~ "[Dd]og" + ... where i-id = 10 or i-id = 20 where i-input ~ "[Dd]og" + + This facilitates query construction, where a user may want to apply + additional global constraints by appending new conditions to the + query string. + + PyDelphin has several differences to standard TSQL: + + * `select *` requires a `from` clause + * `select * from item result` does not also include columns from + the intervening `parse` relation + * `select i-input from result` returns a matching `i-input` for + every row in `result`, rather than only the unique rows + + PyDelphin also adds some features to standard TSQL: + + * qualified column names (e.g., `item.i-id`) + * multiple `where` clauses (as described above) + + Module Functions ---------------- diff --git a/docs/api/delphin.variable.rst b/docs/api/delphin.variable.rst index 8bec5311..a8823b46 100644 --- a/docs/api/delphin.variable.rst +++ b/docs/api/delphin.variable.rst @@ -4,6 +4,79 @@ delphin.variable .. automodule:: delphin.variable + This module contains functions to inspect the type and identifier + of variables (:func:`split`, :func:`type`, :func:`id`) and check if + a variable string is well-formed (:func:`is_valid`). It + additionally has constants for the standard variable types: + :data:`UNSPECIFIC`, :data:`INDIVIDUAL`, :data:`INSTANCE_OR_HANDLE`, + :data:`EVENTUALITY`, :data:`INSTANCE`, and :data:`HANDLE`. Finally, + the :class:`VariableFactory` class may be useful for tasks like + DMRS to MRS conversion for managing the creation of new variables. + + Variables in MRS + ---------------- + + Variables are a concept in Minimal Recursion Semantics coming from + formal semantics. Consider this logical form for a sentence like + "the dog barks":: + + ∃x(dog(x) ^ bark(x)) + + Here *x* is a variable that represents an entity that has the + properties that it is a dog and it is barking. Davidsonian + semantics introduce variables for events as well:: + + ∃e∃x(dog(x) ^ bark(e, x)) + + MRS uses variables in a similar way to Davidsonian semantics, + except that events are not explicitly quantified. That might look + like the following (if we ignore quantifier scope + underspecification):: + + the(x4) [dog(x4)] {bark(e2, x4)} + + "Variables" are also used for scope handles and labels, as in this + minor modification that indicates the scope handles:: + + h3:the(x4) [h6:dog(x4)] {h1:bark(e2, x4)} + + There is some confusion of terminology here. Sometimes "variable" + is contrasted with "handle" to mean an instance (`x`) or + eventuality (`e`) variable, but in this module "variable" means the + identifiers used for instances, eventualities, handles, and their + supertypes. + + The form of MRS variables is the concatenation of a variable *type* + (also called a *sort*) with a variable *id*. For example, the + variable type `e` and id `2` form the variable `e2`. Generally in + MRS the variable ids, regardless of the type, are unique, so for + instance one would not see `x2` and `e2` in the same structure. + + The variable types are arranged in a hierarchy. While the most + accurate variable type hierarchy for a particular grammar is + obtained via its SEM-I (see :mod:`delphin.semi`), in practice the + standard hierarchy given below is used by all DELPH-IN + grammars. The hierarchy in TDL would look like this (with an ASCII + rendering in comments on the right): + + .. code-block:: tdl + + u := *top*. ; u + i := u. ; / \ + p := u. ; i p + e := i. ; / \ / \ + x := i & p. ; e x h + h := p. + + In PyDelphin the equivalent hierarchy could be created as follows: + + >>> from delphin import hierarchy + >>> h = hierarchy.MultiHierarchy( + ... '*top*', + ... {'u': '*top*', 'i': 'u', 'p': 'u', 'e': 'i', 'x': 'i p', 'h': 'p'} + ... ) + + Module Constants ---------------- diff --git a/docs/api/delphin.vpm.rst b/docs/api/delphin.vpm.rst index 4e997878..14537cf8 100644 --- a/docs/api/delphin.vpm.rst +++ b/docs/api/delphin.vpm.rst @@ -4,6 +4,15 @@ delphin.vpm .. automodule:: delphin.vpm + Variable property mappings (VPMs) convert grammar-internal + variables (e.g. `event5`) to the grammar-external form (e.g. `e5`), + and also map variable properties (e.g. `PNG: 1pl` might map to + `PERS: 1` and `NUM: pl`). + + .. seealso:: + - Wiki about VPM: http://moin.delph-in.net/RmrsVpm + + Module functions ---------------- diff --git a/docs/guides/developer.rst b/docs/guides/developer.rst index dbc21cab..c65d64d9 100644 --- a/docs/guides/developer.rst +++ b/docs/guides/developer.rst @@ -71,6 +71,7 @@ The ``delphin`` package of PyDelphin is, as of version 1.0.0, a which means that it is possible to create plugins under the `delphin` namespace. + Plugin Names '''''''''''' @@ -81,6 +82,7 @@ more than one module or the plugin name doesn't strictly coincide with the project name, use ``delphin-{{name}}`` (e.g., `delphin-latex `_). + Project Structure ''''''''''''''''' @@ -105,6 +107,7 @@ a package rather than a module, it could be a subdirectory of modules under ``delphin/`` should not conflict with existing names in PyDelphin. + Plugin Versions ''''''''''''''' @@ -157,6 +160,7 @@ please file an `issue `_ to request the merge. + Module Dependencies ------------------- @@ -203,7 +207,7 @@ least test the codecs to changes they make. - :mod:`delphin.tfs` [`hierarchy`] - :mod:`delphin.tokens` [`lnk`] - :mod:`delphin.vpm` [`variable`] - - :mod:`delphin.web` [`interface`] + - :mod:`delphin.web.client` [`interface`] * Tier 3 @@ -221,6 +225,7 @@ least test the codecs to changes they make. * Tier 5 - `delphin.codecs` [`dmrs`, `eds`, `mrs`, ...] (see :doc:`../api/delphin.codecs`) + - :mod:`delphin.web.server` [`ace`, `codecs`, `derivation`, `dmrs`, `eds`, `itsdb`, `tokens`] * Tier 6