updated models and documentation

c-amr · Jul 24, 2016 · 3a6706b · 3a6706b
1 parent cfd0525
commit 3a6706b
Show file tree

Hide file tree

Showing 5 changed files with 52 additions and 5 deletions.
diff --git a/.gitignore b/.gitignore
@@ -64,4 +64,5 @@ smatch*/
 *~
 scripts/*~
 *.gz
-
+*.zip
+log/
diff --git a/README.md b/README.md
@@ -57,15 +57,23 @@ To preprocess the data, run:
       python amr_parsing.py -m preprocess [input_sentence_file]
 
 This will give you the tokenized sentences(.tok), POS tag and name entity (.prp) and dependency structure (.charniak.parse.dep) (generated by Charniak parser and Stanford Dependency converter).
-Download the model trained on training set of LDC2013E117 newswire section [here](http://www.cs.brandeis.edu/~cwang24/LDC2013E117.train.basic-abt-charniak.m). Then use the following command to parse the sentence:
+**Note:** The parser will try to read the preprocessed file with the above suffix. So if the preprocessing is not complete, do remove all the cached file and then re-run this step.
+
+## Parsing
+Download the following model: 
+[LDC2014T12](http://www.cs.brandeis.edu/~cwang24/files/amr-anno-1.0.train.m.tar.gz): trained on training set of LDC2014T12.
+[SemEval2016](http://www.cs.brandeis.edu/~cwang24/files/amr-semeval-all.train.m.tar.gz): trained on training set of SemEval 2016 Task 8.
+
+Uncompress the model file, then use the following command to parse the sentence:
 
       python amr_parsing.py -m parse --model [model_file] [input_sentence_file] 2>log/error.log
 
 This will give your the parsed AMR file(.parsed) in the same directory of your input sentence file. 
+
 ##Alignment
 If you have annotated AMR file, you could first run the preprocessing step:
 
-	python amr_parsing.py -m preprocess --amrfmt [input_amr_file]
+	python amr_parsing.py -m preprocess --amrfmt amr [input_amr_file]
 
 This will generate a tokenized AMR file (.amr.tok) (which has :tok tag in the comments). Then you can run the following command to get the aligned AMR file(.aligned)
 

diff --git a/amr_parsing.py b/amr_parsing.py
@@ -146,7 +146,7 @@ def main():
     arg_parser.add_argument('--feat',help='feature template file')
     arg_parser.add_argument('-iter','--iterations',default=1,type=int,help='training iterations')
     arg_parser.add_argument('amr_file',nargs='?',help='amr annotation file/input sentence file for parsing')
-    arg_parser.add_argument('--amrfmt',choices=['sent','amr','amreval'],default='amr',help='specifying the input file format')
+    arg_parser.add_argument('--amrfmt',choices=['sent','amr','amreval'],default='sent',help='specifying the input file format')
     arg_parser.add_argument('--smatcheval',action='store_true',help='give evaluation score using smatch')
     arg_parser.add_argument('-e','--eval',nargs=2,help='Error Analysis: give parsed AMR file and gold AMR file')
     arg_parser.add_argument('--section',choices=['proxy','all'],default='all',help='choose section of the corpus. Only works for LDC2014T12 dataset.')

diff --git a/importlib/__init__.py b/importlib/__init__.py
@@ -0,0 +1,38 @@
+"""Backport of importlib.import_module from 3.x."""
+# While not critical (and in no way guaranteed!), it would be nice to keep this
+# code compatible with Python 2.3.
+import sys
+
+def _resolve_name(name, package, level):
+    """Return the absolute name of the module to be imported."""
+    if not hasattr(package, 'rindex'):
+        raise ValueError("'package' not set to a string")
+    dot = len(package)
+    for x in xrange(level, 1, -1):
+        try:
+            dot = package.rindex('.', 0, dot)
+        except ValueError:
+            raise ValueError("attempted relative import beyond top-level "
+                              "package")
+    return "%s.%s" % (package[:dot], name)
+
+
+def import_module(name, package=None):
+    """Import a module.
+
+    The 'package' argument is required when performing a relative import. It
+    specifies the package to use as the anchor point from which to resolve the
+    relative import to an absolute import.
+
+    """
+    if name.startswith('.'):
+        if not package:
+            raise TypeError("relative imports require the 'package' argument")
+        level = 0
+        for character in name:
+            if character != '.':
+                break
+            level += 1
+        name = _resolve_name(name[level:], package, level)
+    __import__(name)
+    return sys.modules[name]
diff --git a/scripts/cmd.train.brown-verb.sh b/scripts/cmd.train.brown-verb.sh
@@ -1,4 +1,4 @@
 #!/bin/sh
 
 echo "Training Model ..."
-/usr/bin/python amr_parsing.py -m train --amrfmt amr --verblist --smatcheval --model ./models/semeval/amr-semeval-all.train.basic-abt-brown-verb -iter 5 --feat ./feature/basic_abt_brown_feats.templates ./data/semeval/training.txt -d ./data/semeval/dev.txt > ./log/amr-semeval-all.train.basic-abt-brown-verb.log 2>&1 &
+/usr/bin/python2.7 amr_parsing.py -m train --amrfmt amr --verblist --smatcheval --model ./models/semeval/amr-semeval-all.train.basic-abt-brown-verb -iter 20 --feat ./feature/basic_abt_brown_feats.templates ./data/semeval/training.txt -d ./data/semeval/dev.txt > ./log/amr-semeval-all.train.basic-abt-brown-verb.log 2>&1 &