Browse files

initial analyzed chunked corpus tests

  • Loading branch information...
1 parent dbde962 commit a77a4684e9ca3ff51f6723555806edf36cef4e68 @japerk committed Jul 19, 2011
Showing with 41 additions and 1 deletion.
  1. +1 −1 analyze_chunked_corpus.py
  2. +40 −0 tests/analyze_chunked_corpus.sh
View
2 analyze_chunked_corpus.py
@@ -46,7 +46,7 @@
raise ValueError('%s is an unknown corpus')
if args.trace:
- print 'loading nltk.corpus.%s' % args.corpus
+ print 'loading %s' % args.corpus
##############
## counting ##
View
40 tests/analyze_chunked_corpus.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env roundup
+
+describe "analyze_chunked_corpus.py"
+
+it_displays_usage_when_no_arguments() {
+ ./analyze_chunked_corpus.py 2>&1 | grep -q "usage: analyze_chunked_corpus.py"
+}
+
+it_needs_a_chunked_corpus() {
+ last_line=$(./analyze_chunked_corpus.py brown 2>&1 | tail -n 1)
+ test "$last_line" "=" "AttributeError: 'CategorizedTaggedCorpusReader' object has no attribute 'chunked_words'"
+}
+
+it_anayzes_treebank_chunk() {
+ first_lines=$(./analyze_chunked_corpus.py treebank_chunk 2>&1 | head -n 5)
+ test "$first_lines" "=" "loading treebank_chunk
+94200 total words
+11993 unique words
+46 tags
+1 IOBs"
+}
+
+it_needs_corpus_reader() {
+ last_line=$(./analyze_chunked_corpus.py corpora/treebank/tagged 2>&1 | tail -n 1)
+ test "$last_line" "=" "ValueError: you must specify a corpus reader"
+}
+
+it_needs_chunked_words() {
+ last_line=$(./analyze_chunked_corpus.py corpora/treebank/tagged --reader nltk.corpus.reader.PlaintextCorpusReader 2>&1 | tail -n 1)
+ test "$last_line" "=" "AttributeError: 'PlaintextCorpusReader' object has no attribute 'chunked_words'"
+}
+
+it_anayzes_treebank_tagged() {
+ first_lines=$(./analyze_chunked_corpus.py corpora/treebank/tagged --reader nltk.corpus.reader.ChunkedCorpusReader 2>&1 | head -n 5)
+ test "$first_lines" "=" "loading corpora/treebank/tagged
+95172 total words
+11994 unique words
+47 tags
+1 IOBs"
+}

0 comments on commit a77a468

Please sign in to comment.