From f2931e64c84c7f191b2196d1cf75a3236a24538f Mon Sep 17 00:00:00 2001 From: Dave Pacheco Date: Mon, 30 Jul 2012 14:00:02 -0700 Subject: [PATCH] move survey results into tests --- .gitignore | 5 ++++ Makefile | 26 +++++++++++++++-- README.md | 40 ++++++++++++++++++++++++++ survey/Makefile | 19 ------------ survey/README.md | 39 ------------------------- {survey => tests}/StringSplitTest.java | 0 {survey => tests}/strsplit.js | 0 {survey => tests}/strsplit.pl | 0 {survey => tests}/strsplit.py | 0 {survey => tests}/testcases.csv | 0 tests/tst.strsplit.sh | 14 ++++----- 11 files changed, 75 insertions(+), 68 deletions(-) create mode 100644 .gitignore delete mode 100644 survey/Makefile delete mode 100644 survey/README.md rename {survey => tests}/StringSplitTest.java (100%) rename {survey => tests}/strsplit.js (100%) rename {survey => tests}/strsplit.pl (100%) rename {survey => tests}/strsplit.py (100%) rename {survey => tests}/testcases.csv (100%) diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3a1db7a --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +tests/StringSplitTest.class +tests/java.csv +tests/perl.csv +tests/python.csv +tests/js-strsplit.csv diff --git a/Makefile b/Makefile index 782a90a..2c1edf1 100644 --- a/Makefile +++ b/Makefile @@ -22,23 +22,43 @@ NPM = npm # # Files # -JS_FILES := $(shell find lib tests survey -name '*.js') +JS_FILES := $(shell find lib tests -name '*.js') JSL_CONF_NODE = tools/jsl.node.conf JSL_FILES_NODE = $(JS_FILES) JSSTYLE_FILES = $(JS_FILES) +TEST_FILES = java.csv perl.csv python.csv js-strsplit.csv +TEST_OUTPUTS = $(TEST_FILES:%=tests/%) +CLEAN_FILES += $(TEST_OUTPUTS) tests/StringSplitTest.class + # # Repo-specific targets # .PHONY: all -all: +all: $(TEST_OUTPUTS) $(NPM) install -test: +.PHONY: test +test: $(TEST_OUTPUTS) tests/tst.strsplit.sh tests/tst.strpatterns.js @echo All tests passed. +tests/java.csv: tests/testcases.csv tests/StringSplitTest.class + java -cp tests StringSplitTest < $< > $@ + +tests/StringSplitTest.class: tests/StringSplitTest.java + javac $^ + +tests/js-strsplit.csv: tests/testcases.csv tests/strsplit.js + tests/strsplit.js < $< > $@ + +tests/perl.csv: tests/testcases.csv tests/strsplit.pl + tests/strsplit.pl < $< > $@ + +tests/python.csv: tests/testcases.csv tests/strsplit.py + tests/strsplit.py < $< > $@ + DISTCLEAN_FILES += node_modules include ./Makefile.targ diff --git a/README.md b/README.md index f8e4a5d..2a945b5 100644 --- a/README.md +++ b/README.md @@ -71,3 +71,43 @@ and here's strsplit: [ 'alpha', 'bravo', 'charlie delta' ] This is the behavior implemented by `split` in Perl, Java, and Python. + +## Background: survey of "split" in Java, Perl, and Python + +The tests directory contains test cases and test programs in Java, Perl, and +Python for figuring out what these language's string split function does. +Specifically, this is: + +* Java: String.split. +* Perl: split. +* Python: re.split. While the "split" method on strings may be more common, it + does not handle regular expressions, while the Java and Perl counterparts do. + +For comparison, there's also a test case for this implementation of "strsplit". +in JavaScript. + +The test cases here test both a simple string as a splitter (a space) and a +simple regular expression (`\s+`, indicating some non-zero number of whitespace +characters), as well as various values of the optional "limit" parameter. + +In summary, in all of the cases tried, the Java and Perl implementations are +identical. The Python implementation differs in a few ways: + +* The "limit" argument is off-by-one relative to the Java and Perl APIs. It + represents the maximum number of splits to be made, rather than the maximum + number of returned fields. +* -1 for "limit" is not special, and seems to mean that at most -1 splits will + be made, meaning the string is not split at all. In Java and Perl, -1 means + there is no limit to the number of returned fields. +* Java and Perl strip trailing empty fields when "limit" is 0. Python never + strips trailing empty fields. + +JavaScript has a "split" method, but it behaves substantially different than all +of these implementations when "limit" is specified. This implementation of +"strsplit" for JavaScript mirrors the Java and Perl implementations, as the +differences in Python do not seem substantial or better. + +The remaining use case that would be nice to address is splitting fields the way +awk(1) and bash(1) do, which is to strip leading whitespace. Python's *string* +split also does this, but only if you specify None as the pattern. strsplit +doesn't support this; just trim the string first if you want that behavior. diff --git a/survey/Makefile b/survey/Makefile deleted file mode 100644 index aff6e8e..0000000 --- a/survey/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -all: java.csv perl.csv python.csv js-strsplit.csv - -java.csv: testcases.csv | StringSplitTest.class - java StringSplitTest < $< > $@ - -StringSplitTest.class: StringSplitTest.java - javac $^ - -perl.csv: testcases.csv strsplit.pl - ./strsplit.pl < $< > $@ - -python.csv: testcases.csv strsplit.py - ./strsplit.py < $< > $@ - -js-strsplit.csv: testcases.csv strsplit.js - ./strsplit.js < $< > $@ - -clean: - rm -f java.csv perl.csv python.csv js-strsplit.csv StringSplitTest.class diff --git a/survey/README.md b/survey/README.md deleted file mode 100644 index 5cde910..0000000 --- a/survey/README.md +++ /dev/null @@ -1,39 +0,0 @@ -# Survey of "split" in Java, Perl, and Python - -This directory contains some test cases and test programs in Java, Perl, and -Python for figuring out what these language's string split function does. -Specifically, this is: - -* Java: String.split. -* Perl: split. -* Python: re.split. While the "split" method on strings may be more common, it - does not handle regular expressions, while the Java and Perl counterparts do. - -For comparison, there's also a test case for this implementation of "strsplit". -in JavaScript. - -The test cases here test both a simple string as a splitter (a space) and a -simple regular expression (`\s+`, indicating some non-zero number of whitespace -characters), as well as various values of the optional "limit" parameter. - -In summary, in all of the cases tried, the Java and Perl implementations are -identical. The Python implementation differs in a few ways: - -* The "limit" argument is off-by-one relative to the Java and Perl APIs. It - represents the maximum number of splits to be made, rather than the maximum - number of returned fields. -* -1 for "limit" is not special, and seems to mean that at most -1 splits will - be made, meaning the string is not split at all. In Java and Perl, -1 means - there is no limit to the number of returned fields. -* Java and Perl strip trailing empty fields when "limit" is 0. Python never - strips trailing empty fields. - -JavaScript has a "split" method, but it behaves substantially different than all -of these implementations when "limit" is specified. This implementation of -"strsplit" for JavaScript mirrors the Java and Perl implementations, as the -differences in Python do not seem substantial or better. - -The remaining use case that would be nice to address is splitting fields the way -awk(1) and bash(1) do, which is to strip leading whitespace. Python's *string* -split also does this, but only if you specify None as the pattern. strsplit -doesn't support this; just trim the string first if you want that behavior. diff --git a/survey/StringSplitTest.java b/tests/StringSplitTest.java similarity index 100% rename from survey/StringSplitTest.java rename to tests/StringSplitTest.java diff --git a/survey/strsplit.js b/tests/strsplit.js similarity index 100% rename from survey/strsplit.js rename to tests/strsplit.js diff --git a/survey/strsplit.pl b/tests/strsplit.pl similarity index 100% rename from survey/strsplit.pl rename to tests/strsplit.pl diff --git a/survey/strsplit.py b/tests/strsplit.py similarity index 100% rename from survey/strsplit.py rename to tests/strsplit.py diff --git a/survey/testcases.csv b/tests/testcases.csv similarity index 100% rename from survey/testcases.csv rename to tests/testcases.csv diff --git a/tests/tst.strsplit.sh b/tests/tst.strsplit.sh index ea17185..cf81e61 100755 --- a/tests/tst.strsplit.sh +++ b/tests/tst.strsplit.sh @@ -2,18 +2,18 @@ # # The main test suite for strsplit is to run the body of test cases in -# ../survey/ and compare the output to that of Java and Perl, whose -# implementations we intend to mirror exactly. errexit will cause this script -# to exit with failure if any of these operations fail. +# testcases.csv and compare the output to that of Java and Perl, whose +# implementations we intend to mirror exactly. All of these outputs have been +# generated automatically by "make test". errexit will cause this script to +# exit with failure if any of these operations fail. # set -o errexit -surveydir=$(dirname $0)/../survey +cd $(dirname $0) set -o xtrace -make -C $surveydir perl.csv java.csv js-strsplit.csv -diff $surveydir/js-strsplit.csv $surveydir/perl.csv > /dev/null -diff $surveydir/js-strsplit.csv $surveydir/java.csv > /dev/null +diff js-strsplit.csv perl.csv > /dev/null +diff js-strsplit.csv java.csv > /dev/null set +o xtrace echo "Test PASSED"