From 759a20da421e4a049c7a22e4e9e791e5b82e8b94 Mon Sep 17 00:00:00 2001 From: Andy C Date: Wed, 20 Jul 2016 09:31:04 -0700 Subject: [PATCH] Big build and test cleanup. - Inline tests/Compare.* into tests/test.sh, and fix various issues, like assuming '.' is in the $PATH - Add targets instrumented with LLVM sanitizers to the Makefile, and support running tests with them TODO: Describe these changes in more detail in README, and add instructions. --- Makefile | 190 ++++++++++++++++++++++++-------- run.sh | 20 +++- test-results/2016-06-12.txt | 50 +++++++++ tests/Compare.T | 10 -- tests/Compare.drek | 2 +- tests/Compare.p | 17 --- tests/Compare.t | 17 --- tests/Compare.tt | 49 --------- tests/REGRESS | 21 ---- tests/T.lilly | 28 ----- tests/ctimes | 1 + tests/test.sh | 211 ++++++++++++++++++++++++++++++++++++ 12 files changed, 426 insertions(+), 190 deletions(-) create mode 100644 test-results/2016-06-12.txt delete mode 100755 tests/Compare.T delete mode 100755 tests/Compare.p delete mode 100755 tests/Compare.t delete mode 100755 tests/Compare.tt delete mode 100755 tests/REGRESS delete mode 100755 tests/T.lilly create mode 100755 tests/test.sh diff --git a/Makefile b/Makefile index 8d52acb..c664ba1 100644 --- a/Makefile +++ b/Makefile @@ -22,68 +22,168 @@ # THIS SOFTWARE. # ****************************************************************/ -CFLAGS = -g -CFLAGS = -O2 -CFLAGS = - -CC = gcc -Wall -g -Wwrite-strings -CC = gcc -fprofile-arcs -ftest-coverage # then gcov f1.c; cat f1.c.gcov -CC = gcc -g -Wall -pedantic -CC = gcc -O4 -Wall -pedantic -fno-strict-aliasing - -YACC = bison -d -y -#YACC = yacc -d -S -#YFLAGS = -d -S - # -S uses sprintf in yacc parser instead of sprint - -OFILES = b.o main.o parse.o proctab.o tran.o lib.o run.o lex.o - -SOURCE = awk.h ytab.c ytab.h proto.h awkgram.y lex.c b.c main.c \ - maketab.c parse.c lib.c run.c tran.c proctab.c - -LISTING = awk.h proto.h awkgram.y lex.c b.c main.c maketab.c parse.c \ - lib.c run.c tran.c +# CLANG_DIR should be set to build and run tests under sanitizers. +ifdef CLANG_DIR + san_cc := $(CLANG_DIR)/bin/clang +else + san_cc := clang +endif + +# -d: produce a header file +# -y: emulate POSIX yacc +YFLAGS := -d -y + +obj_files := \ + b.o \ + main.o \ + parse.o \ + proctab.o \ + tran.o \ + lib.o \ + run.o \ + lex.o \ + ytab.o + +default: bwk-dbg + +all: bwk bwk-dbg bwk-cov bwk-asan bwk-msan bwk-ubsan bwk-sancov-func test_bin + +test_bin: tests/echo tests/time + +HEADER_DEPS := awk.h ytab.h proto.h + +compile_c = $(CC) $(CFLAGS) -c -o $@ $< + +# TODO: Remove all this duplication. Options: +# - eval ? +# - shell script to gen fragments, and then include? Like .d fragments. + +# Compile objects +_obj/bwk/%.o: %.c $(HEADER_DEPS) + mkdir -p _obj/bwk + $(compile_c) + +_obj/bwk-dbg/%.o: %.c $(HEADER_DEPS) + mkdir -p _obj/bwk-dbg + $(compile_c) + +_obj/bwk-cov/%.o: %.c $(HEADER_DEPS) + mkdir -p _obj/bwk-cov + $(compile_c) + +_obj/bwk-asan/%.o: %.c $(HEADER_DEPS) + mkdir -p _obj/bwk-asan + $(compile_c) + +_obj/bwk-msan/%.o: %.c $(HEADER_DEPS) + mkdir -p _obj/bwk-msan + $(compile_c) + +_obj/bwk-ubsan/%.o: %.c $(HEADER_DEPS) + mkdir -p _obj/bwk-ubsan + $(compile_c) + +_obj/bwk-sancov-func/%.o: %.c $(HEADER_DEPS) + mkdir -p _obj/bwk-sancov-func + $(compile_c) + +# NOTE: What is ALLOC used for? Specify a different allocator, I guess. +link_bwk = $(CC) -o $@ $(CFLAGS) $^ $(ALLOC) -lm + +# "release" target, stripped +bwk: CFLAGS += -O4 -Wall -pedantic -fno-strict-aliasing +bwk: $(addprefix _obj/bwk/,$(obj_files)) + $(link_bwk) + strip bwk + +# "debug" target compiles faster +bwk-dbg: CFLAGS += -Wall -pedantic +bwk-dbg: $(addprefix _obj/bwk-dbg/,$(obj_files)) + $(link_bwk) + +# Build instrumented for coverage. +# gcov f1.c; cat f1.c.gcov +bwk-cov: CFLAGS += -fprofile-arcs -ftest-coverage +bwk-cov: $(addprefix _obj/bwk-cov/,$(obj_files)) + $(link_bwk) + +# Binaries built with Clang sanitizers. All of these should be unstripped +# because they show stack traces at runtime. +bwk-asan: CC := $(san_cc) +bwk-asan: CFLAGS += -fsanitize=address -g +bwk-asan: $(addprefix _obj/bwk-asan/,$(obj_files)) + $(link_bwk) + +bwk-msan: CC := $(san_cc) +bwk-msan: CFLAGS += -fsanitize=memory -g +bwk-msan: $(addprefix _obj/bwk-msan/,$(obj_files)) + $(link_bwk) + +bwk-ubsan: CC := $(san_cc) +bwk-ubsan: CFLAGS += -fsanitize=undefined -fno-omit-frame-pointer -g +bwk-ubsan: $(addprefix _obj/bwk-ubsan/,$(obj_files)) + $(link_bwk) + +# NOTES: +# - Coverage works on top of top of sanitizers. Just pick one I guess. +# - There are different types of coverage, e.g. -fsanitize-coverage = func, bb +# or edge. Picking one for now. +bwk-sancov-func: CC := $(san_cc) +bwk-sancov-func: CFLAGS += -fsanitize=memory -g -fsanitize-coverage=func +bwk-sancov-func: $(addprefix _obj/bwk-sancov-func/,$(obj_files)) + $(link_bwk) + +# +# Code Generation +# + +ytab.c ytab.h: awk.h proto.h awkgram.y + $(YACC) -o ytab.c $(YFLAGS) awkgram.y + +proctab.c: maketab + ./maketab >proctab.c -SHIP = README FIXES $(SOURCE) ytab[ch].bak makefile \ - awk.1 +maketab: ytab.h maketab.c + $(CC) $(CFLAGS) maketab.c -o maketab -a.out: ytab.o $(OFILES) - $(CC) $(CFLAGS) ytab.o $(OFILES) $(ALLOC) -lm +# +# Test utils +# -$(OFILES): awk.h ytab.h proto.h +tests/echo: tests/echo.c +tests/time: tests/time.c -ytab.o: awk.h proto.h awkgram.y - $(YACC) $(YFLAGS) awkgram.y - mv y.tab.c ytab.c - mv y.tab.h ytab.h - $(CC) $(CFLAGS) -c ytab.c +# +# Release +# -proctab.c: maketab - ./maketab >proctab.c +source := awk.h ytab.c ytab.h proto.h awkgram.y lex.c b.c main.c \ + maketab.c parse.c lib.c run.c tran.c proctab.c -maketab: ytab.h maketab.c - $(CC) $(CFLAGS) maketab.c -o maketab +ship := README FIXES $(source) ytab[ch].bak makefile awk.1 bundle: @cp ytab.h ytabh.bak @cp ytab.c ytabc.bak - @bundle $(SHIP) + @bundle $(ship) tar: @cp ytab.h ytabh.bak @cp ytab.c ytabc.bak - @bundle $(SHIP) >awk.shar - @tar cf awk.tar $(SHIP) + @bundle $(ship) >awk.shar + @tar cf awk.tar $(ship) gzip awk.tar ls -l awk.tar.gz - @zip awk.zip $(SHIP) + @zip awk.zip $(ship) ls -l awk.zip -names: - @echo $(LISTING) - clean: + rm -r -f _obj rm -f \ - a.out *.o *.obj maketab \ - *.bb *.bbg *.da *.gcov *.gcno *.gcda \ - proctab.c ytab.c ytab.h + bwk bwk-* maketab \ + *.bb *.bbg *.da \ + proctab.c ytab.c ytab.h \ + tests/echo tests/time + +.PHONY: clean +.PHONY: bundle tar names diff --git a/run.sh b/run.sh index 1b0f000..5e1949b 100755 --- a/run.sh +++ b/run.sh @@ -9,8 +9,24 @@ set -o errexit download() { mkdir -p _tmp - wget --directory _tmp 'https://www.cs.princeton.edu/~bwk/btl.mirror/awk.tar.gz' - wget --directory _tmp 'https://www.cs.princeton.edu/~bwk/btl.mirror/awktest.a' + wget --directory _tmp \ + 'https://www.cs.princeton.edu/~bwk/btl.mirror/awk.tar.gz' + wget --directory _tmp \ + 'https://www.cs.princeton.edu/~bwk/btl.mirror/awktest.a' +} + +count() { + ls *.[chy] | grep -v -E 'ytab|proctab' | xargs wc -l | sort -n +} + +coverage() { + mkdir -p _gcov + rm --verbose _gcov/* + + # After running tests with bwk-cov, .gcno and .gcda files are in + # obj/bwk-cov, next to the objects. + gcov --object-directory obj/bwk-cov/ *.c + mv *.gcov _gcov } "$@" diff --git a/test-results/2016-06-12.txt b/test-results/2016-06-12.txt new file mode 100644 index 0000000..c90f498 --- /dev/null +++ b/test-results/2016-06-12.txt @@ -0,0 +1,50 @@ +On Ubuntu 14.04, comparing against mawk. + +CPU: Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz + + new old new/old + + 0.08 0.02 4.000 tt.02: + 0.08 0.03 2.667 tt.02a: + 0.05 0.01 5.000 tt.03: + 0.05 0.02 2.500 tt.03a: + 0.25 0.11 2.273 tt.04: + 0.27 0.10 2.700 tt.05: + 0.06 0.02 3.000 tt.06: + 0.05 0.02 2.500 tt.07: + 0.09 0.04 2.250 tt.12: + 0.87 0.12 7.250 tt.13: + 1.10 0.22 5.000 tt.13a: + 0.01 0.01 1.000 tt.14: + 0.52 0.21 2.476 tt.15: + 0.28 0.12 2.333 tt.16: + 2.72 0.67 4.060 tt.big: + + 6.69 1.72 + +avg new/old = 2.334 +total new/old = 3.890 +21 tests + + new old new/old + + 0.08 0.02 4.000 tt.02: + 0.08 0.03 2.667 tt.02a: + 0.05 0.02 2.500 tt.03: + 0.05 0.02 2.500 tt.03a: + 0.25 0.11 2.273 tt.04: + 0.27 0.10 2.700 tt.05: + 0.05 0.01 5.000 tt.06: + 0.05 0.02 2.500 tt.07: + 0.09 0.04 2.250 tt.12: + 0.88 0.12 7.333 tt.13: + 1.08 0.22 4.909 tt.13a: + 0.52 0.20 2.600 tt.15: + 0.29 0.13 2.231 tt.16: + 2.70 0.67 4.030 tt.big: + + 6.64 1.71 + +avg new/old = 2.262 +total new/old = 3.883 +21 tests diff --git a/tests/Compare.T b/tests/Compare.T deleted file mode 100755 index 5c24867..0000000 --- a/tests/Compare.T +++ /dev/null @@ -1,10 +0,0 @@ - -oldawk=${oldawk-awk} -awk=${awk-../a.out} - -echo oldawk=$oldawk, awk=$awk - -for i in T.* -do - $i -done diff --git a/tests/Compare.drek b/tests/Compare.drek index 9a3ff2d..1275332 100755 --- a/tests/Compare.drek +++ b/tests/Compare.drek @@ -32,4 +32,4 @@ do cat foo1t foo2t >>footot done -ctimes footot +./ctimes footot diff --git a/tests/Compare.p b/tests/Compare.p deleted file mode 100755 index 487358d..0000000 --- a/tests/Compare.p +++ /dev/null @@ -1,17 +0,0 @@ - -oldawk=${oldawk-awk} -awk=${awk-../a.out} - -echo oldawk=$oldawk, awk=$awk - -for i -do - echo "$i:" - $oldawk -f $i test.countries test.countries >foo1 - $awk -f $i test.countries test.countries >foo2 - if cmp -s foo1 foo2 - then true - else echo -n "$i: BAD ..." - fi - diff -b foo1 foo2 | sed -e 's/^/ /' -e 10q -done diff --git a/tests/Compare.t b/tests/Compare.t deleted file mode 100755 index 6ba3704..0000000 --- a/tests/Compare.t +++ /dev/null @@ -1,17 +0,0 @@ - -oldawk=${oldawk-myawk} -awk=${awk-../a.out} - -echo oldawk=$oldawk, awk=$awk - -for i -do - echo "$i:" - $oldawk -f $i test.data >foo1 - $awk -f $i test.data >foo2 - if cmp -s foo1 foo2 - then true - else echo -n "$i: BAD ..." - fi - diff -b foo1 foo2 | sed -e 's/^/ /' -e 10q -done diff --git a/tests/Compare.tt b/tests/Compare.tt deleted file mode 100755 index ca828d2..0000000 --- a/tests/Compare.tt +++ /dev/null @@ -1,49 +0,0 @@ -#!/bin/sh - -oldawk=${oldawk-awk} -awk=${awk-../a.out} - -echo compiling time.c -gcc time.c -o time -time=./time - -echo time command = $time - -#case `uname` in -#SunOS) -# time=/usr/bin/time ;; -#Linux) -# time=/usr/bin/time ;; -#*) -# time=time ;; -#esac - -echo oldawk = $oldawk, awk = $awk, time command = $time - - -# an arbitrary collection of input data - -cat td.1 td.1 >foo.td -sed 's/^........................//' td.1 >>foo.td -pr -m td.1 td.1 td.1 >>foo.td -pr -2 td.1 >>foo.td -cat bib >>foo.td -wc foo.td - -td=foo.td ->footot - -for i in $* -do - echo $i "($oldawk vs $awk)": - # ind <$i - $time $oldawk -f $i $td >foo2 2>foo2t - cat foo2t - $time $awk -f $i $td >foo1 2>foo1t - cat foo1t - cmp foo1 foo2 - echo $i: >>footot - cat foo1t foo2t >>footot -done - -ctimes footot diff --git a/tests/REGRESS b/tests/REGRESS deleted file mode 100755 index 923f1cc..0000000 --- a/tests/REGRESS +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/sh - -uname -a -gcc echo.c -o echo && echo echo compiled - -oldawk=${oldawk-awk} -awk=${awk-../a.out} - -echo oldawk=$oldawk, awk=$awk - -oldawk=$oldawk awk=$awk Compare.t t.* - echo `ls t.* | wc -l` tests; echo - -oldawk=$oldawk awk=$awk Compare.p p.? p.??* - echo `ls p.* | wc -l` tests; echo - -oldawk=$oldawk awk=$awk Compare.T - echo `grep '\$awk' T.* | wc -l` tests; echo - -oldawk=$oldawk awk=$awk Compare.tt tt.* - echo `ls tt.* | wc -l` tests; echo diff --git a/tests/T.lilly b/tests/T.lilly deleted file mode 100755 index 1a6b8b8..0000000 --- a/tests/T.lilly +++ /dev/null @@ -1,28 +0,0 @@ -echo T.lilly: miscellaneous RE tests from Bruce Lilly - -awk=${awk-../a.out} - -rm -f foo -awk ' -/./ { - print $0 >"foo" - print "###", NR, $0 - system("awk -f foo <\"lilly.ifile\" ") - close "foo" -}' foo1 2>&1 - -rm -f foo -$awk ' -/./ { - print $0 >"foo" - print "###", NR, $0 - system("../a.out -f foo <\"lilly.ifile\" ") - close "foo" -}' foo2 2>&1 - -echo `cat lilly.progs | wc -l` tests - -sed -e 's/awk://' -e 's/Syntax/syntax/' foo1 >glop1 -sed 's/..\/a.out://' foo2 >glop2 -diff glop1 glop2 >lilly.diff || echo 'bad: T.lilly is different' -echo diff --git a/tests/ctimes b/tests/ctimes index 96ac465..294b9b5 100755 --- a/tests/ctimes +++ b/tests/ctimes @@ -1,3 +1,4 @@ +#!/bin/sh awk ' BEGIN { OFS = "\t" diff --git a/tests/test.sh b/tests/test.sh new file mode 100755 index 0000000..f847d3a --- /dev/null +++ b/tests/test.sh @@ -0,0 +1,211 @@ +#!/bin/sh +# +# Test driver. +# +# TODO: Import busybox tests? Need the "testing" framework. + +set -o nounset + +# TODO: share with run.sh/ +CLANG_DIR=~/install/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04 +# dash weirdness; ~ is not expanded unless we do it separately? +# http://stackoverflow.com/questions/8441473/tilde-expansion-doesnt-work-when-i-logged-into-gui +readonly CLANG_DIR +readonly sym=$CLANG_DIR/bin/llvm-symbolizer + +# These are needed to show line numbers in stack traces. +export ASAN_SYMBOLIZER_PATH=$sym +export MSAN_SYMBOLIZER_PATH=$sym +export UBSAN_SYMBOLIZER_PATH=$sym + +gcc echo.c -o echo && echo echo compiled + +# A constant that is used by all the tests. Not changing this. +readonly AWK=../a.out + +_print_header() { + local oldawk=$1 + local newawk=$2 + + uname -a + echo "Comparing $newawk vs old $oldawk" +} + +# ../a.out is hard-coded, so we have to use it +_prepare_bin() { + local awk=$1 + + ( cd .. && make $awk test_bin ) # NOTE: pushd is not in /bin/sh + + ln -s -f --verbose $awk ../a.out +} + +setup() { + mkdir -p _tmp + ln -s -f --verbose /bin/busybox _tmp/awk +} + +# We have the expected data +golden() { + local awk=${1:-bwk} + + _prepare_bin $awk + + for i in T.*; do + awk=$AWK ./$i + done + # TODO: Do the tests all use $awk, or a.out too? + echo "Ran $(grep '$awk' T.* | wc -l) test cases" + echo +} + +# Wow, this exposes tons of divergences and bugs. +# NOTE: Tests are not consistent because "BAD" is not always echoed to stderr! +# Sometimes it goes to stdout. + +golden_all() { + #golden /usr/bin/mawk > _tmp/golden-mawk.log + golden /usr/bin/gawk > _tmp/golden-gawk.log + #golden _tmp/awk > _tmp/golden-busybox.log + + grep FAIL _tmp/golden-*.log +} + +# Compare vs another version -- book examples +compare_book() { + local oldawk=${1:-awk} + local newawk=${2:-bwk} + + _print_header $oldawk $newawk + _prepare_bin $newawk + + for i in p.*; do + echo "$i:" + $oldawk -f $i test.countries test.countries >foo1 + $AWK -f $i test.countries test.countries >foo2 + if ! cmp -s foo1 foo2; then + echo -n "$i: FAILED" + fi + diff -b foo1 foo2 | sed -e 's/^/ /' -e 10q + done + + echo "Ran $(ls p.* | wc -l) test files" + echo +} + +# A list of one line programs in lilly.progs, read from lilly.ifile +compare_lilly() { + local oldawk=${1:-awk} + local newawk=${2:-bwk} + + _print_header $oldawk $newawk + _prepare_bin $newawk + + mkdir -p _tmp + local test_runner=' +/./ { + tmp = "_tmp/lilly_line" + print $0 > tmp + print "###", NR, $0 + cmd = awk " -f " tmp " < lilly.ifile " + system(cmd) + close(tmp) +}' + + # NOTE: Somehow './time' doesn't work here + awk -v awk=$oldawk "$test_runner" foo1 2>&1 + awk -v awk=$AWK "$test_runner" foo2 2>&1 + + echo "Ran $(wc -l lilly.progs) lilly tests" + + if ! diff foo1 foo2 >lilly.diff; then + echo 'FAILED: T.lilly, see lilly.diff' + cat lilly.diff + fi + echo +} + +# Compare vs another version +compare_t() { + local oldawk=${1:-awk} + local newawk=${2:-bwk} + + _print_header $oldawk $newawk + _prepare_bin $newawk + + for i in t.*; do + echo "$i:" + $oldawk -f $i test.data >foo1 + $AWK -f $i test.data >foo2 + if ! cmp -s foo1 foo2; then + echo -n "$i: FAILED" + fi + # indent the diff + diff -b foo1 foo2 | sed -e 's/^/ /' -e 10q + done + + echo "Ran $(ls t.* | wc -l) test files" + echo +} + +# Creates a file called foo.td +_create_perf_data() { + cat td.1 td.1 >foo.td + sed 's/^........................//' td.1 >>foo.td + pr -m td.1 td.1 td.1 >>foo.td + pr -2 td.1 >>foo.td + cat bib >>foo.td + wc foo.td +} + +# Perf tests on big data. +# +# TODO: Assert that a.out points to bwk, for speed? Though when you run +# coverage and ASAN, you don't want this. +compare_perf() { + local oldawk=${1:-awk} + local newawk=${2:-bwk} + + _print_header $oldawk $newawk + _prepare_bin $newawk + + time=./time + + _create_perf_data + + # an arbitrary collection of input data + local td=foo.td + # Is this like touch? + >_tmp/perf_timing.log + + for i in tt.*; do + echo $i "($oldawk vs $newawk)": + # ind <$i + $time $oldawk -f $i $td >foo2 2>foo2t + cat foo2t + $time $AWK -f $i $td >foo1 2>foo1t + cat foo1t + cmp foo1 foo2 + + # Accumulate timing + echo $i: >>_tmp/perf_timing.log + cat foo1t foo2t >>_tmp/perf_timing.log + done + + ./ctimes _tmp/perf_timing.log + + echo "Ran $(ls tt.* | wc -l) test files" + echo +} + +# mawk < gawk < bwk < busybox awk in terms of running time. +compare_perf_all() { + # TODO: Write reports to different files. + #compare_perf /usr/bin/mawk + #compare_perf /usr/bin/gawk + + # busybox awk + compare_perf _tmp/awk +} + +"$@"