Skip to content

Commit

Permalink
Move all converters to starch-based implementations (#97)
Browse files Browse the repository at this point in the history
* Switch all conversion routines to use starch.

main user-visible changes:

 * ensure you check out submodules ('git clone --recurse-submodules")
 * --version shows the CPU features and DSP implementations in use
 * --wisdom allows overriding of the built-in architecture wisdom
 * --dcfilter no longer supported
 * "starch-benchmark" binary will benchmark all options on the
   current machine and can produce a wisdom file to feed to
   the --wisdom option

If you have a usecase for --dcfilter, please get in touch and
let me know - it's an edge case and for now there's no starch/DSP
support for it, but support can be written if needed.

In almost all cases the new conversion routines are slightly or
substantially faster than the old conversion routines. The only case
that is slower is SC16/SC16Q11 on a Pi 0, which is around 10% slower
due to changing from heavily approximated lookup tables to higher
quality results (but SC16 is probably already out of reach of a Pi 0)

* No need to build with SC16Q11_TABLE_BITS any more

* Add oneoff/uc8_capture_stats

(reads a UC8 capture; measures min/max/mean I and Q)

* Switch UC8 conversion to 127.4 center, 128 range.

Looking at actual UC8 captures from a RTL2832, the mean I and Q
are actually at 127.4, so use that as the zero point.

This means that the resulting I/Q maximum values could be as large as
127.6. Switch to 128 for simplicity.

* Switch to the new UC8 zero offset in benchmarks, fix some bugs

* Fix some bugs in SC16/SC16Q11 validation, tighten the max error requirements

* Ditch UC8 approximation path, add a NEON VRQSQRTE path.

* Tweak the SC16 exact path, add a new impl that uses a mix of
u32 & floats.

* SC16Q11 impl tweaks:

 * add a u32->float exact path
 * ditch the approximation path
 * add a NEON VRSQRTE path
 * add a 12-bit table path (using the full signed I/Q value, not absolute value)

* Ditch SC16 approximation path, add NEON vrsqrte path

* Add oneoff/dsp_error_measurement

This runs sample input through the DSP functions that are
allowed to be inexact and dumps the results as a TSV suitable for
feeding to gnuplot to look at the actual errors.

* Update make clean, make wisdom targets

* Update wisdom based on benchmarking

* Preserve the raw wisdom benchmark data

* Update to latest starch

* Update .gitignore for new wisdom files

* Update starch generated code

* Build starch-benchmark as part of the 'all' target

* Use wisdom from /etc/dump1090-fa/wisdom.local if present

* Package starch-benchmark and a helper script to generate local wisdom data

* Remove submodules in preparation for importing them directly

* Import cpu_features v0.6.0 from https://github.com/google/cpu_features/releases/tag/v0.6.0

* Import starch at commit a725c8491dc33a321565d451b385131e589d8490
from https://github.com/flightaware/starch
  • Loading branch information
mutability committed Jan 21, 2021
1 parent c7675b3 commit bff71dc
Show file tree
Hide file tree
Showing 134 changed files with 17,436 additions and 480 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Expand Up @@ -12,4 +12,8 @@ view1090
faup1090
package-wheezy
oneoff/convert_benchmark
oneoff/uc8_capture_stats
oneoff/dsp_error_measurement
oneoff/decode_comm_b
starch-benchmark
wisdom.local
78 changes: 65 additions & 13 deletions Makefile
Expand Up @@ -2,12 +2,12 @@ PROGNAME=dump1090

DUMP1090_VERSION ?= unknown

CPPFLAGS += -DMODES_DUMP1090_VERSION=\"$(DUMP1090_VERSION)\" -DMODES_DUMP1090_VARIANT=\"dump1090-fa\"
CPPFLAGS += -I. -DMODES_DUMP1090_VERSION=\"$(DUMP1090_VERSION)\" -DMODES_DUMP1090_VARIANT=\"dump1090-fa\"

DIALECT = -std=c11
CFLAGS += $(DIALECT) -O3 -g -Wall -Wmissing-declarations -Werror -W -D_DEFAULT_SOURCE -fno-common
LIBS = -lpthread -lm
SDR_OBJ = sdr.o fifo.o sdr_ifile.o
SDR_OBJ = cpu.o sdr.o fifo.o sdr_ifile.o dsp/helpers/tables.o

# Try to autodetect available libraries via pkg-config if no explicit setting was used
PKGCONFIG=$(shell pkg-config --version >/dev/null 2>&1 && echo "yes" || echo "no")
Expand Down Expand Up @@ -42,33 +42,43 @@ endif
UNAME := $(shell uname)

ifeq ($(UNAME), Linux)
CFLAGS += -D_DEFAULT_SOURCE
include Makefile.cpufeatures
CPPFLAGS += -D_DEFAULT_SOURCE
LIBS += -lrt
LIBS_USB += -lusb-1.0
CPUFEATURES ?= yes
endif

ifeq ($(UNAME), Darwin)
ifneq ($(shell sw_vers -productVersion | egrep '^10\.([0-9]|1[01])\.'),) # Mac OS X ver <= 10.11
CFLAGS += -DMISSING_GETTIME
CPPFLAGS += -DMISSING_GETTIME
COMPAT += compat/clock_gettime/clock_gettime.o
endif
CFLAGS += -DMISSING_NANOSLEEP
CPPFLAGS += -DMISSING_NANOSLEEP
COMPAT += compat/clock_nanosleep/clock_nanosleep.o
LIBS_USB += -lusb-1.0
CPUFEATURES ?= yes
endif

ifeq ($(UNAME), OpenBSD)
CFLAGS += -DMISSING_NANOSLEEP
CPPFLAGS += -DMISSING_NANOSLEEP
COMPAT += compat/clock_nanosleep/clock_nanosleep.o
LIBS_USB += -lusb-1.0
endif

ifeq ($(UNAME), FreeBSD)
CFLAGS += -D_DEFAULT_SOURCE
CPPFLAGS += -D_DEFAULT_SOURCE
LIBS += -lrt
LIBS_USB += -lusb
endif

CPUFEATURES ?= no

ifeq ($(CPUFEATURES),yes)
include Makefile.cpufeatures
CPPFLAGS += -DENABLE_CPUFEATURES -Icpu_features/include
endif

RTLSDR ?= yes
BLADERF ?= yes

Expand Down Expand Up @@ -122,22 +132,47 @@ ifeq ($(LIMESDR), yes)
LIBS_SDR += $(shell pkg-config --libs LimeSuite)
endif

all: showconfig dump1090 view1090

##
## starch (runtime DSP code selection) mix, architecture-specific
##

ARCH ?= $(shell uname -m)
ifneq ($(CPUFEATURES),yes)
# need to be able to detect CPU features at runtime to enable any non-standard compiler flags
STARCH_MIX := generic
CPPFLAGS += -DSTARCH_MIX_GENERIC
else ifeq ($(ARCH),x86_64)
# AVX, AVX2
STARCH_MIX := x86
CPPFLAGS += -DSTARCH_MIX_X86
else ifneq (,$(findstring arm,$(ARCH)))
# ARMv7 NEON
STARCH_MIX := arm
CPPFLAGS += -DSTARCH_MIX_ARM
else
STARCH_MIX := generic
CPPFLAGS += -DSTARCH_MIX_GENERIC
endif

all: showconfig dump1090 view1090 starch-benchmark

STARCH_COMPILE := $(CC) $(CPPFLAGS) $(CFLAGS) -c
include dsp/generated/makefile.$(STARCH_MIX)

showconfig:
@echo "Building with:" >&2
@echo " Version string: $(DUMP1090_VERSION)" >&2
@echo " DSP mix: $(STARCH_MIX)" >&2
@echo " RTLSDR support: $(RTLSDR)" >&2
@echo " BladeRF support: $(BLADERF)" >&2
@echo " HackRF support: $(HACKRF)" >&2
@echo " LimeSDR support: $(LIMESDR)" >&2

all: dump1090 view1090

%.o: %.c *.h
$(CC) $(CPPFLAGS) $(CFLAGS) -c $< -o $@

dump1090: dump1090.o anet.o interactive.o mode_ac.o mode_s.o comm_b.o net_io.o crc.o demod_2400.o stats.o cpr.o icao_filter.o track.o util.o convert.o ais_charset.o $(SDR_OBJ) $(COMPAT)
dump1090: dump1090.o anet.o interactive.o mode_ac.o mode_s.o comm_b.o net_io.o crc.o demod_2400.o stats.o cpr.o icao_filter.o track.o util.o convert.o ais_charset.o $(SDR_OBJ) $(COMPAT) $(CPUFEATURES_OBJS) $(STARCH_OBJS)
$(CC) -g -o $@ $^ $(LDFLAGS) $(LIBS) $(LIBS_SDR) -lncurses

view1090: view1090.o anet.o interactive.o mode_ac.o mode_s.o comm_b.o net_io.o crc.o stats.o cpr.o icao_filter.o track.o util.o ais_charset.o $(COMPAT)
Expand All @@ -146,8 +181,11 @@ view1090: view1090.o anet.o interactive.o mode_ac.o mode_s.o comm_b.o net_io.o c
faup1090: faup1090.o anet.o mode_ac.o mode_s.o comm_b.o net_io.o crc.o stats.o cpr.o icao_filter.o track.o util.o ais_charset.o $(COMPAT)
$(CC) -g -o $@ $^ $(LDFLAGS) $(LIBS)

starch-benchmark: cpu.o dsp/helpers/tables.o $(CPUFEATURES_OBJS) $(STARCH_OBJS) $(STARCH_BENCHMARK_OBJ)
$(CC) -g -o $@ $^ $(LDFLAGS) $(LIBS)

clean:
rm -f *.o oneoff/*.o compat/clock_gettime/*.o compat/clock_nanosleep/*.o dump1090 view1090 faup1090 cprtests crctests convert_benchmark
rm -f *.o oneoff/*.o compat/clock_gettime/*.o compat/clock_nanosleep/*.o cpu_features/src/*.o dsp/generated/*.o dsp/helpers/*.o $(CPUFEATURES_OBJS) dump1090 view1090 faup1090 cprtests crctests oneoff/convert_benchmark oneoff/decode_comm_b oneoff/dsp_error_measurement oneoff/uc8_capture_stats starch-benchmark

test: cprtests
./cprtests
Expand All @@ -161,8 +199,22 @@ crctests: crc.c crc.h
benchmarks: oneoff/convert_benchmark
oneoff/convert_benchmark

oneoff/convert_benchmark: oneoff/convert_benchmark.o convert.o util.o
oneoff/convert_benchmark: oneoff/convert_benchmark.o convert.o util.o dsp/helpers/tables.o cpu.o $(CPUFEATURES_OBJS) $(STARCH_OBJS)
$(CC) $(CPPFLAGS) $(CFLAGS) -g -o $@ $^ -lm -lpthread

oneoff/decode_comm_b: oneoff/decode_comm_b.o comm_b.o ais_charset.o
$(CC) $(CPPFLAGS) $(CFLAGS) -g -o $@ $^ -lm

oneoff/dsp_error_measurement: oneoff/dsp_error_measurement.o dsp/helpers/tables.o cpu.o $(CPUFEATURES_OBJS) $(STARCH_OBJS)
$(CC) $(CPPFLAGS) $(CFLAGS) -g -o $@ $^ -lm

oneoff/uc8_capture_stats: oneoff/uc8_capture_stats.o
$(CC) $(CPPFLAGS) $(CFLAGS) -g -o $@ $^ -lm

starchgen:
dsp/starchgen.py .

.PHONY: wisdom.local
wisdom.local: starch-benchmark
./starch-benchmark -i 15 -o wisdom.local mean_power_u16 mean_power_u16_aligned magnitude_uc8 magnitude_uc8_aligned
./starch-benchmark -i 15 -r wisdom.local -o wisdom.local
29 changes: 29 additions & 0 deletions Makefile.cpufeatures
@@ -0,0 +1,29 @@
# -*- makefile -*-

# cmake integration is a little tricky, so let's do this by hand for now

CPUFEATURES_UNAME := $(shell uname)
CPUFEATURES_ARCH := $(shell uname -m)

CPUFEATURES_OBJS := cpu_features/src/filesystem.o cpu_features/src/stack_line_reader.o cpu_features/src/string_view.o
CPUFEATURES_CFLAGS := -std=c99 -O -g -DSTACK_LINE_READER_BUFFER_SIZE=1024 -DNDEBUG

ifeq ($(CPUFEATURES_UNAME),Linux)
CPUFEATURES_OBJS += cpu_features/src/hwcaps.o
CPUFEATURES_CFLAGS += -DHAVE_STRONG_GETAUXVAL
endif

ifeq ($(CPUFEATURES_UNAME),Darwin)
CPUFEATURES_CFLAGS += -DHAVE_SYSCTLBYNAME
endif

ifeq ($(CPUFEATURES_ARCH), x86_64)
CPUFEATURES_OBJS += cpu_features/src/cpuinfo_x86.o
endif

ifneq (,$(findstring arm,$(CPUFEATURES_ARCH)))
CPUFEATURES_OBJS += cpu_features/src/cpuinfo_arm.o
endif

$(CPUFEATURES_OBJS): override CFLAGS := $(CPUFEATURES_CFLAGS)
$(CPUFEATURES_OBJS): override CPPFLAGS := -Icpu_features/include

0 comments on commit bff71dc

Please sign in to comment.