Skip to content

Commit

Permalink
Support and optimize PROGMEM accesses in libc
Browse files Browse the repository at this point in the history
Move all the pgmspace.(cpp/h) macros and functions from Arduino IDE
into newlib as a first-class citizen.  All the mem*_P, str*_P, and
*printf_P functions are included in this move, as well as the
PROGMEM macro and pgm_read_*.

Allow for use of PROGMEM based format and parameter strings in all
*printf functions.  No need for copying PSTR()s into RAM before printing
them out.

Add "%S" (capital-S) format that I've been told, but cannot verify,
is used in Arduino to specify a PROGMEM string parameter in printfs,
as an alias for "%s" since plain "%s" can now handle PROGMEM.

PSTR() to 4-byte alignment.  This results in an average wasted space of
1.5bytes/string (25% @0, 25%@1, 25%@2, 25%@3 == 1.5) but allows for
aligned memcpy_P and str(n)cpy_P performance to go up by 4x to 8x by
using 32-bit progmem reads instead of 4 single-byte pgm_read_byte
macros (which are many instructions in length, too).

Optimized the memcpy_P, strnlen_P, and strncpy_P functions to use 32-bit
direct reads whenver possible (source and dest alignment mediated), but
there is still room for improvement in others in newlib/lib/sys/xtensa/*.c.

str(n)cpy now also transparently supports PROGMEM and RAM, only using
the slower PROGMEM version when the source is in PROGMEM.  This was
due to a GCC optimization:  When GCC sees a printf("xxxx") or a
printf("%s", "string") it silently optimizes out the printf and replaces
it with an appropriate strcpy.  So the changes to printf to support
PROGMEM wouldn't ever be invoked, and instead GCC silently calls strcpy
with both RAM and PSTR strings.

Finally, move several constant arrays from RODATA into PROGMEM and
update their accessors.  Among these are the ctype array, ~260 bytes,
mprec* arrays, ~300 bytes, and strings/daycounts in the time
formatting functions, ~200 bytes.
  • Loading branch information
earlephilhower authored and igrr committed Aug 20, 2018
1 parent e674369 commit 61f08d0
Show file tree
Hide file tree
Showing 38 changed files with 796 additions and 72 deletions.
2 changes: 1 addition & 1 deletion newlib/libc/ctype/ctype_.c
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ _CONST char _ctype_[1 + 256] = {

#else /* !defined(ALLOW_NEGATIVE_CTYPE_INDEX) */

_CONST char _ctype_[1 + 256] = {
_CONST char _ctype_[1 + 256] PROGMEM = {
0,
_CTYPE_DATA_0_127,
_CTYPE_DATA_128_255
Expand Down
2 changes: 1 addition & 1 deletion newlib/libc/ctype/isalnum.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,6 @@ No OS subroutines are required.
int
_DEFUN(isalnum,(c),int c)
{
return(__ctype_ptr__[c+1] & (_U|_L|_N));
return(pgm_read_byte(&__ctype_ptr__[c+1]) & (_U|_L|_N));
}

2 changes: 1 addition & 1 deletion newlib/libc/ctype/isalpha.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,6 @@ No supporting OS subroutines are required.
int
_DEFUN(isalpha,(c),int c)
{
return(__ctype_ptr__[c+1] & (_U|_L));
return(pgm_read_byte(&__ctype_ptr__[c+1]) & (_U|_L));
}

2 changes: 1 addition & 1 deletion newlib/libc/ctype/isblank.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,5 +38,5 @@ No supporting OS subroutines are required.
int
_DEFUN(isblank,(c),int c)
{
return ((__ctype_ptr__[c+1] & _B) || (c == '\t'));
return ((pgm_read_byte(&__ctype_ptr__[c+1]) & _B) || (c == '\t'));
}
2 changes: 1 addition & 1 deletion newlib/libc/ctype/iscntrl.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ No supporting OS subroutines are required.
int
_DEFUN(iscntrl,(c),int c)
{
return(__ctype_ptr__[c+1] & _C);
return(pgm_read_byte(&__ctype_ptr__[c+1]) & _C);
}


2 changes: 1 addition & 1 deletion newlib/libc/ctype/isdigit.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,5 @@ No supporting OS subroutines are required.
int
_DEFUN(isdigit,(c),int c)
{
return(__ctype_ptr__[c+1] & _N);
return(pgm_read_byte(&__ctype_ptr__[c+1]) & _N);
}
2 changes: 1 addition & 1 deletion newlib/libc/ctype/islower.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,6 @@ No supporting OS subroutines are required.
int
_DEFUN(islower,(c),int c)
{
return ((__ctype_ptr__[c+1] & (_U|_L)) == _L);
return ((pgm_read_byte(&__ctype_ptr__[c+1]) & (_U|_L)) == _L);
}

4 changes: 2 additions & 2 deletions newlib/libc/ctype/isprint.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,14 @@ No supporting OS subroutines are required.
int
_DEFUN(isgraph,(c),int c)
{
return(__ctype_ptr__[c+1] & (_P|_U|_L|_N));
return(pgm_read_byte(&__ctype_ptr__[c+1]) & (_P|_U|_L|_N));
}


#undef isprint
int
_DEFUN(isprint,(c),int c)
{
return(__ctype_ptr__[c+1] & (_P|_U|_L|_N|_B));
return(pgm_read_byte(&__ctype_ptr__[c+1]) & (_P|_U|_L|_N|_B));
}

2 changes: 1 addition & 1 deletion newlib/libc/ctype/ispunct.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,6 @@ No supporting OS subroutines are required.
int
_DEFUN(ispunct,(c),int c)
{
return(__ctype_ptr__[c+1] & _P);
return(pgm_read_byte(&__ctype_ptr__[c+1]) & _P);
}

2 changes: 1 addition & 1 deletion newlib/libc/ctype/isspace.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,6 @@ No supporting OS subroutines are required.
int
_DEFUN(isspace,(c),int c)
{
return(__ctype_ptr__[c+1] & _S);
return(pgm_read_byte(&__ctype_ptr__[c+1]) & _S);
}

2 changes: 1 addition & 1 deletion newlib/libc/ctype/isupper.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,6 @@ No supporting OS subroutines are required.
int
_DEFUN(isupper,(c),int c)
{
return ((__ctype_ptr__[c+1] & (_U|_L)) == _U);
return ((pgm_read_byte(&__ctype_ptr__[c+1]) & (_U|_L)) == _U);
}

2 changes: 1 addition & 1 deletion newlib/libc/ctype/isxdigit.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,6 @@ No supporting OS subroutines are required.
int
_DEFUN(isxdigit,(c),int c)
{
return(__ctype_ptr__[c+1] & ((_X)|(_N)));
return(pgm_read_byte(&__ctype_ptr__[c+1]) & ((_X)|(_N)));
}

5 changes: 3 additions & 2 deletions newlib/libc/include/assert.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,15 @@ extern "C" {
#endif

#include "_ansi.h"
#include <sys/pgmspace.h>

#undef assert

#ifdef NDEBUG /* required by ANSI standard */
# define assert(__e) ((void)0)
#else
# define assert(__e) ((__e) ? (void)0 : __assert_func (__FILE__, __LINE__, \
__ASSERT_FUNC, #__e))
# define assert(__e) ((__e) ? (void)0 : __assert_func (PSTR(__FILE__), __LINE__, \
__ASSERT_FUNC, PSTR(#__e)))

# ifndef __ASSERT_FUNC
/* Use g++'s demangled names in C++. */
Expand Down
7 changes: 6 additions & 1 deletion newlib/libc/include/ctype.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define _CTYPE_H_

#include "_ansi.h"
#include <sys/ctype.h>

_BEGIN_STD_C

Expand Down Expand Up @@ -54,7 +55,11 @@ extern __IMPORT char *__ctype_ptr__;
Meanwhile, the real index to __ctype_ptr__+1 must be cast to int,
since isalpha(0x100000001LL) must equal isalpha(1), rather than being
an out-of-bounds reference on a 64-bit machine. */
#define __ctype_lookup(__c) ((__ctype_ptr__+sizeof(""[__c]))[(int)(__c)])
#ifdef pgm_read_byte
#define __ctype_lookup(__c) pgm_read_byte(&(__ctype_ptr__+sizeof(""[__c]))[(int)(__c)])
#else
#define __ctype_lookup(__c) ((__ctype_ptr__+sizeof(""[__c]))[(int)(__c)])
#endif

#define isalpha(__c) (__ctype_lookup(__c)&(_U|_L))
#define isupper(__c) ((__ctype_lookup(__c)&(_U|_L))==_U)
Expand Down
4 changes: 4 additions & 0 deletions newlib/libc/include/sys/ctype.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
/* This is a dummy <sys/ctype.h> used as a placeholder for
systems that need to have a special header file. */

#include <sys/pgmspace.h>
54 changes: 54 additions & 0 deletions newlib/libc/include/sys/pgmspace.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/* PGMSPACE.H - Accessor utilities/types for accessing PROGMEM data */

#ifndef _PGMSPACE_H_
#define _PGMSPACE_H_

// These are no-ops in anything but the ESP8266, where they are defined in
// a custom sys/pgmspace.h header

#ifndef ICACHE_RODATA_ATTR
#define ICACHE_RODATA_ATTR
#endif

#ifndef PROGMEM
#define PROGMEM
#endif

#ifndef PGM_P
#define PGM_P
#endif

#ifndef PGM_VOID_P
#define PGM_VOID_P
#endif

#ifndef PSTR
#define PSTR
#endif

#ifdef __cplusplus
#define pgm_read_byte(addr) (*reinterpret_cast<const uint8_t*)(addr)>
#define pgm_read_word(addr) (*reinterpret_cast<const uint16_t*)(addr)>
#define pgm_read_dword(addr) (*reinterpret_cast<const uint32_t*)(addr)>
#define pgm_read_float(addr) (*reinterpret_cast<const float)(addr)>
#define pgm_read_ptr(addr) (*reinterpret_cast<const void const *)(addr)>
#else
#define pgm_read_byte(addr) (*(const uint8_t*)(addr))
#define pgm_read_word(addr) (*(const uint16_t*)(addr))
#define pgm_read_dword(addr) (*(const uint32_t*)(addr))
#define pgm_read_float(addr) (*(const float)(addr))
#define pgm_read_ptr(addr) (*(const void const *)(addr))
#endif

#define pgm_read_byte_near(addr) pgm_read_byte(addr)
#define pgm_read_word_near(addr) pgm_read_word(addr)
#define pgm_read_dword_near(addr) pgm_read_dword(addr)
#define pgm_read_float_near(addr) pgm_read_float(addr)
#define pgm_read_ptr_near(addr) pgm_read_ptr(addr)
#define pgm_read_byte_far(addr) pgm_read_byte(addr)
#define pgm_read_word_far(addr) pgm_read_word(addr)
#define pgm_read_dword_far(addr) pgm_read_dword(addr)
#define pgm_read_float_far(addr) pgm_read_float(addr)
#define pgm_read_ptr_far(addr) pgm_read_ptr(addr)

#endif
2 changes: 1 addition & 1 deletion newlib/libc/machine/xtensa/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ AM_CCASFLAGS = $(INCLUDES)

noinst_LIBRARIES = lib.a

lib_a_SOURCES = setjmp.S memcpy.S memset.S strcmp.S strcpy.S strncpy.S strlen.S
lib_a_SOURCES = setjmp.S memcpy.S memset.S strcmp.S strcpy.c fast_strcpy.S fast_strncpy.S strlen.S
lib_a_CCASFLAGS=$(AM_CCASFLAGS)
lib_a_CFLAGS=$(AM_CFLAGS)
TARGETDOC = ../../tmp.texi
Expand Down
34 changes: 23 additions & 11 deletions newlib/libc/machine/xtensa/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ lib_a_AR = $(AR) $(ARFLAGS)
lib_a_LIBADD =
am_lib_a_OBJECTS = lib_a-setjmp.$(OBJEXT) lib_a-memcpy.$(OBJEXT) \
lib_a-memset.$(OBJEXT) lib_a-strcmp.$(OBJEXT) \
lib_a-strcpy.$(OBJEXT) lib_a-strncpy.$(OBJEXT) \
lib_a-strlen.$(OBJEXT)
lib_a-fast_strcpy.$(OBJEXT) lib_a-fast_strncpy.$(OBJEXT) \
lib_a-strlen.$(OBJEXT) lib_a-strcpy.$(OBJEXT)
lib_a_OBJECTS = $(am_lib_a_OBJECTS)
DEFAULT_INCLUDES = -I.@am__isrc@
depcomp =
Expand Down Expand Up @@ -175,7 +175,7 @@ AUTOMAKE_OPTIONS = cygnus
INCLUDES = $(NEWLIB_CFLAGS) $(CROSS_CFLAGS) $(TARGET_CFLAGS)
AM_CCASFLAGS = $(INCLUDES)
noinst_LIBRARIES = lib.a
lib_a_SOURCES = setjmp.S memcpy.S memset.S strcmp.S strcpy.S strncpy.S strlen.S
lib_a_SOURCES = setjmp.S memcpy.S memset.S strcmp.S fast_strcpy.S fast_strncpy.S strcpy.c strlen.S
lib_a_CCASFLAGS = $(AM_CCASFLAGS)
lib_a_CFLAGS = $(AM_CFLAGS)
TARGETDOC = ../../tmp.texi
Expand Down Expand Up @@ -233,6 +233,12 @@ mostlyclean-compile:
distclean-compile:
-rm -f *.tab.c

.c.o:
$(COMPILE) -c $<

.c.obj:
$(COMPILE) -c `$(CYGPATH_W) '$<'`

.S.o:
$(CPPASCOMPILE) -c -o $@ $<

Expand Down Expand Up @@ -263,24 +269,30 @@ lib_a-strcmp.o: strcmp.S
lib_a-strcmp.obj: strcmp.S
$(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-strcmp.obj `if test -f 'strcmp.S'; then $(CYGPATH_W) 'strcmp.S'; else $(CYGPATH_W) '$(srcdir)/strcmp.S'; fi`

lib_a-strcpy.o: strcpy.S
$(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-strcpy.o `test -f 'strcpy.S' || echo '$(srcdir)/'`strcpy.S
lib_a-fast_strcpy.o: fast_strcpy.S
$(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-fast_strcpy.o `test -f 'fast_strcpy.S' || echo '$(srcdir)/'`fast_strcpy.S

lib_a-strcpy.obj: strcpy.S
$(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-strcpy.obj `if test -f 'strcpy.S'; then $(CYGPATH_W) 'strcpy.S'; else $(CYGPATH_W) '$(srcdir)/strcpy.S'; fi`
lib_a-fast_strcpy.obj: fast_strcpy.S
$(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-fast_strcpy.obj `if test -f 'fast_strcpy.S'; then $(CYGPATH_W) 'fast_strcpy.S'; else $(CYGPATH_W) '$(srcdir)/fast_strcpy.S'; fi`

lib_a-strncpy.o: strncpy.S
$(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-strncpy.o `test -f 'strncpy.S' || echo '$(srcdir)/'`strncpy.S
lib_a-fast_strncpy.o: fast_strncpy.S
$(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-fast_strncpy.o `test -f 'fast_strncpy.S' || echo '$(srcdir)/'`fast_strncpy.S

lib_a-strncpy.obj: strncpy.S
$(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-strncpy.obj `if test -f 'strncpy.S'; then $(CYGPATH_W) 'strncpy.S'; else $(CYGPATH_W) '$(srcdir)/strncpy.S'; fi`
lib_a-fast_strncpy.obj: fast_strncpy.S
$(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-fast_strncpy.obj `if test -f 'fast_strncpy.S'; then $(CYGPATH_W) 'fast_strncpy.S'; else $(CYGPATH_W) '$(srcdir)/fast_strncpy.S'; fi`

lib_a-strlen.o: strlen.S
$(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-strlen.o `test -f 'strlen.S' || echo '$(srcdir)/'`strlen.S

lib_a-strlen.obj: strlen.S
$(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-strlen.obj `if test -f 'strlen.S'; then $(CYGPATH_W) 'strlen.S'; else $(CYGPATH_W) '$(srcdir)/strlen.S'; fi`

lib_a-strcpy.o : strcpy.c
$(COMPILE) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-strcpy.o `test -f 'strcpy.c' || echo '$(srcdir)/'`strcpy.c

lib_a-strcpy.obj: strcpy.c
$(COMPILE) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CCASFLAGS) $(CCASFLAGS) -c -o lib_a-strcpy.obj `if test -f 'strcpy.c'; then $(CYGPATH_W) 'strcpy.c'; else $(CYGPATH_W) '$(srcdir)/strcpy.c'; fi`

ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@
.begin schedule
.align 4
.literal_position
.global strcpy
.type strcpy, @function
strcpy:
.global __fast_strcpy
.type __fast_strcpy, @function
__fast_strcpy:
leaf_entry sp, 16
/* a2 = dst, a3 = src */

Expand Down Expand Up @@ -232,4 +232,4 @@ strcpy:
#endif /* 0 */
.end schedule

.size strcpy, . - strcpy
.size __fast_strcpy, . - __fast_strcpy
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ __strncpy_aux:


.align 4
.global strncpy
.type strncpy, @function
strncpy:
.global __fast_strncpy
.type __fast_strncpy, @function
__fast_strncpy:
leaf_entry sp, 16
/* a2 = dst, a3 = src */

Expand Down Expand Up @@ -255,4 +255,4 @@ strncpy:
3: leaf_return
.end schedule

.size strncpy, . - strncpy
.size __fast_strncpy, . - __fast_strncpy
34 changes: 34 additions & 0 deletions newlib/libc/machine/xtensa/strcpy.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/* strcpy.c - Xtensa code to determine if source is PMEM or RAM and call appropriate strcpy routine.
GCC is a very smart compiler, and it will, in fact, replace printf(), and its
related functions with strcpy() calls in order to optimize speed. This fails
horribly when printf(PSTR("xxx")) is called since the original strcpy can't
handle the PROGMEM source.
See http://www.ciselant.de/projects/gcc_printf/gcc_printf.html for more info.
There are two ways around this:
- Adding -fno-builtin-*printf, which slows down every printf() call that was
being optimized before. GCC won't replace the printf() call with a strcpy()
call, and everything will work since we now support PROGMEM strings in
printf().
- Make strcpy smarter and fall back on the appropriate routine depending on
the source parameter. Since on the ESP8266 PROGMEM starts at 0x40000000
this is a simple comparison. In this case speed will be maintained.
*/

#include <string.h>

extern char *__fast_strcpy(char *dest, const char *src);
char *strcpy(char *dest, const char *src)
{
if (src >= (const char *)0x40000000) return strcpy_P(dest, src);
else return __fast_strcpy(dest, src);
}

extern char *__fast_strncpy(char *dest, const char *src, size_t n);
char *strncpy(char *dest, const char *src, size_t n)
{
if (src >= (const char *)0x40000000) return strncpy_P(dest, src, n);
else return __fast_strncpy(dest, src, n);
}
Loading

0 comments on commit 61f08d0

Please sign in to comment.