Skip to content
Permalink
Browse files
riscv: optimized memset
The generic memset is defined as a byte at time write. This is always
safe, but it's slower than a 4 byte or even 8 byte write.

Write a generic memset which fills the data one byte at time until the
destination is aligned, then fills using the largest size allowed,
and finally fills the remaining data one byte at time.

Signed-off-by: Matteo Croce <mcroce@microsoft.com>
  • Loading branch information
teknoraver authored and esmil committed Jan 9, 2022
1 parent 3eca7e3 commit 55f013c0bc3e224cf34271c3cc11f7443b11553f
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 135 deletions.
@@ -6,14 +6,10 @@
#ifndef _ASM_RISCV_STRING_H
#define _ASM_RISCV_STRING_H

#include <linux/types.h>
#include <linux/linkage.h>

#define __HAVE_ARCH_MEMSET
extern asmlinkage void *memset(void *, int, size_t);
extern asmlinkage void *__memset(void *, int, size_t);

#ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE
#define __HAVE_ARCH_MEMSET
extern void *memset(void *s, int c, size_t count);
extern void *__memset(void *s, int c, size_t count);
#define __HAVE_ARCH_MEMCPY
extern void *memcpy(void *dest, const void *src, size_t count);
extern void *__memcpy(void *dest, const void *src, size_t count);
@@ -31,7 +31,6 @@ obj-y += syscall_table.o
obj-y += sys_riscv.o
obj-y += time.o
obj-y += traps.o
obj-y += riscv_ksyms.o
obj-y += stacktrace.o
obj-y += cacheinfo.o
obj-y += patch.o

This file was deleted.

@@ -1,6 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
lib-y += delay.o
lib-y += memset.o
lib-$(CONFIG_MMU) += uaccess.o
lib-$(CONFIG_64BIT) += tishift.o
lib-$(CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE) += string.o

This file was deleted.

@@ -111,3 +111,44 @@ EXPORT_SYMBOL(__memmove);

void *memmove(void *dest, const void *src, size_t count) __weak __alias(__memmove);
EXPORT_SYMBOL(memmove);

void *__memset(void *s, int c, size_t count)
{
union types dest = { .as_u8 = s };

if (count >= MIN_THRESHOLD) {
unsigned long cu = (unsigned long)c;

/* Compose an ulong with 'c' repeated 4/8 times */
#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER
cu *= 0x0101010101010101UL;
#else
cu |= cu << 8;
cu |= cu << 16;
/* Suppress warning on 32 bit machines */
cu |= (cu << 16) << 16;
#endif
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
/*
* Fill the buffer one byte at time until
* the destination is word aligned.
*/
for (; count && dest.as_uptr & WORD_MASK; count--)
*dest.as_u8++ = c;
}

/* Copy using the largest size allowed */
for (; count >= BYTES_LONG; count -= BYTES_LONG)
*dest.as_ulong++ = cu;
}

/* copy the remainder */
while (count--)
*dest.as_u8++ = c;

return s;
}
EXPORT_SYMBOL(__memset);

void *memset(void *s, int c, size_t count) __weak __alias(__memset);
EXPORT_SYMBOL(memset);

0 comments on commit 55f013c

Please sign in to comment.