Skip to content
Permalink
Browse files
riscv: optimized memset
The generic memset is defined as a byte at time write. This is always
safe, but it's slower than a 4 byte or even 8 byte write.

Write a generic memset which fills the data one byte at time until the
destination is aligned, then fills using the largest size allowed,
and finally fills the remaining data one byte at time.

Signed-off-by: Matteo Croce <mcroce@microsoft.com>
  • Loading branch information
teknoraver authored and esmil committed Jan 18, 2022
1 parent 5b8a118 commit 6348ed9f5269d38acf552123e96766aa2eb0fd7e
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 135 deletions.
@@ -6,14 +6,10 @@
#ifndef _ASM_RISCV_STRING_H
#define _ASM_RISCV_STRING_H

#include <linux/types.h>
#include <linux/linkage.h>

#define __HAVE_ARCH_MEMSET
extern asmlinkage void *memset(void *, int, size_t);
extern asmlinkage void *__memset(void *, int, size_t);

#ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE
#define __HAVE_ARCH_MEMSET
extern void *memset(void *s, int c, size_t count);
extern void *__memset(void *s, int c, size_t count);
#define __HAVE_ARCH_MEMCPY
extern void *memcpy(void *dest, const void *src, size_t count);
extern void *__memcpy(void *dest, const void *src, size_t count);
@@ -31,7 +31,6 @@ obj-y += syscall_table.o
obj-y += sys_riscv.o
obj-y += time.o
obj-y += traps.o
obj-y += riscv_ksyms.o
obj-y += stacktrace.o
obj-y += cacheinfo.o
obj-y += patch.o

This file was deleted.

@@ -1,6 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
lib-y += delay.o
lib-y += memset.o
lib-$(CONFIG_MMU) += uaccess.o
lib-$(CONFIG_64BIT) += tishift.o
lib-$(CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE) += string.o

This file was deleted.

@@ -111,3 +111,44 @@ EXPORT_SYMBOL(__memmove);

void *memmove(void *dest, const void *src, size_t count) __weak __alias(__memmove);
EXPORT_SYMBOL(memmove);

void *__memset(void *s, int c, size_t count)
{
union types dest = { .as_u8 = s };

if (count >= MIN_THRESHOLD) {
unsigned long cu = (unsigned long)c;

/* Compose an ulong with 'c' repeated 4/8 times */
#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER
cu *= 0x0101010101010101UL;
#else
cu |= cu << 8;
cu |= cu << 16;
/* Suppress warning on 32 bit machines */
cu |= (cu << 16) << 16;
#endif
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
/*
* Fill the buffer one byte at time until
* the destination is word aligned.
*/
for (; count && dest.as_uptr & WORD_MASK; count--)
*dest.as_u8++ = c;
}

/* Copy using the largest size allowed */
for (; count >= BYTES_LONG; count -= BYTES_LONG)
*dest.as_ulong++ = cu;
}

/* copy the remainder */
while (count--)
*dest.as_u8++ = c;

return s;
}
EXPORT_SYMBOL(__memset);

void *memset(void *s, int c, size_t count) __weak __alias(__memset);
EXPORT_SYMBOL(memset);

0 comments on commit 6348ed9

Please sign in to comment.