Skip to content

Commit

Permalink
std/jpeg: call slice.bulk_{load,save}_host_endian
Browse files Browse the repository at this point in the history
Updates https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=59540

name                                             old speed     new speed     delta

wuffs_jpeg_decode_30k_24bpp_progressive/clang11  131MB/s ± 0%  168MB/s ± 0%  +28.44%  (p=0.008 n=5+5)
wuffs_jpeg_decode_30k_24bpp_progressive/gcc10    167MB/s ± 0%  175MB/s ± 0%   +4.80%  (p=0.008 n=5+5)
  • Loading branch information
nigeltao committed Jun 5, 2023
1 parent 4a164cd commit 2865c5b
Show file tree
Hide file tree
Showing 7 changed files with 160 additions and 152 deletions.
26 changes: 26 additions & 0 deletions internal/cgen/base/fundamental-private.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,19 @@ wuffs_base__slice_u8__copy_from_slice(wuffs_base__slice_u8 dst,
return len;
}

static inline wuffs_base__empty_struct //
wuffs_base__bulk_load_host_endian(void* ptr,
size_t len,
wuffs_base__slice_u8 src) {
if (len > src.len) {
len = src.len;
}
if (len) {
memmove(ptr, src.ptr, len);
}
return wuffs_base__make_empty_struct();
}

static inline wuffs_base__empty_struct //
wuffs_base__bulk_memset(void* ptr, size_t len, uint8_t byte_value) {
if (len) {
Expand All @@ -222,6 +235,19 @@ wuffs_base__bulk_memset(void* ptr, size_t len, uint8_t byte_value) {
return wuffs_base__make_empty_struct();
}

static inline wuffs_base__empty_struct //
wuffs_base__bulk_save_host_endian(void* ptr,
size_t len,
wuffs_base__slice_u8 dst) {
if (len > dst.len) {
len = dst.len;
}
if (len) {
memmove(dst.ptr, ptr, len);
}
return wuffs_base__make_empty_struct();
}

// --------

static inline wuffs_base__slice_u8 //
Expand Down
134 changes: 75 additions & 59 deletions internal/cgen/builtin.go
Original file line number Diff line number Diff line change
Expand Up @@ -872,68 +872,26 @@ func (g *gen) writeBuiltinNumType(b *buffer, recv *a.Expr, method t.ID, args []*

func (g *gen) writeBuiltinSlice(b *buffer, recv *a.Expr, method t.ID, args []*a.Node, sideEffectsOnly bool, depth uint32) error {
switch method {
case t.IDBulkMemset:
if arrayOrSlice, lo, hi, ok := recv.IsSlice(); !ok {
return fmt.Errorf("TODO: bulk_memset for general expressions")

} else if hi == nil {
return fmt.Errorf("TODO: bulk_memset for general expressions")

} else if arrayOrSlice.MType().IsBulkNumType() {
b.writes("wuffs_base__bulk_memset(&")
if err := g.writeExpr(b, arrayOrSlice, false, depth); err != nil {
return err
}
if lo == nil {
b.writes("[0], ")
if err := g.writeExpr(b, hi, false, depth); err != nil {
return err
}
} else {
b.writes("[")
if err := g.writeExpr(b, lo, false, depth); err != nil {
return err
}
b.writes("], (")
if err := g.writeExpr(b, hi, false, depth); err != nil {
return err
}
b.writes(" - ")
if err := g.writeExpr(b, lo, false, depth); err != nil {
return err
}
b.writes(")")
}
if bs := arrayOrSlice.MType().Inner().BulkSize(); (bs != nil) && (bs.Cmp(one) != 0) {
b.writes(" * (size_t)")
b.writes(bs.String())
b.writes("u")
}
b.writes(", ")

return g.writeArgs(b, args, depth)
case t.IDBulkLoadHostEndian:
b.writes("wuffs_base__bulk_load_host_endian(")
if err := g.writeBuiltinSliceBulkMethodRecv(b, recv, depth); err != nil {
return err
}
return g.writeArgs(b, args, depth)

} else if arrayOrSlice.MType().IsContainerOfSpecificNumType(t.IDSlice, t.IDU8) {
b.writes("wuffs_base__bulk_memset(")
if err := g.writeExpr(b, recv, false, depth); err != nil {
return err
}
b.writes(".ptr, ")
if err := g.writeExpr(b, recv, false, depth); err != nil {
return err
}
b.writes(".len")
if bs := arrayOrSlice.MType().Inner().BulkSize(); (bs != nil) && (bs.Cmp(one) != 0) {
b.writes(" * (size_t)")
b.writes(bs.String())
b.writes("u")
}
b.writes(", ")
return g.writeArgs(b, args, depth)
case t.IDBulkMemset:
b.writes("wuffs_base__bulk_memset(")
if err := g.writeBuiltinSliceBulkMethodRecv(b, recv, depth); err != nil {
return err
}
return g.writeArgs(b, args, depth)

} else {
return fmt.Errorf("TODO: bulk_memset for general expressions")
case t.IDBulkSaveHostEndian:
b.writes("wuffs_base__bulk_save_host_endian(")
if err := g.writeBuiltinSliceBulkMethodRecv(b, recv, depth); err != nil {
return err
}
return g.writeArgs(b, args, depth)

case t.IDCopyFromSlice:
if err := g.writeBuiltinSliceCopyFromSlice8(b, recv, method, args, depth); err != errOptimizationNotApplicable {
Expand Down Expand Up @@ -1013,6 +971,64 @@ func (g *gen) writeBuiltinSlice(b *buffer, recv *a.Expr, method t.ID, args []*a.
return errNoSuchBuiltin
}

func (g *gen) writeBuiltinSliceBulkMethodRecv(b *buffer, recv *a.Expr, depth uint32) error {
if arrayOrSlice, lo, hi, ok := recv.IsSlice(); !ok || (hi == nil) {
// No-op.

} else if arrayOrSlice.MType().IsBulkNumType() {
b.writes("&")
if err := g.writeExpr(b, arrayOrSlice, false, depth); err != nil {
return err
}
if lo == nil {
b.writes("[0], ")
if err := g.writeExpr(b, hi, false, depth); err != nil {
return err
}
} else {
b.writes("[")
if err := g.writeExpr(b, lo, false, depth); err != nil {
return err
}
b.writes("], (")
if err := g.writeExpr(b, hi, false, depth); err != nil {
return err
}
b.writes(" - ")
if err := g.writeExpr(b, lo, false, depth); err != nil {
return err
}
b.writes(")")
}
if bs := arrayOrSlice.MType().Inner().BulkSize(); (bs != nil) && (bs.Cmp(one) != 0) {
b.writes(" * (size_t)")
b.writes(bs.String())
b.writes("u")
}
b.writes(", ")
return nil

} else if arrayOrSlice.MType().IsContainerOfSpecificNumType(t.IDSlice, t.IDU8) {
if err := g.writeExpr(b, recv, false, depth); err != nil {
return err
}
b.writes(".ptr, ")
if err := g.writeExpr(b, recv, false, depth); err != nil {
return err
}
b.writes(".len")
if bs := arrayOrSlice.MType().Inner().BulkSize(); (bs != nil) && (bs.Cmp(one) != 0) {
b.writes(" * (size_t)")
b.writes(bs.String())
b.writes("u")
}
b.writes(", ")
return nil
}

return fmt.Errorf("TODO: bulk_memset for general expressions")
}

// writeBuiltinSliceCopyFromSlice8 writes an optimized version of:
//
// foo[fIndex .. fIndex + 8].copy_from_slice!(s:bar[bIndex .. bIndex + 8])
Expand Down
4 changes: 3 additions & 1 deletion lang/builtin/builtin.go
Original file line number Diff line number Diff line change
Expand Up @@ -965,10 +965,12 @@ var SliceFuncs = []string{
"GENERIC T1.uintptr_low_12_bits() u32[..= 4095]",
"GENERIC T1.suffix(up_to: u64) T1",

// The bulk_etc methods are further restricted to those slices whose
// The bulk_etc methods are further restricted to those T1 slices whose
// elements are bulk-numeric types: numerics (e.g. base.u32), arrays of
// numerics, arrays of arrays of numerics, etc.
"GENERIC T1.bulk_load_host_endian!(src: slice u8)",
"GENERIC T1.bulk_memset!(byte_value: u8)",
"GENERIC T1.bulk_save_host_endian!(dst: slice u8)",
}

var SliceU8Funcs = []string{
Expand Down
2 changes: 2 additions & 0 deletions lang/check/type.go
Original file line number Diff line number Diff line change
Expand Up @@ -732,7 +732,9 @@ func (q *checker) tcheckDot(n *a.Expr, depth uint32) error {
if q.c.isBuiltInSliceFunc(qqid, lTyp) {
bulky := false
switch qqid[2] {
case t.IDBulkLoadHostEndian:
case t.IDBulkMemset:
case t.IDBulkSaveHostEndian:
bulky = true
}

Expand Down
34 changes: 20 additions & 14 deletions lang/token/list.go
Original file line number Diff line number Diff line change
Expand Up @@ -668,19 +668,22 @@ const (
IDIsOK = ID(0x231)
IDIsSuspension = ID(0x232)

IDBulkMemset = ID(0x240)
IDData = ID(0x241)
IDHeight = ID(0x242)
IDIO = ID(0x243)
IDLimit = ID(0x244)
IDPrefix = ID(0x245)
IDRowU32 = ID(0x246)
IDStride = ID(0x247)
IDSubtable = ID(0x248)
IDSuffix = ID(0x249)
IDUintptrLow12Bits = ID(0x24A)
IDValidUTF8Length = ID(0x24B)
IDWidth = ID(0x24C)
IDBulkLoadHostEndian = ID(0x238)
IDBulkMemset = ID(0x239)
IDBulkSaveHostEndian = ID(0x23A)

IDData = ID(0x240)
IDHeight = ID(0x241)
IDIO = ID(0x242)
IDLimit = ID(0x243)
IDPrefix = ID(0x244)
IDRowU32 = ID(0x245)
IDStride = ID(0x246)
IDSubtable = ID(0x247)
IDSuffix = ID(0x248)
IDUintptrLow12Bits = ID(0x249)
IDValidUTF8Length = ID(0x24A)
IDWidth = ID(0x24B)

IDLimitedSwizzleU32InterleavedFromReader = ID(0x280)
IDSwizzleInterleavedFromReader = ID(0x281)
Expand Down Expand Up @@ -1100,7 +1103,10 @@ var builtInsByID = [nBuiltInIDs]string{
IDIsOK: "is_ok",
IDIsSuspension: "is_suspension",

IDBulkMemset: "bulk_memset",
IDBulkLoadHostEndian: "bulk_load_host_endian",
IDBulkMemset: "bulk_memset",
IDBulkSaveHostEndian: "bulk_save_host_endian",

IDData: "data",
IDHeight: "height",
IDIO: "io",
Expand Down
62 changes: 29 additions & 33 deletions release/c/wuffs-unsupported-snapshot.c
Original file line number Diff line number Diff line change
Expand Up @@ -12277,6 +12277,19 @@ wuffs_base__slice_u8__copy_from_slice(wuffs_base__slice_u8 dst,
return len;
}

static inline wuffs_base__empty_struct //
wuffs_base__bulk_load_host_endian(void* ptr,
size_t len,
wuffs_base__slice_u8 src) {
if (len > src.len) {
len = src.len;
}
if (len) {
memmove(ptr, src.ptr, len);
}
return wuffs_base__make_empty_struct();
}

static inline wuffs_base__empty_struct //
wuffs_base__bulk_memset(void* ptr, size_t len, uint8_t byte_value) {
if (len) {
Expand All @@ -12285,6 +12298,19 @@ wuffs_base__bulk_memset(void* ptr, size_t len, uint8_t byte_value) {
return wuffs_base__make_empty_struct();
}

static inline wuffs_base__empty_struct //
wuffs_base__bulk_save_host_endian(void* ptr,
size_t len,
wuffs_base__slice_u8 dst) {
if (len > dst.len) {
len = dst.len;
}
if (len) {
memmove(dst.ptr, ptr, len);
}
return wuffs_base__make_empty_struct();
}

// --------

static inline wuffs_base__slice_u8 //
Expand Down Expand Up @@ -40016,21 +40042,12 @@ wuffs_jpeg__decoder__load_mcu_blocks_for_single_component(
uint32_t a_csel) {
uint64_t v_stride16 = 0;
uint64_t v_offset = 0;
wuffs_base__slice_u8 v_s = {0};
uint32_t v_i = 0;

while (true) {
v_stride16 = ((uint64_t)((self->private_impl.f_components_workbuf_widths[a_csel] * 16u)));
v_offset = (self->private_impl.f_components_workbuf_offsets[(a_csel | 4u)] + (((uint64_t)(a_mx)) * 128u) + (((uint64_t)(a_my)) * v_stride16));
if (v_offset <= ((uint64_t)(a_workbuf.len))) {
v_s = wuffs_base__slice_u8__subslice_i(a_workbuf, v_offset);
if (((uint64_t)(v_s.len)) >= 128u) {
v_i = 0u;
while (v_i < 64u) {
self->private_data.f_mcu_blocks[0u][v_i] = ((uint16_t)(((((uint16_t)(v_s.ptr[((2u * v_i) + 0u)])) << 0u) | (((uint16_t)(v_s.ptr[((2u * v_i) + 1u)])) << 8u))));
v_i += 1u;
}
}
wuffs_base__bulk_load_host_endian(&self->private_data.f_mcu_blocks[0], 1u * (size_t)128u, wuffs_base__slice_u8__subslice_i(a_workbuf, v_offset));
}
goto label__0__break;
}
Expand All @@ -40052,8 +40069,6 @@ wuffs_jpeg__decoder__load_mcu_blocks(
uint64_t v_v = 0;
uint64_t v_stride16 = 0;
uint64_t v_offset = 0;
wuffs_base__slice_u8 v_s = {0};
uint32_t v_i = 0;

v_h = 1u;
v_v = 1u;
Expand All @@ -40067,14 +40082,7 @@ wuffs_jpeg__decoder__load_mcu_blocks(
v_stride16 = ((uint64_t)((self->private_impl.f_components_workbuf_widths[v_csel] * 16u)));
v_offset = (self->private_impl.f_components_workbuf_offsets[(v_csel | 4u)] + (((v_h * ((uint64_t)(a_mx))) + ((uint64_t)(self->private_impl.f_scan_comps_bx_offset[v_b]))) * 128u) + (((v_v * ((uint64_t)(a_my))) + ((uint64_t)(self->private_impl.f_scan_comps_by_offset[v_b]))) * v_stride16));
if (v_offset <= ((uint64_t)(a_workbuf.len))) {
v_s = wuffs_base__slice_u8__subslice_i(a_workbuf, v_offset);
if (((uint64_t)(v_s.len)) >= 128u) {
v_i = 0u;
while (v_i < 64u) {
self->private_data.f_mcu_blocks[v_b][v_i] = ((uint16_t)(((((uint16_t)(v_s.ptr[((2u * v_i) + 0u)])) << 0u) | (((uint16_t)(v_s.ptr[((2u * v_i) + 1u)])) << 8u))));
v_i += 1u;
}
}
wuffs_base__bulk_load_host_endian(&self->private_data.f_mcu_blocks[v_b], ((v_b + 1u) - v_b) * (size_t)128u, wuffs_base__slice_u8__subslice_i(a_workbuf, v_offset));
}
v_b += 1u;
}
Expand All @@ -40095,9 +40103,6 @@ wuffs_jpeg__decoder__save_mcu_blocks(
uint64_t v_v = 0;
uint64_t v_stride16 = 0;
uint64_t v_offset = 0;
wuffs_base__slice_u8 v_s = {0};
uint32_t v_i = 0;
uint16_t v_m = 0;

v_h = 1u;
v_v = 1u;
Expand All @@ -40111,16 +40116,7 @@ wuffs_jpeg__decoder__save_mcu_blocks(
v_stride16 = ((uint64_t)((self->private_impl.f_components_workbuf_widths[v_csel] * 16u)));
v_offset = (self->private_impl.f_components_workbuf_offsets[(v_csel | 4u)] + (((v_h * ((uint64_t)(a_mx))) + ((uint64_t)(self->private_impl.f_scan_comps_bx_offset[v_b]))) * 128u) + (((v_v * ((uint64_t)(a_my))) + ((uint64_t)(self->private_impl.f_scan_comps_by_offset[v_b]))) * v_stride16));
if (v_offset <= ((uint64_t)(a_workbuf.len))) {
v_s = wuffs_base__slice_u8__subslice_i(a_workbuf, v_offset);
if (((uint64_t)(v_s.len)) >= 128u) {
v_i = 0u;
while (v_i < 64u) {
v_m = self->private_data.f_mcu_blocks[v_b][v_i];
v_s.ptr[((2u * v_i) + 0u)] = ((uint8_t)((v_m >> 0u)));
v_s.ptr[((2u * v_i) + 1u)] = ((uint8_t)((v_m >> 8u)));
v_i += 1u;
}
}
wuffs_base__bulk_save_host_endian(&self->private_data.f_mcu_blocks[v_b], ((v_b + 1u) - v_b) * (size_t)128u, wuffs_base__slice_u8__subslice_i(a_workbuf, v_offset));
}
v_b += 1u;
}
Expand Down
Loading

0 comments on commit 2865c5b

Please sign in to comment.