| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,136 @@ | ||
|
|
||
| /* arm_init.c - NEON optimised filter functions | ||
| * | ||
| * Copyright (c) 2018 Cosmin Truta | ||
| * Copyright (c) 2014,2016 Glenn Randers-Pehrson | ||
| * Written by Mans Rullgard, 2011. | ||
| * | ||
| * This code is released under the libpng license. | ||
| * For conditions of distribution and use, see the disclaimer | ||
| * and license in png.h | ||
| */ | ||
|
|
||
| /* Below, after checking __linux__, various non-C90 POSIX 1003.1 functions are | ||
| * called. | ||
| */ | ||
| #define _POSIX_SOURCE 1 | ||
|
|
||
| #include "../pngpriv.h" | ||
|
|
||
| #ifdef PNG_READ_SUPPORTED | ||
|
|
||
| #if PNG_ARM_NEON_OPT > 0 | ||
| #ifdef PNG_ARM_NEON_CHECK_SUPPORTED /* Do run-time checks */ | ||
| /* WARNING: it is strongly recommended that you do not build libpng with | ||
| * run-time checks for CPU features if at all possible. In the case of the ARM | ||
| * NEON instructions there is no processor-specific way of detecting the | ||
| * presence of the required support, therefore run-time detection is extremely | ||
| * OS specific. | ||
| * | ||
| * You may set the macro PNG_ARM_NEON_FILE to the file name of file containing | ||
| * a fragment of C source code which defines the png_have_neon function. There | ||
| * are a number of implementations in contrib/arm-neon, but the only one that | ||
| * has partial support is contrib/arm-neon/linux.c - a generic Linux | ||
| * implementation which reads /proc/cpufino. | ||
| */ | ||
| #ifndef PNG_ARM_NEON_FILE | ||
| # ifdef __linux__ | ||
| # define PNG_ARM_NEON_FILE "contrib/arm-neon/linux.c" | ||
| # endif | ||
| #endif | ||
|
|
||
| #ifdef PNG_ARM_NEON_FILE | ||
|
|
||
| #include <signal.h> /* for sig_atomic_t */ | ||
| static int png_have_neon(png_structp png_ptr); | ||
| #include PNG_ARM_NEON_FILE | ||
|
|
||
| #else /* PNG_ARM_NEON_FILE */ | ||
| # error "PNG_ARM_NEON_FILE undefined: no support for run-time ARM NEON checks" | ||
| #endif /* PNG_ARM_NEON_FILE */ | ||
| #endif /* PNG_ARM_NEON_CHECK_SUPPORTED */ | ||
|
|
||
| #ifndef PNG_ALIGNED_MEMORY_SUPPORTED | ||
| # error "ALIGNED_MEMORY is required; set: -DPNG_ALIGNED_MEMORY_SUPPORTED" | ||
| #endif | ||
|
|
||
| void | ||
| png_init_filter_functions_neon(png_structp pp, unsigned int bpp) | ||
| { | ||
| /* The switch statement is compiled in for ARM_NEON_API, the call to | ||
| * png_have_neon is compiled in for ARM_NEON_CHECK. If both are defined | ||
| * the check is only performed if the API has not set the NEON option on | ||
| * or off explicitly. In this case the check controls what happens. | ||
| * | ||
| * If the CHECK is not compiled in and the option is UNSET the behavior prior | ||
| * to 1.6.7 was to use the NEON code - this was a bug caused by having the | ||
| * wrong order of the 'ON' and 'default' cases. UNSET now defaults to OFF, | ||
| * as documented in png.h | ||
| */ | ||
| png_debug(1, "in png_init_filter_functions_neon"); | ||
| #ifdef PNG_ARM_NEON_API_SUPPORTED | ||
| switch ((pp->options >> PNG_ARM_NEON) & 3) | ||
| { | ||
| case PNG_OPTION_UNSET: | ||
| /* Allow the run-time check to execute if it has been enabled - | ||
| * thus both API and CHECK can be turned on. If it isn't supported | ||
| * this case will fall through to the 'default' below, which just | ||
| * returns. | ||
| */ | ||
| #endif /* PNG_ARM_NEON_API_SUPPORTED */ | ||
| #ifdef PNG_ARM_NEON_CHECK_SUPPORTED | ||
| { | ||
| static volatile sig_atomic_t no_neon = -1; /* not checked */ | ||
|
|
||
| if (no_neon < 0) | ||
| no_neon = !png_have_neon(pp); | ||
|
|
||
| if (no_neon) | ||
| return; | ||
| } | ||
| #ifdef PNG_ARM_NEON_API_SUPPORTED | ||
| break; | ||
| #endif | ||
| #endif /* PNG_ARM_NEON_CHECK_SUPPORTED */ | ||
|
|
||
| #ifdef PNG_ARM_NEON_API_SUPPORTED | ||
| default: /* OFF or INVALID */ | ||
| return; | ||
|
|
||
| case PNG_OPTION_ON: | ||
| /* Option turned on */ | ||
| break; | ||
| } | ||
| #endif | ||
|
|
||
| /* IMPORTANT: any new external functions used here must be declared using | ||
| * PNG_INTERNAL_FUNCTION in ../pngpriv.h. This is required so that the | ||
| * 'prefix' option to configure works: | ||
| * | ||
| * ./configure --with-libpng-prefix=foobar_ | ||
| * | ||
| * Verify you have got this right by running the above command, doing a build | ||
| * and examining pngprefix.h; it must contain a #define for every external | ||
| * function you add. (Notice that this happens automatically for the | ||
| * initialization function.) | ||
| */ | ||
| pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon; | ||
|
|
||
| if (bpp == 3) | ||
| { | ||
| pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_neon; | ||
| pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_neon; | ||
| pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = | ||
| png_read_filter_row_paeth3_neon; | ||
| } | ||
|
|
||
| else if (bpp == 4) | ||
| { | ||
| pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_neon; | ||
| pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_neon; | ||
| pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = | ||
| png_read_filter_row_paeth4_neon; | ||
| } | ||
| } | ||
| #endif /* PNG_ARM_NEON_OPT > 0 */ | ||
| #endif /* READ */ |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,253 @@ | ||
|
|
||
| /* filter_neon.S - NEON optimised filter functions | ||
| * | ||
| * Copyright (c) 2018 Cosmin Truta | ||
| * Copyright (c) 2014,2017 Glenn Randers-Pehrson | ||
| * Written by Mans Rullgard, 2011. | ||
| * | ||
| * This code is released under the libpng license. | ||
| * For conditions of distribution and use, see the disclaimer | ||
| * and license in png.h | ||
| */ | ||
|
|
||
| /* This is required to get the symbol renames, which are #defines, and the | ||
| * definitions (or not) of PNG_ARM_NEON_OPT and PNG_ARM_NEON_IMPLEMENTATION. | ||
| */ | ||
| #define PNG_VERSION_INFO_ONLY | ||
| #include "../pngpriv.h" | ||
|
|
||
| #if (defined(__linux__) || defined(__FreeBSD__)) && defined(__ELF__) | ||
| .section .note.GNU-stack,"",%progbits /* mark stack as non-executable */ | ||
| #endif | ||
|
|
||
| #ifdef PNG_READ_SUPPORTED | ||
|
|
||
| /* Assembler NEON support - only works for 32-bit ARM (i.e. it does not work for | ||
| * ARM64). The code in arm/filter_neon_intrinsics.c supports ARM64, however it | ||
| * only works if -mfpu=neon is specified on the GCC command line. See pngpriv.h | ||
| * for the logic which sets PNG_USE_ARM_NEON_ASM: | ||
| */ | ||
| #if PNG_ARM_NEON_IMPLEMENTATION == 2 /* hand-coded assembler */ | ||
|
|
||
| #if PNG_ARM_NEON_OPT > 0 | ||
|
|
||
| #ifdef __ELF__ | ||
| # define ELF | ||
| #else | ||
| # define ELF @ | ||
| #endif | ||
|
|
||
| .arch armv7-a | ||
| .fpu neon | ||
|
|
||
| .macro func name, export=0 | ||
| .macro endfunc | ||
| ELF .size \name, . - \name | ||
| .endfunc | ||
| .purgem endfunc | ||
| .endm | ||
| .text | ||
|
|
||
| /* Explicitly specifying alignment here because some versions of | ||
| * GAS don't align code correctly. This is harmless in correctly | ||
| * written versions of GAS. | ||
| */ | ||
| .align 2 | ||
|
|
||
| .if \export | ||
| .global \name | ||
| .endif | ||
| ELF .type \name, STT_FUNC | ||
| .func \name | ||
| \name: | ||
| .endm | ||
|
|
||
| func png_read_filter_row_sub4_neon, export=1 | ||
| ldr r3, [r0, #4] @ rowbytes | ||
| vmov.i8 d3, #0 | ||
| 1: | ||
| vld4.32 {d4[],d5[],d6[],d7[]}, [r1,:128] | ||
| vadd.u8 d0, d3, d4 | ||
| vadd.u8 d1, d0, d5 | ||
| vadd.u8 d2, d1, d6 | ||
| vadd.u8 d3, d2, d7 | ||
| vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r1,:128]! | ||
| subs r3, r3, #16 | ||
| bgt 1b | ||
|
|
||
| bx lr | ||
| endfunc | ||
|
|
||
| func png_read_filter_row_sub3_neon, export=1 | ||
| ldr r3, [r0, #4] @ rowbytes | ||
| vmov.i8 d3, #0 | ||
| mov r0, r1 | ||
| mov r2, #3 | ||
| mov r12, #12 | ||
| vld1.8 {q11}, [r0], r12 | ||
| 1: | ||
| vext.8 d5, d22, d23, #3 | ||
| vadd.u8 d0, d3, d22 | ||
| vext.8 d6, d22, d23, #6 | ||
| vadd.u8 d1, d0, d5 | ||
| vext.8 d7, d23, d23, #1 | ||
| vld1.8 {q11}, [r0], r12 | ||
| vst1.32 {d0[0]}, [r1,:32], r2 | ||
| vadd.u8 d2, d1, d6 | ||
| vst1.32 {d1[0]}, [r1], r2 | ||
| vadd.u8 d3, d2, d7 | ||
| vst1.32 {d2[0]}, [r1], r2 | ||
| vst1.32 {d3[0]}, [r1], r2 | ||
| subs r3, r3, #12 | ||
| bgt 1b | ||
|
|
||
| bx lr | ||
| endfunc | ||
|
|
||
| func png_read_filter_row_up_neon, export=1 | ||
| ldr r3, [r0, #4] @ rowbytes | ||
| 1: | ||
| vld1.8 {q0}, [r1,:128] | ||
| vld1.8 {q1}, [r2,:128]! | ||
| vadd.u8 q0, q0, q1 | ||
| vst1.8 {q0}, [r1,:128]! | ||
| subs r3, r3, #16 | ||
| bgt 1b | ||
|
|
||
| bx lr | ||
| endfunc | ||
|
|
||
| func png_read_filter_row_avg4_neon, export=1 | ||
| ldr r12, [r0, #4] @ rowbytes | ||
| vmov.i8 d3, #0 | ||
| 1: | ||
| vld4.32 {d4[],d5[],d6[],d7[]}, [r1,:128] | ||
| vld4.32 {d16[],d17[],d18[],d19[]},[r2,:128]! | ||
| vhadd.u8 d0, d3, d16 | ||
| vadd.u8 d0, d0, d4 | ||
| vhadd.u8 d1, d0, d17 | ||
| vadd.u8 d1, d1, d5 | ||
| vhadd.u8 d2, d1, d18 | ||
| vadd.u8 d2, d2, d6 | ||
| vhadd.u8 d3, d2, d19 | ||
| vadd.u8 d3, d3, d7 | ||
| vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r1,:128]! | ||
| subs r12, r12, #16 | ||
| bgt 1b | ||
|
|
||
| bx lr | ||
| endfunc | ||
|
|
||
| func png_read_filter_row_avg3_neon, export=1 | ||
| push {r4,lr} | ||
| ldr r12, [r0, #4] @ rowbytes | ||
| vmov.i8 d3, #0 | ||
| mov r0, r1 | ||
| mov r4, #3 | ||
| mov lr, #12 | ||
| vld1.8 {q11}, [r0], lr | ||
| 1: | ||
| vld1.8 {q10}, [r2], lr | ||
| vext.8 d5, d22, d23, #3 | ||
| vhadd.u8 d0, d3, d20 | ||
| vext.8 d17, d20, d21, #3 | ||
| vadd.u8 d0, d0, d22 | ||
| vext.8 d6, d22, d23, #6 | ||
| vhadd.u8 d1, d0, d17 | ||
| vext.8 d18, d20, d21, #6 | ||
| vadd.u8 d1, d1, d5 | ||
| vext.8 d7, d23, d23, #1 | ||
| vld1.8 {q11}, [r0], lr | ||
| vst1.32 {d0[0]}, [r1,:32], r4 | ||
| vhadd.u8 d2, d1, d18 | ||
| vst1.32 {d1[0]}, [r1], r4 | ||
| vext.8 d19, d21, d21, #1 | ||
| vadd.u8 d2, d2, d6 | ||
| vhadd.u8 d3, d2, d19 | ||
| vst1.32 {d2[0]}, [r1], r4 | ||
| vadd.u8 d3, d3, d7 | ||
| vst1.32 {d3[0]}, [r1], r4 | ||
| subs r12, r12, #12 | ||
| bgt 1b | ||
|
|
||
| pop {r4,pc} | ||
| endfunc | ||
|
|
||
| .macro paeth rx, ra, rb, rc | ||
| vaddl.u8 q12, \ra, \rb @ a + b | ||
| vaddl.u8 q15, \rc, \rc @ 2*c | ||
| vabdl.u8 q13, \rb, \rc @ pa | ||
| vabdl.u8 q14, \ra, \rc @ pb | ||
| vabd.u16 q15, q12, q15 @ pc | ||
| vcle.u16 q12, q13, q14 @ pa <= pb | ||
| vcle.u16 q13, q13, q15 @ pa <= pc | ||
| vcle.u16 q14, q14, q15 @ pb <= pc | ||
| vand q12, q12, q13 @ pa <= pb && pa <= pc | ||
| vmovn.u16 d28, q14 | ||
| vmovn.u16 \rx, q12 | ||
| vbsl d28, \rb, \rc | ||
| vbsl \rx, \ra, d28 | ||
| .endm | ||
|
|
||
| func png_read_filter_row_paeth4_neon, export=1 | ||
| ldr r12, [r0, #4] @ rowbytes | ||
| vmov.i8 d3, #0 | ||
| vmov.i8 d20, #0 | ||
| 1: | ||
| vld4.32 {d4[],d5[],d6[],d7[]}, [r1,:128] | ||
| vld4.32 {d16[],d17[],d18[],d19[]},[r2,:128]! | ||
| paeth d0, d3, d16, d20 | ||
| vadd.u8 d0, d0, d4 | ||
| paeth d1, d0, d17, d16 | ||
| vadd.u8 d1, d1, d5 | ||
| paeth d2, d1, d18, d17 | ||
| vadd.u8 d2, d2, d6 | ||
| paeth d3, d2, d19, d18 | ||
| vmov d20, d19 | ||
| vadd.u8 d3, d3, d7 | ||
| vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r1,:128]! | ||
| subs r12, r12, #16 | ||
| bgt 1b | ||
|
|
||
| bx lr | ||
| endfunc | ||
|
|
||
| func png_read_filter_row_paeth3_neon, export=1 | ||
| push {r4,lr} | ||
| ldr r12, [r0, #4] @ rowbytes | ||
| vmov.i8 d3, #0 | ||
| vmov.i8 d4, #0 | ||
| mov r0, r1 | ||
| mov r4, #3 | ||
| mov lr, #12 | ||
| vld1.8 {q11}, [r0], lr | ||
| 1: | ||
| vld1.8 {q10}, [r2], lr | ||
| paeth d0, d3, d20, d4 | ||
| vext.8 d5, d22, d23, #3 | ||
| vadd.u8 d0, d0, d22 | ||
| vext.8 d17, d20, d21, #3 | ||
| paeth d1, d0, d17, d20 | ||
| vst1.32 {d0[0]}, [r1,:32], r4 | ||
| vext.8 d6, d22, d23, #6 | ||
| vadd.u8 d1, d1, d5 | ||
| vext.8 d18, d20, d21, #6 | ||
| paeth d2, d1, d18, d17 | ||
| vext.8 d7, d23, d23, #1 | ||
| vld1.8 {q11}, [r0], lr | ||
| vst1.32 {d1[0]}, [r1], r4 | ||
| vadd.u8 d2, d2, d6 | ||
| vext.8 d19, d21, d21, #1 | ||
| paeth d3, d2, d19, d18 | ||
| vst1.32 {d2[0]}, [r1], r4 | ||
| vmov d4, d19 | ||
| vadd.u8 d3, d3, d7 | ||
| vst1.32 {d3[0]}, [r1], r4 | ||
| subs r12, r12, #12 | ||
| bgt 1b | ||
|
|
||
| pop {r4,pc} | ||
| endfunc | ||
| #endif /* PNG_ARM_NEON_OPT > 0 */ | ||
| #endif /* PNG_ARM_NEON_IMPLEMENTATION == 2 (assembler) */ | ||
| #endif /* READ */ |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,149 @@ | ||
|
|
||
| /* palette_neon_intrinsics.c - NEON optimised palette expansion functions | ||
| * | ||
| * Copyright (c) 2018-2019 Cosmin Truta | ||
| * Copyright (c) 2017-2018 Arm Holdings. All rights reserved. | ||
| * Written by Richard Townsend <Richard.Townsend@arm.com>, February 2017. | ||
| * | ||
| * This code is released under the libpng license. | ||
| * For conditions of distribution and use, see the disclaimer | ||
| * and license in png.h | ||
| */ | ||
|
|
||
| #include "../pngpriv.h" | ||
|
|
||
| #if PNG_ARM_NEON_IMPLEMENTATION == 1 | ||
|
|
||
| #if defined(_MSC_VER) && defined(_M_ARM64) | ||
| # include <arm64_neon.h> | ||
| #else | ||
| # include <arm_neon.h> | ||
| #endif | ||
|
|
||
| /* Build an RGBA8 palette from the separate RGB and alpha palettes. */ | ||
| void | ||
| png_riffle_palette_neon(png_structrp png_ptr) | ||
| { | ||
| png_const_colorp palette = png_ptr->palette; | ||
| png_bytep riffled_palette = png_ptr->riffled_palette; | ||
| png_const_bytep trans_alpha = png_ptr->trans_alpha; | ||
| int num_trans = png_ptr->num_trans; | ||
| int i; | ||
|
|
||
| png_debug(1, "in png_riffle_palette_neon"); | ||
|
|
||
| /* Initially black, opaque. */ | ||
| uint8x16x4_t w = {{ | ||
| vdupq_n_u8(0x00), | ||
| vdupq_n_u8(0x00), | ||
| vdupq_n_u8(0x00), | ||
| vdupq_n_u8(0xff), | ||
| }}; | ||
|
|
||
| /* First, riffle the RGB colours into an RGBA8 palette. | ||
| * The alpha component is set to opaque for now. | ||
| */ | ||
| for (i = 0; i < 256; i += 16) | ||
| { | ||
| uint8x16x3_t v = vld3q_u8((png_const_bytep)(palette + i)); | ||
| w.val[0] = v.val[0]; | ||
| w.val[1] = v.val[1]; | ||
| w.val[2] = v.val[2]; | ||
| vst4q_u8(riffled_palette + (i << 2), w); | ||
| } | ||
|
|
||
| /* Fix up the missing transparency values. */ | ||
| for (i = 0; i < num_trans; i++) | ||
| riffled_palette[(i << 2) + 3] = trans_alpha[i]; | ||
| } | ||
|
|
||
| /* Expands a palettized row into RGBA8. */ | ||
| int | ||
| png_do_expand_palette_rgba8_neon(png_structrp png_ptr, png_row_infop row_info, | ||
| png_const_bytep row, png_bytepp ssp, png_bytepp ddp) | ||
| { | ||
| png_uint_32 row_width = row_info->width; | ||
| const png_uint_32 *riffled_palette = | ||
| (const png_uint_32 *)png_ptr->riffled_palette; | ||
| const png_int_32 pixels_per_chunk = 4; | ||
| int i; | ||
|
|
||
| png_debug(1, "in png_do_expand_palette_rgba8_neon"); | ||
|
|
||
| if (row_width < pixels_per_chunk) | ||
| return 0; | ||
|
|
||
| /* This function originally gets the last byte of the output row. | ||
| * The NEON part writes forward from a given position, so we have | ||
| * to seek this back by 4 pixels x 4 bytes. | ||
| */ | ||
| *ddp = *ddp - ((pixels_per_chunk * sizeof(png_uint_32)) - 1); | ||
|
|
||
| for (i = 0; i < row_width; i += pixels_per_chunk) | ||
| { | ||
| uint32x4_t cur; | ||
| png_bytep sp = *ssp - i, dp = *ddp - (i << 2); | ||
| cur = vld1q_dup_u32 (riffled_palette + *(sp - 3)); | ||
| cur = vld1q_lane_u32(riffled_palette + *(sp - 2), cur, 1); | ||
| cur = vld1q_lane_u32(riffled_palette + *(sp - 1), cur, 2); | ||
| cur = vld1q_lane_u32(riffled_palette + *(sp - 0), cur, 3); | ||
| vst1q_u32((void *)dp, cur); | ||
| } | ||
| if (i != row_width) | ||
| { | ||
| /* Remove the amount that wasn't processed. */ | ||
| i -= pixels_per_chunk; | ||
| } | ||
|
|
||
| /* Decrement output pointers. */ | ||
| *ssp = *ssp - i; | ||
| *ddp = *ddp - (i << 2); | ||
| return i; | ||
| } | ||
|
|
||
| /* Expands a palettized row into RGB8. */ | ||
| int | ||
| png_do_expand_palette_rgb8_neon(png_structrp png_ptr, png_row_infop row_info, | ||
| png_const_bytep row, png_bytepp ssp, png_bytepp ddp) | ||
| { | ||
| png_uint_32 row_width = row_info->width; | ||
| png_const_bytep palette = (png_const_bytep)png_ptr->palette; | ||
| const png_uint_32 pixels_per_chunk = 8; | ||
| int i; | ||
|
|
||
| png_debug(1, "in png_do_expand_palette_rgb8_neon"); | ||
|
|
||
| if (row_width <= pixels_per_chunk) | ||
| return 0; | ||
|
|
||
| /* Seeking this back by 8 pixels x 3 bytes. */ | ||
| *ddp = *ddp - ((pixels_per_chunk * sizeof(png_color)) - 1); | ||
|
|
||
| for (i = 0; i < row_width; i += pixels_per_chunk) | ||
| { | ||
| uint8x8x3_t cur; | ||
| png_bytep sp = *ssp - i, dp = *ddp - ((i << 1) + i); | ||
| cur = vld3_dup_u8(palette + sizeof(png_color) * (*(sp - 7))); | ||
| cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 6)), cur, 1); | ||
| cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 5)), cur, 2); | ||
| cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 4)), cur, 3); | ||
| cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 3)), cur, 4); | ||
| cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 2)), cur, 5); | ||
| cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 1)), cur, 6); | ||
| cur = vld3_lane_u8(palette + sizeof(png_color) * (*(sp - 0)), cur, 7); | ||
| vst3_u8((void *)dp, cur); | ||
| } | ||
|
|
||
| if (i != row_width) | ||
| { | ||
| /* Remove the amount that wasn't processed. */ | ||
| i -= pixels_per_chunk; | ||
| } | ||
|
|
||
| /* Decrement output pointers. */ | ||
| *ssp = *ssp - i; | ||
| *ddp = *ddp - ((i << 1) + i); | ||
| return i; | ||
| } | ||
|
|
||
| #endif /* PNG_ARM_NEON_IMPLEMENTATION */ |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,391 @@ | ||
|
|
||
| /* filter_sse2_intrinsics.c - SSE2 optimized filter functions | ||
| * | ||
| * Copyright (c) 2018 Cosmin Truta | ||
| * Copyright (c) 2016-2017 Glenn Randers-Pehrson | ||
| * Written by Mike Klein and Matt Sarett | ||
| * Derived from arm/filter_neon_intrinsics.c | ||
| * | ||
| * This code is released under the libpng license. | ||
| * For conditions of distribution and use, see the disclaimer | ||
| * and license in png.h | ||
| */ | ||
|
|
||
| #include "../pngpriv.h" | ||
|
|
||
| #ifdef PNG_READ_SUPPORTED | ||
|
|
||
| #if PNG_INTEL_SSE_IMPLEMENTATION > 0 | ||
|
|
||
| #include <immintrin.h> | ||
|
|
||
| /* Functions in this file look at most 3 pixels (a,b,c) to predict the 4th (d). | ||
| * They're positioned like this: | ||
| * prev: c b | ||
| * row: a d | ||
| * The Sub filter predicts d=a, Avg d=(a+b)/2, and Paeth predicts d to be | ||
| * whichever of a, b, or c is closest to p=a+b-c. | ||
| */ | ||
|
|
||
| static __m128i load4(const void* p) { | ||
| int tmp; | ||
| memcpy(&tmp, p, sizeof(tmp)); | ||
| return _mm_cvtsi32_si128(tmp); | ||
| } | ||
|
|
||
| static void store4(void* p, __m128i v) { | ||
| int tmp = _mm_cvtsi128_si32(v); | ||
| memcpy(p, &tmp, sizeof(int)); | ||
| } | ||
|
|
||
| static __m128i load3(const void* p) { | ||
| png_uint_32 tmp = 0; | ||
| memcpy(&tmp, p, 3); | ||
| return _mm_cvtsi32_si128(tmp); | ||
| } | ||
|
|
||
| static void store3(void* p, __m128i v) { | ||
| int tmp = _mm_cvtsi128_si32(v); | ||
| memcpy(p, &tmp, 3); | ||
| } | ||
|
|
||
| void png_read_filter_row_sub3_sse2(png_row_infop row_info, png_bytep row, | ||
| png_const_bytep prev) | ||
| { | ||
| /* The Sub filter predicts each pixel as the previous pixel, a. | ||
| * There is no pixel to the left of the first pixel. It's encoded directly. | ||
| * That works with our main loop if we just say that left pixel was zero. | ||
| */ | ||
| size_t rb; | ||
|
|
||
| __m128i a, d = _mm_setzero_si128(); | ||
|
|
||
| png_debug(1, "in png_read_filter_row_sub3_sse2"); | ||
|
|
||
| rb = row_info->rowbytes; | ||
| while (rb >= 4) { | ||
| a = d; d = load4(row); | ||
| d = _mm_add_epi8(d, a); | ||
| store3(row, d); | ||
|
|
||
| row += 3; | ||
| rb -= 3; | ||
| } | ||
| if (rb > 0) { | ||
| a = d; d = load3(row); | ||
| d = _mm_add_epi8(d, a); | ||
| store3(row, d); | ||
|
|
||
| row += 3; | ||
| rb -= 3; | ||
| } | ||
| PNG_UNUSED(prev) | ||
| } | ||
|
|
||
| void png_read_filter_row_sub4_sse2(png_row_infop row_info, png_bytep row, | ||
| png_const_bytep prev) | ||
| { | ||
| /* The Sub filter predicts each pixel as the previous pixel, a. | ||
| * There is no pixel to the left of the first pixel. It's encoded directly. | ||
| * That works with our main loop if we just say that left pixel was zero. | ||
| */ | ||
| size_t rb; | ||
|
|
||
| __m128i a, d = _mm_setzero_si128(); | ||
|
|
||
| png_debug(1, "in png_read_filter_row_sub4_sse2"); | ||
|
|
||
| rb = row_info->rowbytes+4; | ||
| while (rb > 4) { | ||
| a = d; d = load4(row); | ||
| d = _mm_add_epi8(d, a); | ||
| store4(row, d); | ||
|
|
||
| row += 4; | ||
| rb -= 4; | ||
| } | ||
| PNG_UNUSED(prev) | ||
| } | ||
|
|
||
| void png_read_filter_row_avg3_sse2(png_row_infop row_info, png_bytep row, | ||
| png_const_bytep prev) | ||
| { | ||
| /* The Avg filter predicts each pixel as the (truncated) average of a and b. | ||
| * There's no pixel to the left of the first pixel. Luckily, it's | ||
| * predicted to be half of the pixel above it. So again, this works | ||
| * perfectly with our loop if we make sure a starts at zero. | ||
| */ | ||
|
|
||
| size_t rb; | ||
|
|
||
| const __m128i zero = _mm_setzero_si128(); | ||
|
|
||
| __m128i b; | ||
| __m128i a, d = zero; | ||
|
|
||
| png_debug(1, "in png_read_filter_row_avg3_sse2"); | ||
| rb = row_info->rowbytes; | ||
| while (rb >= 4) { | ||
| __m128i avg; | ||
| b = load4(prev); | ||
| a = d; d = load4(row ); | ||
|
|
||
| /* PNG requires a truncating average, so we can't just use _mm_avg_epu8 */ | ||
| avg = _mm_avg_epu8(a,b); | ||
| /* ...but we can fix it up by subtracting off 1 if it rounded up. */ | ||
| avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a,b), | ||
| _mm_set1_epi8(1))); | ||
| d = _mm_add_epi8(d, avg); | ||
| store3(row, d); | ||
|
|
||
| prev += 3; | ||
| row += 3; | ||
| rb -= 3; | ||
| } | ||
| if (rb > 0) { | ||
| __m128i avg; | ||
| b = load3(prev); | ||
| a = d; d = load3(row ); | ||
|
|
||
| /* PNG requires a truncating average, so we can't just use _mm_avg_epu8 */ | ||
| avg = _mm_avg_epu8(a,b); | ||
| /* ...but we can fix it up by subtracting off 1 if it rounded up. */ | ||
| avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a,b), | ||
| _mm_set1_epi8(1))); | ||
|
|
||
| d = _mm_add_epi8(d, avg); | ||
| store3(row, d); | ||
|
|
||
| prev += 3; | ||
| row += 3; | ||
| rb -= 3; | ||
| } | ||
| } | ||
|
|
||
| void png_read_filter_row_avg4_sse2(png_row_infop row_info, png_bytep row, | ||
| png_const_bytep prev) | ||
| { | ||
| /* The Avg filter predicts each pixel as the (truncated) average of a and b. | ||
| * There's no pixel to the left of the first pixel. Luckily, it's | ||
| * predicted to be half of the pixel above it. So again, this works | ||
| * perfectly with our loop if we make sure a starts at zero. | ||
| */ | ||
| size_t rb; | ||
| const __m128i zero = _mm_setzero_si128(); | ||
| __m128i b; | ||
| __m128i a, d = zero; | ||
|
|
||
| png_debug(1, "in png_read_filter_row_avg4_sse2"); | ||
|
|
||
| rb = row_info->rowbytes+4; | ||
| while (rb > 4) { | ||
| __m128i avg; | ||
| b = load4(prev); | ||
| a = d; d = load4(row ); | ||
|
|
||
| /* PNG requires a truncating average, so we can't just use _mm_avg_epu8 */ | ||
| avg = _mm_avg_epu8(a,b); | ||
| /* ...but we can fix it up by subtracting off 1 if it rounded up. */ | ||
| avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a,b), | ||
| _mm_set1_epi8(1))); | ||
|
|
||
| d = _mm_add_epi8(d, avg); | ||
| store4(row, d); | ||
|
|
||
| prev += 4; | ||
| row += 4; | ||
| rb -= 4; | ||
| } | ||
| } | ||
|
|
||
| /* Returns |x| for 16-bit lanes. */ | ||
| static __m128i abs_i16(__m128i x) { | ||
| #if PNG_INTEL_SSE_IMPLEMENTATION >= 2 | ||
| return _mm_abs_epi16(x); | ||
| #else | ||
| /* Read this all as, return x<0 ? -x : x. | ||
| * To negate two's complement, you flip all the bits then add 1. | ||
| */ | ||
| __m128i is_negative = _mm_cmplt_epi16(x, _mm_setzero_si128()); | ||
|
|
||
| /* Flip negative lanes. */ | ||
| x = _mm_xor_si128(x, is_negative); | ||
|
|
||
| /* +1 to negative lanes, else +0. */ | ||
| x = _mm_sub_epi16(x, is_negative); | ||
| return x; | ||
| #endif | ||
| } | ||
|
|
||
| /* Bytewise c ? t : e. */ | ||
| static __m128i if_then_else(__m128i c, __m128i t, __m128i e) { | ||
| #if PNG_INTEL_SSE_IMPLEMENTATION >= 3 | ||
| return _mm_blendv_epi8(e,t,c); | ||
| #else | ||
| return _mm_or_si128(_mm_and_si128(c, t), _mm_andnot_si128(c, e)); | ||
| #endif | ||
| } | ||
|
|
||
| void png_read_filter_row_paeth3_sse2(png_row_infop row_info, png_bytep row, | ||
| png_const_bytep prev) | ||
| { | ||
| /* Paeth tries to predict pixel d using the pixel to the left of it, a, | ||
| * and two pixels from the previous row, b and c: | ||
| * prev: c b | ||
| * row: a d | ||
| * The Paeth function predicts d to be whichever of a, b, or c is nearest to | ||
| * p=a+b-c. | ||
| * | ||
| * The first pixel has no left context, and so uses an Up filter, p = b. | ||
| * This works naturally with our main loop's p = a+b-c if we force a and c | ||
| * to zero. | ||
| * Here we zero b and d, which become c and a respectively at the start of | ||
| * the loop. | ||
| */ | ||
| size_t rb; | ||
| const __m128i zero = _mm_setzero_si128(); | ||
| __m128i c, b = zero, | ||
| a, d = zero; | ||
|
|
||
| png_debug(1, "in png_read_filter_row_paeth3_sse2"); | ||
|
|
||
| rb = row_info->rowbytes; | ||
| while (rb >= 4) { | ||
| /* It's easiest to do this math (particularly, deal with pc) with 16-bit | ||
| * intermediates. | ||
| */ | ||
| __m128i pa,pb,pc,smallest,nearest; | ||
| c = b; b = _mm_unpacklo_epi8(load4(prev), zero); | ||
| a = d; d = _mm_unpacklo_epi8(load4(row ), zero); | ||
|
|
||
| /* (p-a) == (a+b-c - a) == (b-c) */ | ||
|
|
||
| pa = _mm_sub_epi16(b,c); | ||
|
|
||
| /* (p-b) == (a+b-c - b) == (a-c) */ | ||
| pb = _mm_sub_epi16(a,c); | ||
|
|
||
| /* (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) */ | ||
| pc = _mm_add_epi16(pa,pb); | ||
|
|
||
| pa = abs_i16(pa); /* |p-a| */ | ||
| pb = abs_i16(pb); /* |p-b| */ | ||
| pc = abs_i16(pc); /* |p-c| */ | ||
|
|
||
| smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb)); | ||
|
|
||
| /* Paeth breaks ties favoring a over b over c. */ | ||
| nearest = if_then_else(_mm_cmpeq_epi16(smallest, pa), a, | ||
| if_then_else(_mm_cmpeq_epi16(smallest, pb), b, | ||
| c)); | ||
|
|
||
| /* Note `_epi8`: we need addition to wrap modulo 255. */ | ||
| d = _mm_add_epi8(d, nearest); | ||
| store3(row, _mm_packus_epi16(d,d)); | ||
|
|
||
| prev += 3; | ||
| row += 3; | ||
| rb -= 3; | ||
| } | ||
| if (rb > 0) { | ||
| /* It's easiest to do this math (particularly, deal with pc) with 16-bit | ||
| * intermediates. | ||
| */ | ||
| __m128i pa,pb,pc,smallest,nearest; | ||
| c = b; b = _mm_unpacklo_epi8(load3(prev), zero); | ||
| a = d; d = _mm_unpacklo_epi8(load3(row ), zero); | ||
|
|
||
| /* (p-a) == (a+b-c - a) == (b-c) */ | ||
| pa = _mm_sub_epi16(b,c); | ||
|
|
||
| /* (p-b) == (a+b-c - b) == (a-c) */ | ||
| pb = _mm_sub_epi16(a,c); | ||
|
|
||
| /* (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) */ | ||
| pc = _mm_add_epi16(pa,pb); | ||
|
|
||
| pa = abs_i16(pa); /* |p-a| */ | ||
| pb = abs_i16(pb); /* |p-b| */ | ||
| pc = abs_i16(pc); /* |p-c| */ | ||
|
|
||
| smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb)); | ||
|
|
||
| /* Paeth breaks ties favoring a over b over c. */ | ||
| nearest = if_then_else(_mm_cmpeq_epi16(smallest, pa), a, | ||
| if_then_else(_mm_cmpeq_epi16(smallest, pb), b, | ||
| c)); | ||
|
|
||
| /* Note `_epi8`: we need addition to wrap modulo 255. */ | ||
| d = _mm_add_epi8(d, nearest); | ||
| store3(row, _mm_packus_epi16(d,d)); | ||
|
|
||
| prev += 3; | ||
| row += 3; | ||
| rb -= 3; | ||
| } | ||
| } | ||
|
|
||
| void png_read_filter_row_paeth4_sse2(png_row_infop row_info, png_bytep row, | ||
| png_const_bytep prev) | ||
| { | ||
| /* Paeth tries to predict pixel d using the pixel to the left of it, a, | ||
| * and two pixels from the previous row, b and c: | ||
| * prev: c b | ||
| * row: a d | ||
| * The Paeth function predicts d to be whichever of a, b, or c is nearest to | ||
| * p=a+b-c. | ||
| * | ||
| * The first pixel has no left context, and so uses an Up filter, p = b. | ||
| * This works naturally with our main loop's p = a+b-c if we force a and c | ||
| * to zero. | ||
| * Here we zero b and d, which become c and a respectively at the start of | ||
| * the loop. | ||
| */ | ||
| size_t rb; | ||
| const __m128i zero = _mm_setzero_si128(); | ||
| __m128i pa,pb,pc,smallest,nearest; | ||
| __m128i c, b = zero, | ||
| a, d = zero; | ||
|
|
||
| png_debug(1, "in png_read_filter_row_paeth4_sse2"); | ||
|
|
||
| rb = row_info->rowbytes+4; | ||
| while (rb > 4) { | ||
| /* It's easiest to do this math (particularly, deal with pc) with 16-bit | ||
| * intermediates. | ||
| */ | ||
| c = b; b = _mm_unpacklo_epi8(load4(prev), zero); | ||
| a = d; d = _mm_unpacklo_epi8(load4(row ), zero); | ||
|
|
||
| /* (p-a) == (a+b-c - a) == (b-c) */ | ||
| pa = _mm_sub_epi16(b,c); | ||
|
|
||
| /* (p-b) == (a+b-c - b) == (a-c) */ | ||
| pb = _mm_sub_epi16(a,c); | ||
|
|
||
| /* (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) */ | ||
| pc = _mm_add_epi16(pa,pb); | ||
|
|
||
| pa = abs_i16(pa); /* |p-a| */ | ||
| pb = abs_i16(pb); /* |p-b| */ | ||
| pc = abs_i16(pc); /* |p-c| */ | ||
|
|
||
| smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb)); | ||
|
|
||
| /* Paeth breaks ties favoring a over b over c. */ | ||
| nearest = if_then_else(_mm_cmpeq_epi16(smallest, pa), a, | ||
| if_then_else(_mm_cmpeq_epi16(smallest, pb), b, | ||
| c)); | ||
|
|
||
| /* Note `_epi8`: we need addition to wrap modulo 255. */ | ||
| d = _mm_add_epi8(d, nearest); | ||
| store4(row, _mm_packus_epi16(d,d)); | ||
|
|
||
| prev += 4; | ||
| row += 4; | ||
| rb -= 4; | ||
| } | ||
| } | ||
|
|
||
| #endif /* PNG_INTEL_SSE_IMPLEMENTATION > 0 */ | ||
| #endif /* READ */ |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,52 @@ | ||
|
|
||
| /* intel_init.c - SSE2 optimized filter functions | ||
| * | ||
| * Copyright (c) 2018 Cosmin Truta | ||
| * Copyright (c) 2016-2017 Glenn Randers-Pehrson | ||
| * Written by Mike Klein and Matt Sarett, Google, Inc. | ||
| * Derived from arm/arm_init.c | ||
| * | ||
| * This code is released under the libpng license. | ||
| * For conditions of distribution and use, see the disclaimer | ||
| * and license in png.h | ||
| */ | ||
|
|
||
| #include "../pngpriv.h" | ||
|
|
||
| #ifdef PNG_READ_SUPPORTED | ||
| #if PNG_INTEL_SSE_IMPLEMENTATION > 0 | ||
|
|
||
| void | ||
| png_init_filter_functions_sse2(png_structp pp, unsigned int bpp) | ||
| { | ||
| /* The techniques used to implement each of these filters in SSE operate on | ||
| * one pixel at a time. | ||
| * So they generally speed up 3bpp images about 3x, 4bpp images about 4x. | ||
| * They can scale up to 6 and 8 bpp images and down to 2 bpp images, | ||
| * but they'd not likely have any benefit for 1bpp images. | ||
| * Most of these can be implemented using only MMX and 64-bit registers, | ||
| * but they end up a bit slower than using the equally-ubiquitous SSE2. | ||
| */ | ||
| png_debug(1, "in png_init_filter_functions_sse2"); | ||
| if (bpp == 3) | ||
| { | ||
| pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_sse2; | ||
| pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_sse2; | ||
| pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = | ||
| png_read_filter_row_paeth3_sse2; | ||
| } | ||
| else if (bpp == 4) | ||
| { | ||
| pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_sse2; | ||
| pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_sse2; | ||
| pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = | ||
| png_read_filter_row_paeth4_sse2; | ||
| } | ||
|
|
||
| /* No need optimize PNG_FILTER_VALUE_UP. The compiler should | ||
| * autovectorize. | ||
| */ | ||
| } | ||
|
|
||
| #endif /* PNG_INTEL_SSE_IMPLEMENTATION > 0 */ | ||
| #endif /* PNG_READ_SUPPORTED */ |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,130 @@ | ||
|
|
||
| /* mips_init.c - MSA optimised filter functions | ||
| * | ||
| * Copyright (c) 2018 Cosmin Truta | ||
| * Copyright (c) 2016 Glenn Randers-Pehrson | ||
| * Written by Mandar Sahastrabuddhe, 2016. | ||
| * | ||
| * This code is released under the libpng license. | ||
| * For conditions of distribution and use, see the disclaimer | ||
| * and license in png.h | ||
| */ | ||
|
|
||
| /* Below, after checking __linux__, various non-C90 POSIX 1003.1 functions are | ||
| * called. | ||
| */ | ||
| #define _POSIX_SOURCE 1 | ||
|
|
||
| #include <stdio.h> | ||
| #include "../pngpriv.h" | ||
|
|
||
| #ifdef PNG_READ_SUPPORTED | ||
|
|
||
| #if PNG_MIPS_MSA_OPT > 0 | ||
| #ifdef PNG_MIPS_MSA_CHECK_SUPPORTED /* Do run-time checks */ | ||
| /* WARNING: it is strongly recommended that you do not build libpng with | ||
| * run-time checks for CPU features if at all possible. In the case of the MIPS | ||
| * MSA instructions there is no processor-specific way of detecting the | ||
| * presence of the required support, therefore run-time detection is extremely | ||
| * OS specific. | ||
| * | ||
| * You may set the macro PNG_MIPS_MSA_FILE to the file name of file containing | ||
| * a fragment of C source code which defines the png_have_msa function. There | ||
| * are a number of implementations in contrib/mips-msa, but the only one that | ||
| * has partial support is contrib/mips-msa/linux.c - a generic Linux | ||
| * implementation which reads /proc/cpufino. | ||
| */ | ||
| #ifndef PNG_MIPS_MSA_FILE | ||
| # ifdef __linux__ | ||
| # define PNG_MIPS_MSA_FILE "contrib/mips-msa/linux.c" | ||
| # endif | ||
| #endif | ||
|
|
||
| #ifdef PNG_MIPS_MSA_FILE | ||
|
|
||
| #include <signal.h> /* for sig_atomic_t */ | ||
| static int png_have_msa(png_structp png_ptr); | ||
| #include PNG_MIPS_MSA_FILE | ||
|
|
||
| #else /* PNG_MIPS_MSA_FILE */ | ||
| # error "PNG_MIPS_MSA_FILE undefined: no support for run-time MIPS MSA checks" | ||
| #endif /* PNG_MIPS_MSA_FILE */ | ||
| #endif /* PNG_MIPS_MSA_CHECK_SUPPORTED */ | ||
|
|
||
| #ifndef PNG_ALIGNED_MEMORY_SUPPORTED | ||
| # error "ALIGNED_MEMORY is required; set: -DPNG_ALIGNED_MEMORY_SUPPORTED" | ||
| #endif | ||
|
|
||
| void | ||
| png_init_filter_functions_msa(png_structp pp, unsigned int bpp) | ||
| { | ||
| /* The switch statement is compiled in for MIPS_MSA_API, the call to | ||
| * png_have_msa is compiled in for MIPS_MSA_CHECK. If both are defined | ||
| * the check is only performed if the API has not set the MSA option on | ||
| * or off explicitly. In this case the check controls what happens. | ||
| */ | ||
|
|
||
| #ifdef PNG_MIPS_MSA_API_SUPPORTED | ||
| switch ((pp->options >> PNG_MIPS_MSA) & 3) | ||
| { | ||
| case PNG_OPTION_UNSET: | ||
| /* Allow the run-time check to execute if it has been enabled - | ||
| * thus both API and CHECK can be turned on. If it isn't supported | ||
| * this case will fall through to the 'default' below, which just | ||
| * returns. | ||
| */ | ||
| #endif /* PNG_MIPS_MSA_API_SUPPORTED */ | ||
| #ifdef PNG_MIPS_MSA_CHECK_SUPPORTED | ||
| { | ||
| static volatile sig_atomic_t no_msa = -1; /* not checked */ | ||
|
|
||
| if (no_msa < 0) | ||
| no_msa = !png_have_msa(pp); | ||
|
|
||
| if (no_msa) | ||
| return; | ||
| } | ||
| #ifdef PNG_MIPS_MSA_API_SUPPORTED | ||
| break; | ||
| #endif | ||
| #endif /* PNG_MIPS_MSA_CHECK_SUPPORTED */ | ||
|
|
||
| #ifdef PNG_MIPS_MSA_API_SUPPORTED | ||
| default: /* OFF or INVALID */ | ||
| return; | ||
|
|
||
| case PNG_OPTION_ON: | ||
| /* Option turned on */ | ||
| break; | ||
| } | ||
| #endif | ||
|
|
||
| /* IMPORTANT: any new external functions used here must be declared using | ||
| * PNG_INTERNAL_FUNCTION in ../pngpriv.h. This is required so that the | ||
| * 'prefix' option to configure works: | ||
| * | ||
| * ./configure --with-libpng-prefix=foobar_ | ||
| * | ||
| * Verify you have got this right by running the above command, doing a build | ||
| * and examining pngprefix.h; it must contain a #define for every external | ||
| * function you add. (Notice that this happens automatically for the | ||
| * initialization function.) | ||
| */ | ||
| pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_msa; | ||
|
|
||
| if (bpp == 3) | ||
| { | ||
| pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_msa; | ||
| pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_msa; | ||
| pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth3_msa; | ||
| } | ||
|
|
||
| else if (bpp == 4) | ||
| { | ||
| pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_msa; | ||
| pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_msa; | ||
| pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth4_msa; | ||
| } | ||
| } | ||
| #endif /* PNG_MIPS_MSA_OPT > 0 */ | ||
| #endif /* READ */ |