From 67951d4c88a03916384c8ad03f5f6cd3317e9366 Mon Sep 17 00:00:00 2001 From: Pedro Pontes Date: Wed, 10 Mar 2021 17:53:40 +0100 Subject: [PATCH] cherry-pick 157601b9b9 from pdfium --- patches/config.json | 4 +- patches/pdfium/.patches | 1 + .../m89_upgrade_openjpeg_to_2_4_0.patch | 8883 +++++++++++++++++ 3 files changed, 8887 insertions(+), 1 deletion(-) create mode 100644 patches/pdfium/.patches create mode 100644 patches/pdfium/m89_upgrade_openjpeg_to_2_4_0.patch diff --git a/patches/config.json b/patches/config.json index 1991caedf3158..af04ffbd84043 100644 --- a/patches/config.json +++ b/patches/config.json @@ -19,5 +19,7 @@ "src/electron/patches/skia": "src/third_party/skia", - "src/electron/patches/usrsctp": "src/third_party/usrsctp/usrsctplib" + "src/electron/patches/usrsctp": "src/third_party/usrsctp/usrsctplib", + + "src/electron/patches/pdfium": "src/third_party/pdfium" } diff --git a/patches/pdfium/.patches b/patches/pdfium/.patches new file mode 100644 index 0000000000000..22f5086badca5 --- /dev/null +++ b/patches/pdfium/.patches @@ -0,0 +1 @@ +m89_upgrade_openjpeg_to_2_4_0.patch diff --git a/patches/pdfium/m89_upgrade_openjpeg_to_2_4_0.patch b/patches/pdfium/m89_upgrade_openjpeg_to_2_4_0.patch new file mode 100644 index 0000000000000..94777df6f7646 --- /dev/null +++ b/patches/pdfium/m89_upgrade_openjpeg_to_2_4_0.patch @@ -0,0 +1,8883 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Daniel Hosseinian +Date: Mon, 22 Feb 2021 19:30:49 +0000 +Subject: M89: Upgrade OpenJPEG to 2.4.0 + +Upgrade OpenJPEG by copying the files from 2.4.0 and then applying +patches. Patch files that are no longer relevant are deleted. + +Some parts of patch 3 are no longer applicable. + +The bug from patch 36 was fixed by upstream commit +024b8407392cb0b82b04b58ed256094ed5799e04. + +Add a new patch 39 to remove the unused opj_mqc_renorme() function. + +Fixed: pdfium:1634 +Change-Id: Iaf5e208ea1f32a84aedb09744e0df084621f73dd +Bug: pdfium:1634, chromium:1177875 +Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/78050 +Reviewed-by: Tom Sepez +Commit-Queue: Daniel Hosseinian +(cherry picked from commit a81ff7286463b41d1055353a1e5ed6a2501a8b63) +Reviewed-on: https://pdfium-review.googlesource.com/c/pdfium/+/78170 +Auto-Submit: Daniel Hosseinian + +diff --git a/third_party/libopenjpeg20/0003-dwt-decode.patch b/third_party/libopenjpeg20/0003-dwt-decode.patch +index 94d4b41a75fc439ca76d1a43f612388008324a96..99c7a8cda7d374f33eb2d897a11083e67469c14a 100644 +--- a/third_party/libopenjpeg20/0003-dwt-decode.patch ++++ b/third_party/libopenjpeg20/0003-dwt-decode.patch +@@ -1,5 +1,5 @@ + diff --git a/third_party/libopenjpeg20/dwt.c b/third_party/libopenjpeg20/dwt.c +-index 5930d1c71..6512b1e4c 100644 ++index 4164ba090..a36b7ed10 100644 + --- a/third_party/libopenjpeg20/dwt.c + +++ b/third_party/libopenjpeg20/dwt.c + @@ -63,9 +63,6 @@ +@@ -20,25 +20,7 @@ index 5930d1c71..6512b1e4c 100644 + OPJ_INT32 dn; /* number of elements in high pass band */ + OPJ_INT32 sn; /* number of elements in low pass band */ + OPJ_INT32 cas; /* 0 = start on even coord, 1 = start on odd coord */ +-@@ -133,13 +131,13 @@ static void opj_dwt_deinterleave_v(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, +- /** +- Forward 5-3 wavelet transform in 1-D +- */ +--static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, +-- OPJ_INT32 cas); +-+static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_SIZE_T a_count, OPJ_INT32 dn, +-+ OPJ_INT32 sn, OPJ_INT32 cas); +- /** +- Forward 9-7 wavelet transform in 1-D +- */ +--static void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, +-- OPJ_INT32 cas); +-+static void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_SIZE_T a_count, +-+ OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas); +- /** +- Explicit calculation of the Quantization Stepsizes +- */ +-@@ -149,14 +147,14 @@ static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps, ++@@ -140,7 +138,7 @@ static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps, + Inverse wavelet transform in 2-D. + */ + static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, +@@ -47,16 +29,7 @@ index 5930d1c71..6512b1e4c 100644 + + static OPJ_BOOL opj_dwt_decode_partial_tile( + opj_tcd_tilecomp_t* tilec, +- OPJ_UINT32 numres); +- +--static OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec, +-- void (*p_function)(OPJ_INT32 *, OPJ_INT32, OPJ_INT32, OPJ_INT32)); +-+static OPJ_BOOL opj_dwt_encode_procedure(const opj_tcd_tilecomp_t * tilec, +-+ void(*p_function)(OPJ_INT32 *, OPJ_SIZE_T, OPJ_INT32, OPJ_INT32, OPJ_INT32)); +- +- static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* OPJ_RESTRICT r, +- OPJ_UINT32 i); +-@@ -205,13 +203,20 @@ static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, ++@@ -181,13 +179,20 @@ static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* OPJ_RESTRICT r, + + /*@}*/ + +@@ -83,18 +56,7 @@ index 5930d1c71..6512b1e4c 100644 + + /* */ + /* This table contains the norms of the 5-3 wavelets for different bands. */ +-@@ -344,8 +349,8 @@ static void opj_dwt_interleave_v(const opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x) +- /* */ +- /* Forward 5-3 wavelet transform in 1-D. */ +- /* */ +--static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, +-- OPJ_INT32 cas) +-+static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_SIZE_T a_count, OPJ_INT32 dn, +-+ OPJ_INT32 sn, OPJ_INT32 cas) +- { +- OPJ_INT32 i; +- +-@@ -376,8 +381,8 @@ static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, ++@@ -296,8 +301,8 @@ static void opj_dwt_interleave_v(const opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x) + /* */ + /* Inverse 5-3 wavelet transform in 1-D. */ + /* */ +@@ -105,7 +67,7 @@ index 5930d1c71..6512b1e4c 100644 + { + OPJ_INT32 i; + +-@@ -406,7 +411,7 @@ static void opj_dwt_decode_1_(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, ++@@ -326,7 +331,7 @@ static void opj_dwt_decode_1_(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, + + static void opj_dwt_decode_1(const opj_dwt_t *v) + { +@@ -114,72 +76,7 @@ index 5930d1c71..6512b1e4c 100644 + } + + #endif /* STANDARD_SLOW_VERSION */ +-@@ -1037,8 +1042,8 @@ static void opj_idwt53_v(const opj_dwt_t *dwt, +- /* */ +- /* Forward 9-7 wavelet transform in 1-D. */ +- /* */ +--static void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn, +-- OPJ_INT32 cas) +-+static void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_SIZE_T a_count, +-+ OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) +- { +- OPJ_INT32 i; +- if (!cas) { +-@@ -1106,8 +1111,8 @@ static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps, +- /* */ +- /* Forward 5-3 wavelet transform in 2-D. */ +- /* */ +--static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec, +-- void (*p_function)(OPJ_INT32 *, OPJ_INT32, OPJ_INT32, OPJ_INT32)) +-+static INLINE OPJ_BOOL opj_dwt_encode_procedure(const opj_tcd_tilecomp_t * tilec, +-+ void(*p_function)(OPJ_INT32 *, OPJ_SIZE_T, OPJ_INT32, OPJ_INT32, OPJ_INT32)) +- { +- OPJ_INT32 i, j, k; +- OPJ_INT32 *a = 00; +-@@ -1117,6 +1122,7 @@ static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec, +- +- OPJ_INT32 rw; /* width of the resolution level computed */ +- OPJ_INT32 rh; /* height of the resolution level computed */ +-+ OPJ_SIZE_T l_data_count; +- OPJ_SIZE_T l_data_size; +- +- opj_tcd_resolution_t * l_cur_res = 0; +-@@ -1129,13 +1135,13 @@ static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec, +- l_cur_res = tilec->resolutions + l; +- l_last_res = l_cur_res - 1; +- +-- l_data_size = opj_dwt_max_resolution(tilec->resolutions, tilec->numresolutions); +-+ l_data_count = opj_dwt_max_resolution(tilec->resolutions, tilec->numresolutions); +- /* overflow check */ +-- if (l_data_size > (SIZE_MAX / sizeof(OPJ_INT32))) { +-+ if (l_data_count > (SIZE_MAX / sizeof(OPJ_INT32))) { +- /* FIXME event manager error callback */ +- return OPJ_FALSE; +- } +-- l_data_size *= sizeof(OPJ_INT32); +-+ l_data_size = l_data_count * sizeof(OPJ_INT32); +- bj = (OPJ_INT32*)opj_malloc(l_data_size); +- /* l_data_size is equal to 0 when numresolutions == 1 but bj is not used */ +- /* in that case, so do not error out */ +-@@ -1167,7 +1173,7 @@ static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec, +- bj[k] = aj[k * w]; +- } +- +-- (*p_function)(bj, dn, sn, cas_col); +-+ (*p_function) (bj, l_data_count, dn, sn, cas_col); +- +- opj_dwt_deinterleave_v(bj, aj, dn, sn, w, cas_col); +- } +-@@ -1180,7 +1186,7 @@ static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_tcd_tilecomp_t * tilec, +- for (k = 0; k < rw; k++) { +- bj[k] = aj[k]; +- } +-- (*p_function)(bj, dn, sn, cas_row); +-+ (*p_function) (bj, l_data_count, dn, sn, cas_row); +- opj_dwt_deinterleave_h(bj, aj, dn, sn, cas_row); +- } +- +-@@ -1379,7 +1385,7 @@ static void opj_dwt_decode_v_func(void* user_data, opj_tls_t* tls) ++@@ -2062,7 +2067,7 @@ static void opj_dwt_decode_v_func(void* user_data, opj_tls_t* tls) + /* Inverse wavelet transform in 2-D. */ + /* */ + static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, +@@ -188,7 +85,7 @@ index 5930d1c71..6512b1e4c 100644 + { + opj_dwt_t h; + opj_dwt_t v; +-@@ -1401,22 +1407,23 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, ++@@ -2084,22 +2089,23 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, + return OPJ_TRUE; + } + num_threads = opj_thread_pool_get_thread_count(tp); +@@ -215,7 +112,7 @@ index 5930d1c71..6512b1e4c 100644 + v.mem = h.mem; + + while (--numres) { +-@@ -1594,7 +1601,8 @@ static void opj_dwt_interleave_partial_v(OPJ_INT32 *dest, ++@@ -2277,7 +2283,8 @@ static void opj_dwt_interleave_partial_v(OPJ_INT32 *dest, + OPJ_UNUSED(ret); + } + +@@ -225,7 +122,7 @@ index 5930d1c71..6512b1e4c 100644 + OPJ_INT32 cas, + OPJ_INT32 win_l_x0, + OPJ_INT32 win_l_x1, +-@@ -1974,16 +1982,16 @@ static OPJ_BOOL opj_dwt_decode_partial_tile( ++@@ -2657,16 +2664,16 @@ static OPJ_BOOL opj_dwt_decode_partial_tile( + opj_sparse_array_int32_free(sa); + return OPJ_TRUE; + } +@@ -245,7 +142,7 @@ index 5930d1c71..6512b1e4c 100644 + h.mem = (OPJ_INT32*)opj_aligned_32_malloc(h_mem_size); + if (! h.mem) { + /* FIXME event manager error callback */ +-@@ -1991,6 +1999,7 @@ static OPJ_BOOL opj_dwt_decode_partial_tile( ++@@ -2674,6 +2681,7 @@ static OPJ_BOOL opj_dwt_decode_partial_tile( + return OPJ_FALSE; + } + +@@ -253,7 +150,7 @@ index 5930d1c71..6512b1e4c 100644 + v.mem = h.mem; + + for (resno = 1; resno < numres; resno ++) { +-@@ -2101,7 +2110,7 @@ static OPJ_BOOL opj_dwt_decode_partial_tile( ++@@ -2784,7 +2792,7 @@ static OPJ_BOOL opj_dwt_decode_partial_tile( + win_ll_x1, + win_hl_x0, + win_hl_x1); +diff --git a/third_party/libopenjpeg20/0005-jp2_apply_pclr.patch b/third_party/libopenjpeg20/0005-jp2_apply_pclr.patch +index 2d450172a5887b8a5197d7a2a5f2c911bf9049a7..cbc5a8f86a4b845809b7bcb26e70cdfbec9f25b4 100644 +--- a/third_party/libopenjpeg20/0005-jp2_apply_pclr.patch ++++ b/third_party/libopenjpeg20/0005-jp2_apply_pclr.patch +@@ -1,8 +1,8 @@ + diff --git a/third_party/libopenjpeg20/jp2.c b/third_party/libopenjpeg20/jp2.c +-index 8dc1ecbe6..61b3f5821 100644 ++index 7c065ba74..a5790b267 100644 + --- a/third_party/libopenjpeg20/jp2.c + +++ b/third_party/libopenjpeg20/jp2.c +-@@ -1073,8 +1073,8 @@ static OPJ_BOOL opj_jp2_apply_pclr(opj_image_t *image, ++@@ -1079,8 +1079,8 @@ static OPJ_BOOL opj_jp2_apply_pclr(opj_image_t *image, + assert(pcol == 0); + new_comps[i] = old_comps[cmp]; + } else { +@@ -13,7 +13,7 @@ index 8dc1ecbe6..61b3f5821 100644 + } + + /* Palette mapping: */ +-@@ -1102,7 +1102,7 @@ static OPJ_BOOL opj_jp2_apply_pclr(opj_image_t *image, ++@@ -1108,7 +1108,7 @@ static OPJ_BOOL opj_jp2_apply_pclr(opj_image_t *image, + pcol = cmap[i].pcol; + src = old_comps[cmp].data; + assert(src); /* verified above */ +@@ -22,7 +22,7 @@ index 8dc1ecbe6..61b3f5821 100644 + + /* Direct use: */ + if (cmap[i].mtyp == 0) { +-@@ -1112,8 +1112,8 @@ static OPJ_BOOL opj_jp2_apply_pclr(opj_image_t *image, ++@@ -1118,8 +1118,8 @@ static OPJ_BOOL opj_jp2_apply_pclr(opj_image_t *image, + dst[j] = src[j]; + } + } else { +diff --git a/third_party/libopenjpeg20/0006-tcd_init_tile.patch b/third_party/libopenjpeg20/0006-tcd_init_tile.patch +index 8c37fc2733d685c99b60acf7544fa580d6a0c774..409fe7b927fccc67cb53bc8d7d96a0ac28044ef0 100644 +--- a/third_party/libopenjpeg20/0006-tcd_init_tile.patch ++++ b/third_party/libopenjpeg20/0006-tcd_init_tile.patch +@@ -1,8 +1,8 @@ + diff --git a/third_party/libopenjpeg20/tcd.c b/third_party/libopenjpeg20/tcd.c +-index 1dd15405d..acc28dd55 100644 ++index 6442669d6..4c728d4c6 100644 + --- a/third_party/libopenjpeg20/tcd.c + +++ b/third_party/libopenjpeg20/tcd.c +-@@ -818,6 +818,11 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, ++@@ -819,6 +819,11 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, + if (isEncoder) { + OPJ_SIZE_T l_tile_data_size; + +diff --git a/third_party/libopenjpeg20/0007-jp2_read_cmap.patch b/third_party/libopenjpeg20/0007-jp2_read_cmap.patch +index 4cc434099ff0f14414a8000ee8fcb28d68948215..781f2c6d0f37217c15e6a71772aade4c0b1ad919 100644 +--- a/third_party/libopenjpeg20/0007-jp2_read_cmap.patch ++++ b/third_party/libopenjpeg20/0007-jp2_read_cmap.patch +@@ -1,13 +1,13 @@ + diff --git a/third_party/libopenjpeg20/jp2.c b/third_party/libopenjpeg20/jp2.c +-index 3ace09654..7ef7c9139 100644 ++index a5790b267..a0f639d8e 100644 + --- a/third_party/libopenjpeg20/jp2.c + +++ b/third_party/libopenjpeg20/jp2.c +-@@ -1296,7 +1296,7 @@ static OPJ_BOOL opj_jp2_read_cmap(opj_jp2_t * jp2, ++@@ -1308,7 +1308,7 @@ static OPJ_BOOL opj_jp2_read_cmap(opj_jp2_t * jp2, + + + for (i = 0; i < nr_channels; ++i) { + - opj_read_bytes(p_cmap_header_data, &l_value, 2); /* CMP^i */ +-+ opj_read_bytes_BE(p_cmap_header_data, &l_value, 2); /* CMP^i */ +++ opj_read_bytes_BE(p_cmap_header_data, &l_value, 2); /* CMP^i */ + p_cmap_header_data += 2; + cmap[i].cmp = (OPJ_UINT16) l_value; + +diff --git a/third_party/libopenjpeg20/0009-opj_pi_next.patch b/third_party/libopenjpeg20/0009-opj_pi_next.patch +index 99f17d313836b18d52d6bb49bea4c2061fa29766..ed0332fe0701348fe4a27088f6f60482df14e72f 100644 +--- a/third_party/libopenjpeg20/0009-opj_pi_next.patch ++++ b/third_party/libopenjpeg20/0009-opj_pi_next.patch +@@ -1,31 +1,31 @@ + diff --git a/third_party/libopenjpeg20/pi.c b/third_party/libopenjpeg20/pi.c +-index 91642ee4e..256fe37a1 100644 ++index 4f7dd50f1..1430d12a9 100644 + --- a/third_party/libopenjpeg20/pi.c + +++ b/third_party/libopenjpeg20/pi.c +-@@ -445,6 +445,9 @@ static OPJ_BOOL opj_pi_next_rpcl(opj_pi_iterator_t * pi) +- (OPJ_INT32)(comp->dy << levelno)), (OPJ_INT32)res->pdy) +- - opj_int_floordivpow2(try0, (OPJ_INT32)res->pdy); +- pi->precno = (OPJ_UINT32)(prci + prcj * (OPJ_INT32)res->pw); ++@@ -464,6 +464,9 @@ static OPJ_BOOL opj_pi_next_rpcl(opj_pi_iterator_t * pi) ++ (comp->dy << levelno)), res->pdy) ++ - opj_uint_floordivpow2(try0, res->pdy); ++ pi->precno = prci + prcj * res->pw; + + if (pi->precno >= res->pw * res->ph) { + + return OPJ_FALSE; + + } + for (pi->layno = pi->poc.layno0; pi->layno < pi->poc.layno1; pi->layno++) { + index = pi->layno * pi->step_l + pi->resno * pi->step_r + pi->compno * + pi->step_c + pi->precno * pi->step_p; +-@@ -576,6 +579,9 @@ static OPJ_BOOL opj_pi_next_pcrl(opj_pi_iterator_t * pi) +- (OPJ_INT32)(comp->dy << levelno)), (OPJ_INT32)res->pdy) +- - opj_int_floordivpow2(try0, (OPJ_INT32)res->pdy); +- pi->precno = (OPJ_UINT32)(prci + prcj * (OPJ_INT32)res->pw); ++@@ -602,6 +605,9 @@ static OPJ_BOOL opj_pi_next_pcrl(opj_pi_iterator_t * pi) ++ (comp->dy << levelno)), res->pdy) ++ - opj_uint_floordivpow2(try0, res->pdy); ++ pi->precno = prci + prcj * res->pw; + + if (pi->precno >= res->pw * res->ph) { + + return OPJ_FALSE; + + } + for (pi->layno = pi->poc.layno0; pi->layno < pi->poc.layno1; pi->layno++) { + index = pi->layno * pi->step_l + pi->resno * pi->step_r + pi->compno * + pi->step_c + pi->precno * pi->step_p; +-@@ -704,6 +710,9 @@ static OPJ_BOOL opj_pi_next_cprl(opj_pi_iterator_t * pi) +- (OPJ_INT32)(comp->dy << levelno)), (OPJ_INT32)res->pdy) +- - opj_int_floordivpow2(try0, (OPJ_INT32)res->pdy); +- pi->precno = (OPJ_UINT32)(prci + prcj * (OPJ_INT32)res->pw); ++@@ -737,6 +743,9 @@ static OPJ_BOOL opj_pi_next_cprl(opj_pi_iterator_t * pi) ++ (comp->dy << levelno)), res->pdy) ++ - opj_uint_floordivpow2(try0, res->pdy); ++ pi->precno = (OPJ_UINT32)(prci + prcj * res->pw); + + if (pi->precno >= res->pw * res->ph) { + + return OPJ_FALSE; + + } +diff --git a/third_party/libopenjpeg20/0011-j2k_update_image_data.patch b/third_party/libopenjpeg20/0011-j2k_update_image_data.patch +index b61324a6b4d967ff369442824331b6e6c96ecb61..1402129b7fca35b2e9f90d68a68c81b2692aaad7 100644 +--- a/third_party/libopenjpeg20/0011-j2k_update_image_data.patch ++++ b/third_party/libopenjpeg20/0011-j2k_update_image_data.patch +@@ -1,8 +1,8 @@ + diff --git a/third_party/libopenjpeg20/j2k.c b/third_party/libopenjpeg20/j2k.c +-index ad6e1b86f..bf1cb4f36 100644 ++index 8e343ab2e..5e1494394 100644 + --- a/third_party/libopenjpeg20/j2k.c + +++ b/third_party/libopenjpeg20/j2k.c +-@@ -9086,6 +9086,12 @@ static OPJ_BOOL opj_j2k_update_image_data(opj_tcd_t * p_tcd, ++@@ -9882,6 +9882,12 @@ static OPJ_BOOL opj_j2k_update_image_data(opj_tcd_t * p_tcd, + * */ + assert(res_x0 >= 0); + assert(res_x1 >= 0); +diff --git a/third_party/libopenjpeg20/0012-mct_sse.patch b/third_party/libopenjpeg20/0012-mct_sse.patch +index 9bc2e6f0dc6d551580e0235e23d2c6b02ffd50cb..812ff01df5fe3d528fffeb75f35326b94babcc6e 100644 +--- a/third_party/libopenjpeg20/0012-mct_sse.patch ++++ b/third_party/libopenjpeg20/0012-mct_sse.patch +@@ -1,8 +1,8 @@ + diff --git a/third_party/libopenjpeg20/mct.c b/third_party/libopenjpeg20/mct.c +-index b79d4b87c..81ec223d8 100644 ++index 88c8f4092..c4c2e732e 100644 + --- a/third_party/libopenjpeg20/mct.c + +++ b/third_party/libopenjpeg20/mct.c +-@@ -37,13 +37,16 @@ ++@@ -37,13 +37,15 @@ + * POSSIBILITY OF SUCH DAMAGE. + */ + +@@ -18,11 +18,10 @@ index b79d4b87c..81ec223d8 100644 + #endif + -#ifdef __SSE4_1__ + +#if defined(__SSE4_1__) && !defined(_M_IX86) && !defined(__i386) +-+#define USE_SSE4 + #include + #endif + +-@@ -72,7 +75,7 @@ const OPJ_FLOAT64 * opj_mct_get_mct_norms_real() ++@@ -72,7 +74,7 @@ const OPJ_FLOAT64 * opj_mct_get_mct_norms_real() + /* */ + /* Forward reversible MCT. */ + /* */ +@@ -31,7 +30,7 @@ index b79d4b87c..81ec223d8 100644 + void opj_mct_encode( + OPJ_INT32* OPJ_RESTRICT c0, + OPJ_INT32* OPJ_RESTRICT c1, +-@@ -141,7 +144,7 @@ void opj_mct_encode( ++@@ -141,7 +143,7 @@ void opj_mct_encode( + /* */ + /* Inverse reversible MCT. */ + /* */ +@@ -40,19 +39,19 @@ index b79d4b87c..81ec223d8 100644 + void opj_mct_decode( + OPJ_INT32* OPJ_RESTRICT c0, + OPJ_INT32* OPJ_RESTRICT c1, +-@@ -209,7 +212,7 @@ OPJ_FLOAT64 opj_mct_getnorm(OPJ_UINT32 compno) +- /* */ +- /* Forward irreversible MCT. */ +- /* */ +--#ifdef __SSE4_1__ +-+#ifdef USE_SSE4 +- void opj_mct_encode_real( +- OPJ_INT32* OPJ_RESTRICT c0, +- OPJ_INT32* OPJ_RESTRICT c1, +-@@ -389,7 +392,7 @@ void opj_mct_decode_real( ++@@ -216,7 +218,7 @@ void opj_mct_encode_real( ++ OPJ_SIZE_T n) ++ { ++ OPJ_SIZE_T i; ++-#ifdef __SSE__ +++#ifdef USE_SSE ++ const __m128 YR = _mm_set1_ps(0.299f); ++ const __m128 YG = _mm_set1_ps(0.587f); ++ const __m128 YB = _mm_set1_ps(0.114f); ++@@ -286,7 +288,7 @@ void opj_mct_decode_real( + OPJ_SIZE_T n) + { +- OPJ_UINT32 i; ++ OPJ_SIZE_T i; + -#ifdef __SSE__ + +#ifdef USE_SSE + __m128 vrv, vgu, vgv, vbu; +diff --git a/third_party/libopenjpeg20/0014-opj_jp2_read_ihdr_leak.patch b/third_party/libopenjpeg20/0014-opj_jp2_read_ihdr_leak.patch +index 0ae0cfcb1b80560a9d21cbc57233f4f45bdc3927..ab0da52677f50c4e24a3d9280b54f047fb429bca 100644 +--- a/third_party/libopenjpeg20/0014-opj_jp2_read_ihdr_leak.patch ++++ b/third_party/libopenjpeg20/0014-opj_jp2_read_ihdr_leak.patch +@@ -1,8 +1,8 @@ + diff --git a/third_party/libopenjpeg20/jp2.c b/third_party/libopenjpeg20/jp2.c +-index 7ef7c9139..1fa607d66 100644 ++index a0f639d8e..38715b80f 100644 + --- a/third_party/libopenjpeg20/jp2.c + +++ b/third_party/libopenjpeg20/jp2.c +-@@ -593,6 +593,7 @@ static OPJ_BOOL opj_jp2_read_ihdr(opj_jp2_t *jp2, ++@@ -599,6 +599,7 @@ static OPJ_BOOL opj_jp2_read_ihdr(opj_jp2_t *jp2, + } + + /* allocate memory for components */ +@@ -10,7 +10,7 @@ index 7ef7c9139..1fa607d66 100644 + jp2->comps = (opj_jp2_comps_t*) opj_calloc(jp2->numcomps, + sizeof(opj_jp2_comps_t)); + if (jp2->comps == 0) { +-@@ -1882,6 +1883,7 @@ void opj_jp2_setup_decoder(opj_jp2_t *jp2, opj_dparameters_t *parameters) ++@@ -1897,6 +1898,7 @@ void opj_jp2_setup_decoder(opj_jp2_t *jp2, opj_dparameters_t *parameters) + + /* further JP2 initializations go here */ + jp2->color.jp2_has_colr = 0; +diff --git a/third_party/libopenjpeg20/0015-read_SPCod_SPCoc_overflow.patch b/third_party/libopenjpeg20/0015-read_SPCod_SPCoc_overflow.patch +index 760ed7462bdda26055cb370b7e8a8a73ab75d9fc..11e22fbd1a9153d93dd13a691bb07032ebd9e655 100644 +--- a/third_party/libopenjpeg20/0015-read_SPCod_SPCoc_overflow.patch ++++ b/third_party/libopenjpeg20/0015-read_SPCod_SPCoc_overflow.patch +@@ -1,8 +1,8 @@ + diff --git a/third_party/libopenjpeg20/j2k.c b/third_party/libopenjpeg20/j2k.c +-index 8e35b33ee..d95963a5c 100644 ++index 5e1494394..413dbdd9f 100644 + --- a/third_party/libopenjpeg20/j2k.c + +++ b/third_party/libopenjpeg20/j2k.c +-@@ -9527,6 +9527,10 @@ static OPJ_BOOL opj_j2k_read_SPCod_SPCoc(opj_j2k_t *p_j2k, ++@@ -10537,6 +10537,10 @@ static OPJ_BOOL opj_j2k_read_SPCod_SPCoc(opj_j2k_t *p_j2k, + p_j2k->m_specific_param.m_decoder.m_default_tcp; + + /* precondition again */ +diff --git a/third_party/libopenjpeg20/0016-read_SQcd_SQcc_overflow.patch b/third_party/libopenjpeg20/0016-read_SQcd_SQcc_overflow.patch +index d7e06ead8fce73e6f71cb9f6b0b5542e6b00589c..7a63b9ba47e88d35c4e604aeb2de96ce88335a63 100644 +--- a/third_party/libopenjpeg20/0016-read_SQcd_SQcc_overflow.patch ++++ b/third_party/libopenjpeg20/0016-read_SQcd_SQcc_overflow.patch +@@ -1,8 +1,8 @@ + diff --git a/third_party/libopenjpeg20/j2k.c b/third_party/libopenjpeg20/j2k.c +-index d95963a5c..ed449684f 100644 ++index 413dbdd9f..1932fe20c 100644 + --- a/third_party/libopenjpeg20/j2k.c + +++ b/third_party/libopenjpeg20/j2k.c +-@@ -9864,7 +9864,9 @@ static OPJ_BOOL opj_j2k_read_SQcd_SQcc(opj_j2k_t *p_j2k, ++@@ -10885,7 +10885,9 @@ static OPJ_BOOL opj_j2k_read_SQcd_SQcc(opj_j2k_t *p_j2k, + p_j2k->m_specific_param.m_decoder.m_default_tcp; + + /* precondition again*/ +diff --git a/third_party/libopenjpeg20/0019-tcd_init_tile.patch b/third_party/libopenjpeg20/0019-tcd_init_tile.patch +index 8746eace207e94cbd16dafb167282387dd7589d3..bf16596f0242d6291e19aefcc9a11fcaaa0588a4 100644 +--- a/third_party/libopenjpeg20/0019-tcd_init_tile.patch ++++ b/third_party/libopenjpeg20/0019-tcd_init_tile.patch +@@ -1,8 +1,8 @@ + diff --git a/third_party/libopenjpeg20/tcd.c b/third_party/libopenjpeg20/tcd.c +-index be3b84363..5757fd401 100644 ++index 4c728d4c6..b9f571410 100644 + --- a/third_party/libopenjpeg20/tcd.c + +++ b/third_party/libopenjpeg20/tcd.c +-@@ -1065,6 +1065,9 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, ++@@ -1094,6 +1094,9 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, + l_current_precinct->ch = (OPJ_UINT32)((brcblkyend - tlcblkystart) >> + cblkheightexpn); + +diff --git a/third_party/libopenjpeg20/0022-jp2_apply_pclr_overflow.patch b/third_party/libopenjpeg20/0022-jp2_apply_pclr_overflow.patch +index c1773d48fe176ffb77164ec3e944ba790879de87..1546b956944c16e7562155d12b6df5e35063b9f3 100644 +--- a/third_party/libopenjpeg20/0022-jp2_apply_pclr_overflow.patch ++++ b/third_party/libopenjpeg20/0022-jp2_apply_pclr_overflow.patch +@@ -1,8 +1,8 @@ + diff --git a/third_party/libopenjpeg20/jp2.c b/third_party/libopenjpeg20/jp2.c +-index 8dc2b977f..3e23bc363 100644 ++index 38715b80f..dcaf3872c 100644 + --- a/third_party/libopenjpeg20/jp2.c + +++ b/third_party/libopenjpeg20/jp2.c +-@@ -1058,6 +1058,14 @@ static OPJ_BOOL opj_jp2_apply_pclr(opj_image_t *image, ++@@ -1064,6 +1064,14 @@ static OPJ_BOOL opj_jp2_apply_pclr(opj_image_t *image, + } + + old_comps = image->comps; +@@ -17,7 +17,7 @@ index 8dc2b977f..3e23bc363 100644 + new_comps = (opj_image_comp_t*) + opj_malloc(nr_channels * sizeof(opj_image_comp_t)); + if (!new_comps) { +-@@ -1102,20 +1110,26 @@ static OPJ_BOOL opj_jp2_apply_pclr(opj_image_t *image, ++@@ -1108,20 +1116,26 @@ static OPJ_BOOL opj_jp2_apply_pclr(opj_image_t *image, + cmp = cmap[i].cmp; + pcol = cmap[i].pcol; + src = old_comps[cmp].data; +diff --git a/third_party/libopenjpeg20/0023-opj_j2k_read_mct_records.patch b/third_party/libopenjpeg20/0023-opj_j2k_read_mct_records.patch +index c8415ae4eee6f82d5c61865121ab1fb96c42099c..607d9f117f0b8e6c3537f47f82cd7b1ac201bbe5 100644 +--- a/third_party/libopenjpeg20/0023-opj_j2k_read_mct_records.patch ++++ b/third_party/libopenjpeg20/0023-opj_j2k_read_mct_records.patch +@@ -1,8 +1,8 @@ + diff --git a/third_party/libopenjpeg20/j2k.c b/third_party/libopenjpeg20/j2k.c +-index ed449684f..c5f9dd53e 100644 ++index 1932fe20c..d24564cc2 100644 + --- a/third_party/libopenjpeg20/j2k.c + +++ b/third_party/libopenjpeg20/j2k.c +-@@ -5553,6 +5553,7 @@ static OPJ_BOOL opj_j2k_read_mct(opj_j2k_t *p_j2k, ++@@ -5749,6 +5749,7 @@ static OPJ_BOOL opj_j2k_read_mct(opj_j2k_t *p_j2k, + OPJ_UINT32 l_tmp; + OPJ_UINT32 l_indix; + opj_mct_data_t * l_mct_data; +@@ -10,7 +10,7 @@ index ed449684f..c5f9dd53e 100644 + + /* preconditions */ + assert(p_header_data != 00); +-@@ -5640,7 +5641,7 @@ static OPJ_BOOL opj_j2k_read_mct(opj_j2k_t *p_j2k, ++@@ -5836,7 +5837,7 @@ static OPJ_BOOL opj_j2k_read_mct(opj_j2k_t *p_j2k, + } + + l_mct_data = l_tcp->m_mct_records + l_tcp->m_nb_mct_records; +@@ -19,7 +19,7 @@ index ed449684f..c5f9dd53e 100644 + } + + if (l_mct_data->m_data) { +-@@ -5672,6 +5673,9 @@ static OPJ_BOOL opj_j2k_read_mct(opj_j2k_t *p_j2k, ++@@ -5868,6 +5869,9 @@ static OPJ_BOOL opj_j2k_read_mct(opj_j2k_t *p_j2k, + + l_mct_data->m_data_size = p_header_size; + +diff --git a/third_party/libopenjpeg20/0025-opj_j2k_add_mct_null_data.patch b/third_party/libopenjpeg20/0025-opj_j2k_add_mct_null_data.patch +index b2e7cdc75aa4ba8b0d3541ccceb37f4a0b90c34d..50679061c037b2f1e8dc3e0cef383a2e32e7ce96 100644 +--- a/third_party/libopenjpeg20/0025-opj_j2k_add_mct_null_data.patch ++++ b/third_party/libopenjpeg20/0025-opj_j2k_add_mct_null_data.patch +@@ -1,8 +1,8 @@ + diff --git a/third_party/libopenjpeg20/j2k.c b/third_party/libopenjpeg20/j2k.c +-index c5f9dd53e..d31eb29a7 100644 ++index d24564cc2..889c2cfc8 100644 + --- a/third_party/libopenjpeg20/j2k.c + +++ b/third_party/libopenjpeg20/j2k.c +-@@ -6201,7 +6201,7 @@ static OPJ_BOOL opj_j2k_add_mct(opj_tcp_t * p_tcp, opj_image_t * p_image, ++@@ -6397,7 +6397,7 @@ static OPJ_BOOL opj_j2k_add_mct(opj_tcp_t * p_tcp, opj_image_t * p_image, + if (l_deco_array) { + l_data_size = MCT_ELEMENT_SIZE[l_deco_array->m_element_type] * p_image->numcomps + * p_image->numcomps; +@@ -11,7 +11,7 @@ index c5f9dd53e..d31eb29a7 100644 + return OPJ_FALSE; + } + +-@@ -6222,7 +6222,7 @@ static OPJ_BOOL opj_j2k_add_mct(opj_tcp_t * p_tcp, opj_image_t * p_image, ++@@ -6418,7 +6418,7 @@ static OPJ_BOOL opj_j2k_add_mct(opj_tcp_t * p_tcp, opj_image_t * p_image, + if (l_offset_array) { + l_data_size = MCT_ELEMENT_SIZE[l_offset_array->m_element_type] * + p_image->numcomps; +diff --git a/third_party/libopenjpeg20/0026-use_opj_uint_ceildiv.patch b/third_party/libopenjpeg20/0026-use_opj_uint_ceildiv.patch +index 038fb90ff4611dedf005410d485edd628fb4694a..0eb365453b3862c83628f7ebaa9d6866956932c9 100644 +--- a/third_party/libopenjpeg20/0026-use_opj_uint_ceildiv.patch ++++ b/third_party/libopenjpeg20/0026-use_opj_uint_ceildiv.patch +@@ -1,8 +1,8 @@ + diff --git a/third_party/libopenjpeg20/j2k.c b/third_party/libopenjpeg20/j2k.c +-index c5f9dd53e..1869833f7 100644 ++index 889c2cfc8..711dd73e8 100644 + --- a/third_party/libopenjpeg20/j2k.c + +++ b/third_party/libopenjpeg20/j2k.c +-@@ -2311,10 +2311,8 @@ static OPJ_BOOL opj_j2k_read_siz(opj_j2k_t *p_j2k, ++@@ -2299,10 +2299,8 @@ static OPJ_BOOL opj_j2k_read_siz(opj_j2k_t *p_j2k, + } + + /* Compute the number of tiles */ +@@ -15,7 +15,7 @@ index c5f9dd53e..1869833f7 100644 + + /* Check that the number of tiles is valid */ + if (l_cp->tw == 0 || l_cp->th == 0 || l_cp->tw > 65535 / l_cp->th) { +-@@ -2331,12 +2329,10 @@ static OPJ_BOOL opj_j2k_read_siz(opj_j2k_t *p_j2k, ++@@ -2319,12 +2317,10 @@ static OPJ_BOOL opj_j2k_read_siz(opj_j2k_t *p_j2k, + (p_j2k->m_specific_param.m_decoder.m_start_tile_x - l_cp->tx0) / l_cp->tdx; + p_j2k->m_specific_param.m_decoder.m_start_tile_y = + (p_j2k->m_specific_param.m_decoder.m_start_tile_y - l_cp->ty0) / l_cp->tdy; +@@ -32,10 +32,10 @@ index c5f9dd53e..1869833f7 100644 + } else { + p_j2k->m_specific_param.m_decoder.m_start_tile_x = 0; + p_j2k->m_specific_param.m_decoder.m_start_tile_y = 0; +-@@ -6922,10 +6918,8 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k, +- */ +- +- if (parameters->tile_size_on) { ++@@ -7839,10 +7835,8 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k, ++ opj_event_msg(p_manager, EVT_ERROR, "Invalid tile height\n"); ++ return OPJ_FALSE; ++ } + - cp->tw = (OPJ_UINT32)opj_int_ceildiv((OPJ_INT32)(image->x1 - cp->tx0), + - (OPJ_INT32)cp->tdx); + - cp->th = (OPJ_UINT32)opj_int_ceildiv((OPJ_INT32)(image->y1 - cp->ty0), +@@ -45,9 +45,9 @@ index c5f9dd53e..1869833f7 100644 + } else { + cp->tdx = image->x1 - cp->tx0; + cp->tdy = image->y1 - cp->ty0; +-@@ -9237,10 +9231,8 @@ OPJ_BOOL opj_j2k_set_decode_area(opj_j2k_t *p_j2k, +- for (it_comp = 0; it_comp < p_image->numcomps; ++it_comp) { +- OPJ_INT32 l_h, l_w; ++@@ -10035,10 +10029,8 @@ static OPJ_BOOL opj_j2k_update_image_dimensions(opj_image_t* p_image, ++ return OPJ_FALSE; ++ } + + - l_img_comp->x0 = (OPJ_UINT32)opj_int_ceildiv((OPJ_INT32)p_image->x0, + - (OPJ_INT32)l_img_comp->dx); +@@ -58,7 +58,7 @@ index c5f9dd53e..1869833f7 100644 + l_comp_x1 = opj_int_ceildiv((OPJ_INT32)p_image->x1, (OPJ_INT32)l_img_comp->dx); + l_comp_y1 = opj_int_ceildiv((OPJ_INT32)p_image->y1, (OPJ_INT32)l_img_comp->dy); + +-@@ -10848,10 +10840,8 @@ OPJ_BOOL opj_j2k_get_tile(opj_j2k_t *p_j2k, ++@@ -11950,10 +11942,8 @@ OPJ_BOOL opj_j2k_get_tile(opj_j2k_t *p_j2k, + + l_img_comp->factor = p_j2k->m_private_image->comps[compno].factor; + +@@ -71,7 +71,7 @@ index c5f9dd53e..1869833f7 100644 + l_comp_x1 = opj_int_ceildiv((OPJ_INT32)p_image->x1, (OPJ_INT32)l_img_comp->dx); + l_comp_y1 = opj_int_ceildiv((OPJ_INT32)p_image->y1, (OPJ_INT32)l_img_comp->dy); + +-@@ -11161,10 +11151,8 @@ static void opj_get_tile_dimensions(opj_image_t * l_image, ++@@ -12304,10 +12294,8 @@ static void opj_get_tile_dimensions(opj_image_t * l_image, + + *l_width = (OPJ_UINT32)(l_tilec->x1 - l_tilec->x0); + *l_height = (OPJ_UINT32)(l_tilec->y1 - l_tilec->y0); +diff --git a/third_party/libopenjpeg20/0035-opj_image_data_free.patch b/third_party/libopenjpeg20/0035-opj_image_data_free.patch +index bc674da378f2277784223255f21ab975d590eb35..f0cdd8a3b806c808e51f6fc9fa5198c9c3db4319 100644 +--- a/third_party/libopenjpeg20/0035-opj_image_data_free.patch ++++ b/third_party/libopenjpeg20/0035-opj_image_data_free.patch +@@ -1,8 +1,8 @@ + diff --git a/third_party/libopenjpeg20/jp2.c b/third_party/libopenjpeg20/jp2.c +-index 298648a77..2374d459f 100644 ++index dcaf3872c..02f3d04c7 100644 + --- a/third_party/libopenjpeg20/jp2.c + +++ b/third_party/libopenjpeg20/jp2.c +-@@ -1116,7 +1116,7 @@ static OPJ_BOOL opj_jp2_apply_pclr(opj_image_t *image, ++@@ -1122,7 +1122,7 @@ static OPJ_BOOL opj_jp2_apply_pclr(opj_image_t *image, + /* Prevent null pointer access */ + if (!src || !dst) { + for (j = 0; j < nr_channels; ++j) { +diff --git a/third_party/libopenjpeg20/0036-opj_j2k_update_image_dimensions.patch b/third_party/libopenjpeg20/0036-opj_j2k_update_image_dimensions.patch +deleted file mode 100644 +index b918c0586e0f22ae080bcd041f984f3526419983..0000000000000000000000000000000000000000 +--- a/third_party/libopenjpeg20/0036-opj_j2k_update_image_dimensions.patch ++++ /dev/null +@@ -1,49 +0,0 @@ +-diff --git a/third_party/libopenjpeg20/j2k.c b/third_party/libopenjpeg20/j2k.c +-index 784a0620a..cea614709 100644 +---- a/third_party/libopenjpeg20/j2k.c +-+++ b/third_party/libopenjpeg20/j2k.c +-@@ -9223,32 +9223,30 @@ static OPJ_BOOL opj_j2k_update_image_dimensions(opj_image_t* p_image, +- +- l_img_comp = p_image->comps; +- for (it_comp = 0; it_comp < p_image->numcomps; ++it_comp) { +-- OPJ_INT32 l_h, l_w; +-- +- l_img_comp->x0 = opj_uint_ceildiv(p_image->x0, l_img_comp->dx); +- l_img_comp->y0 = opj_uint_ceildiv(p_image->y0, l_img_comp->dy); +- l_comp_x1 = opj_int_ceildiv((OPJ_INT32)p_image->x1, (OPJ_INT32)l_img_comp->dx); +- l_comp_y1 = opj_int_ceildiv((OPJ_INT32)p_image->y1, (OPJ_INT32)l_img_comp->dy); +- +-- l_w = opj_int_ceildivpow2(l_comp_x1, (OPJ_INT32)l_img_comp->factor) +-- - opj_int_ceildivpow2((OPJ_INT32)l_img_comp->x0, (OPJ_INT32)l_img_comp->factor); +-- if (l_w < 0) { +-+ OPJ_INT32 l_1 = opj_int_ceildivpow2(l_comp_x1, (OPJ_INT32)l_img_comp->factor); +-+ OPJ_INT32 l_2 = opj_int_ceildivpow2((OPJ_INT32)l_img_comp->x0, (OPJ_INT32)l_img_comp->factor); +-+ if (l_1 < l_2) { +- opj_event_msg(p_manager, EVT_ERROR, +-- "Size x of the decoded component image is incorrect (comp[%d].w=%d).\n", +-- it_comp, l_w); +-+ "Size x of the decoded component image is incorrect (comp[%d].w<0).\n", +-+ it_comp); +- return OPJ_FALSE; +- } +-- l_img_comp->w = (OPJ_UINT32)l_w; +-+ l_img_comp->w = (OPJ_UINT32)(l_1-l_2); +- +-- l_h = opj_int_ceildivpow2(l_comp_y1, (OPJ_INT32)l_img_comp->factor) +-- - opj_int_ceildivpow2((OPJ_INT32)l_img_comp->y0, (OPJ_INT32)l_img_comp->factor); +-- if (l_h < 0) { +-+ l_1 = opj_int_ceildivpow2(l_comp_y1, (OPJ_INT32)l_img_comp->factor); +-+ l_2 = opj_int_ceildivpow2((OPJ_INT32)l_img_comp->y0, (OPJ_INT32)l_img_comp->factor); +-+ if (l_1 < l_2) { +- opj_event_msg(p_manager, EVT_ERROR, +-- "Size y of the decoded component image is incorrect (comp[%d].h=%d).\n", +-- it_comp, l_h); +-+ "Size y of the decoded component image is incorrect (comp[%d].h<0).\n", +-+ it_comp); +- return OPJ_FALSE; +- } +-- l_img_comp->h = (OPJ_UINT32)l_h; +-+ l_img_comp->h = (OPJ_UINT32)(l_1-l_2); +- +- l_img_comp++; +- } +diff --git a/third_party/libopenjpeg20/0037-tcd_init_tile.patch b/third_party/libopenjpeg20/0037-tcd_init_tile.patch +deleted file mode 100644 +index e38a7ec87122697f2dba052df8551207e3355750..0000000000000000000000000000000000000000 +--- a/third_party/libopenjpeg20/0037-tcd_init_tile.patch ++++ /dev/null +@@ -1,31 +0,0 @@ +-diff --git a/third_party/libopenjpeg20/tcd.c b/third_party/libopenjpeg20/tcd.c +-index 2ae211ef4..9e98f04ab 100644 +---- a/third_party/libopenjpeg20/tcd.c +-+++ b/third_party/libopenjpeg20/tcd.c +-@@ -910,8 +910,24 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, +- /* p. 64, B.6, ISO/IEC FDIS15444-1 : 2000 (18 august 2000) */ +- l_tl_prc_x_start = opj_int_floordivpow2(l_res->x0, (OPJ_INT32)l_pdx) << l_pdx; +- l_tl_prc_y_start = opj_int_floordivpow2(l_res->y0, (OPJ_INT32)l_pdy) << l_pdy; +-- l_br_prc_x_end = opj_int_ceildivpow2(l_res->x1, (OPJ_INT32)l_pdx) << l_pdx; +-- l_br_prc_y_end = opj_int_ceildivpow2(l_res->y1, (OPJ_INT32)l_pdy) << l_pdy; +-+ { +-+ OPJ_UINT32 tmp = ((OPJ_UINT32)opj_int_ceildivpow2(l_res->x1, +-+ (OPJ_INT32)l_pdx)) << l_pdx; +-+ if (tmp > (OPJ_UINT32)INT_MAX) { +-+ opj_event_msg(manager, EVT_ERROR, "Integer overflow\n"); +-+ return OPJ_FALSE; +-+ } +-+ l_br_prc_x_end = (OPJ_INT32)tmp; +-+ } +-+ { +-+ OPJ_UINT32 tmp = ((OPJ_UINT32)opj_int_ceildivpow2(l_res->y1, +-+ (OPJ_INT32)l_pdy)) << l_pdy; +-+ if (tmp > (OPJ_UINT32)INT_MAX) { +-+ opj_event_msg(manager, EVT_ERROR, "Integer overflow\n"); +-+ return OPJ_FALSE; +-+ } +-+ l_br_prc_y_end = (OPJ_INT32)tmp; +-+ } +- /*fprintf(stderr, "\t\t\tprc_x_start=%d, prc_y_start=%d, br_prc_x_end=%d, br_prc_y_end=%d \n", l_tl_prc_x_start, l_tl_prc_y_start, l_br_prc_x_end ,l_br_prc_y_end );*/ +- +- l_res->pw = (l_res->x0 == l_res->x1) ? 0U : (OPJ_UINT32)(( +diff --git a/third_party/libopenjpeg20/0038-opj_j2k_validate_param.patch b/third_party/libopenjpeg20/0038-opj_j2k_validate_param.patch +deleted file mode 100644 +index 9431d821883a780559da1ec8686a0e108c12f4bd..0000000000000000000000000000000000000000 +--- a/third_party/libopenjpeg20/0038-opj_j2k_validate_param.patch ++++ /dev/null +@@ -1,30 +0,0 @@ +-diff --git a/src/lib/openjp2/j2k.c b/src/lib/openjp2/j2k.c +-index 59b2bbb7..43be7677 100644 +---- a/src/lib/openjp2/j2k.c +-+++ b/src/lib/openjp2/j2k.c +-@@ -2698,6 +2698,12 @@ static OPJ_BOOL opj_j2k_read_cod(opj_j2k_t *p_j2k, +- opj_read_bytes(p_header_data, &l_tcp->mct, 1); /* SGcod (C) */ +- ++p_header_data; +- +-+ if (l_tcp->mct > 1) { +-+ opj_event_msg(p_manager, EVT_ERROR, +-+ "Invalid multiple component transformation\n"); +-+ return OPJ_FALSE; +-+ } +-+ +- p_header_size -= 5; +- for (i = 0; i < l_image->numcomps; ++i) { +- l_tcp->tccps[i].csty = l_tcp->csty & J2K_CCP_CSTY_PRT; +-@@ -9792,6 +9798,12 @@ static OPJ_BOOL opj_j2k_read_SPCod_SPCoc(opj_j2k_t *p_j2k, +- opj_read_bytes(l_current_ptr, &l_tccp->qmfbid, 1); +- ++l_current_ptr; +- +-+ if (l_tccp->qmfbid > 1) { +-+ opj_event_msg(p_manager, EVT_ERROR, +-+ "Error reading SPCod SPCoc element, Invalid transformation found\n"); +-+ return OPJ_FALSE; +-+ } +-+ +- *p_header_size = *p_header_size - 5; +- +- /* use custom precinct size ? */ +diff --git a/third_party/libopenjpeg20/0039-opj_mqc_renorme.patch b/third_party/libopenjpeg20/0039-opj_mqc_renorme.patch +new file mode 100644 +index 0000000000000000000000000000000000000000..07e8a801bf0f5e2919a43a9710918bf83c4dfe73 +--- /dev/null ++++ b/third_party/libopenjpeg20/0039-opj_mqc_renorme.patch +@@ -0,0 +1,16 @@ ++diff --git a/third_party/libopenjpeg20/mqc.c b/third_party/libopenjpeg20/mqc.c ++index 4cbfabd03..3caab9e7c 100644 ++--- a/third_party/libopenjpeg20/mqc.c +++++ b/third_party/libopenjpeg20/mqc.c ++@@ -370,11 +370,6 @@ void opj_mqc_erterm_enc(opj_mqc_t *mqc) ++ } ++ } ++ ++-static INLINE void opj_mqc_renorme(opj_mqc_t *mqc) ++-{ ++- opj_mqc_renorme_macro(mqc, mqc->a, mqc->c, mqc->ct); ++-} ++- ++ /** ++ Encode the most probable symbol ++ @param mqc MQC handle +diff --git a/third_party/libopenjpeg20/README.pdfium b/third_party/libopenjpeg20/README.pdfium +index 08f60079f5951cf120f77c7933125c939bd0392b..c62780c3d6c275a7db7e10d91f38fc3bf55f9291 100644 +--- a/third_party/libopenjpeg20/README.pdfium ++++ b/third_party/libopenjpeg20/README.pdfium +@@ -1,8 +1,9 @@ + Name: OpenJPEG + URL: http://www.openjpeg.org/ +-Version: 2.3.1 (also update in opj_config*) ++Version: 2.4.0 (also update in opj_config*) + Security Critical: yes + License: 2-clause BSD ++CPEPrefix: cpe:/a:uclouvain:openjpeg:2.4.0 + + Description: + JPEG 2000 library. +@@ -24,9 +25,6 @@ Local Modifications: + 0023-opj_j2k_read_mct_records.patch: Fix opj_j2k_read to prevent heap-use-after-free. + 0025-opj_j2k_add_mct_null_data.patch: Check m_data != null before trying to read from it. + 0026-use_opj_uint_ceildiv.patch: Remove (OPJ_UINT32)opj_int_ceildiv((OPJ_INT32)a, (OPJ_INT32) b). +-0033-undefined-shift-opj_t1_dec_clnpass.patch: fix undefined shifts originated from opj_t1_decode_cblk. + 0034-opj_malloc.patch: PDFium changes in opj_malloc. + 0035-opj_image_data_free.patch: Use the right free function in opj_jp2_apply_pclr. +-0036-opj_j2k_update_image_dimensions.patch: fix integer overflow. +-0037-tcd_init_tile.patch: Avoid integer overflow in opj_tcd_init_tile(). +-0038-opj_j2k_validate_param.patch: Validate all SGcod/SPcod/SPcoc parameter values. ++0039-opj_mqc_renorme.patch: Remove unused opj_mqc_renorme(). +diff --git a/third_party/libopenjpeg20/dwt.c b/third_party/libopenjpeg20/dwt.c +index 6512b1e4cee6bef02d623cab8efb469ef10b7f70..a36b7ed10b8738fb0bc47bb9585502b7f7da5f65 100644 +--- a/third_party/libopenjpeg20/dwt.c ++++ b/third_party/libopenjpeg20/dwt.c +@@ -85,12 +85,14 @@ typedef struct dwt_local { + OPJ_INT32 cas; /* 0 = start on even coord, 1 = start on odd coord */ + } opj_dwt_t; + ++#define NB_ELTS_V8 8 ++ + typedef union { +- OPJ_FLOAT32 f[4]; +-} opj_v4_t; ++ OPJ_FLOAT32 f[NB_ELTS_V8]; ++} opj_v8_t; + +-typedef struct v4dwt_local { +- opj_v4_t* wavelet ; ++typedef struct v8dwt_local { ++ opj_v8_t* wavelet ; + OPJ_INT32 dn ; /* number of elements in high pass band */ + OPJ_INT32 sn ; /* number of elements in low pass band */ + OPJ_INT32 cas ; /* 0 = start on even coord, 1 = start on odd coord */ +@@ -98,46 +100,35 @@ typedef struct v4dwt_local { + OPJ_UINT32 win_l_x1; /* end coord in low pass band */ + OPJ_UINT32 win_h_x0; /* start coord in high pass band */ + OPJ_UINT32 win_h_x1; /* end coord in high pass band */ +-} opj_v4dwt_t ; ++} opj_v8dwt_t ; + +-static const OPJ_FLOAT32 opj_dwt_alpha = 1.586134342f; /* 12994 */ +-static const OPJ_FLOAT32 opj_dwt_beta = 0.052980118f; /* 434 */ +-static const OPJ_FLOAT32 opj_dwt_gamma = -0.882911075f; /* -7233 */ +-static const OPJ_FLOAT32 opj_dwt_delta = -0.443506852f; /* -3633 */ ++/* From table F.4 from the standard */ ++static const OPJ_FLOAT32 opj_dwt_alpha = -1.586134342f; ++static const OPJ_FLOAT32 opj_dwt_beta = -0.052980118f; ++static const OPJ_FLOAT32 opj_dwt_gamma = 0.882911075f; ++static const OPJ_FLOAT32 opj_dwt_delta = 0.443506852f; + +-static const OPJ_FLOAT32 opj_K = 1.230174105f; /* 10078 */ +-static const OPJ_FLOAT32 opj_c13318 = 1.625732422f; ++static const OPJ_FLOAT32 opj_K = 1.230174105f; ++static const OPJ_FLOAT32 opj_invK = (OPJ_FLOAT32)(1.0 / 1.230174105); + + /*@}*/ + +-/** +-Virtual function type for wavelet transform in 1-D +-*/ +-typedef void (*DWT1DFN)(const opj_dwt_t* v); +- + /** @name Local static functions */ + /*@{*/ + + /** + Forward lazy transform (horizontal) + */ +-static void opj_dwt_deinterleave_h(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, ++static void opj_dwt_deinterleave_h(const OPJ_INT32 * OPJ_RESTRICT a, ++ OPJ_INT32 * OPJ_RESTRICT b, ++ OPJ_INT32 dn, + OPJ_INT32 sn, OPJ_INT32 cas); +-/** +-Forward lazy transform (vertical) +-*/ +-static void opj_dwt_deinterleave_v(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, +- OPJ_INT32 sn, OPJ_INT32 x, OPJ_INT32 cas); +-/** +-Forward 5-3 wavelet transform in 1-D +-*/ +-static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_SIZE_T a_count, OPJ_INT32 dn, +- OPJ_INT32 sn, OPJ_INT32 cas); ++ + /** + Forward 9-7 wavelet transform in 1-D + */ +-static void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_SIZE_T a_count, +- OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas); ++static void opj_dwt_encode_1_real(void *a, OPJ_INT32 dn, OPJ_INT32 sn, ++ OPJ_INT32 cas); + /** + Explicit calculation of the Quantization Stepsizes + */ +@@ -153,8 +144,29 @@ static OPJ_BOOL opj_dwt_decode_partial_tile( + opj_tcd_tilecomp_t* tilec, + OPJ_UINT32 numres); + +-static OPJ_BOOL opj_dwt_encode_procedure(const opj_tcd_tilecomp_t * tilec, +- void(*p_function)(OPJ_INT32 *, OPJ_SIZE_T, OPJ_INT32, OPJ_INT32, OPJ_INT32)); ++/* Forward transform, for the vertical pass, processing cols columns */ ++/* where cols <= NB_ELTS_V8 */ ++/* Where void* is a OPJ_INT32* for 5x3 and OPJ_FLOAT32* for 9x7 */ ++typedef void (*opj_encode_and_deinterleave_v_fnptr_type)( ++ void *array, ++ void *tmp, ++ OPJ_UINT32 height, ++ OPJ_BOOL even, ++ OPJ_UINT32 stride_width, ++ OPJ_UINT32 cols); ++ ++/* Where void* is a OPJ_INT32* for 5x3 and OPJ_FLOAT32* for 9x7 */ ++typedef void (*opj_encode_and_deinterleave_h_one_row_fnptr_type)( ++ void *row, ++ void *tmp, ++ OPJ_UINT32 width, ++ OPJ_BOOL even); ++ ++static OPJ_BOOL opj_dwt_encode_procedure(opj_thread_pool_t* tp, ++ opj_tcd_tilecomp_t * tilec, ++ opj_encode_and_deinterleave_v_fnptr_type p_encode_and_deinterleave_v, ++ opj_encode_and_deinterleave_h_one_row_fnptr_type ++ p_encode_and_deinterleave_h_one_row); + + static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* OPJ_RESTRICT r, + OPJ_UINT32 i); +@@ -162,42 +174,6 @@ static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* OPJ_RESTRICT r, + /* */ + /* Inverse 9-7 wavelet transform in 1-D. */ + /* */ +-static void opj_v4dwt_decode(opj_v4dwt_t* OPJ_RESTRICT dwt); +- +-static void opj_v4dwt_interleave_h(opj_v4dwt_t* OPJ_RESTRICT dwt, +- OPJ_FLOAT32* OPJ_RESTRICT a, +- OPJ_UINT32 width, +- OPJ_UINT32 remaining_height); +- +-static void opj_v4dwt_interleave_v(opj_v4dwt_t* OPJ_RESTRICT dwt, +- OPJ_FLOAT32* OPJ_RESTRICT a, +- OPJ_UINT32 width, +- OPJ_UINT32 nb_elts_read); +- +-#ifdef __SSE__ +-static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, +- OPJ_UINT32 start, +- OPJ_UINT32 end, +- const __m128 c); +- +-static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, +- OPJ_UINT32 start, +- OPJ_UINT32 end, +- OPJ_UINT32 m, __m128 c); +- +-#else +-static void opj_v4dwt_decode_step1(opj_v4_t* w, +- OPJ_UINT32 start, +- OPJ_UINT32 end, +- const OPJ_FLOAT32 c); +- +-static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, +- OPJ_UINT32 start, +- OPJ_UINT32 end, +- OPJ_UINT32 m, +- OPJ_FLOAT32 c); +- +-#endif + + /*@}*/ + +@@ -251,12 +227,14 @@ static const OPJ_FLOAT64 opj_dwt_norms_real[4][10] = { + /* */ + /* Forward lazy transform (horizontal). */ + /* */ +-static void opj_dwt_deinterleave_h(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, ++static void opj_dwt_deinterleave_h(const OPJ_INT32 * OPJ_RESTRICT a, ++ OPJ_INT32 * OPJ_RESTRICT b, ++ OPJ_INT32 dn, + OPJ_INT32 sn, OPJ_INT32 cas) + { + OPJ_INT32 i; +- OPJ_INT32 * l_dest = b; +- OPJ_INT32 * l_src = a + cas; ++ OPJ_INT32 * OPJ_RESTRICT l_dest = b; ++ const OPJ_INT32 * OPJ_RESTRICT l_src = a + cas; + + for (i = 0; i < sn; ++i) { + *l_dest++ = *l_src; +@@ -272,40 +250,13 @@ static void opj_dwt_deinterleave_h(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, + } + } + +-/* */ +-/* Forward lazy transform (vertical). */ +-/* */ +-static void opj_dwt_deinterleave_v(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, +- OPJ_INT32 sn, OPJ_INT32 x, OPJ_INT32 cas) +-{ +- OPJ_INT32 i = sn; +- OPJ_INT32 * l_dest = b; +- OPJ_INT32 * l_src = a + cas; +- +- while (i--) { +- *l_dest = *l_src; +- l_dest += x; +- l_src += 2; +- } /* b[i*x]=a[2*i+cas]; */ +- +- l_dest = b + (OPJ_SIZE_T)sn * (OPJ_SIZE_T)x; +- l_src = a + 1 - cas; +- +- i = dn; +- while (i--) { +- *l_dest = *l_src; +- l_dest += x; +- l_src += 2; +- } /*b[(sn+i)*x]=a[(2*i+1-cas)];*/ +-} +- + #ifdef STANDARD_SLOW_VERSION + /* */ + /* Inverse lazy transform (horizontal). */ + /* */ + static void opj_dwt_interleave_h(const opj_dwt_t* h, OPJ_INT32 *a) + { +- OPJ_INT32 *ai = a; ++ const OPJ_INT32 *ai = a; + OPJ_INT32 *bi = h->mem + h->cas; + OPJ_INT32 i = h->sn; + while (i--) { +@@ -326,7 +277,7 @@ static void opj_dwt_interleave_h(const opj_dwt_t* h, OPJ_INT32 *a) + /* */ + static void opj_dwt_interleave_v(const opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x) + { +- OPJ_INT32 *ai = a; ++ const OPJ_INT32 *ai = a; + OPJ_INT32 *bi = v->mem + v->cas; + OPJ_INT32 i = v->sn; + while (i--) { +@@ -346,37 +297,6 @@ static void opj_dwt_interleave_v(const opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x) + + #endif /* STANDARD_SLOW_VERSION */ + +-/* */ +-/* Forward 5-3 wavelet transform in 1-D. */ +-/* */ +-static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_SIZE_T a_count, OPJ_INT32 dn, +- OPJ_INT32 sn, OPJ_INT32 cas) +-{ +- OPJ_INT32 i; +- +- if (!cas) { +- if ((dn > 0) || (sn > 1)) { /* NEW : CASE ONE ELEMENT */ +- for (i = 0; i < dn; i++) { +- OPJ_D(i) -= (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1; +- } +- for (i = 0; i < sn; i++) { +- OPJ_S(i) += (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2; +- } +- } +- } else { +- if (!sn && dn == 1) { /* NEW : CASE ONE ELEMENT */ +- OPJ_S(0) *= 2; +- } else { +- for (i = 0; i < dn; i++) { +- OPJ_S(i) -= (OPJ_DD_(i) + OPJ_DD_(i - 1)) >> 1; +- } +- for (i = 0; i < sn; i++) { +- OPJ_D(i) += (OPJ_SS_(i) + OPJ_SS_(i + 1) + 2) >> 2; +- } +- } +- } +-} +- + #ifdef STANDARD_SLOW_VERSION + /* */ + /* Inverse 5-3 wavelet transform in 1-D. */ +@@ -1038,111 +958,799 @@ static void opj_idwt53_v(const opj_dwt_t *dwt, + #endif + } + ++#if 0 ++static void opj_dwt_encode_step1(OPJ_FLOAT32* fw, ++ OPJ_UINT32 end, ++ const OPJ_FLOAT32 c) ++{ ++ OPJ_UINT32 i = 0; ++ for (; i < end; ++i) { ++ fw[0] *= c; ++ fw += 2; ++ } ++} ++#else ++static void opj_dwt_encode_step1_combined(OPJ_FLOAT32* fw, ++ OPJ_UINT32 iters_c1, ++ OPJ_UINT32 iters_c2, ++ const OPJ_FLOAT32 c1, ++ const OPJ_FLOAT32 c2) ++{ ++ OPJ_UINT32 i = 0; ++ const OPJ_UINT32 iters_common = opj_uint_min(iters_c1, iters_c2); ++ assert((((OPJ_SIZE_T)fw) & 0xf) == 0); ++ assert(opj_int_abs((OPJ_INT32)iters_c1 - (OPJ_INT32)iters_c2) <= 1); ++ for (; i + 3 < iters_common; i += 4) { ++#ifdef __SSE__ ++ const __m128 vcst = _mm_set_ps(c2, c1, c2, c1); ++ *(__m128*)fw = _mm_mul_ps(*(__m128*)fw, vcst); ++ *(__m128*)(fw + 4) = _mm_mul_ps(*(__m128*)(fw + 4), vcst); ++#else ++ fw[0] *= c1; ++ fw[1] *= c2; ++ fw[2] *= c1; ++ fw[3] *= c2; ++ fw[4] *= c1; ++ fw[5] *= c2; ++ fw[6] *= c1; ++ fw[7] *= c2; ++#endif ++ fw += 8; ++ } ++ for (; i < iters_common; i++) { ++ fw[0] *= c1; ++ fw[1] *= c2; ++ fw += 2; ++ } ++ if (i < iters_c1) { ++ fw[0] *= c1; ++ } else if (i < iters_c2) { ++ fw[1] *= c2; ++ } ++} + +-/* */ +-/* Forward 9-7 wavelet transform in 1-D. */ +-/* */ +-static void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_SIZE_T a_count, +- OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) ++#endif ++ ++static void opj_dwt_encode_step2(OPJ_FLOAT32* fl, OPJ_FLOAT32* fw, ++ OPJ_UINT32 end, ++ OPJ_UINT32 m, ++ OPJ_FLOAT32 c) + { +- OPJ_INT32 i; +- if (!cas) { +- if ((dn > 0) || (sn > 1)) { /* NEW : CASE ONE ELEMENT */ +- for (i = 0; i < dn; i++) { +- OPJ_D(i) -= opj_int_fix_mul(OPJ_S_(i) + OPJ_S_(i + 1), 12993); ++ OPJ_UINT32 i; ++ OPJ_UINT32 imax = opj_uint_min(end, m); ++ if (imax > 0) { ++ fw[-1] += (fl[0] + fw[0]) * c; ++ fw += 2; ++ i = 1; ++ for (; i + 3 < imax; i += 4) { ++ fw[-1] += (fw[-2] + fw[0]) * c; ++ fw[1] += (fw[0] + fw[2]) * c; ++ fw[3] += (fw[2] + fw[4]) * c; ++ fw[5] += (fw[4] + fw[6]) * c; ++ fw += 8; ++ } ++ for (; i < imax; ++i) { ++ fw[-1] += (fw[-2] + fw[0]) * c; ++ fw += 2; ++ } ++ } ++ if (m < end) { ++ assert(m + 1 == end); ++ fw[-1] += (2 * fw[-2]) * c; ++ } ++} ++ ++static void opj_dwt_encode_1_real(void *aIn, OPJ_INT32 dn, OPJ_INT32 sn, ++ OPJ_INT32 cas) ++{ ++ OPJ_FLOAT32* w = (OPJ_FLOAT32*)aIn; ++ OPJ_INT32 a, b; ++ assert(dn + sn > 1); ++ if (cas == 0) { ++ a = 0; ++ b = 1; ++ } else { ++ a = 1; ++ b = 0; ++ } ++ opj_dwt_encode_step2(w + a, w + b + 1, ++ (OPJ_UINT32)dn, ++ (OPJ_UINT32)opj_int_min(dn, sn - b), ++ opj_dwt_alpha); ++ opj_dwt_encode_step2(w + b, w + a + 1, ++ (OPJ_UINT32)sn, ++ (OPJ_UINT32)opj_int_min(sn, dn - a), ++ opj_dwt_beta); ++ opj_dwt_encode_step2(w + a, w + b + 1, ++ (OPJ_UINT32)dn, ++ (OPJ_UINT32)opj_int_min(dn, sn - b), ++ opj_dwt_gamma); ++ opj_dwt_encode_step2(w + b, w + a + 1, ++ (OPJ_UINT32)sn, ++ (OPJ_UINT32)opj_int_min(sn, dn - a), ++ opj_dwt_delta); ++#if 0 ++ opj_dwt_encode_step1(w + b, (OPJ_UINT32)dn, ++ opj_K); ++ opj_dwt_encode_step1(w + a, (OPJ_UINT32)sn, ++ opj_invK); ++#else ++ if (a == 0) { ++ opj_dwt_encode_step1_combined(w, ++ (OPJ_UINT32)sn, ++ (OPJ_UINT32)dn, ++ opj_invK, ++ opj_K); ++ } else { ++ opj_dwt_encode_step1_combined(w, ++ (OPJ_UINT32)dn, ++ (OPJ_UINT32)sn, ++ opj_K, ++ opj_invK); ++ } ++#endif ++} ++ ++static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps, ++ opj_stepsize_t *bandno_stepsize) ++{ ++ OPJ_INT32 p, n; ++ p = opj_int_floorlog2(stepsize) - 13; ++ n = 11 - opj_int_floorlog2(stepsize); ++ bandno_stepsize->mant = (n < 0 ? stepsize >> -n : stepsize << n) & 0x7ff; ++ bandno_stepsize->expn = numbps - p; ++} ++ ++/* ++========================================================== ++ DWT interface ++========================================================== ++*/ ++ ++/** Process one line for the horizontal pass of the 5x3 forward transform */ ++static ++void opj_dwt_encode_and_deinterleave_h_one_row(void* rowIn, ++ void* tmpIn, ++ OPJ_UINT32 width, ++ OPJ_BOOL even) ++{ ++ OPJ_INT32* OPJ_RESTRICT row = (OPJ_INT32*)rowIn; ++ OPJ_INT32* OPJ_RESTRICT tmp = (OPJ_INT32*)tmpIn; ++ const OPJ_INT32 sn = (OPJ_INT32)((width + (even ? 1 : 0)) >> 1); ++ const OPJ_INT32 dn = (OPJ_INT32)(width - (OPJ_UINT32)sn); ++ ++ if (even) { ++ if (width > 1) { ++ OPJ_INT32 i; ++ for (i = 0; i < sn - 1; i++) { ++ tmp[sn + i] = row[2 * i + 1] - ((row[(i) * 2] + row[(i + 1) * 2]) >> 1); + } +- for (i = 0; i < sn; i++) { +- OPJ_S(i) -= opj_int_fix_mul(OPJ_D_(i - 1) + OPJ_D_(i), 434); ++ if ((width % 2) == 0) { ++ tmp[sn + i] = row[2 * i + 1] - row[(i) * 2]; + } +- for (i = 0; i < dn; i++) { +- OPJ_D(i) += opj_int_fix_mul(OPJ_S_(i) + OPJ_S_(i + 1), 7233); ++ row[0] += (tmp[sn] + tmp[sn] + 2) >> 2; ++ for (i = 1; i < dn; i++) { ++ row[i] = row[2 * i] + ((tmp[sn + (i - 1)] + tmp[sn + i] + 2) >> 2); + } +- for (i = 0; i < sn; i++) { +- OPJ_S(i) += opj_int_fix_mul(OPJ_D_(i - 1) + OPJ_D_(i), 3633); ++ if ((width % 2) == 1) { ++ row[i] = row[2 * i] + ((tmp[sn + (i - 1)] + tmp[sn + (i - 1)] + 2) >> 2); + } +- for (i = 0; i < dn; i++) { +- OPJ_D(i) = opj_int_fix_mul(OPJ_D(i), 5038); /*5038 */ ++ memcpy(row + sn, tmp + sn, (OPJ_SIZE_T)dn * sizeof(OPJ_INT32)); ++ } ++ } else { ++ if (width == 1) { ++ row[0] *= 2; ++ } else { ++ OPJ_INT32 i; ++ tmp[sn + 0] = row[0] - row[1]; ++ for (i = 1; i < sn; i++) { ++ tmp[sn + i] = row[2 * i] - ((row[2 * i + 1] + row[2 * (i - 1) + 1]) >> 1); + } +- for (i = 0; i < sn; i++) { +- OPJ_S(i) = opj_int_fix_mul(OPJ_S(i), 6659); /*6660 */ ++ if ((width % 2) == 1) { ++ tmp[sn + i] = row[2 * i] - row[2 * (i - 1) + 1]; ++ } ++ ++ for (i = 0; i < dn - 1; i++) { ++ row[i] = row[2 * i + 1] + ((tmp[sn + i] + tmp[sn + i + 1] + 2) >> 2); ++ } ++ if ((width % 2) == 0) { ++ row[i] = row[2 * i + 1] + ((tmp[sn + i] + tmp[sn + i] + 2) >> 2); ++ } ++ memcpy(row + sn, tmp + sn, (OPJ_SIZE_T)dn * sizeof(OPJ_INT32)); ++ } ++ } ++} ++ ++/** Process one line for the horizontal pass of the 9x7 forward transform */ ++static ++void opj_dwt_encode_and_deinterleave_h_one_row_real(void* rowIn, ++ void* tmpIn, ++ OPJ_UINT32 width, ++ OPJ_BOOL even) ++{ ++ OPJ_FLOAT32* OPJ_RESTRICT row = (OPJ_FLOAT32*)rowIn; ++ OPJ_FLOAT32* OPJ_RESTRICT tmp = (OPJ_FLOAT32*)tmpIn; ++ const OPJ_INT32 sn = (OPJ_INT32)((width + (even ? 1 : 0)) >> 1); ++ const OPJ_INT32 dn = (OPJ_INT32)(width - (OPJ_UINT32)sn); ++ if (width == 1) { ++ return; ++ } ++ memcpy(tmp, row, width * sizeof(OPJ_FLOAT32)); ++ opj_dwt_encode_1_real(tmp, dn, sn, even ? 0 : 1); ++ opj_dwt_deinterleave_h((OPJ_INT32 * OPJ_RESTRICT)tmp, ++ (OPJ_INT32 * OPJ_RESTRICT)row, ++ dn, sn, even ? 0 : 1); ++} ++ ++typedef struct { ++ opj_dwt_t h; ++ OPJ_UINT32 rw; /* Width of the resolution to process */ ++ OPJ_UINT32 w; /* Width of tiledp */ ++ OPJ_INT32 * OPJ_RESTRICT tiledp; ++ OPJ_UINT32 min_j; ++ OPJ_UINT32 max_j; ++ opj_encode_and_deinterleave_h_one_row_fnptr_type p_function; ++} opj_dwt_encode_h_job_t; ++ ++static void opj_dwt_encode_h_func(void* user_data, opj_tls_t* tls) ++{ ++ OPJ_UINT32 j; ++ opj_dwt_encode_h_job_t* job; ++ (void)tls; ++ ++ job = (opj_dwt_encode_h_job_t*)user_data; ++ for (j = job->min_j; j < job->max_j; j++) { ++ OPJ_INT32* OPJ_RESTRICT aj = job->tiledp + j * job->w; ++ (*job->p_function)(aj, job->h.mem, job->rw, ++ job->h.cas == 0 ? OPJ_TRUE : OPJ_FALSE); ++ } ++ ++ opj_aligned_free(job->h.mem); ++ opj_free(job); ++} ++ ++typedef struct { ++ opj_dwt_t v; ++ OPJ_UINT32 rh; ++ OPJ_UINT32 w; ++ OPJ_INT32 * OPJ_RESTRICT tiledp; ++ OPJ_UINT32 min_j; ++ OPJ_UINT32 max_j; ++ opj_encode_and_deinterleave_v_fnptr_type p_encode_and_deinterleave_v; ++} opj_dwt_encode_v_job_t; ++ ++static void opj_dwt_encode_v_func(void* user_data, opj_tls_t* tls) ++{ ++ OPJ_UINT32 j; ++ opj_dwt_encode_v_job_t* job; ++ (void)tls; ++ ++ job = (opj_dwt_encode_v_job_t*)user_data; ++ for (j = job->min_j; j + NB_ELTS_V8 - 1 < job->max_j; j += NB_ELTS_V8) { ++ (*job->p_encode_and_deinterleave_v)(job->tiledp + j, ++ job->v.mem, ++ job->rh, ++ job->v.cas == 0, ++ job->w, ++ NB_ELTS_V8); ++ } ++ if (j < job->max_j) { ++ (*job->p_encode_and_deinterleave_v)(job->tiledp + j, ++ job->v.mem, ++ job->rh, ++ job->v.cas == 0, ++ job->w, ++ job->max_j - j); ++ } ++ ++ opj_aligned_free(job->v.mem); ++ opj_free(job); ++} ++ ++/** Fetch up to cols <= NB_ELTS_V8 for each line, and put them in tmpOut */ ++/* that has a NB_ELTS_V8 interleave factor. */ ++static void opj_dwt_fetch_cols_vertical_pass(const void *arrayIn, ++ void *tmpOut, ++ OPJ_UINT32 height, ++ OPJ_UINT32 stride_width, ++ OPJ_UINT32 cols) ++{ ++ const OPJ_INT32* OPJ_RESTRICT array = (const OPJ_INT32 * OPJ_RESTRICT)arrayIn; ++ OPJ_INT32* OPJ_RESTRICT tmp = (OPJ_INT32 * OPJ_RESTRICT)tmpOut; ++ if (cols == NB_ELTS_V8) { ++ OPJ_UINT32 k; ++ for (k = 0; k < height; ++k) { ++ memcpy(tmp + NB_ELTS_V8 * k, ++ array + k * stride_width, ++ NB_ELTS_V8 * sizeof(OPJ_INT32)); ++ } ++ } else { ++ OPJ_UINT32 k; ++ for (k = 0; k < height; ++k) { ++ OPJ_UINT32 c; ++ for (c = 0; c < cols; c++) { ++ tmp[NB_ELTS_V8 * k + c] = array[c + k * stride_width]; ++ } ++ for (; c < NB_ELTS_V8; c++) { ++ tmp[NB_ELTS_V8 * k + c] = 0; ++ } ++ } ++ } ++} ++ ++/* Deinterleave result of forward transform, where cols <= NB_ELTS_V8 */ ++/* and src contains NB_ELTS_V8 consecutive values for up to NB_ELTS_V8 */ ++/* columns. */ ++static INLINE void opj_dwt_deinterleave_v_cols( ++ const OPJ_INT32 * OPJ_RESTRICT src, ++ OPJ_INT32 * OPJ_RESTRICT dst, ++ OPJ_INT32 dn, ++ OPJ_INT32 sn, ++ OPJ_UINT32 stride_width, ++ OPJ_INT32 cas, ++ OPJ_UINT32 cols) ++{ ++ OPJ_INT32 k; ++ OPJ_INT32 i = sn; ++ OPJ_INT32 * OPJ_RESTRICT l_dest = dst; ++ const OPJ_INT32 * OPJ_RESTRICT l_src = src + cas * NB_ELTS_V8; ++ OPJ_UINT32 c; ++ ++ for (k = 0; k < 2; k++) { ++ while (i--) { ++ if (cols == NB_ELTS_V8) { ++ memcpy(l_dest, l_src, NB_ELTS_V8 * sizeof(OPJ_INT32)); ++ } else { ++ c = 0; ++ switch (cols) { ++ case 7: ++ l_dest[c] = l_src[c]; ++ c++; /* fallthru */ ++ case 6: ++ l_dest[c] = l_src[c]; ++ c++; /* fallthru */ ++ case 5: ++ l_dest[c] = l_src[c]; ++ c++; /* fallthru */ ++ case 4: ++ l_dest[c] = l_src[c]; ++ c++; /* fallthru */ ++ case 3: ++ l_dest[c] = l_src[c]; ++ c++; /* fallthru */ ++ case 2: ++ l_dest[c] = l_src[c]; ++ c++; /* fallthru */ ++ default: ++ l_dest[c] = l_src[c]; ++ break; ++ } ++ } ++ l_dest += stride_width; ++ l_src += 2 * NB_ELTS_V8; ++ } ++ ++ l_dest = dst + (OPJ_SIZE_T)sn * (OPJ_SIZE_T)stride_width; ++ l_src = src + (1 - cas) * NB_ELTS_V8; ++ i = dn; ++ } ++} ++ ++ ++/* Forward 5-3 transform, for the vertical pass, processing cols columns */ ++/* where cols <= NB_ELTS_V8 */ ++static void opj_dwt_encode_and_deinterleave_v( ++ void *arrayIn, ++ void *tmpIn, ++ OPJ_UINT32 height, ++ OPJ_BOOL even, ++ OPJ_UINT32 stride_width, ++ OPJ_UINT32 cols) ++{ ++ OPJ_INT32* OPJ_RESTRICT array = (OPJ_INT32 * OPJ_RESTRICT)arrayIn; ++ OPJ_INT32* OPJ_RESTRICT tmp = (OPJ_INT32 * OPJ_RESTRICT)tmpIn; ++ const OPJ_UINT32 sn = (height + (even ? 1 : 0)) >> 1; ++ const OPJ_UINT32 dn = height - sn; ++ ++ opj_dwt_fetch_cols_vertical_pass(arrayIn, tmpIn, height, stride_width, cols); ++ ++#define OPJ_Sc(i) tmp[(i)*2* NB_ELTS_V8 + c] ++#define OPJ_Dc(i) tmp[((1+(i)*2))* NB_ELTS_V8 + c] ++ ++#ifdef __SSE2__ ++ if (height == 1) { ++ if (!even) { ++ OPJ_UINT32 c; ++ for (c = 0; c < NB_ELTS_V8; c++) { ++ tmp[c] *= 2; ++ } ++ } ++ } else if (even) { ++ OPJ_UINT32 c; ++ OPJ_UINT32 i; ++ i = 0; ++ if (i + 1 < sn) { ++ __m128i xmm_Si_0 = *(const __m128i*)(tmp + 4 * 0); ++ __m128i xmm_Si_1 = *(const __m128i*)(tmp + 4 * 1); ++ for (; i + 1 < sn; i++) { ++ __m128i xmm_Sip1_0 = *(const __m128i*)(tmp + ++ (i + 1) * 2 * NB_ELTS_V8 + 4 * 0); ++ __m128i xmm_Sip1_1 = *(const __m128i*)(tmp + ++ (i + 1) * 2 * NB_ELTS_V8 + 4 * 1); ++ __m128i xmm_Di_0 = *(const __m128i*)(tmp + ++ (1 + i * 2) * NB_ELTS_V8 + 4 * 0); ++ __m128i xmm_Di_1 = *(const __m128i*)(tmp + ++ (1 + i * 2) * NB_ELTS_V8 + 4 * 1); ++ xmm_Di_0 = _mm_sub_epi32(xmm_Di_0, ++ _mm_srai_epi32(_mm_add_epi32(xmm_Si_0, xmm_Sip1_0), 1)); ++ xmm_Di_1 = _mm_sub_epi32(xmm_Di_1, ++ _mm_srai_epi32(_mm_add_epi32(xmm_Si_1, xmm_Sip1_1), 1)); ++ *(__m128i*)(tmp + (1 + i * 2) * NB_ELTS_V8 + 4 * 0) = xmm_Di_0; ++ *(__m128i*)(tmp + (1 + i * 2) * NB_ELTS_V8 + 4 * 1) = xmm_Di_1; ++ xmm_Si_0 = xmm_Sip1_0; ++ xmm_Si_1 = xmm_Sip1_1; + } + } +- } else { +- if ((sn > 0) || (dn > 1)) { /* NEW : CASE ONE ELEMENT */ +- for (i = 0; i < dn; i++) { +- OPJ_S(i) -= opj_int_fix_mul(OPJ_DD_(i) + OPJ_DD_(i - 1), 12993); +- } +- for (i = 0; i < sn; i++) { +- OPJ_D(i) -= opj_int_fix_mul(OPJ_SS_(i) + OPJ_SS_(i + 1), 434); +- } +- for (i = 0; i < dn; i++) { +- OPJ_S(i) += opj_int_fix_mul(OPJ_DD_(i) + OPJ_DD_(i - 1), 7233); +- } +- for (i = 0; i < sn; i++) { +- OPJ_D(i) += opj_int_fix_mul(OPJ_SS_(i) + OPJ_SS_(i + 1), 3633); +- } +- for (i = 0; i < dn; i++) { +- OPJ_S(i) = opj_int_fix_mul(OPJ_S(i), 5038); /*5038 */ +- } +- for (i = 0; i < sn; i++) { +- OPJ_D(i) = opj_int_fix_mul(OPJ_D(i), 6659); /*6660 */ ++ if (((height) % 2) == 0) { ++ for (c = 0; c < NB_ELTS_V8; c++) { ++ OPJ_Dc(i) -= OPJ_Sc(i); ++ } ++ } ++ for (c = 0; c < NB_ELTS_V8; c++) { ++ OPJ_Sc(0) += (OPJ_Dc(0) + OPJ_Dc(0) + 2) >> 2; ++ } ++ i = 1; ++ if (i < dn) { ++ __m128i xmm_Dim1_0 = *(const __m128i*)(tmp + (1 + ++ (i - 1) * 2) * NB_ELTS_V8 + 4 * 0); ++ __m128i xmm_Dim1_1 = *(const __m128i*)(tmp + (1 + ++ (i - 1) * 2) * NB_ELTS_V8 + 4 * 1); ++ const __m128i xmm_two = _mm_set1_epi32(2); ++ for (; i < dn; i++) { ++ __m128i xmm_Di_0 = *(const __m128i*)(tmp + ++ (1 + i * 2) * NB_ELTS_V8 + 4 * 0); ++ __m128i xmm_Di_1 = *(const __m128i*)(tmp + ++ (1 + i * 2) * NB_ELTS_V8 + 4 * 1); ++ __m128i xmm_Si_0 = *(const __m128i*)(tmp + ++ (i * 2) * NB_ELTS_V8 + 4 * 0); ++ __m128i xmm_Si_1 = *(const __m128i*)(tmp + ++ (i * 2) * NB_ELTS_V8 + 4 * 1); ++ xmm_Si_0 = _mm_add_epi32(xmm_Si_0, ++ _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(xmm_Dim1_0, xmm_Di_0), xmm_two), 2)); ++ xmm_Si_1 = _mm_add_epi32(xmm_Si_1, ++ _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(xmm_Dim1_1, xmm_Di_1), xmm_two), 2)); ++ *(__m128i*)(tmp + (i * 2) * NB_ELTS_V8 + 4 * 0) = xmm_Si_0; ++ *(__m128i*)(tmp + (i * 2) * NB_ELTS_V8 + 4 * 1) = xmm_Si_1; ++ xmm_Dim1_0 = xmm_Di_0; ++ xmm_Dim1_1 = xmm_Di_1; ++ } ++ } ++ if (((height) % 2) == 1) { ++ for (c = 0; c < NB_ELTS_V8; c++) { ++ OPJ_Sc(i) += (OPJ_Dc(i - 1) + OPJ_Dc(i - 1) + 2) >> 2; ++ } ++ } ++ } else { ++ OPJ_UINT32 c; ++ OPJ_UINT32 i; ++ for (c = 0; c < NB_ELTS_V8; c++) { ++ OPJ_Sc(0) -= OPJ_Dc(0); ++ } ++ i = 1; ++ if (i < sn) { ++ __m128i xmm_Dim1_0 = *(const __m128i*)(tmp + (1 + ++ (i - 1) * 2) * NB_ELTS_V8 + 4 * 0); ++ __m128i xmm_Dim1_1 = *(const __m128i*)(tmp + (1 + ++ (i - 1) * 2) * NB_ELTS_V8 + 4 * 1); ++ for (; i < sn; i++) { ++ __m128i xmm_Di_0 = *(const __m128i*)(tmp + ++ (1 + i * 2) * NB_ELTS_V8 + 4 * 0); ++ __m128i xmm_Di_1 = *(const __m128i*)(tmp + ++ (1 + i * 2) * NB_ELTS_V8 + 4 * 1); ++ __m128i xmm_Si_0 = *(const __m128i*)(tmp + ++ (i * 2) * NB_ELTS_V8 + 4 * 0); ++ __m128i xmm_Si_1 = *(const __m128i*)(tmp + ++ (i * 2) * NB_ELTS_V8 + 4 * 1); ++ xmm_Si_0 = _mm_sub_epi32(xmm_Si_0, ++ _mm_srai_epi32(_mm_add_epi32(xmm_Di_0, xmm_Dim1_0), 1)); ++ xmm_Si_1 = _mm_sub_epi32(xmm_Si_1, ++ _mm_srai_epi32(_mm_add_epi32(xmm_Di_1, xmm_Dim1_1), 1)); ++ *(__m128i*)(tmp + (i * 2) * NB_ELTS_V8 + 4 * 0) = xmm_Si_0; ++ *(__m128i*)(tmp + (i * 2) * NB_ELTS_V8 + 4 * 1) = xmm_Si_1; ++ xmm_Dim1_0 = xmm_Di_0; ++ xmm_Dim1_1 = xmm_Di_1; ++ } ++ } ++ if (((height) % 2) == 1) { ++ for (c = 0; c < NB_ELTS_V8; c++) { ++ OPJ_Sc(i) -= OPJ_Dc(i - 1); ++ } ++ } ++ i = 0; ++ if (i + 1 < dn) { ++ __m128i xmm_Si_0 = *((const __m128i*)(tmp + 4 * 0)); ++ __m128i xmm_Si_1 = *((const __m128i*)(tmp + 4 * 1)); ++ const __m128i xmm_two = _mm_set1_epi32(2); ++ for (; i + 1 < dn; i++) { ++ __m128i xmm_Sip1_0 = *(const __m128i*)(tmp + ++ (i + 1) * 2 * NB_ELTS_V8 + 4 * 0); ++ __m128i xmm_Sip1_1 = *(const __m128i*)(tmp + ++ (i + 1) * 2 * NB_ELTS_V8 + 4 * 1); ++ __m128i xmm_Di_0 = *(const __m128i*)(tmp + ++ (1 + i * 2) * NB_ELTS_V8 + 4 * 0); ++ __m128i xmm_Di_1 = *(const __m128i*)(tmp + ++ (1 + i * 2) * NB_ELTS_V8 + 4 * 1); ++ xmm_Di_0 = _mm_add_epi32(xmm_Di_0, ++ _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(xmm_Si_0, xmm_Sip1_0), xmm_two), 2)); ++ xmm_Di_1 = _mm_add_epi32(xmm_Di_1, ++ _mm_srai_epi32(_mm_add_epi32(_mm_add_epi32(xmm_Si_1, xmm_Sip1_1), xmm_two), 2)); ++ *(__m128i*)(tmp + (1 + i * 2) * NB_ELTS_V8 + 4 * 0) = xmm_Di_0; ++ *(__m128i*)(tmp + (1 + i * 2) * NB_ELTS_V8 + 4 * 1) = xmm_Di_1; ++ xmm_Si_0 = xmm_Sip1_0; ++ xmm_Si_1 = xmm_Sip1_1; ++ } ++ } ++ if (((height) % 2) == 0) { ++ for (c = 0; c < NB_ELTS_V8; c++) { ++ OPJ_Dc(i) += (OPJ_Sc(i) + OPJ_Sc(i) + 2) >> 2; ++ } ++ } ++ } ++#else ++ if (even) { ++ OPJ_UINT32 c; ++ if (height > 1) { ++ OPJ_UINT32 i; ++ for (i = 0; i + 1 < sn; i++) { ++ for (c = 0; c < NB_ELTS_V8; c++) { ++ OPJ_Dc(i) -= (OPJ_Sc(i) + OPJ_Sc(i + 1)) >> 1; ++ } ++ } ++ if (((height) % 2) == 0) { ++ for (c = 0; c < NB_ELTS_V8; c++) { ++ OPJ_Dc(i) -= OPJ_Sc(i); ++ } ++ } ++ for (c = 0; c < NB_ELTS_V8; c++) { ++ OPJ_Sc(0) += (OPJ_Dc(0) + OPJ_Dc(0) + 2) >> 2; ++ } ++ for (i = 1; i < dn; i++) { ++ for (c = 0; c < NB_ELTS_V8; c++) { ++ OPJ_Sc(i) += (OPJ_Dc(i - 1) + OPJ_Dc(i) + 2) >> 2; ++ } ++ } ++ if (((height) % 2) == 1) { ++ for (c = 0; c < NB_ELTS_V8; c++) { ++ OPJ_Sc(i) += (OPJ_Dc(i - 1) + OPJ_Dc(i - 1) + 2) >> 2; ++ } ++ } ++ } ++ } else { ++ OPJ_UINT32 c; ++ if (height == 1) { ++ for (c = 0; c < NB_ELTS_V8; c++) { ++ OPJ_Sc(0) *= 2; ++ } ++ } else { ++ OPJ_UINT32 i; ++ for (c = 0; c < NB_ELTS_V8; c++) { ++ OPJ_Sc(0) -= OPJ_Dc(0); ++ } ++ for (i = 1; i < sn; i++) { ++ for (c = 0; c < NB_ELTS_V8; c++) { ++ OPJ_Sc(i) -= (OPJ_Dc(i) + OPJ_Dc(i - 1)) >> 1; ++ } ++ } ++ if (((height) % 2) == 1) { ++ for (c = 0; c < NB_ELTS_V8; c++) { ++ OPJ_Sc(i) -= OPJ_Dc(i - 1); ++ } ++ } ++ for (i = 0; i + 1 < dn; i++) { ++ for (c = 0; c < NB_ELTS_V8; c++) { ++ OPJ_Dc(i) += (OPJ_Sc(i) + OPJ_Sc(i + 1) + 2) >> 2; ++ } ++ } ++ if (((height) % 2) == 0) { ++ for (c = 0; c < NB_ELTS_V8; c++) { ++ OPJ_Dc(i) += (OPJ_Sc(i) + OPJ_Sc(i) + 2) >> 2; ++ } ++ } ++ } ++ } ++#endif ++ ++ if (cols == NB_ELTS_V8) { ++ opj_dwt_deinterleave_v_cols(tmp, array, (OPJ_INT32)dn, (OPJ_INT32)sn, ++ stride_width, even ? 0 : 1, NB_ELTS_V8); ++ } else { ++ opj_dwt_deinterleave_v_cols(tmp, array, (OPJ_INT32)dn, (OPJ_INT32)sn, ++ stride_width, even ? 0 : 1, cols); ++ } ++} ++ ++static void opj_v8dwt_encode_step1(OPJ_FLOAT32* fw, ++ OPJ_UINT32 end, ++ const OPJ_FLOAT32 cst) ++{ ++ OPJ_UINT32 i; ++#ifdef __SSE__ ++ __m128* vw = (__m128*) fw; ++ const __m128 vcst = _mm_set1_ps(cst); ++ for (i = 0; i < end; ++i) { ++ vw[0] = _mm_mul_ps(vw[0], vcst); ++ vw[1] = _mm_mul_ps(vw[1], vcst); ++ vw += 2 * (NB_ELTS_V8 * sizeof(OPJ_FLOAT32) / sizeof(__m128)); ++ } ++#else ++ OPJ_UINT32 c; ++ for (i = 0; i < end; ++i) { ++ for (c = 0; c < NB_ELTS_V8; c++) { ++ fw[i * 2 * NB_ELTS_V8 + c] *= cst; ++ } ++ } ++#endif ++} ++ ++static void opj_v8dwt_encode_step2(OPJ_FLOAT32* fl, OPJ_FLOAT32* fw, ++ OPJ_UINT32 end, ++ OPJ_UINT32 m, ++ OPJ_FLOAT32 cst) ++{ ++ OPJ_UINT32 i; ++ OPJ_UINT32 imax = opj_uint_min(end, m); ++#ifdef __SSE__ ++ __m128* vw = (__m128*) fw; ++ __m128 vcst = _mm_set1_ps(cst); ++ if (imax > 0) { ++ __m128* vl = (__m128*) fl; ++ vw[-2] = _mm_add_ps(vw[-2], _mm_mul_ps(_mm_add_ps(vl[0], vw[0]), vcst)); ++ vw[-1] = _mm_add_ps(vw[-1], _mm_mul_ps(_mm_add_ps(vl[1], vw[1]), vcst)); ++ vw += 2 * (NB_ELTS_V8 * sizeof(OPJ_FLOAT32) / sizeof(__m128)); ++ i = 1; ++ ++ for (; i < imax; ++i) { ++ vw[-2] = _mm_add_ps(vw[-2], _mm_mul_ps(_mm_add_ps(vw[-4], vw[0]), vcst)); ++ vw[-1] = _mm_add_ps(vw[-1], _mm_mul_ps(_mm_add_ps(vw[-3], vw[1]), vcst)); ++ vw += 2 * (NB_ELTS_V8 * sizeof(OPJ_FLOAT32) / sizeof(__m128)); ++ } ++ } ++ if (m < end) { ++ assert(m + 1 == end); ++ vcst = _mm_add_ps(vcst, vcst); ++ vw[-2] = _mm_add_ps(vw[-2], _mm_mul_ps(vw[-4], vcst)); ++ vw[-1] = _mm_add_ps(vw[-1], _mm_mul_ps(vw[-3], vcst)); ++ } ++#else ++ OPJ_INT32 c; ++ if (imax > 0) { ++ for (c = 0; c < NB_ELTS_V8; c++) { ++ fw[-1 * NB_ELTS_V8 + c] += (fl[0 * NB_ELTS_V8 + c] + fw[0 * NB_ELTS_V8 + c]) * ++ cst; ++ } ++ fw += 2 * NB_ELTS_V8; ++ i = 1; ++ for (; i < imax; ++i) { ++ for (c = 0; c < NB_ELTS_V8; c++) { ++ fw[-1 * NB_ELTS_V8 + c] += (fw[-2 * NB_ELTS_V8 + c] + fw[0 * NB_ELTS_V8 + c]) * ++ cst; + } ++ fw += 2 * NB_ELTS_V8; ++ } ++ } ++ if (m < end) { ++ assert(m + 1 == end); ++ for (c = 0; c < NB_ELTS_V8; c++) { ++ fw[-1 * NB_ELTS_V8 + c] += (2 * fw[-2 * NB_ELTS_V8 + c]) * cst; + } + } ++#endif + } + +-static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps, +- opj_stepsize_t *bandno_stepsize) ++/* Forward 9-7 transform, for the vertical pass, processing cols columns */ ++/* where cols <= NB_ELTS_V8 */ ++static void opj_dwt_encode_and_deinterleave_v_real( ++ void *arrayIn, ++ void *tmpIn, ++ OPJ_UINT32 height, ++ OPJ_BOOL even, ++ OPJ_UINT32 stride_width, ++ OPJ_UINT32 cols) + { +- OPJ_INT32 p, n; +- p = opj_int_floorlog2(stepsize) - 13; +- n = 11 - opj_int_floorlog2(stepsize); +- bandno_stepsize->mant = (n < 0 ? stepsize >> -n : stepsize << n) & 0x7ff; +- bandno_stepsize->expn = numbps - p; +-} ++ OPJ_FLOAT32* OPJ_RESTRICT array = (OPJ_FLOAT32 * OPJ_RESTRICT)arrayIn; ++ OPJ_FLOAT32* OPJ_RESTRICT tmp = (OPJ_FLOAT32 * OPJ_RESTRICT)tmpIn; ++ const OPJ_INT32 sn = (OPJ_INT32)((height + (even ? 1 : 0)) >> 1); ++ const OPJ_INT32 dn = (OPJ_INT32)(height - (OPJ_UINT32)sn); ++ OPJ_INT32 a, b; ++ ++ if (height == 1) { ++ return; ++ } ++ ++ opj_dwt_fetch_cols_vertical_pass(arrayIn, tmpIn, height, stride_width, cols); ++ ++ if (even) { ++ a = 0; ++ b = 1; ++ } else { ++ a = 1; ++ b = 0; ++ } ++ opj_v8dwt_encode_step2(tmp + a * NB_ELTS_V8, ++ tmp + (b + 1) * NB_ELTS_V8, ++ (OPJ_UINT32)dn, ++ (OPJ_UINT32)opj_int_min(dn, sn - b), ++ opj_dwt_alpha); ++ opj_v8dwt_encode_step2(tmp + b * NB_ELTS_V8, ++ tmp + (a + 1) * NB_ELTS_V8, ++ (OPJ_UINT32)sn, ++ (OPJ_UINT32)opj_int_min(sn, dn - a), ++ opj_dwt_beta); ++ opj_v8dwt_encode_step2(tmp + a * NB_ELTS_V8, ++ tmp + (b + 1) * NB_ELTS_V8, ++ (OPJ_UINT32)dn, ++ (OPJ_UINT32)opj_int_min(dn, sn - b), ++ opj_dwt_gamma); ++ opj_v8dwt_encode_step2(tmp + b * NB_ELTS_V8, ++ tmp + (a + 1) * NB_ELTS_V8, ++ (OPJ_UINT32)sn, ++ (OPJ_UINT32)opj_int_min(sn, dn - a), ++ opj_dwt_delta); ++ opj_v8dwt_encode_step1(tmp + b * NB_ELTS_V8, (OPJ_UINT32)dn, ++ opj_K); ++ opj_v8dwt_encode_step1(tmp + a * NB_ELTS_V8, (OPJ_UINT32)sn, ++ opj_invK); + +-/* +-========================================================== +- DWT interface +-========================================================== +-*/ ++ ++ if (cols == NB_ELTS_V8) { ++ opj_dwt_deinterleave_v_cols((OPJ_INT32*)tmp, ++ (OPJ_INT32*)array, ++ (OPJ_INT32)dn, (OPJ_INT32)sn, ++ stride_width, even ? 0 : 1, NB_ELTS_V8); ++ } else { ++ opj_dwt_deinterleave_v_cols((OPJ_INT32*)tmp, ++ (OPJ_INT32*)array, ++ (OPJ_INT32)dn, (OPJ_INT32)sn, ++ stride_width, even ? 0 : 1, cols); ++ } ++} + + + /* */ + /* Forward 5-3 wavelet transform in 2-D. */ + /* */ +-static INLINE OPJ_BOOL opj_dwt_encode_procedure(const opj_tcd_tilecomp_t * tilec, +- void(*p_function)(OPJ_INT32 *, OPJ_SIZE_T, OPJ_INT32, OPJ_INT32, OPJ_INT32)) ++static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_thread_pool_t* tp, ++ opj_tcd_tilecomp_t * tilec, ++ opj_encode_and_deinterleave_v_fnptr_type p_encode_and_deinterleave_v, ++ opj_encode_and_deinterleave_h_one_row_fnptr_type ++ p_encode_and_deinterleave_h_one_row) + { +- OPJ_INT32 i, j, k; +- OPJ_INT32 *a = 00; +- OPJ_INT32 *aj = 00; ++ OPJ_INT32 i; + OPJ_INT32 *bj = 00; +- OPJ_INT32 w, l; ++ OPJ_UINT32 w; ++ OPJ_INT32 l; + +- OPJ_INT32 rw; /* width of the resolution level computed */ +- OPJ_INT32 rh; /* height of the resolution level computed */ +- OPJ_SIZE_T l_data_count; + OPJ_SIZE_T l_data_size; + + opj_tcd_resolution_t * l_cur_res = 0; + opj_tcd_resolution_t * l_last_res = 0; ++ const int num_threads = opj_thread_pool_get_thread_count(tp); ++ OPJ_INT32 * OPJ_RESTRICT tiledp = tilec->data; + +- w = tilec->x1 - tilec->x0; ++ w = (OPJ_UINT32)(tilec->x1 - tilec->x0); + l = (OPJ_INT32)tilec->numresolutions - 1; +- a = tilec->data; + + l_cur_res = tilec->resolutions + l; + l_last_res = l_cur_res - 1; + +- l_data_count = opj_dwt_max_resolution(tilec->resolutions, tilec->numresolutions); ++ l_data_size = opj_dwt_max_resolution(tilec->resolutions, tilec->numresolutions); + /* overflow check */ +- if (l_data_count > (SIZE_MAX / sizeof(OPJ_INT32))) { ++ if (l_data_size > (SIZE_MAX / (NB_ELTS_V8 * sizeof(OPJ_INT32)))) { + /* FIXME event manager error callback */ + return OPJ_FALSE; + } +- l_data_size = l_data_count * sizeof(OPJ_INT32); +- bj = (OPJ_INT32*)opj_malloc(l_data_size); ++ l_data_size *= NB_ELTS_V8 * sizeof(OPJ_INT32); ++ bj = (OPJ_INT32*)opj_aligned_32_malloc(l_data_size); + /* l_data_size is equal to 0 when numresolutions == 1 but bj is not used */ + /* in that case, so do not error out */ + if (l_data_size != 0 && ! bj) { +@@ -1151,43 +1759,135 @@ static INLINE OPJ_BOOL opj_dwt_encode_procedure(const opj_tcd_tilecomp_t * tilec + i = l; + + while (i--) { +- OPJ_INT32 rw1; /* width of the resolution level once lower than computed one */ +- OPJ_INT32 rh1; /* height of the resolution level once lower than computed one */ ++ OPJ_UINT32 j; ++ OPJ_UINT32 rw; /* width of the resolution level computed */ ++ OPJ_UINT32 rh; /* height of the resolution level computed */ ++ OPJ_UINT32 ++ rw1; /* width of the resolution level once lower than computed one */ ++ OPJ_UINT32 ++ rh1; /* height of the resolution level once lower than computed one */ + OPJ_INT32 cas_col; /* 0 = non inversion on horizontal filtering 1 = inversion between low-pass and high-pass filtering */ + OPJ_INT32 cas_row; /* 0 = non inversion on vertical filtering 1 = inversion between low-pass and high-pass filtering */ + OPJ_INT32 dn, sn; + +- rw = l_cur_res->x1 - l_cur_res->x0; +- rh = l_cur_res->y1 - l_cur_res->y0; +- rw1 = l_last_res->x1 - l_last_res->x0; +- rh1 = l_last_res->y1 - l_last_res->y0; ++ rw = (OPJ_UINT32)(l_cur_res->x1 - l_cur_res->x0); ++ rh = (OPJ_UINT32)(l_cur_res->y1 - l_cur_res->y0); ++ rw1 = (OPJ_UINT32)(l_last_res->x1 - l_last_res->x0); ++ rh1 = (OPJ_UINT32)(l_last_res->y1 - l_last_res->y0); + + cas_row = l_cur_res->x0 & 1; + cas_col = l_cur_res->y0 & 1; + +- sn = rh1; +- dn = rh - rh1; +- for (j = 0; j < rw; ++j) { +- aj = a + j; +- for (k = 0; k < rh; ++k) { +- bj[k] = aj[k * w]; ++ sn = (OPJ_INT32)rh1; ++ dn = (OPJ_INT32)(rh - rh1); ++ ++ /* Perform vertical pass */ ++ if (num_threads <= 1 || rw < 2 * NB_ELTS_V8) { ++ for (j = 0; j + NB_ELTS_V8 - 1 < rw; j += NB_ELTS_V8) { ++ p_encode_and_deinterleave_v(tiledp + j, ++ bj, ++ rh, ++ cas_col == 0, ++ w, ++ NB_ELTS_V8); ++ } ++ if (j < rw) { ++ p_encode_and_deinterleave_v(tiledp + j, ++ bj, ++ rh, ++ cas_col == 0, ++ w, ++ rw - j); ++ } ++ } else { ++ OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads; ++ OPJ_UINT32 step_j; ++ ++ if (rw < num_jobs) { ++ num_jobs = rw; + } ++ step_j = ((rw / num_jobs) / NB_ELTS_V8) * NB_ELTS_V8; + +- (*p_function) (bj, l_data_count, dn, sn, cas_col); ++ for (j = 0; j < num_jobs; j++) { ++ opj_dwt_encode_v_job_t* job; + +- opj_dwt_deinterleave_v(bj, aj, dn, sn, w, cas_col); ++ job = (opj_dwt_encode_v_job_t*) opj_malloc(sizeof(opj_dwt_encode_v_job_t)); ++ if (!job) { ++ opj_thread_pool_wait_completion(tp, 0); ++ opj_aligned_free(bj); ++ return OPJ_FALSE; ++ } ++ job->v.mem = (OPJ_INT32*)opj_aligned_32_malloc(l_data_size); ++ if (!job->v.mem) { ++ opj_thread_pool_wait_completion(tp, 0); ++ opj_free(job); ++ opj_aligned_free(bj); ++ return OPJ_FALSE; ++ } ++ job->v.dn = dn; ++ job->v.sn = sn; ++ job->v.cas = cas_col; ++ job->rh = rh; ++ job->w = w; ++ job->tiledp = tiledp; ++ job->min_j = j * step_j; ++ job->max_j = (j + 1 == num_jobs) ? rw : (j + 1) * step_j; ++ job->p_encode_and_deinterleave_v = p_encode_and_deinterleave_v; ++ opj_thread_pool_submit_job(tp, opj_dwt_encode_v_func, job); ++ } ++ opj_thread_pool_wait_completion(tp, 0); + } + +- sn = rw1; +- dn = rw - rw1; ++ sn = (OPJ_INT32)rw1; ++ dn = (OPJ_INT32)(rw - rw1); + +- for (j = 0; j < rh; j++) { +- aj = a + j * w; +- for (k = 0; k < rw; k++) { +- bj[k] = aj[k]; ++ /* Perform horizontal pass */ ++ if (num_threads <= 1 || rh <= 1) { ++ for (j = 0; j < rh; j++) { ++ OPJ_INT32* OPJ_RESTRICT aj = tiledp + j * w; ++ (*p_encode_and_deinterleave_h_one_row)(aj, bj, rw, ++ cas_row == 0 ? OPJ_TRUE : OPJ_FALSE); ++ } ++ } else { ++ OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads; ++ OPJ_UINT32 step_j; ++ ++ if (rh < num_jobs) { ++ num_jobs = rh; ++ } ++ step_j = (rh / num_jobs); ++ ++ for (j = 0; j < num_jobs; j++) { ++ opj_dwt_encode_h_job_t* job; ++ ++ job = (opj_dwt_encode_h_job_t*) opj_malloc(sizeof(opj_dwt_encode_h_job_t)); ++ if (!job) { ++ opj_thread_pool_wait_completion(tp, 0); ++ opj_aligned_free(bj); ++ return OPJ_FALSE; ++ } ++ job->h.mem = (OPJ_INT32*)opj_aligned_32_malloc(l_data_size); ++ if (!job->h.mem) { ++ opj_thread_pool_wait_completion(tp, 0); ++ opj_free(job); ++ opj_aligned_free(bj); ++ return OPJ_FALSE; ++ } ++ job->h.dn = dn; ++ job->h.sn = sn; ++ job->h.cas = cas_row; ++ job->rw = rw; ++ job->w = w; ++ job->tiledp = tiledp; ++ job->min_j = j * step_j; ++ job->max_j = (j + 1U) * step_j; /* this can overflow */ ++ if (j == (num_jobs - 1U)) { /* this will take care of the overflow */ ++ job->max_j = rh; ++ } ++ job->p_function = p_encode_and_deinterleave_h_one_row; ++ opj_thread_pool_submit_job(tp, opj_dwt_encode_h_func, job); + } +- (*p_function) (bj, l_data_count, dn, sn, cas_row); +- opj_dwt_deinterleave_h(bj, aj, dn, sn, cas_row); ++ opj_thread_pool_wait_completion(tp, 0); + } + + l_cur_res = l_last_res; +@@ -1195,15 +1895,18 @@ static INLINE OPJ_BOOL opj_dwt_encode_procedure(const opj_tcd_tilecomp_t * tilec + --l_last_res; + } + +- opj_free(bj); ++ opj_aligned_free(bj); + return OPJ_TRUE; + } + + /* Forward 5-3 wavelet transform in 2-D. */ + /* */ +-OPJ_BOOL opj_dwt_encode(opj_tcd_tilecomp_t * tilec) ++OPJ_BOOL opj_dwt_encode(opj_tcd_t *p_tcd, ++ opj_tcd_tilecomp_t * tilec) + { +- return opj_dwt_encode_procedure(tilec, opj_dwt_encode_1); ++ return opj_dwt_encode_procedure(p_tcd->thread_pool, tilec, ++ opj_dwt_encode_and_deinterleave_v, ++ opj_dwt_encode_and_deinterleave_h_one_row); + } + + /* */ +@@ -1219,21 +1922,6 @@ OPJ_BOOL opj_dwt_decode(opj_tcd_t *p_tcd, opj_tcd_tilecomp_t* tilec, + } + } + +- +-/* */ +-/* Get gain of 5-3 wavelet transform. */ +-/* */ +-OPJ_UINT32 opj_dwt_getgain(OPJ_UINT32 orient) +-{ +- if (orient == 0) { +- return 0; +- } +- if (orient == 1 || orient == 2) { +- return 1; +- } +- return 2; +-} +- + /* */ + /* Get norm of 5-3 wavelet. */ + /* */ +@@ -1253,18 +1941,12 @@ OPJ_FLOAT64 opj_dwt_getnorm(OPJ_UINT32 level, OPJ_UINT32 orient) + /* */ + /* Forward 9-7 wavelet transform in 2-D. */ + /* */ +-OPJ_BOOL opj_dwt_encode_real(opj_tcd_tilecomp_t * tilec) +-{ +- return opj_dwt_encode_procedure(tilec, opj_dwt_encode_1_real); +-} +- +-/* */ +-/* Get gain of 9-7 wavelet transform. */ +-/* */ +-OPJ_UINT32 opj_dwt_getgain_real(OPJ_UINT32 orient) ++OPJ_BOOL opj_dwt_encode_real(opj_tcd_t *p_tcd, ++ opj_tcd_tilecomp_t * tilec) + { +- (void)orient; +- return 0; ++ return opj_dwt_encode_procedure(p_tcd->thread_pool, tilec, ++ opj_dwt_encode_and_deinterleave_v_real, ++ opj_dwt_encode_and_deinterleave_h_one_row_real); + } + + /* */ +@@ -1299,7 +1981,7 @@ void opj_dwt_calc_explicit_stepsizes(opj_tccp_t * tccp, OPJ_UINT32 prec) + if (tccp->qntsty == J2K_CCP_QNTSTY_NOQNT) { + stepsize = 1.0; + } else { +- OPJ_FLOAT64 norm = opj_dwt_norms_real[orient][level]; ++ OPJ_FLOAT64 norm = opj_dwt_getnorm_real(level, orient); + stepsize = (1 << (gain)) / norm; + } + opj_dwt_encode_stepsize((OPJ_INT32) floor(stepsize * 8192.0), +@@ -1334,15 +2016,15 @@ typedef struct { + OPJ_INT32 * OPJ_RESTRICT tiledp; + OPJ_UINT32 min_j; + OPJ_UINT32 max_j; +-} opj_dwd_decode_h_job_t; ++} opj_dwt_decode_h_job_t; + + static void opj_dwt_decode_h_func(void* user_data, opj_tls_t* tls) + { + OPJ_UINT32 j; +- opj_dwd_decode_h_job_t* job; ++ opj_dwt_decode_h_job_t* job; + (void)tls; + +- job = (opj_dwd_decode_h_job_t*)user_data; ++ job = (opj_dwt_decode_h_job_t*)user_data; + for (j = job->min_j; j < job->max_j; j++) { + opj_idwt53_h(&job->h, &job->tiledp[j * job->w]); + } +@@ -1358,15 +2040,15 @@ typedef struct { + OPJ_INT32 * OPJ_RESTRICT tiledp; + OPJ_UINT32 min_j; + OPJ_UINT32 max_j; +-} opj_dwd_decode_v_job_t; ++} opj_dwt_decode_v_job_t; + + static void opj_dwt_decode_v_func(void* user_data, opj_tls_t* tls) + { + OPJ_UINT32 j; +- opj_dwd_decode_v_job_t* job; ++ opj_dwt_decode_v_job_t* job; + (void)tls; + +- job = (opj_dwd_decode_v_job_t*)user_data; ++ job = (opj_dwt_decode_v_job_t*)user_data; + for (j = job->min_j; j + PARALLEL_COLS_53 <= job->max_j; + j += PARALLEL_COLS_53) { + opj_idwt53_v(&job->v, &job->tiledp[j], (OPJ_SIZE_T)job->w, +@@ -1454,9 +2136,9 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, + step_j = (rh / num_jobs); + + for (j = 0; j < num_jobs; j++) { +- opj_dwd_decode_h_job_t* job; ++ opj_dwt_decode_h_job_t* job; + +- job = (opj_dwd_decode_h_job_t*) opj_malloc(sizeof(opj_dwd_decode_h_job_t)); ++ job = (opj_dwt_decode_h_job_t*) opj_malloc(sizeof(opj_dwt_decode_h_job_t)); + if (!job) { + /* It would be nice to fallback to single thread case, but */ + /* unfortunately some jobs may be launched and have modified */ +@@ -1509,9 +2191,9 @@ static OPJ_BOOL opj_dwt_decode_tile(opj_thread_pool_t* tp, + step_j = (rw / num_jobs); + + for (j = 0; j < num_jobs; j++) { +- opj_dwd_decode_v_job_t* job; ++ opj_dwt_decode_v_job_t* job; + +- job = (opj_dwd_decode_v_job_t*) opj_malloc(sizeof(opj_dwd_decode_v_job_t)); ++ job = (opj_dwt_decode_v_job_t*) opj_malloc(sizeof(opj_dwt_decode_v_job_t)); + if (!job) { + /* It would be nice to fallback to single thread case, but */ + /* unfortunately some jobs may be launched and have modified */ +@@ -2177,7 +2859,7 @@ static OPJ_BOOL opj_dwt_decode_partial_tile( + return OPJ_TRUE; + } + +-static void opj_v4dwt_interleave_h(opj_v4dwt_t* OPJ_RESTRICT dwt, ++static void opj_v8dwt_interleave_h(opj_v8dwt_t* OPJ_RESTRICT dwt, + OPJ_FLOAT32* OPJ_RESTRICT a, + OPJ_UINT32 width, + OPJ_UINT32 remaining_height) +@@ -2188,39 +2870,69 @@ static void opj_v4dwt_interleave_h(opj_v4dwt_t* OPJ_RESTRICT dwt, + OPJ_UINT32 x1 = dwt->win_l_x1; + + for (k = 0; k < 2; ++k) { +- if (remaining_height >= 4 && ((OPJ_SIZE_T) a & 0x0f) == 0 && +- ((OPJ_SIZE_T) bi & 0x0f) == 0 && (width & 0x0f) == 0) { ++ if (remaining_height >= NB_ELTS_V8 && ((OPJ_SIZE_T) a & 0x0f) == 0 && ++ ((OPJ_SIZE_T) bi & 0x0f) == 0) { + /* Fast code path */ + for (i = x0; i < x1; ++i) { + OPJ_UINT32 j = i; +- bi[i * 8 ] = a[j]; ++ OPJ_FLOAT32* OPJ_RESTRICT dst = bi + i * 2 * NB_ELTS_V8; ++ dst[0] = a[j]; + j += width; +- bi[i * 8 + 1] = a[j]; ++ dst[1] = a[j]; + j += width; +- bi[i * 8 + 2] = a[j]; ++ dst[2] = a[j]; + j += width; +- bi[i * 8 + 3] = a[j]; ++ dst[3] = a[j]; ++ j += width; ++ dst[4] = a[j]; ++ j += width; ++ dst[5] = a[j]; ++ j += width; ++ dst[6] = a[j]; ++ j += width; ++ dst[7] = a[j]; + } + } else { + /* Slow code path */ + for (i = x0; i < x1; ++i) { + OPJ_UINT32 j = i; +- bi[i * 8 ] = a[j]; ++ OPJ_FLOAT32* OPJ_RESTRICT dst = bi + i * 2 * NB_ELTS_V8; ++ dst[0] = a[j]; + j += width; + if (remaining_height == 1) { + continue; + } +- bi[i * 8 + 1] = a[j]; ++ dst[1] = a[j]; + j += width; + if (remaining_height == 2) { + continue; + } +- bi[i * 8 + 2] = a[j]; ++ dst[2] = a[j]; + j += width; + if (remaining_height == 3) { + continue; + } +- bi[i * 8 + 3] = a[j]; /* This one*/ ++ dst[3] = a[j]; ++ j += width; ++ if (remaining_height == 4) { ++ continue; ++ } ++ dst[4] = a[j]; ++ j += width; ++ if (remaining_height == 5) { ++ continue; ++ } ++ dst[5] = a[j]; ++ j += width; ++ if (remaining_height == 6) { ++ continue; ++ } ++ dst[6] = a[j]; ++ j += width; ++ if (remaining_height == 7) { ++ continue; ++ } ++ dst[7] = a[j]; + } + } + +@@ -2231,7 +2943,7 @@ static void opj_v4dwt_interleave_h(opj_v4dwt_t* OPJ_RESTRICT dwt, + } + } + +-static void opj_v4dwt_interleave_partial_h(opj_v4dwt_t* dwt, ++static void opj_v8dwt_interleave_partial_h(opj_v8dwt_t* dwt, + opj_sparse_array_int32_t* sa, + OPJ_UINT32 sa_line, + OPJ_UINT32 remaining_height) +@@ -2244,25 +2956,25 @@ static void opj_v4dwt_interleave_partial_h(opj_v4dwt_t* dwt, + dwt->win_l_x1, sa_line + i + 1, + /* Nasty cast from float* to int32* */ + (OPJ_INT32*)(dwt->wavelet + dwt->cas + 2 * dwt->win_l_x0) + i, +- 8, 0, OPJ_TRUE); ++ 2 * NB_ELTS_V8, 0, OPJ_TRUE); + assert(ret); + ret = opj_sparse_array_int32_read(sa, + (OPJ_UINT32)dwt->sn + dwt->win_h_x0, sa_line + i, + (OPJ_UINT32)dwt->sn + dwt->win_h_x1, sa_line + i + 1, + /* Nasty cast from float* to int32* */ + (OPJ_INT32*)(dwt->wavelet + 1 - dwt->cas + 2 * dwt->win_h_x0) + i, +- 8, 0, OPJ_TRUE); ++ 2 * NB_ELTS_V8, 0, OPJ_TRUE); + assert(ret); + OPJ_UNUSED(ret); + } + } + +-static void opj_v4dwt_interleave_v(opj_v4dwt_t* OPJ_RESTRICT dwt, +- OPJ_FLOAT32* OPJ_RESTRICT a, +- OPJ_UINT32 width, +- OPJ_UINT32 nb_elts_read) ++static INLINE void opj_v8dwt_interleave_v(opj_v8dwt_t* OPJ_RESTRICT dwt, ++ OPJ_FLOAT32* OPJ_RESTRICT a, ++ OPJ_UINT32 width, ++ OPJ_UINT32 nb_elts_read) + { +- opj_v4_t* OPJ_RESTRICT bi = dwt->wavelet + dwt->cas; ++ opj_v8_t* OPJ_RESTRICT bi = dwt->wavelet + dwt->cas; + OPJ_UINT32 i; + + for (i = dwt->win_l_x0; i < dwt->win_l_x1; ++i) { +@@ -2279,7 +2991,7 @@ static void opj_v4dwt_interleave_v(opj_v4dwt_t* OPJ_RESTRICT dwt, + } + } + +-static void opj_v4dwt_interleave_partial_v(opj_v4dwt_t* OPJ_RESTRICT dwt, ++static void opj_v8dwt_interleave_partial_v(opj_v8dwt_t* OPJ_RESTRICT dwt, + opj_sparse_array_int32_t* sa, + OPJ_UINT32 sa_col, + OPJ_UINT32 nb_elts_read) +@@ -2289,44 +3001,36 @@ static void opj_v4dwt_interleave_partial_v(opj_v4dwt_t* OPJ_RESTRICT dwt, + sa_col, dwt->win_l_x0, + sa_col + nb_elts_read, dwt->win_l_x1, + (OPJ_INT32*)(dwt->wavelet + dwt->cas + 2 * dwt->win_l_x0), +- 1, 8, OPJ_TRUE); ++ 1, 2 * NB_ELTS_V8, OPJ_TRUE); + assert(ret); + ret = opj_sparse_array_int32_read(sa, + sa_col, (OPJ_UINT32)dwt->sn + dwt->win_h_x0, + sa_col + nb_elts_read, (OPJ_UINT32)dwt->sn + dwt->win_h_x1, + (OPJ_INT32*)(dwt->wavelet + 1 - dwt->cas + 2 * dwt->win_h_x0), +- 1, 8, OPJ_TRUE); ++ 1, 2 * NB_ELTS_V8, OPJ_TRUE); + assert(ret); + OPJ_UNUSED(ret); + } + + #ifdef __SSE__ + +-static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, ++static void opj_v8dwt_decode_step1_sse(opj_v8_t* w, + OPJ_UINT32 start, + OPJ_UINT32 end, + const __m128 c) + { + __m128* OPJ_RESTRICT vw = (__m128*) w; +- OPJ_UINT32 i; +- /* 4x unrolled loop */ +- vw += 2 * start; +- for (i = start; i + 3 < end; i += 4, vw += 8) { +- __m128 xmm0 = _mm_mul_ps(vw[0], c); +- __m128 xmm2 = _mm_mul_ps(vw[2], c); +- __m128 xmm4 = _mm_mul_ps(vw[4], c); +- __m128 xmm6 = _mm_mul_ps(vw[6], c); +- vw[0] = xmm0; +- vw[2] = xmm2; +- vw[4] = xmm4; +- vw[6] = xmm6; +- } +- for (; i < end; ++i, vw += 2) { ++ OPJ_UINT32 i = start; ++ /* To be adapted if NB_ELTS_V8 changes */ ++ vw += 4 * start; ++ /* Note: attempt at loop unrolling x2 doesn't help */ ++ for (; i < end; ++i, vw += 4) { + vw[0] = _mm_mul_ps(vw[0], c); ++ vw[1] = _mm_mul_ps(vw[1], c); + } + } + +-static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, ++static void opj_v8dwt_decode_step2_sse(opj_v8_t* l, opj_v8_t* w, + OPJ_UINT32 start, + OPJ_UINT32 end, + OPJ_UINT32 m, +@@ -2334,74 +3038,58 @@ static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, + { + __m128* OPJ_RESTRICT vl = (__m128*) l; + __m128* OPJ_RESTRICT vw = (__m128*) w; ++ /* To be adapted if NB_ELTS_V8 changes */ + OPJ_UINT32 i; + OPJ_UINT32 imax = opj_uint_min(end, m); +- __m128 tmp1, tmp2, tmp3; + if (start == 0) { +- tmp1 = vl[0]; ++ if (imax >= 1) { ++ vw[-2] = _mm_add_ps(vw[-2], _mm_mul_ps(_mm_add_ps(vl[0], vw[0]), c)); ++ vw[-1] = _mm_add_ps(vw[-1], _mm_mul_ps(_mm_add_ps(vl[1], vw[1]), c)); ++ vw += 4; ++ start = 1; ++ } + } else { +- vw += start * 2; +- tmp1 = vw[-3]; ++ vw += start * 4; + } + + i = start; +- +- /* 4x loop unrolling */ +- for (; i + 3 < imax; i += 4) { +- __m128 tmp4, tmp5, tmp6, tmp7, tmp8, tmp9; +- tmp2 = vw[-1]; +- tmp3 = vw[ 0]; +- tmp4 = vw[ 1]; +- tmp5 = vw[ 2]; +- tmp6 = vw[ 3]; +- tmp7 = vw[ 4]; +- tmp8 = vw[ 5]; +- tmp9 = vw[ 6]; +- vw[-1] = _mm_add_ps(tmp2, _mm_mul_ps(_mm_add_ps(tmp1, tmp3), c)); +- vw[ 1] = _mm_add_ps(tmp4, _mm_mul_ps(_mm_add_ps(tmp3, tmp5), c)); +- vw[ 3] = _mm_add_ps(tmp6, _mm_mul_ps(_mm_add_ps(tmp5, tmp7), c)); +- vw[ 5] = _mm_add_ps(tmp8, _mm_mul_ps(_mm_add_ps(tmp7, tmp9), c)); +- tmp1 = tmp9; +- vw += 8; +- } +- ++ /* Note: attempt at loop unrolling x2 doesn't help */ + for (; i < imax; ++i) { +- tmp2 = vw[-1]; +- tmp3 = vw[ 0]; +- vw[-1] = _mm_add_ps(tmp2, _mm_mul_ps(_mm_add_ps(tmp1, tmp3), c)); +- tmp1 = tmp3; +- vw += 2; ++ vw[-2] = _mm_add_ps(vw[-2], _mm_mul_ps(_mm_add_ps(vw[-4], vw[0]), c)); ++ vw[-1] = _mm_add_ps(vw[-1], _mm_mul_ps(_mm_add_ps(vw[-3], vw[1]), c)); ++ vw += 4; + } + if (m < end) { + assert(m + 1 == end); + c = _mm_add_ps(c, c); +- c = _mm_mul_ps(c, vw[-2]); +- vw[-1] = _mm_add_ps(vw[-1], c); ++ vw[-2] = _mm_add_ps(vw[-2], _mm_mul_ps(c, vw[-4])); ++ vw[-1] = _mm_add_ps(vw[-1], _mm_mul_ps(c, vw[-3])); + } + } + + #else + +-static void opj_v4dwt_decode_step1(opj_v4_t* w, ++static void opj_v8dwt_decode_step1(opj_v8_t* w, + OPJ_UINT32 start, + OPJ_UINT32 end, + const OPJ_FLOAT32 c) + { + OPJ_FLOAT32* OPJ_RESTRICT fw = (OPJ_FLOAT32*) w; + OPJ_UINT32 i; ++ /* To be adapted if NB_ELTS_V8 changes */ + for (i = start; i < end; ++i) { +- OPJ_FLOAT32 tmp1 = fw[i * 8 ]; +- OPJ_FLOAT32 tmp2 = fw[i * 8 + 1]; +- OPJ_FLOAT32 tmp3 = fw[i * 8 + 2]; +- OPJ_FLOAT32 tmp4 = fw[i * 8 + 3]; +- fw[i * 8 ] = tmp1 * c; +- fw[i * 8 + 1] = tmp2 * c; +- fw[i * 8 + 2] = tmp3 * c; +- fw[i * 8 + 3] = tmp4 * c; ++ fw[i * 2 * 8 ] = fw[i * 2 * 8 ] * c; ++ fw[i * 2 * 8 + 1] = fw[i * 2 * 8 + 1] * c; ++ fw[i * 2 * 8 + 2] = fw[i * 2 * 8 + 2] * c; ++ fw[i * 2 * 8 + 3] = fw[i * 2 * 8 + 3] * c; ++ fw[i * 2 * 8 + 4] = fw[i * 2 * 8 + 4] * c; ++ fw[i * 2 * 8 + 5] = fw[i * 2 * 8 + 5] * c; ++ fw[i * 2 * 8 + 6] = fw[i * 2 * 8 + 6] * c; ++ fw[i * 2 * 8 + 7] = fw[i * 2 * 8 + 7] * c; + } + } + +-static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, ++static void opj_v8dwt_decode_step2(opj_v8_t* l, opj_v8_t* w, + OPJ_UINT32 start, + OPJ_UINT32 end, + OPJ_UINT32 m, +@@ -2412,36 +3100,33 @@ static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, + OPJ_UINT32 i; + OPJ_UINT32 imax = opj_uint_min(end, m); + if (start > 0) { +- fw += 8 * start; +- fl = fw - 8; ++ fw += 2 * NB_ELTS_V8 * start; ++ fl = fw - 2 * NB_ELTS_V8; + } ++ /* To be adapted if NB_ELTS_V8 changes */ + for (i = start; i < imax; ++i) { +- OPJ_FLOAT32 tmp1_1 = fl[0]; +- OPJ_FLOAT32 tmp1_2 = fl[1]; +- OPJ_FLOAT32 tmp1_3 = fl[2]; +- OPJ_FLOAT32 tmp1_4 = fl[3]; +- OPJ_FLOAT32 tmp2_1 = fw[-4]; +- OPJ_FLOAT32 tmp2_2 = fw[-3]; +- OPJ_FLOAT32 tmp2_3 = fw[-2]; +- OPJ_FLOAT32 tmp2_4 = fw[-1]; +- OPJ_FLOAT32 tmp3_1 = fw[0]; +- OPJ_FLOAT32 tmp3_2 = fw[1]; +- OPJ_FLOAT32 tmp3_3 = fw[2]; +- OPJ_FLOAT32 tmp3_4 = fw[3]; +- fw[-4] = tmp2_1 + ((tmp1_1 + tmp3_1) * c); +- fw[-3] = tmp2_2 + ((tmp1_2 + tmp3_2) * c); +- fw[-2] = tmp2_3 + ((tmp1_3 + tmp3_3) * c); +- fw[-1] = tmp2_4 + ((tmp1_4 + tmp3_4) * c); ++ fw[-8] = fw[-8] + ((fl[0] + fw[0]) * c); ++ fw[-7] = fw[-7] + ((fl[1] + fw[1]) * c); ++ fw[-6] = fw[-6] + ((fl[2] + fw[2]) * c); ++ fw[-5] = fw[-5] + ((fl[3] + fw[3]) * c); ++ fw[-4] = fw[-4] + ((fl[4] + fw[4]) * c); ++ fw[-3] = fw[-3] + ((fl[5] + fw[5]) * c); ++ fw[-2] = fw[-2] + ((fl[6] + fw[6]) * c); ++ fw[-1] = fw[-1] + ((fl[7] + fw[7]) * c); + fl = fw; +- fw += 8; ++ fw += 2 * NB_ELTS_V8; + } + if (m < end) { + assert(m + 1 == end); + c += c; +- fw[-4] = fw[-4] + fl[0] * c; +- fw[-3] = fw[-3] + fl[1] * c; +- fw[-2] = fw[-2] + fl[2] * c; +- fw[-1] = fw[-1] + fl[3] * c; ++ fw[-8] = fw[-8] + fl[0] * c; ++ fw[-7] = fw[-7] + fl[1] * c; ++ fw[-6] = fw[-6] + fl[2] * c; ++ fw[-5] = fw[-5] + fl[3] * c; ++ fw[-4] = fw[-4] + fl[4] * c; ++ fw[-3] = fw[-3] + fl[5] * c; ++ fw[-2] = fw[-2] + fl[6] * c; ++ fw[-1] = fw[-1] + fl[7] * c; + } + } + +@@ -2450,9 +3135,17 @@ static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, + /* */ + /* Inverse 9-7 wavelet transform in 1-D. */ + /* */ +-static void opj_v4dwt_decode(opj_v4dwt_t* OPJ_RESTRICT dwt) ++static void opj_v8dwt_decode(opj_v8dwt_t* OPJ_RESTRICT dwt) + { + OPJ_INT32 a, b; ++ /* BUG_WEIRD_TWO_INVK (look for this identifier in tcd.c) */ ++ /* Historic value for 2 / opj_invK */ ++ /* Normally, we should use invK, but if we do so, we have failures in the */ ++ /* conformance test, due to MSE and peak errors significantly higher than */ ++ /* accepted value */ ++ /* Due to using two_invK instead of invK, we have to compensate in tcd.c */ ++ /* the computation of the stepsize for the non LL subbands */ ++ const float two_invK = 1.625732422f; + if (dwt->cas == 0) { + if (!((dwt->dn > 0) || (dwt->sn > 1))) { + return; +@@ -2467,60 +3160,147 @@ static void opj_v4dwt_decode(opj_v4dwt_t* OPJ_RESTRICT dwt) + b = 0; + } + #ifdef __SSE__ +- opj_v4dwt_decode_step1_sse(dwt->wavelet + a, dwt->win_l_x0, dwt->win_l_x1, ++ opj_v8dwt_decode_step1_sse(dwt->wavelet + a, dwt->win_l_x0, dwt->win_l_x1, + _mm_set1_ps(opj_K)); +- opj_v4dwt_decode_step1_sse(dwt->wavelet + b, dwt->win_h_x0, dwt->win_h_x1, +- _mm_set1_ps(opj_c13318)); +- opj_v4dwt_decode_step2_sse(dwt->wavelet + b, dwt->wavelet + a + 1, ++ opj_v8dwt_decode_step1_sse(dwt->wavelet + b, dwt->win_h_x0, dwt->win_h_x1, ++ _mm_set1_ps(two_invK)); ++ opj_v8dwt_decode_step2_sse(dwt->wavelet + b, dwt->wavelet + a + 1, + dwt->win_l_x0, dwt->win_l_x1, + (OPJ_UINT32)opj_int_min(dwt->sn, dwt->dn - a), +- _mm_set1_ps(opj_dwt_delta)); +- opj_v4dwt_decode_step2_sse(dwt->wavelet + a, dwt->wavelet + b + 1, ++ _mm_set1_ps(-opj_dwt_delta)); ++ opj_v8dwt_decode_step2_sse(dwt->wavelet + a, dwt->wavelet + b + 1, + dwt->win_h_x0, dwt->win_h_x1, + (OPJ_UINT32)opj_int_min(dwt->dn, dwt->sn - b), +- _mm_set1_ps(opj_dwt_gamma)); +- opj_v4dwt_decode_step2_sse(dwt->wavelet + b, dwt->wavelet + a + 1, ++ _mm_set1_ps(-opj_dwt_gamma)); ++ opj_v8dwt_decode_step2_sse(dwt->wavelet + b, dwt->wavelet + a + 1, + dwt->win_l_x0, dwt->win_l_x1, + (OPJ_UINT32)opj_int_min(dwt->sn, dwt->dn - a), +- _mm_set1_ps(opj_dwt_beta)); +- opj_v4dwt_decode_step2_sse(dwt->wavelet + a, dwt->wavelet + b + 1, ++ _mm_set1_ps(-opj_dwt_beta)); ++ opj_v8dwt_decode_step2_sse(dwt->wavelet + a, dwt->wavelet + b + 1, + dwt->win_h_x0, dwt->win_h_x1, + (OPJ_UINT32)opj_int_min(dwt->dn, dwt->sn - b), +- _mm_set1_ps(opj_dwt_alpha)); ++ _mm_set1_ps(-opj_dwt_alpha)); + #else +- opj_v4dwt_decode_step1(dwt->wavelet + a, dwt->win_l_x0, dwt->win_l_x1, ++ opj_v8dwt_decode_step1(dwt->wavelet + a, dwt->win_l_x0, dwt->win_l_x1, + opj_K); +- opj_v4dwt_decode_step1(dwt->wavelet + b, dwt->win_h_x0, dwt->win_h_x1, +- opj_c13318); +- opj_v4dwt_decode_step2(dwt->wavelet + b, dwt->wavelet + a + 1, ++ opj_v8dwt_decode_step1(dwt->wavelet + b, dwt->win_h_x0, dwt->win_h_x1, ++ two_invK); ++ opj_v8dwt_decode_step2(dwt->wavelet + b, dwt->wavelet + a + 1, + dwt->win_l_x0, dwt->win_l_x1, + (OPJ_UINT32)opj_int_min(dwt->sn, dwt->dn - a), +- opj_dwt_delta); +- opj_v4dwt_decode_step2(dwt->wavelet + a, dwt->wavelet + b + 1, ++ -opj_dwt_delta); ++ opj_v8dwt_decode_step2(dwt->wavelet + a, dwt->wavelet + b + 1, + dwt->win_h_x0, dwt->win_h_x1, + (OPJ_UINT32)opj_int_min(dwt->dn, dwt->sn - b), +- opj_dwt_gamma); +- opj_v4dwt_decode_step2(dwt->wavelet + b, dwt->wavelet + a + 1, ++ -opj_dwt_gamma); ++ opj_v8dwt_decode_step2(dwt->wavelet + b, dwt->wavelet + a + 1, + dwt->win_l_x0, dwt->win_l_x1, + (OPJ_UINT32)opj_int_min(dwt->sn, dwt->dn - a), +- opj_dwt_beta); +- opj_v4dwt_decode_step2(dwt->wavelet + a, dwt->wavelet + b + 1, ++ -opj_dwt_beta); ++ opj_v8dwt_decode_step2(dwt->wavelet + a, dwt->wavelet + b + 1, + dwt->win_h_x0, dwt->win_h_x1, + (OPJ_UINT32)opj_int_min(dwt->dn, dwt->sn - b), +- opj_dwt_alpha); ++ -opj_dwt_alpha); + #endif + } + ++typedef struct { ++ opj_v8dwt_t h; ++ OPJ_UINT32 rw; ++ OPJ_UINT32 w; ++ OPJ_FLOAT32 * OPJ_RESTRICT aj; ++ OPJ_UINT32 nb_rows; ++} opj_dwt97_decode_h_job_t; ++ ++static void opj_dwt97_decode_h_func(void* user_data, opj_tls_t* tls) ++{ ++ OPJ_UINT32 j; ++ opj_dwt97_decode_h_job_t* job; ++ OPJ_FLOAT32 * OPJ_RESTRICT aj; ++ OPJ_UINT32 w; ++ (void)tls; ++ ++ job = (opj_dwt97_decode_h_job_t*)user_data; ++ w = job->w; ++ ++ assert((job->nb_rows % NB_ELTS_V8) == 0); ++ ++ aj = job->aj; ++ for (j = 0; j + NB_ELTS_V8 <= job->nb_rows; j += NB_ELTS_V8) { ++ OPJ_UINT32 k; ++ opj_v8dwt_interleave_h(&job->h, aj, job->w, NB_ELTS_V8); ++ opj_v8dwt_decode(&job->h); ++ ++ /* To be adapted if NB_ELTS_V8 changes */ ++ for (k = 0; k < job->rw; k++) { ++ aj[k ] = job->h.wavelet[k].f[0]; ++ aj[k + (OPJ_SIZE_T)w ] = job->h.wavelet[k].f[1]; ++ aj[k + (OPJ_SIZE_T)w * 2] = job->h.wavelet[k].f[2]; ++ aj[k + (OPJ_SIZE_T)w * 3] = job->h.wavelet[k].f[3]; ++ } ++ for (k = 0; k < job->rw; k++) { ++ aj[k + (OPJ_SIZE_T)w * 4] = job->h.wavelet[k].f[4]; ++ aj[k + (OPJ_SIZE_T)w * 5] = job->h.wavelet[k].f[5]; ++ aj[k + (OPJ_SIZE_T)w * 6] = job->h.wavelet[k].f[6]; ++ aj[k + (OPJ_SIZE_T)w * 7] = job->h.wavelet[k].f[7]; ++ } ++ ++ aj += w * NB_ELTS_V8; ++ } ++ ++ opj_aligned_free(job->h.wavelet); ++ opj_free(job); ++} ++ ++ ++typedef struct { ++ opj_v8dwt_t v; ++ OPJ_UINT32 rh; ++ OPJ_UINT32 w; ++ OPJ_FLOAT32 * OPJ_RESTRICT aj; ++ OPJ_UINT32 nb_columns; ++} opj_dwt97_decode_v_job_t; ++ ++static void opj_dwt97_decode_v_func(void* user_data, opj_tls_t* tls) ++{ ++ OPJ_UINT32 j; ++ opj_dwt97_decode_v_job_t* job; ++ OPJ_FLOAT32 * OPJ_RESTRICT aj; ++ (void)tls; ++ ++ job = (opj_dwt97_decode_v_job_t*)user_data; ++ ++ assert((job->nb_columns % NB_ELTS_V8) == 0); ++ ++ aj = job->aj; ++ for (j = 0; j + NB_ELTS_V8 <= job->nb_columns; j += NB_ELTS_V8) { ++ OPJ_UINT32 k; ++ ++ opj_v8dwt_interleave_v(&job->v, aj, job->w, NB_ELTS_V8); ++ opj_v8dwt_decode(&job->v); ++ ++ for (k = 0; k < job->rh; ++k) { ++ memcpy(&aj[k * (OPJ_SIZE_T)job->w], &job->v.wavelet[k], ++ NB_ELTS_V8 * sizeof(OPJ_FLOAT32)); ++ } ++ aj += NB_ELTS_V8; ++ } ++ ++ opj_aligned_free(job->v.wavelet); ++ opj_free(job); ++} ++ + + /* */ + /* Inverse 9-7 wavelet transform in 2-D. */ + /* */ + static +-OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, ++OPJ_BOOL opj_dwt_decode_tile_97(opj_thread_pool_t* tp, ++ opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, + OPJ_UINT32 numres) + { +- opj_v4dwt_t h; +- opj_v4dwt_t v; ++ opj_v8dwt_t h; ++ opj_v8dwt_t v; + + opj_tcd_resolution_t* res = tilec->resolutions; + +@@ -2534,20 +3314,19 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, + tilec->resolutions[tilec->minimum_num_resolutions - 1].x0); + + OPJ_SIZE_T l_data_size; ++ const int num_threads = opj_thread_pool_get_thread_count(tp); + +- l_data_size = opj_dwt_max_resolution(res, numres); +- /* overflow check */ +- if (l_data_size > (SIZE_MAX - 5U)) { +- /* FIXME event manager error callback */ +- return OPJ_FALSE; ++ if (numres == 1) { ++ return OPJ_TRUE; + } +- l_data_size += 5U; ++ ++ l_data_size = opj_dwt_max_resolution(res, numres); + /* overflow check */ +- if (l_data_size > (SIZE_MAX / sizeof(opj_v4_t))) { ++ if (l_data_size > (SIZE_MAX / sizeof(opj_v8_t))) { + /* FIXME event manager error callback */ + return OPJ_FALSE; + } +- h.wavelet = (opj_v4_t*) opj_aligned_malloc(l_data_size * sizeof(opj_v4_t)); ++ h.wavelet = (opj_v8_t*) opj_aligned_malloc(l_data_size * sizeof(opj_v8_t)); + if (!h.wavelet) { + /* FIXME event manager error callback */ + return OPJ_FALSE; +@@ -2575,35 +3354,80 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, + h.win_l_x1 = (OPJ_UINT32)h.sn; + h.win_h_x0 = 0; + h.win_h_x1 = (OPJ_UINT32)h.dn; +- for (j = 0; j + 3 < rh; j += 4) { +- OPJ_UINT32 k; +- opj_v4dwt_interleave_h(&h, aj, w, rh - j); +- opj_v4dwt_decode(&h); + +- for (k = 0; k < rw; k++) { +- aj[k ] = h.wavelet[k].f[0]; +- aj[k + (OPJ_SIZE_T)w ] = h.wavelet[k].f[1]; +- aj[k + (OPJ_SIZE_T)w * 2] = h.wavelet[k].f[2]; +- aj[k + (OPJ_SIZE_T)w * 3] = h.wavelet[k].f[3]; ++ if (num_threads <= 1 || rh < 2 * NB_ELTS_V8) { ++ for (j = 0; j + (NB_ELTS_V8 - 1) < rh; j += NB_ELTS_V8) { ++ OPJ_UINT32 k; ++ opj_v8dwt_interleave_h(&h, aj, w, NB_ELTS_V8); ++ opj_v8dwt_decode(&h); ++ ++ /* To be adapted if NB_ELTS_V8 changes */ ++ for (k = 0; k < rw; k++) { ++ aj[k ] = h.wavelet[k].f[0]; ++ aj[k + (OPJ_SIZE_T)w ] = h.wavelet[k].f[1]; ++ aj[k + (OPJ_SIZE_T)w * 2] = h.wavelet[k].f[2]; ++ aj[k + (OPJ_SIZE_T)w * 3] = h.wavelet[k].f[3]; ++ } ++ for (k = 0; k < rw; k++) { ++ aj[k + (OPJ_SIZE_T)w * 4] = h.wavelet[k].f[4]; ++ aj[k + (OPJ_SIZE_T)w * 5] = h.wavelet[k].f[5]; ++ aj[k + (OPJ_SIZE_T)w * 6] = h.wavelet[k].f[6]; ++ aj[k + (OPJ_SIZE_T)w * 7] = h.wavelet[k].f[7]; ++ } ++ ++ aj += w * NB_ELTS_V8; ++ } ++ } else { ++ OPJ_UINT32 num_jobs = (OPJ_UINT32)num_threads; ++ OPJ_UINT32 step_j; ++ ++ if ((rh / NB_ELTS_V8) < num_jobs) { ++ num_jobs = rh / NB_ELTS_V8; + } ++ step_j = ((rh / num_jobs) / NB_ELTS_V8) * NB_ELTS_V8; ++ for (j = 0; j < num_jobs; j++) { ++ opj_dwt97_decode_h_job_t* job; + +- aj += w * 4; ++ job = (opj_dwt97_decode_h_job_t*) opj_malloc(sizeof(opj_dwt97_decode_h_job_t)); ++ if (!job) { ++ opj_thread_pool_wait_completion(tp, 0); ++ opj_aligned_free(h.wavelet); ++ return OPJ_FALSE; ++ } ++ job->h.wavelet = (opj_v8_t*)opj_aligned_malloc(l_data_size * sizeof(opj_v8_t)); ++ if (!job->h.wavelet) { ++ opj_thread_pool_wait_completion(tp, 0); ++ opj_free(job); ++ opj_aligned_free(h.wavelet); ++ return OPJ_FALSE; ++ } ++ job->h.dn = h.dn; ++ job->h.sn = h.sn; ++ job->h.cas = h.cas; ++ job->h.win_l_x0 = h.win_l_x0; ++ job->h.win_l_x1 = h.win_l_x1; ++ job->h.win_h_x0 = h.win_h_x0; ++ job->h.win_h_x1 = h.win_h_x1; ++ job->rw = rw; ++ job->w = w; ++ job->aj = aj; ++ job->nb_rows = (j + 1 == num_jobs) ? (rh & (OPJ_UINT32)~ ++ (NB_ELTS_V8 - 1)) - j * step_j : step_j; ++ aj += w * job->nb_rows; ++ opj_thread_pool_submit_job(tp, opj_dwt97_decode_h_func, job); ++ } ++ opj_thread_pool_wait_completion(tp, 0); ++ j = rh & (OPJ_UINT32)~(NB_ELTS_V8 - 1); + } + + if (j < rh) { + OPJ_UINT32 k; +- opj_v4dwt_interleave_h(&h, aj, w, rh - j); +- opj_v4dwt_decode(&h); ++ opj_v8dwt_interleave_h(&h, aj, w, rh - j); ++ opj_v8dwt_decode(&h); + for (k = 0; k < rw; k++) { +- switch (rh - j) { +- case 3: +- aj[k + (OPJ_SIZE_T)w * 2] = h.wavelet[k].f[2]; +- /* FALLTHRU */ +- case 2: +- aj[k + (OPJ_SIZE_T)w ] = h.wavelet[k].f[1]; +- /* FALLTHRU */ +- case 1: +- aj[k] = h.wavelet[k].f[0]; ++ OPJ_UINT32 l; ++ for (l = 0; l < rh - j; l++) { ++ aj[k + (OPJ_SIZE_T)w * l ] = h.wavelet[k].f[l]; + } + } + } +@@ -2616,25 +3440,71 @@ OPJ_BOOL opj_dwt_decode_tile_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, + v.win_h_x1 = (OPJ_UINT32)v.dn; + + aj = (OPJ_FLOAT32*) tilec->data; +- for (j = rw; j > 3; j -= 4) { +- OPJ_UINT32 k; ++ if (num_threads <= 1 || rw < 2 * NB_ELTS_V8) { ++ for (j = rw; j > (NB_ELTS_V8 - 1); j -= NB_ELTS_V8) { ++ OPJ_UINT32 k; + +- opj_v4dwt_interleave_v(&v, aj, w, 4); +- opj_v4dwt_decode(&v); ++ opj_v8dwt_interleave_v(&v, aj, w, NB_ELTS_V8); ++ opj_v8dwt_decode(&v); + +- for (k = 0; k < rh; ++k) { +- memcpy(&aj[k * (OPJ_SIZE_T)w], &v.wavelet[k], 4 * sizeof(OPJ_FLOAT32)); ++ for (k = 0; k < rh; ++k) { ++ memcpy(&aj[k * (OPJ_SIZE_T)w], &v.wavelet[k], NB_ELTS_V8 * sizeof(OPJ_FLOAT32)); ++ } ++ aj += NB_ELTS_V8; ++ } ++ } else { ++ /* "bench_dwt -I" shows that scaling is poor, likely due to RAM ++ transfer being the limiting factor. So limit the number of ++ threads. ++ */ ++ OPJ_UINT32 num_jobs = opj_uint_max((OPJ_UINT32)num_threads / 2, 2U); ++ OPJ_UINT32 step_j; ++ ++ if ((rw / NB_ELTS_V8) < num_jobs) { ++ num_jobs = rw / NB_ELTS_V8; ++ } ++ step_j = ((rw / num_jobs) / NB_ELTS_V8) * NB_ELTS_V8; ++ for (j = 0; j < num_jobs; j++) { ++ opj_dwt97_decode_v_job_t* job; ++ ++ job = (opj_dwt97_decode_v_job_t*) opj_malloc(sizeof(opj_dwt97_decode_v_job_t)); ++ if (!job) { ++ opj_thread_pool_wait_completion(tp, 0); ++ opj_aligned_free(h.wavelet); ++ return OPJ_FALSE; ++ } ++ job->v.wavelet = (opj_v8_t*)opj_aligned_malloc(l_data_size * sizeof(opj_v8_t)); ++ if (!job->v.wavelet) { ++ opj_thread_pool_wait_completion(tp, 0); ++ opj_free(job); ++ opj_aligned_free(h.wavelet); ++ return OPJ_FALSE; ++ } ++ job->v.dn = v.dn; ++ job->v.sn = v.sn; ++ job->v.cas = v.cas; ++ job->v.win_l_x0 = v.win_l_x0; ++ job->v.win_l_x1 = v.win_l_x1; ++ job->v.win_h_x0 = v.win_h_x0; ++ job->v.win_h_x1 = v.win_h_x1; ++ job->rh = rh; ++ job->w = w; ++ job->aj = aj; ++ job->nb_columns = (j + 1 == num_jobs) ? (rw & (OPJ_UINT32)~ ++ (NB_ELTS_V8 - 1)) - j * step_j : step_j; ++ aj += job->nb_columns; ++ opj_thread_pool_submit_job(tp, opj_dwt97_decode_v_func, job); + } +- aj += 4; ++ opj_thread_pool_wait_completion(tp, 0); + } + +- if (rw & 0x03) { ++ if (rw & (NB_ELTS_V8 - 1)) { + OPJ_UINT32 k; + +- j = rw & 0x03; ++ j = rw & (NB_ELTS_V8 - 1); + +- opj_v4dwt_interleave_v(&v, aj, w, j); +- opj_v4dwt_decode(&v); ++ opj_v8dwt_interleave_v(&v, aj, w, j); ++ opj_v8dwt_decode(&v); + + for (k = 0; k < rh; ++k) { + memcpy(&aj[k * (OPJ_SIZE_T)w], &v.wavelet[k], +@@ -2652,8 +3522,8 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, + OPJ_UINT32 numres) + { + opj_sparse_array_int32_t* sa; +- opj_v4dwt_t h; +- opj_v4dwt_t v; ++ opj_v8dwt_t h; ++ opj_v8dwt_t v; + OPJ_UINT32 resno; + /* This value matches the maximum left/right extension given in tables */ + /* F.2 and F.3 of the standard. Note: in opj_tcd_is_subband_area_of_interest() */ +@@ -2703,19 +3573,12 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, + + l_data_size = opj_dwt_max_resolution(tr, numres); + /* overflow check */ +- if (l_data_size > (SIZE_MAX - 5U)) { +- /* FIXME event manager error callback */ +- opj_sparse_array_int32_free(sa); +- return OPJ_FALSE; +- } +- l_data_size += 5U; +- /* overflow check */ +- if (l_data_size > (SIZE_MAX / sizeof(opj_v4_t))) { ++ if (l_data_size > (SIZE_MAX / sizeof(opj_v8_t))) { + /* FIXME event manager error callback */ + opj_sparse_array_int32_free(sa); + return OPJ_FALSE; + } +- h.wavelet = (opj_v4_t*) opj_aligned_malloc(l_data_size * sizeof(opj_v4_t)); ++ h.wavelet = (opj_v8_t*) opj_aligned_malloc(l_data_size * sizeof(opj_v8_t)); + if (!h.wavelet) { + /* FIXME event manager error callback */ + opj_sparse_array_int32_free(sa); +@@ -2810,17 +3673,17 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, + h.win_l_x1 = win_ll_x1; + h.win_h_x0 = win_hl_x0; + h.win_h_x1 = win_hl_x1; +- for (j = 0; j + 3 < rh; j += 4) { +- if ((j + 3 >= win_ll_y0 && j < win_ll_y1) || +- (j + 3 >= win_lh_y0 + (OPJ_UINT32)v.sn && ++ for (j = 0; j + (NB_ELTS_V8 - 1) < rh; j += NB_ELTS_V8) { ++ if ((j + (NB_ELTS_V8 - 1) >= win_ll_y0 && j < win_ll_y1) || ++ (j + (NB_ELTS_V8 - 1) >= win_lh_y0 + (OPJ_UINT32)v.sn && + j < win_lh_y1 + (OPJ_UINT32)v.sn)) { +- opj_v4dwt_interleave_partial_h(&h, sa, j, opj_uint_min(4U, rh - j)); +- opj_v4dwt_decode(&h); ++ opj_v8dwt_interleave_partial_h(&h, sa, j, opj_uint_min(NB_ELTS_V8, rh - j)); ++ opj_v8dwt_decode(&h); + if (!opj_sparse_array_int32_write(sa, + win_tr_x0, j, +- win_tr_x1, j + 4, ++ win_tr_x1, j + NB_ELTS_V8, + (OPJ_INT32*)&h.wavelet[win_tr_x0].f[0], +- 4, 1, OPJ_TRUE)) { ++ NB_ELTS_V8, 1, OPJ_TRUE)) { + /* FIXME event manager error callback */ + opj_sparse_array_int32_free(sa); + opj_aligned_free(h.wavelet); +@@ -2830,16 +3693,16 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, + } + + if (j < rh && +- ((j + 3 >= win_ll_y0 && j < win_ll_y1) || +- (j + 3 >= win_lh_y0 + (OPJ_UINT32)v.sn && ++ ((j + (NB_ELTS_V8 - 1) >= win_ll_y0 && j < win_ll_y1) || ++ (j + (NB_ELTS_V8 - 1) >= win_lh_y0 + (OPJ_UINT32)v.sn && + j < win_lh_y1 + (OPJ_UINT32)v.sn))) { +- opj_v4dwt_interleave_partial_h(&h, sa, j, rh - j); +- opj_v4dwt_decode(&h); ++ opj_v8dwt_interleave_partial_h(&h, sa, j, rh - j); ++ opj_v8dwt_decode(&h); + if (!opj_sparse_array_int32_write(sa, + win_tr_x0, j, + win_tr_x1, rh, + (OPJ_INT32*)&h.wavelet[win_tr_x0].f[0], +- 4, 1, OPJ_TRUE)) { ++ NB_ELTS_V8, 1, OPJ_TRUE)) { + /* FIXME event manager error callback */ + opj_sparse_array_int32_free(sa); + opj_aligned_free(h.wavelet); +@@ -2851,17 +3714,17 @@ OPJ_BOOL opj_dwt_decode_partial_97(opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, + v.win_l_x1 = win_ll_y1; + v.win_h_x0 = win_lh_y0; + v.win_h_x1 = win_lh_y1; +- for (j = win_tr_x0; j < win_tr_x1; j += 4) { +- OPJ_UINT32 nb_elts = opj_uint_min(4U, win_tr_x1 - j); ++ for (j = win_tr_x0; j < win_tr_x1; j += NB_ELTS_V8) { ++ OPJ_UINT32 nb_elts = opj_uint_min(NB_ELTS_V8, win_tr_x1 - j); + +- opj_v4dwt_interleave_partial_v(&v, sa, j, nb_elts); +- opj_v4dwt_decode(&v); ++ opj_v8dwt_interleave_partial_v(&v, sa, j, nb_elts); ++ opj_v8dwt_decode(&v); + + if (!opj_sparse_array_int32_write(sa, + j, win_tr_y0, + j + nb_elts, win_tr_y1, + (OPJ_INT32*)&h.wavelet[win_tr_y0].f[0], +- 1, 4, OPJ_TRUE)) { ++ 1, NB_ELTS_V8, OPJ_TRUE)) { + /* FIXME event manager error callback */ + opj_sparse_array_int32_free(sa); + opj_aligned_free(h.wavelet); +@@ -2894,7 +3757,7 @@ OPJ_BOOL opj_dwt_decode_real(opj_tcd_t *p_tcd, + OPJ_UINT32 numres) + { + if (p_tcd->whole_tile_decoding) { +- return opj_dwt_decode_tile_97(tilec, numres); ++ return opj_dwt_decode_tile_97(p_tcd->thread_pool, tilec, numres); + } else { + return opj_dwt_decode_partial_97(tilec, numres); + } +diff --git a/third_party/libopenjpeg20/dwt.h b/third_party/libopenjpeg20/dwt.h +index 4f63e524a60fd75577e5b579438990cfbf6d540f..215061e6b9cf010da87b652b9a5f65f212e7f84b 100644 +--- a/third_party/libopenjpeg20/dwt.h ++++ b/third_party/libopenjpeg20/dwt.h +@@ -56,9 +56,11 @@ DWT.C are used by some function in TCD.C. + /** + Forward 5-3 wavelet transform in 2-D. + Apply a reversible DWT transform to a component of an image. ++@param p_tcd TCD handle + @param tilec Tile component information (current tile) + */ +-OPJ_BOOL opj_dwt_encode(opj_tcd_tilecomp_t * tilec); ++OPJ_BOOL opj_dwt_encode(opj_tcd_t *p_tcd, ++ opj_tcd_tilecomp_t * tilec); + + /** + Inverse 5-3 wavelet transform in 2-D. +@@ -71,12 +73,6 @@ OPJ_BOOL opj_dwt_decode(opj_tcd_t *p_tcd, + opj_tcd_tilecomp_t* tilec, + OPJ_UINT32 numres); + +-/** +-Get the gain of a subband for the reversible 5-3 DWT. +-@param orient Number that identifies the subband (0->LL, 1->HL, 2->LH, 3->HH) +-@return Returns 0 if orient = 0, returns 1 if orient = 1 or 2, returns 2 otherwise +-*/ +-OPJ_UINT32 opj_dwt_getgain(OPJ_UINT32 orient) ; + /** + Get the norm of a wavelet function of a subband at a specified level for the reversible 5-3 DWT. + @param level Level of the wavelet function +@@ -87,9 +83,11 @@ OPJ_FLOAT64 opj_dwt_getnorm(OPJ_UINT32 level, OPJ_UINT32 orient); + /** + Forward 9-7 wavelet transform in 2-D. + Apply an irreversible DWT transform to a component of an image. ++@param p_tcd TCD handle + @param tilec Tile component information (current tile) + */ +-OPJ_BOOL opj_dwt_encode_real(opj_tcd_tilecomp_t * tilec); ++OPJ_BOOL opj_dwt_encode_real(opj_tcd_t *p_tcd, ++ opj_tcd_tilecomp_t * tilec); + /** + Inverse 9-7 wavelet transform in 2-D. + Apply an irreversible inverse DWT transform to a component of an image. +@@ -101,12 +99,6 @@ OPJ_BOOL opj_dwt_decode_real(opj_tcd_t *p_tcd, + opj_tcd_tilecomp_t* OPJ_RESTRICT tilec, + OPJ_UINT32 numres); + +-/** +-Get the gain of a subband for the irreversible 9-7 DWT. +-@param orient Number that identifies the subband (0->LL, 1->HL, 2->LH, 3->HH) +-@return Returns the gain of the 9-7 wavelet transform +-*/ +-OPJ_UINT32 opj_dwt_getgain_real(OPJ_UINT32 orient); + /** + Get the norm of a wavelet function of a subband at a specified level for the irreversible 9-7 DWT + @param level Level of the wavelet function +diff --git a/third_party/libopenjpeg20/j2k.c b/third_party/libopenjpeg20/j2k.c +index c6473743df69839a1404adb59af4ca18035e02c6..711dd73e87d7a6888353a9790eb0741b6b06cc8d 100644 +--- a/third_party/libopenjpeg20/j2k.c ++++ b/third_party/libopenjpeg20/j2k.c +@@ -400,14 +400,14 @@ static OPJ_BOOL opj_j2k_setup_header_writing(opj_j2k_t *p_j2k, + static OPJ_BOOL opj_j2k_write_first_tile_part(opj_j2k_t *p_j2k, + OPJ_BYTE * p_data, + OPJ_UINT32 * p_data_written, +- OPJ_UINT32 p_total_data_size, ++ OPJ_UINT32 total_data_size, + opj_stream_private_t *p_stream, + struct opj_event_mgr * p_manager); + + static OPJ_BOOL opj_j2k_write_all_tile_parts(opj_j2k_t *p_j2k, + OPJ_BYTE * p_data, + OPJ_UINT32 * p_data_written, +- OPJ_UINT32 p_total_data_size, ++ OPJ_UINT32 total_data_size, + opj_stream_private_t *p_stream, + struct opj_event_mgr * p_manager); + +@@ -832,14 +832,14 @@ static OPJ_BOOL opj_j2k_write_tlm(opj_j2k_t *p_j2k, + * + * @param p_j2k J2K codec. + * @param p_data Output buffer +- * @param p_total_data_size Output buffer size ++ * @param total_data_size Output buffer size + * @param p_data_written Number of bytes written into stream + * @param p_stream the stream to write data to. + * @param p_manager the user event manager. + */ + static OPJ_BOOL opj_j2k_write_sot(opj_j2k_t *p_j2k, + OPJ_BYTE * p_data, +- OPJ_UINT32 p_total_data_size, ++ OPJ_UINT32 total_data_size, + OPJ_UINT32 * p_data_written, + const opj_stream_private_t *p_stream, + opj_event_mgr_t * p_manager); +@@ -879,11 +879,13 @@ static OPJ_BOOL opj_j2k_read_sot(opj_j2k_t *p_j2k, + /** + * Writes the SOD marker (Start of data) + * ++ * This also writes optional PLT markers (before SOD) ++ * + * @param p_j2k J2K codec. + * @param p_tile_coder FIXME DOC + * @param p_data FIXME DOC + * @param p_data_written FIXME DOC +- * @param p_total_data_size FIXME DOC ++ * @param total_data_size FIXME DOC + * @param p_stream the stream to write data to. + * @param p_manager the user event manager. + */ +@@ -891,7 +893,7 @@ static OPJ_BOOL opj_j2k_write_sod(opj_j2k_t *p_j2k, + opj_tcd_t * p_tile_coder, + OPJ_BYTE * p_data, + OPJ_UINT32 * p_data_written, +- OPJ_UINT32 p_total_data_size, ++ OPJ_UINT32 total_data_size, + const opj_stream_private_t *p_stream, + opj_event_mgr_t * p_manager); + +@@ -1219,6 +1221,7 @@ static OPJ_BOOL opj_j2k_write_epc(opj_j2k_t *p_j2k, + * A nice message is outputted at errors. + * + * @param p_pocs the progression order changes. ++ * @param tileno the tile number of interest + * @param p_nb_pocs the number of progression order changes. + * @param p_nb_resolutions the number of resolutions. + * @param numcomps the number of components +@@ -1228,6 +1231,7 @@ static OPJ_BOOL opj_j2k_write_epc(opj_j2k_t *p_j2k, + * @return true if the pocs are valid. + */ + static OPJ_BOOL opj_j2k_check_poc_val(const opj_poc_t *p_pocs, ++ OPJ_UINT32 tileno, + OPJ_UINT32 p_nb_pocs, + OPJ_UINT32 p_nb_resolutions, + OPJ_UINT32 numcomps, +@@ -1282,6 +1286,13 @@ static void opj_j2k_set_cinema_parameters(opj_cparameters_t *parameters, + static OPJ_BOOL opj_j2k_is_cinema_compliant(opj_image_t *image, OPJ_UINT16 rsiz, + opj_event_mgr_t *p_manager); + ++static void opj_j2k_set_imf_parameters(opj_cparameters_t *parameters, ++ opj_image_t *image, opj_event_mgr_t *p_manager); ++ ++static OPJ_BOOL opj_j2k_is_imf_compliant(opj_cparameters_t *parameters, ++ opj_image_t *image, ++ opj_event_mgr_t *p_manager); ++ + /** + * Checks for invalid number of tile-parts in SOT marker (TPsot==TNsot). See issue 254. + * +@@ -1615,6 +1626,7 @@ const char *opj_j2k_convert_progression_order(OPJ_PROG_ORDER prg_order) + } + + static OPJ_BOOL opj_j2k_check_poc_val(const opj_poc_t *p_pocs, ++ OPJ_UINT32 tileno, + OPJ_UINT32 p_nb_pocs, + OPJ_UINT32 p_nb_resolutions, + OPJ_UINT32 p_num_comps, +@@ -1628,7 +1640,8 @@ static OPJ_BOOL opj_j2k_check_poc_val(const opj_poc_t *p_pocs, + OPJ_UINT32 step_r = p_num_comps * step_c; + OPJ_UINT32 step_l = p_nb_resolutions * step_r; + OPJ_BOOL loss = OPJ_FALSE; +- OPJ_UINT32 layno0 = 0; ++ ++ assert(p_nb_pocs > 0); + + packet_array = (OPJ_UINT32*) opj_calloc(step_l * p_num_layers, + sizeof(OPJ_UINT32)); +@@ -1638,63 +1651,37 @@ static OPJ_BOOL opj_j2k_check_poc_val(const opj_poc_t *p_pocs, + return OPJ_FALSE; + } + +- if (p_nb_pocs == 0) { +- opj_free(packet_array); +- return OPJ_TRUE; +- } ++ /* iterate through all the pocs that match our tile of interest. */ ++ for (i = 0; i < p_nb_pocs; ++i) { ++ const opj_poc_t *poc = &p_pocs[i]; ++ if (tileno + 1 == poc->tile) { ++ index = step_r * poc->resno0; + +- index = step_r * p_pocs->resno0; +- /* take each resolution for each poc */ +- for (resno = p_pocs->resno0 ; resno < p_pocs->resno1 ; ++resno) { +- OPJ_UINT32 res_index = index + p_pocs->compno0 * step_c; ++ /* take each resolution for each poc */ ++ for (resno = poc->resno0 ; ++ resno < opj_uint_min(poc->resno1, p_nb_resolutions); ++resno) { ++ OPJ_UINT32 res_index = index + poc->compno0 * step_c; + +- /* take each comp of each resolution for each poc */ +- for (compno = p_pocs->compno0 ; compno < p_pocs->compno1 ; ++compno) { +- OPJ_UINT32 comp_index = res_index + layno0 * step_l; +- +- /* and finally take each layer of each res of ... */ +- for (layno = layno0; layno < p_pocs->layno1 ; ++layno) { +- /*index = step_r * resno + step_c * compno + step_l * layno;*/ +- packet_array[comp_index] = 1; +- comp_index += step_l; +- } +- +- res_index += step_c; +- } +- +- index += step_r; +- } +- ++p_pocs; ++ /* take each comp of each resolution for each poc */ ++ for (compno = poc->compno0 ; ++ compno < opj_uint_min(poc->compno1, p_num_comps); ++compno) { ++ /* The layer index always starts at zero for every progression. */ ++ const OPJ_UINT32 layno0 = 0; ++ OPJ_UINT32 comp_index = res_index + layno0 * step_l; + +- /* iterate through all the pocs */ +- for (i = 1; i < p_nb_pocs ; ++i) { +- OPJ_UINT32 l_last_layno1 = (p_pocs - 1)->layno1 ; +- +- layno0 = (p_pocs->layno1 > l_last_layno1) ? l_last_layno1 : 0; +- index = step_r * p_pocs->resno0; +- +- /* take each resolution for each poc */ +- for (resno = p_pocs->resno0 ; resno < p_pocs->resno1 ; ++resno) { +- OPJ_UINT32 res_index = index + p_pocs->compno0 * step_c; +- +- /* take each comp of each resolution for each poc */ +- for (compno = p_pocs->compno0 ; compno < p_pocs->compno1 ; ++compno) { +- OPJ_UINT32 comp_index = res_index + layno0 * step_l; ++ /* and finally take each layer of each res of ... */ ++ for (layno = layno0; layno < opj_uint_min(poc->layno1, p_num_layers); ++ ++layno) { ++ packet_array[comp_index] = 1; ++ comp_index += step_l; ++ } + +- /* and finally take each layer of each res of ... */ +- for (layno = layno0; layno < p_pocs->layno1 ; ++layno) { +- /*index = step_r * resno + step_c * compno + step_l * layno;*/ +- packet_array[comp_index] = 1; +- comp_index += step_l; ++ res_index += step_c; + } + +- res_index += step_c; ++ index += step_r; + } +- +- index += step_r; + } +- +- ++p_pocs; + } + + index = 0; +@@ -1702,7 +1689,13 @@ static OPJ_BOOL opj_j2k_check_poc_val(const opj_poc_t *p_pocs, + for (resno = 0; resno < p_nb_resolutions; ++resno) { + for (compno = 0; compno < p_num_comps; ++compno) { + loss |= (packet_array[index] != 1); +- /*index = step_r * resno + step_c * compno + step_l * layno;*/ ++#ifdef DEBUG_VERBOSE ++ if (packet_array[index] != 1) { ++ fprintf(stderr, ++ "Missing packet in POC: layno=%d resno=%d compno=%d\n", ++ layno, resno, compno); ++ } ++#endif + index += step_c; + } + } +@@ -3454,6 +3447,28 @@ static OPJ_UINT32 opj_j2k_get_specific_header_sizes(opj_j2k_t *p_j2k) + + l_nb_bytes += opj_j2k_get_max_poc_size(p_j2k); + ++ if (p_j2k->m_specific_param.m_encoder.m_PLT) { ++ /* Reserve space for PLT markers */ ++ ++ OPJ_UINT32 i; ++ const opj_cp_t * l_cp = &(p_j2k->m_cp); ++ OPJ_UINT32 l_max_packet_count = 0; ++ for (i = 0; i < l_cp->th * l_cp->tw; ++i) { ++ l_max_packet_count = opj_uint_max(l_max_packet_count, ++ opj_get_encoding_packet_count(p_j2k->m_private_image, l_cp, i)); ++ } ++ /* Minimum 6 bytes per PLT marker, and at a minimum (taking a pessimistic */ ++ /* estimate of 4 bytes for a packet size), one can write */ ++ /* (65536-6) / 4 = 16382 paquet sizes per PLT marker */ ++ p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT = ++ 6 * opj_uint_ceildiv(l_max_packet_count, 16382); ++ /* Maximum 5 bytes per packet to encode a full UINT32 */ ++ p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT += ++ l_nb_bytes += 5 * l_max_packet_count; ++ p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT += 1; ++ l_nb_bytes += p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT; ++ } ++ + /*** DEVELOPER CORNER, Add room for your headers ***/ + + return l_nb_bytes; +@@ -4207,7 +4222,7 @@ static OPJ_BOOL opj_j2k_write_tlm(opj_j2k_t *p_j2k, + + static OPJ_BOOL opj_j2k_write_sot(opj_j2k_t *p_j2k, + OPJ_BYTE * p_data, +- OPJ_UINT32 p_total_data_size, ++ OPJ_UINT32 total_data_size, + OPJ_UINT32 * p_data_written, + const opj_stream_private_t *p_stream, + opj_event_mgr_t * p_manager +@@ -4220,7 +4235,7 @@ static OPJ_BOOL opj_j2k_write_sot(opj_j2k_t *p_j2k, + + OPJ_UNUSED(p_stream); + +- if (p_total_data_size < 12) { ++ if (total_data_size < 12) { + opj_event_msg(p_manager, EVT_ERROR, + "Not enough bytes in output buffer to write SOT marker\n"); + return OPJ_FALSE; +@@ -4613,17 +4628,105 @@ static OPJ_BOOL opj_j2k_read_sot(opj_j2k_t *p_j2k, + return OPJ_TRUE; + } + ++/** ++ * Write one or more PLT markers in the provided buffer ++ */ ++static OPJ_BOOL opj_j2k_write_plt_in_memory(opj_j2k_t *p_j2k, ++ opj_tcd_marker_info_t* marker_info, ++ OPJ_BYTE * p_data, ++ OPJ_UINT32 * p_data_written, ++ opj_event_mgr_t * p_manager) ++{ ++ OPJ_BYTE Zplt = 0; ++ OPJ_UINT16 Lplt; ++ OPJ_BYTE* p_data_start = p_data; ++ OPJ_BYTE* p_data_Lplt = p_data + 2; ++ OPJ_UINT32 i; ++ ++ OPJ_UNUSED(p_j2k); ++ ++ opj_write_bytes(p_data, J2K_MS_PLT, 2); ++ p_data += 2; ++ ++ /* Reserve space for Lplt */ ++ p_data += 2; ++ ++ opj_write_bytes(p_data, Zplt, 1); ++ p_data += 1; ++ ++ Lplt = 3; ++ ++ for (i = 0; i < marker_info->packet_count; i++) { ++ OPJ_BYTE var_bytes[5]; ++ OPJ_UINT8 var_bytes_size = 0; ++ OPJ_UINT32 packet_size = marker_info->p_packet_size[i]; ++ ++ /* Packet size written in variable-length way, starting with LSB */ ++ var_bytes[var_bytes_size] = (OPJ_BYTE)(packet_size & 0x7f); ++ var_bytes_size ++; ++ packet_size >>= 7; ++ while (packet_size > 0) { ++ var_bytes[var_bytes_size] = (OPJ_BYTE)((packet_size & 0x7f) | 0x80); ++ var_bytes_size ++; ++ packet_size >>= 7; ++ } ++ ++ /* Check if that can fit in the current PLT marker. If not, finish */ ++ /* current one, and start a new one */ ++ if (Lplt + var_bytes_size > 65535) { ++ if (Zplt == 255) { ++ opj_event_msg(p_manager, EVT_ERROR, ++ "More than 255 PLT markers would be needed for current tile-part !\n"); ++ return OPJ_FALSE; ++ } ++ ++ /* Patch Lplt */ ++ opj_write_bytes(p_data_Lplt, Lplt, 2); ++ ++ /* Start new segment */ ++ opj_write_bytes(p_data, J2K_MS_PLT, 2); ++ p_data += 2; ++ ++ /* Reserve space for Lplt */ ++ p_data_Lplt = p_data; ++ p_data += 2; ++ ++ Zplt ++; ++ opj_write_bytes(p_data, Zplt, 1); ++ p_data += 1; ++ ++ Lplt = 3; ++ } ++ ++ Lplt = (OPJ_UINT16)(Lplt + var_bytes_size); ++ ++ /* Serialize variable-length packet size, starting with MSB */ ++ for (; var_bytes_size > 0; --var_bytes_size) { ++ opj_write_bytes(p_data, var_bytes[var_bytes_size - 1], 1); ++ p_data += 1; ++ } ++ } ++ ++ *p_data_written = (OPJ_UINT32)(p_data - p_data_start); ++ ++ /* Patch Lplt */ ++ opj_write_bytes(p_data_Lplt, Lplt, 2); ++ ++ return OPJ_TRUE; ++} ++ + static OPJ_BOOL opj_j2k_write_sod(opj_j2k_t *p_j2k, + opj_tcd_t * p_tile_coder, + OPJ_BYTE * p_data, + OPJ_UINT32 * p_data_written, +- OPJ_UINT32 p_total_data_size, ++ OPJ_UINT32 total_data_size, + const opj_stream_private_t *p_stream, + opj_event_mgr_t * p_manager + ) + { + opj_codestream_info_t *l_cstr_info = 00; + OPJ_UINT32 l_remaining_data; ++ opj_tcd_marker_info_t* marker_info = NULL; + + /* preconditions */ + assert(p_j2k != 00); +@@ -4632,7 +4735,7 @@ static OPJ_BOOL opj_j2k_write_sod(opj_j2k_t *p_j2k, + + OPJ_UNUSED(p_stream); + +- if (p_total_data_size < 4) { ++ if (total_data_size < 4) { + opj_event_msg(p_manager, EVT_ERROR, + "Not enough bytes in output buffer to write SOD marker\n"); + return OPJ_FALSE; +@@ -4640,10 +4743,9 @@ static OPJ_BOOL opj_j2k_write_sod(opj_j2k_t *p_j2k, + + opj_write_bytes(p_data, J2K_MS_SOD, + 2); /* SOD */ +- p_data += 2; + + /* make room for the EOF marker */ +- l_remaining_data = p_total_data_size - 4; ++ l_remaining_data = total_data_size - 4; + + /* update tile coder */ + p_tile_coder->tp_num = +@@ -4690,15 +4792,69 @@ static OPJ_BOOL opj_j2k_write_sod(opj_j2k_t *p_j2k, + + *p_data_written = 0; + +- if (! opj_tcd_encode_tile(p_tile_coder, p_j2k->m_current_tile_number, p_data, ++ if (p_j2k->m_specific_param.m_encoder.m_PLT) { ++ marker_info = opj_tcd_marker_info_create( ++ p_j2k->m_specific_param.m_encoder.m_PLT); ++ if (marker_info == NULL) { ++ opj_event_msg(p_manager, EVT_ERROR, ++ "Cannot encode tile: opj_tcd_marker_info_create() failed\n"); ++ return OPJ_FALSE; ++ } ++ } ++ ++ if (l_remaining_data < ++ p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT) { ++ opj_event_msg(p_manager, EVT_ERROR, ++ "Not enough bytes in output buffer to write SOD marker\n"); ++ opj_tcd_marker_info_destroy(marker_info); ++ return OPJ_FALSE; ++ } ++ l_remaining_data -= p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT; ++ ++ if (! opj_tcd_encode_tile(p_tile_coder, p_j2k->m_current_tile_number, ++ p_data + 2, + p_data_written, l_remaining_data, l_cstr_info, ++ marker_info, + p_manager)) { + opj_event_msg(p_manager, EVT_ERROR, "Cannot encode tile\n"); ++ opj_tcd_marker_info_destroy(marker_info); + return OPJ_FALSE; + } + ++ /* For SOD */ + *p_data_written += 2; + ++ if (p_j2k->m_specific_param.m_encoder.m_PLT) { ++ OPJ_UINT32 l_data_written_PLT = 0; ++ OPJ_BYTE* p_PLT_buffer = (OPJ_BYTE*)opj_malloc( ++ p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT); ++ if (!p_PLT_buffer) { ++ opj_event_msg(p_manager, EVT_ERROR, "Cannot allocate memory\n"); ++ opj_tcd_marker_info_destroy(marker_info); ++ return OPJ_FALSE; ++ } ++ if (!opj_j2k_write_plt_in_memory(p_j2k, ++ marker_info, ++ p_PLT_buffer, ++ &l_data_written_PLT, ++ p_manager)) { ++ opj_tcd_marker_info_destroy(marker_info); ++ opj_free(p_PLT_buffer); ++ return OPJ_FALSE; ++ } ++ ++ assert(l_data_written_PLT <= ++ p_j2k->m_specific_param.m_encoder.m_reserved_bytes_for_PLT); ++ ++ /* Move PLT marker(s) before SOD */ ++ memmove(p_data + l_data_written_PLT, p_data, *p_data_written); ++ memcpy(p_data, p_PLT_buffer, l_data_written_PLT); ++ opj_free(p_PLT_buffer); ++ *p_data_written += l_data_written_PLT; ++ } ++ ++ opj_tcd_marker_info_destroy(marker_info); ++ + return OPJ_TRUE; + } + +@@ -5048,7 +5204,7 @@ static OPJ_BOOL opj_j2k_update_rates(opj_j2k_t *p_j2k, + OPJ_FLOAT32 * l_rates = 0; + OPJ_FLOAT32 l_sot_remove; + OPJ_UINT32 l_bits_empty, l_size_pixel; +- OPJ_UINT32 l_tile_size = 0; ++ OPJ_UINT64 l_tile_size = 0; + OPJ_UINT32 l_last_res; + OPJ_FLOAT32(* l_tp_stride_func)(opj_tcp_t *) = 00; + +@@ -5092,25 +5248,12 @@ static OPJ_BOOL opj_j2k_update_rates(opj_j2k_t *p_j2k, + l_rates = l_tcp->rates; + + /* Modification of the RATE >> */ +- if (*l_rates > 0.0f) { +- *l_rates = (((OPJ_FLOAT32)(l_size_pixel * (OPJ_UINT32)(l_x1 - l_x0) * +- (OPJ_UINT32)(l_y1 - l_y0))) +- / +- ((*l_rates) * (OPJ_FLOAT32)l_bits_empty) +- ) +- - +- l_offset; +- } +- +- ++l_rates; +- +- for (k = 1; k < l_tcp->numlayers; ++k) { ++ for (k = 0; k < l_tcp->numlayers; ++k) { + if (*l_rates > 0.0f) { +- *l_rates = (((OPJ_FLOAT32)(l_size_pixel * (OPJ_UINT32)(l_x1 - l_x0) * +- (OPJ_UINT32)(l_y1 - l_y0))) +- / +- ((*l_rates) * (OPJ_FLOAT32)l_bits_empty) +- ) ++ *l_rates = (OPJ_FLOAT32)(((OPJ_FLOAT64)l_size_pixel * (OPJ_UINT32)( ++ l_x1 - l_x0) * ++ (OPJ_UINT32)(l_y1 - l_y0)) ++ / ((*l_rates) * (OPJ_FLOAT32)l_bits_empty)) + - + l_offset; + } +@@ -5170,12 +5313,11 @@ static OPJ_BOOL opj_j2k_update_rates(opj_j2k_t *p_j2k, + l_tile_size = 0; + + for (i = 0; i < l_image->numcomps; ++i) { +- l_tile_size += (opj_uint_ceildiv(l_cp->tdx, l_img_comp->dx) +- * +- opj_uint_ceildiv(l_cp->tdy, l_img_comp->dy) +- * +- l_img_comp->prec +- ); ++ l_tile_size += (OPJ_UINT64)opj_uint_ceildiv(l_cp->tdx, l_img_comp->dx) ++ * ++ opj_uint_ceildiv(l_cp->tdy, l_img_comp->dy) ++ * ++ l_img_comp->prec; + + ++l_img_comp; + } +@@ -5186,7 +5328,7 @@ static OPJ_BOOL opj_j2k_update_rates(opj_j2k_t *p_j2k, + /* bin/test_tile_encoder 1 256 256 32 32 8 0 reversible_with_precinct.j2k 4 4 3 0 0 1 16 16 */ + /* TODO revise this to take into account the overhead linked to the */ + /* number of packets and number of code blocks in packets */ +- l_tile_size = (OPJ_UINT32)(l_tile_size * 1.4 / 8); ++ l_tile_size = (OPJ_UINT64)((double)l_tile_size * 1.4 / 8); + + /* Arbitrary amount to make the following work: */ + /* bin/test_tile_encoder 1 256 256 17 16 8 0 reversible_no_precinct.j2k 4 4 3 0 0 1 */ +@@ -5194,14 +5336,21 @@ static OPJ_BOOL opj_j2k_update_rates(opj_j2k_t *p_j2k, + + l_tile_size += opj_j2k_get_specific_header_sizes(p_j2k); + +- p_j2k->m_specific_param.m_encoder.m_encoded_tile_size = l_tile_size; ++ if (l_tile_size > UINT_MAX) { ++ l_tile_size = UINT_MAX; ++ } ++ ++ p_j2k->m_specific_param.m_encoder.m_encoded_tile_size = (OPJ_UINT32)l_tile_size; + p_j2k->m_specific_param.m_encoder.m_encoded_tile_data = + (OPJ_BYTE *) opj_malloc(p_j2k->m_specific_param.m_encoder.m_encoded_tile_size); + if (p_j2k->m_specific_param.m_encoder.m_encoded_tile_data == 00) { ++ opj_event_msg(p_manager, EVT_ERROR, ++ "Not enough memory to allocate m_encoded_tile_data. %u MB required\n", ++ (OPJ_UINT32)(l_tile_size / 1024 / 1024)); + return OPJ_FALSE; + } + +- if (OPJ_IS_CINEMA(l_cp->rsiz)) { ++ if (OPJ_IS_CINEMA(l_cp->rsiz) || OPJ_IS_IMF(l_cp->rsiz)) { + p_j2k->m_specific_param.m_encoder.m_tlm_sot_offsets_buffer = + (OPJ_BYTE *) opj_malloc(5 * + p_j2k->m_specific_param.m_encoder.m_total_tile_parts); +@@ -6633,7 +6782,7 @@ static void opj_j2k_set_cinema_parameters(opj_cparameters_t *parameters, + } + + /* Precincts */ +- parameters->csty |= 0x01; ++ parameters->csty |= J2K_CP_CSTY_PRT; + if (parameters->numresolution == 1) { + parameters->res_spec = 1; + parameters->prcw_init[0] = 128; +@@ -6759,6 +6908,589 @@ static OPJ_BOOL opj_j2k_is_cinema_compliant(opj_image_t *image, OPJ_UINT16 rsiz, + return OPJ_TRUE; + } + ++static int opj_j2k_get_imf_max_NL(opj_cparameters_t *parameters, ++ opj_image_t *image) ++{ ++ /* Decomposition levels */ ++ const OPJ_UINT16 rsiz = parameters->rsiz; ++ const OPJ_UINT16 profile = OPJ_GET_IMF_PROFILE(rsiz); ++ const OPJ_UINT32 XTsiz = parameters->tile_size_on ? (OPJ_UINT32) ++ parameters->cp_tdx : image->x1; ++ switch (profile) { ++ case OPJ_PROFILE_IMF_2K: ++ return 5; ++ case OPJ_PROFILE_IMF_4K: ++ return 6; ++ case OPJ_PROFILE_IMF_8K: ++ return 7; ++ case OPJ_PROFILE_IMF_2K_R: { ++ if (XTsiz >= 2048) { ++ return 5; ++ } else if (XTsiz >= 1024) { ++ return 4; ++ } ++ break; ++ } ++ case OPJ_PROFILE_IMF_4K_R: { ++ if (XTsiz >= 4096) { ++ return 6; ++ } else if (XTsiz >= 2048) { ++ return 5; ++ } else if (XTsiz >= 1024) { ++ return 4; ++ } ++ break; ++ } ++ case OPJ_PROFILE_IMF_8K_R: { ++ if (XTsiz >= 8192) { ++ return 7; ++ } else if (XTsiz >= 4096) { ++ return 6; ++ } else if (XTsiz >= 2048) { ++ return 5; ++ } else if (XTsiz >= 1024) { ++ return 4; ++ } ++ break; ++ } ++ default: ++ break; ++ } ++ return -1; ++} ++ ++static void opj_j2k_set_imf_parameters(opj_cparameters_t *parameters, ++ opj_image_t *image, opj_event_mgr_t *p_manager) ++{ ++ const OPJ_UINT16 rsiz = parameters->rsiz; ++ const OPJ_UINT16 profile = OPJ_GET_IMF_PROFILE(rsiz); ++ ++ OPJ_UNUSED(p_manager); ++ ++ /* Override defaults set by opj_set_default_encoder_parameters */ ++ if (parameters->cblockw_init == OPJ_COMP_PARAM_DEFAULT_CBLOCKW && ++ parameters->cblockh_init == OPJ_COMP_PARAM_DEFAULT_CBLOCKH) { ++ parameters->cblockw_init = 32; ++ parameters->cblockh_init = 32; ++ } ++ ++ /* One tile part for each component */ ++ parameters->tp_flag = 'C'; ++ parameters->tp_on = 1; ++ ++ if (parameters->prog_order == OPJ_COMP_PARAM_DEFAULT_PROG_ORDER) { ++ parameters->prog_order = OPJ_CPRL; ++ } ++ ++ if (profile == OPJ_PROFILE_IMF_2K || ++ profile == OPJ_PROFILE_IMF_4K || ++ profile == OPJ_PROFILE_IMF_8K) { ++ /* 9-7 transform */ ++ parameters->irreversible = 1; ++ } ++ ++ /* Adjust the number of resolutions if set to its defaults */ ++ if (parameters->numresolution == OPJ_COMP_PARAM_DEFAULT_NUMRESOLUTION && ++ image->x0 == 0 && ++ image->y0 == 0) { ++ const int max_NL = opj_j2k_get_imf_max_NL(parameters, image); ++ if (max_NL >= 0 && parameters->numresolution > max_NL) { ++ parameters->numresolution = max_NL + 1; ++ } ++ ++ /* Note: below is generic logic */ ++ if (!parameters->tile_size_on) { ++ while (parameters->numresolution > 0) { ++ if (image->x1 < (1U << ((OPJ_UINT32)parameters->numresolution - 1U))) { ++ parameters->numresolution --; ++ continue; ++ } ++ if (image->y1 < (1U << ((OPJ_UINT32)parameters->numresolution - 1U))) { ++ parameters->numresolution --; ++ continue; ++ } ++ break; ++ } ++ } ++ } ++ ++ /* Set defaults precincts */ ++ if (parameters->csty == 0) { ++ parameters->csty |= J2K_CP_CSTY_PRT; ++ if (parameters->numresolution == 1) { ++ parameters->res_spec = 1; ++ parameters->prcw_init[0] = 128; ++ parameters->prch_init[0] = 128; ++ } else { ++ int i; ++ parameters->res_spec = parameters->numresolution - 1; ++ for (i = 0; i < parameters->res_spec; i++) { ++ parameters->prcw_init[i] = 256; ++ parameters->prch_init[i] = 256; ++ } ++ } ++ } ++} ++ ++/* Table A.53 from JPEG2000 standard */ ++static const OPJ_UINT16 tabMaxSubLevelFromMainLevel[] = { ++ 15, /* unspecified */ ++ 1, ++ 1, ++ 1, ++ 2, ++ 3, ++ 4, ++ 5, ++ 6, ++ 7, ++ 8, ++ 9 ++}; ++ ++static OPJ_BOOL opj_j2k_is_imf_compliant(opj_cparameters_t *parameters, ++ opj_image_t *image, ++ opj_event_mgr_t *p_manager) ++{ ++ OPJ_UINT32 i; ++ const OPJ_UINT16 rsiz = parameters->rsiz; ++ const OPJ_UINT16 profile = OPJ_GET_IMF_PROFILE(rsiz); ++ const OPJ_UINT16 mainlevel = OPJ_GET_IMF_MAINLEVEL(rsiz); ++ const OPJ_UINT16 sublevel = OPJ_GET_IMF_SUBLEVEL(rsiz); ++ const int NL = parameters->numresolution - 1; ++ const OPJ_UINT32 XTsiz = parameters->tile_size_on ? (OPJ_UINT32) ++ parameters->cp_tdx : image->x1; ++ OPJ_BOOL ret = OPJ_TRUE; ++ ++ /* Validate mainlevel */ ++ if (mainlevel > OPJ_IMF_MAINLEVEL_MAX) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF profile require mainlevel <= 11.\n" ++ "-> %d is thus not compliant\n" ++ "-> Non-IMF codestream will be generated\n", ++ mainlevel); ++ ret = OPJ_FALSE; ++ } ++ ++ /* Validate sublevel */ ++ assert(sizeof(tabMaxSubLevelFromMainLevel) == ++ (OPJ_IMF_MAINLEVEL_MAX + 1) * sizeof(tabMaxSubLevelFromMainLevel[0])); ++ if (sublevel > tabMaxSubLevelFromMainLevel[mainlevel]) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF profile require sublevel <= %d for mainlevel = %d.\n" ++ "-> %d is thus not compliant\n" ++ "-> Non-IMF codestream will be generated\n", ++ tabMaxSubLevelFromMainLevel[mainlevel], ++ mainlevel, ++ sublevel); ++ ret = OPJ_FALSE; ++ } ++ ++ /* Number of components */ ++ if (image->numcomps > 3) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF profiles require at most 3 components.\n" ++ "-> Number of components of input image (%d) is not compliant\n" ++ "-> Non-IMF codestream will be generated\n", ++ image->numcomps); ++ ret = OPJ_FALSE; ++ } ++ ++ if (image->x0 != 0 || image->y0 != 0) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF profiles require image origin to be at 0,0.\n" ++ "-> %d,%d is not compliant\n" ++ "-> Non-IMF codestream will be generated\n", ++ image->x0, image->y0 != 0); ++ ret = OPJ_FALSE; ++ } ++ ++ if (parameters->cp_tx0 != 0 || parameters->cp_ty0 != 0) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF profiles require tile origin to be at 0,0.\n" ++ "-> %d,%d is not compliant\n" ++ "-> Non-IMF codestream will be generated\n", ++ parameters->cp_tx0, parameters->cp_ty0); ++ ret = OPJ_FALSE; ++ } ++ ++ if (parameters->tile_size_on) { ++ if (profile == OPJ_PROFILE_IMF_2K || ++ profile == OPJ_PROFILE_IMF_4K || ++ profile == OPJ_PROFILE_IMF_8K) { ++ if ((OPJ_UINT32)parameters->cp_tdx < image->x1 || ++ (OPJ_UINT32)parameters->cp_tdy < image->y1) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF 2K/4K/8K single tile profiles require tile to be greater or equal to image size.\n" ++ "-> %d,%d is lesser than %d,%d\n" ++ "-> Non-IMF codestream will be generated\n", ++ parameters->cp_tdx, ++ parameters->cp_tdy, ++ image->x1, ++ image->y1); ++ ret = OPJ_FALSE; ++ } ++ } else { ++ if ((OPJ_UINT32)parameters->cp_tdx >= image->x1 && ++ (OPJ_UINT32)parameters->cp_tdy >= image->y1) { ++ /* ok */ ++ } else if (parameters->cp_tdx == 1024 && ++ parameters->cp_tdy == 1024) { ++ /* ok */ ++ } else if (parameters->cp_tdx == 2048 && ++ parameters->cp_tdy == 2048 && ++ (profile == OPJ_PROFILE_IMF_4K || ++ profile == OPJ_PROFILE_IMF_8K)) { ++ /* ok */ ++ } else if (parameters->cp_tdx == 4096 && ++ parameters->cp_tdy == 4096 && ++ profile == OPJ_PROFILE_IMF_8K) { ++ /* ok */ ++ } else { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF 2K_R/4K_R/8K_R single/multiple tile profiles " ++ "require tile to be greater or equal to image size,\n" ++ "or to be (1024,1024), or (2048,2048) for 4K_R/8K_R " ++ "or (4096,4096) for 8K_R.\n" ++ "-> %d,%d is non conformant\n" ++ "-> Non-IMF codestream will be generated\n", ++ parameters->cp_tdx, ++ parameters->cp_tdy); ++ ret = OPJ_FALSE; ++ } ++ } ++ } ++ ++ /* Bitdepth */ ++ for (i = 0; i < image->numcomps; i++) { ++ if (!(image->comps[i].bpp >= 8 && image->comps[i].bpp <= 16) || ++ (image->comps[i].sgnd)) { ++ char signed_str[] = "signed"; ++ char unsigned_str[] = "unsigned"; ++ char *tmp_str = image->comps[i].sgnd ? signed_str : unsigned_str; ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF profiles require precision of each component to b in [8-16] bits unsigned" ++ "-> At least component %d of input image (%d bits, %s) is not compliant\n" ++ "-> Non-IMF codestream will be generated\n", ++ i, image->comps[i].bpp, tmp_str); ++ ret = OPJ_FALSE; ++ } ++ } ++ ++ /* Sub-sampling */ ++ for (i = 0; i < image->numcomps; i++) { ++ if (i == 0 && image->comps[i].dx != 1) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF profiles require XRSiz1 == 1. Here it is set to %d.\n" ++ "-> Non-IMF codestream will be generated\n", ++ image->comps[i].dx); ++ ret = OPJ_FALSE; ++ } ++ if (i == 1 && image->comps[i].dx != 1 && image->comps[i].dx != 2) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF profiles require XRSiz2 == 1 or 2. Here it is set to %d.\n" ++ "-> Non-IMF codestream will be generated\n", ++ image->comps[i].dx); ++ ret = OPJ_FALSE; ++ } ++ if (i > 1 && image->comps[i].dx != image->comps[i - 1].dx) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF profiles require XRSiz%d to be the same as XRSiz2. " ++ "Here it is set to %d instead of %d.\n" ++ "-> Non-IMF codestream will be generated\n", ++ i + 1, image->comps[i].dx, image->comps[i - 1].dx); ++ ret = OPJ_FALSE; ++ } ++ if (image->comps[i].dy != 1) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF profiles require YRsiz == 1. " ++ "Here it is set to %d for component i.\n" ++ "-> Non-IMF codestream will be generated\n", ++ image->comps[i].dy, i); ++ ret = OPJ_FALSE; ++ } ++ } ++ ++ /* Image size */ ++ switch (profile) { ++ case OPJ_PROFILE_IMF_2K: ++ case OPJ_PROFILE_IMF_2K_R: ++ if (((image->comps[0].w > 2048) | (image->comps[0].h > 1556))) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF 2K/2K_R profile require:\n" ++ "width <= 2048 and height <= 1556\n" ++ "-> Input image size %d x %d is not compliant\n" ++ "-> Non-IMF codestream will be generated\n", ++ image->comps[0].w, image->comps[0].h); ++ ret = OPJ_FALSE; ++ } ++ break; ++ case OPJ_PROFILE_IMF_4K: ++ case OPJ_PROFILE_IMF_4K_R: ++ if (((image->comps[0].w > 4096) | (image->comps[0].h > 3112))) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF 4K/4K_R profile require:\n" ++ "width <= 4096 and height <= 3112\n" ++ "-> Input image size %d x %d is not compliant\n" ++ "-> Non-IMF codestream will be generated\n", ++ image->comps[0].w, image->comps[0].h); ++ ret = OPJ_FALSE; ++ } ++ break; ++ case OPJ_PROFILE_IMF_8K: ++ case OPJ_PROFILE_IMF_8K_R: ++ if (((image->comps[0].w > 8192) | (image->comps[0].h > 6224))) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF 8K/8K_R profile require:\n" ++ "width <= 8192 and height <= 6224\n" ++ "-> Input image size %d x %d is not compliant\n" ++ "-> Non-IMF codestream will be generated\n", ++ image->comps[0].w, image->comps[0].h); ++ ret = OPJ_FALSE; ++ } ++ break; ++ default : ++ assert(0); ++ return OPJ_FALSE; ++ } ++ ++ if (parameters->roi_compno != -1) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF profile forbid RGN / region of interest marker.\n" ++ "-> Compression parameters specify a ROI\n" ++ "-> Non-IMF codestream will be generated\n"); ++ ret = OPJ_FALSE; ++ } ++ ++ if (parameters->cblockw_init != 32 || parameters->cblockh_init != 32) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF profile require code block size to be 32x32.\n" ++ "-> Compression parameters set it to %dx%d.\n" ++ "-> Non-IMF codestream will be generated\n", ++ parameters->cblockw_init, ++ parameters->cblockh_init); ++ ret = OPJ_FALSE; ++ } ++ ++ if (parameters->prog_order != OPJ_CPRL) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF profile require progression order to be CPRL.\n" ++ "-> Compression parameters set it to %d.\n" ++ "-> Non-IMF codestream will be generated\n", ++ parameters->prog_order); ++ ret = OPJ_FALSE; ++ } ++ ++ if (parameters->numpocs != 0) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF profile forbid POC markers.\n" ++ "-> Compression parameters set %d POC.\n" ++ "-> Non-IMF codestream will be generated\n", ++ parameters->numpocs); ++ ret = OPJ_FALSE; ++ } ++ ++ /* Codeblock style: no mode switch enabled */ ++ if (parameters->mode != 0) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF profile forbid mode switch in code block style.\n" ++ "-> Compression parameters set code block style to %d.\n" ++ "-> Non-IMF codestream will be generated\n", ++ parameters->mode); ++ ret = OPJ_FALSE; ++ } ++ ++ if (profile == OPJ_PROFILE_IMF_2K || ++ profile == OPJ_PROFILE_IMF_4K || ++ profile == OPJ_PROFILE_IMF_8K) { ++ /* Expect 9-7 transform */ ++ if (parameters->irreversible != 1) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF 2K/4K/8K profiles require 9-7 Irreversible Transform.\n" ++ "-> Compression parameters set it to reversible.\n" ++ "-> Non-IMF codestream will be generated\n"); ++ ret = OPJ_FALSE; ++ } ++ } else { ++ /* Expect 5-3 transform */ ++ if (parameters->irreversible != 0) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF 2K/4K/8K profiles require 5-3 reversible Transform.\n" ++ "-> Compression parameters set it to irreversible.\n" ++ "-> Non-IMF codestream will be generated\n"); ++ ret = OPJ_FALSE; ++ } ++ } ++ ++ /* Number of layers */ ++ if (parameters->tcp_numlayers != 1) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF 2K/4K/8K profiles require 1 single quality layer.\n" ++ "-> Number of layers is %d.\n" ++ "-> Non-IMF codestream will be generated\n", ++ parameters->tcp_numlayers); ++ ret = OPJ_FALSE; ++ } ++ ++ /* Decomposition levels */ ++ switch (profile) { ++ case OPJ_PROFILE_IMF_2K: ++ if (!(NL >= 1 && NL <= 5)) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF 2K profile requires 1 <= NL <= 5:\n" ++ "-> Number of decomposition levels is %d.\n" ++ "-> Non-IMF codestream will be generated\n", ++ NL); ++ ret = OPJ_FALSE; ++ } ++ break; ++ case OPJ_PROFILE_IMF_4K: ++ if (!(NL >= 1 && NL <= 6)) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF 4K profile requires 1 <= NL <= 6:\n" ++ "-> Number of decomposition levels is %d.\n" ++ "-> Non-IMF codestream will be generated\n", ++ NL); ++ ret = OPJ_FALSE; ++ } ++ break; ++ case OPJ_PROFILE_IMF_8K: ++ if (!(NL >= 1 && NL <= 7)) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF 8K profile requires 1 <= NL <= 7:\n" ++ "-> Number of decomposition levels is %d.\n" ++ "-> Non-IMF codestream will be generated\n", ++ NL); ++ ret = OPJ_FALSE; ++ } ++ break; ++ case OPJ_PROFILE_IMF_2K_R: { ++ if (XTsiz >= 2048) { ++ if (!(NL >= 1 && NL <= 5)) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF 2K_R profile requires 1 <= NL <= 5 for XTsiz >= 2048:\n" ++ "-> Number of decomposition levels is %d.\n" ++ "-> Non-IMF codestream will be generated\n", ++ NL); ++ ret = OPJ_FALSE; ++ } ++ } else if (XTsiz >= 1024) { ++ if (!(NL >= 1 && NL <= 4)) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF 2K_R profile requires 1 <= NL <= 4 for XTsiz in [1024,2048[:\n" ++ "-> Number of decomposition levels is %d.\n" ++ "-> Non-IMF codestream will be generated\n", ++ NL); ++ ret = OPJ_FALSE; ++ } ++ } ++ break; ++ } ++ case OPJ_PROFILE_IMF_4K_R: { ++ if (XTsiz >= 4096) { ++ if (!(NL >= 1 && NL <= 6)) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF 4K_R profile requires 1 <= NL <= 6 for XTsiz >= 4096:\n" ++ "-> Number of decomposition levels is %d.\n" ++ "-> Non-IMF codestream will be generated\n", ++ NL); ++ ret = OPJ_FALSE; ++ } ++ } else if (XTsiz >= 2048) { ++ if (!(NL >= 1 && NL <= 5)) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF 4K_R profile requires 1 <= NL <= 5 for XTsiz in [2048,4096[:\n" ++ "-> Number of decomposition levels is %d.\n" ++ "-> Non-IMF codestream will be generated\n", ++ NL); ++ ret = OPJ_FALSE; ++ } ++ } else if (XTsiz >= 1024) { ++ if (!(NL >= 1 && NL <= 4)) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF 4K_R profile requires 1 <= NL <= 4 for XTsiz in [1024,2048[:\n" ++ "-> Number of decomposition levels is %d.\n" ++ "-> Non-IMF codestream will be generated\n", ++ NL); ++ ret = OPJ_FALSE; ++ } ++ } ++ break; ++ } ++ case OPJ_PROFILE_IMF_8K_R: { ++ if (XTsiz >= 8192) { ++ if (!(NL >= 1 && NL <= 7)) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF 4K_R profile requires 1 <= NL <= 7 for XTsiz >= 8192:\n" ++ "-> Number of decomposition levels is %d.\n" ++ "-> Non-IMF codestream will be generated\n", ++ NL); ++ ret = OPJ_FALSE; ++ } ++ } else if (XTsiz >= 4096) { ++ if (!(NL >= 1 && NL <= 6)) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF 4K_R profile requires 1 <= NL <= 6 for XTsiz in [4096,8192[:\n" ++ "-> Number of decomposition levels is %d.\n" ++ "-> Non-IMF codestream will be generated\n", ++ NL); ++ ret = OPJ_FALSE; ++ } ++ } else if (XTsiz >= 2048) { ++ if (!(NL >= 1 && NL <= 5)) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF 4K_R profile requires 1 <= NL <= 5 for XTsiz in [2048,4096[:\n" ++ "-> Number of decomposition levels is %d.\n" ++ "-> Non-IMF codestream will be generated\n", ++ NL); ++ ret = OPJ_FALSE; ++ } ++ } else if (XTsiz >= 1024) { ++ if (!(NL >= 1 && NL <= 4)) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF 4K_R profile requires 1 <= NL <= 4 for XTsiz in [1024,2048[:\n" ++ "-> Number of decomposition levels is %d.\n" ++ "-> Non-IMF codestream will be generated\n", ++ NL); ++ ret = OPJ_FALSE; ++ } ++ } ++ break; ++ } ++ default: ++ break; ++ } ++ ++ if (parameters->numresolution == 1) { ++ if (parameters->res_spec != 1 || ++ parameters->prcw_init[0] != 128 || ++ parameters->prch_init[0] != 128) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF profiles require PPx = PPy = 7 for NLLL band, else 8.\n" ++ "-> Supplied values are different from that.\n" ++ "-> Non-IMF codestream will be generated\n", ++ NL); ++ ret = OPJ_FALSE; ++ } ++ } else { ++ int i; ++ for (i = 0; i < parameters->res_spec; i++) { ++ if (parameters->prcw_init[i] != 256 || ++ parameters->prch_init[i] != 256) { ++ opj_event_msg(p_manager, EVT_WARNING, ++ "IMF profiles require PPx = PPy = 7 for NLLL band, else 8.\n" ++ "-> Supplied values are different from that.\n" ++ "-> Non-IMF codestream will be generated\n", ++ NL); ++ ret = OPJ_FALSE; ++ } ++ } ++ } ++ ++ return ret; ++} ++ ++ + OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k, + opj_cparameters_t *parameters, + opj_image_t *image, +@@ -6951,6 +7683,15 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k, + } else { + OPJ_FLOAT32 temp_rate; + OPJ_BOOL cap = OPJ_FALSE; ++ ++ if (OPJ_IS_IMF(parameters->rsiz) && parameters->max_cs_size > 0 && ++ parameters->tcp_numlayers == 1 && parameters->tcp_rates[0] == 0) { ++ parameters->tcp_rates[0] = (OPJ_FLOAT32)(image->numcomps * image->comps[0].w * ++ image->comps[0].h * image->comps[0].prec) / ++ (OPJ_FLOAT32)(((OPJ_UINT32)parameters->max_cs_size) * 8 * image->comps[0].dx * ++ image->comps[0].dy); ++ } ++ + temp_rate = (OPJ_FLOAT32)(((double)image->numcomps * image->comps[0].w * + image->comps[0].h * image->comps[0].prec) / + (((double)parameters->max_cs_size) * 8 * image->comps[0].dx * +@@ -6991,9 +7732,10 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k, + "JPEG 2000 Broadcast profiles not yet supported\n"); + parameters->rsiz = OPJ_PROFILE_NONE; + } else if (OPJ_IS_IMF(parameters->rsiz)) { +- opj_event_msg(p_manager, EVT_WARNING, +- "JPEG 2000 IMF profiles not yet supported\n"); +- parameters->rsiz = OPJ_PROFILE_NONE; ++ opj_j2k_set_imf_parameters(parameters, image, p_manager); ++ if (!opj_j2k_is_imf_compliant(parameters, image, p_manager)) { ++ parameters->rsiz = OPJ_PROFILE_NONE; ++ } + } else if (OPJ_IS_PART2(parameters->rsiz)) { + if (parameters->rsiz == ((OPJ_PROFILE_PART2) | (OPJ_EXTENSION_NONE))) { + opj_event_msg(p_manager, EVT_WARNING, +@@ -7085,6 +7827,14 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k, + */ + + if (parameters->tile_size_on) { ++ if (cp->tdx == 0) { ++ opj_event_msg(p_manager, EVT_ERROR, "Invalid tile width\n"); ++ return OPJ_FALSE; ++ } ++ if (cp->tdy == 0) { ++ opj_event_msg(p_manager, EVT_ERROR, "Invalid tile height\n"); ++ return OPJ_FALSE; ++ } + cp->tw = opj_uint_ceildiv(image->x1 - cp->tx0, cp->tdx); + cp->th = opj_uint_ceildiv(image->y1 - cp->ty0, cp->tdy); + } else { +@@ -7161,20 +7911,13 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k, + "Not enough memory to allocate tile coding parameters\n"); + return OPJ_FALSE; + } +- if (parameters->numpocs) { +- /* initialisation of POC */ +- opj_j2k_check_poc_val(parameters->POC, parameters->numpocs, +- (OPJ_UINT32)parameters->numresolution, image->numcomps, +- (OPJ_UINT32)parameters->tcp_numlayers, p_manager); +- /* TODO MSD use the return value*/ +- } + + for (tileno = 0; tileno < cp->tw * cp->th; tileno++) { + opj_tcp_t *tcp = &cp->tcps[tileno]; + tcp->numlayers = (OPJ_UINT32)parameters->tcp_numlayers; + + for (j = 0; j < tcp->numlayers; j++) { +- if (OPJ_IS_CINEMA(cp->rsiz)) { ++ if (OPJ_IS_CINEMA(cp->rsiz) || OPJ_IS_IMF(cp->rsiz)) { + if (cp->m_specific_param.m_enc.m_fixed_quality) { + tcp->distoratio[j] = parameters->tcp_distoratio[j]; + } +@@ -7201,16 +7944,22 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k, + + if (parameters->numpocs) { + /* initialisation of POC */ +- tcp->POC = 1; + for (i = 0; i < parameters->numpocs; i++) { + if (tileno + 1 == parameters->POC[i].tile) { + opj_poc_t *tcp_poc = &tcp->pocs[numpocs_tile]; + ++ if (parameters->POC[numpocs_tile].compno0 >= image->numcomps) { ++ opj_event_msg(p_manager, EVT_ERROR, ++ "Invalid compno0 for POC %d\n", i); ++ return OPJ_FALSE; ++ } ++ + tcp_poc->resno0 = parameters->POC[numpocs_tile].resno0; + tcp_poc->compno0 = parameters->POC[numpocs_tile].compno0; + tcp_poc->layno1 = parameters->POC[numpocs_tile].layno1; + tcp_poc->resno1 = parameters->POC[numpocs_tile].resno1; +- tcp_poc->compno1 = parameters->POC[numpocs_tile].compno1; ++ tcp_poc->compno1 = opj_uint_min(parameters->POC[numpocs_tile].compno1, ++ image->numcomps); + tcp_poc->prg1 = parameters->POC[numpocs_tile].prg1; + tcp_poc->tile = parameters->POC[numpocs_tile].tile; + +@@ -7218,7 +7967,16 @@ OPJ_BOOL opj_j2k_setup_encoder(opj_j2k_t *p_j2k, + } + } + +- tcp->numpocs = numpocs_tile - 1 ; ++ if (numpocs_tile) { ++ ++ /* TODO MSD use the return value*/ ++ opj_j2k_check_poc_val(parameters->POC, tileno, parameters->numpocs, ++ (OPJ_UINT32)parameters->numresolution, image->numcomps, ++ (OPJ_UINT32)parameters->tcp_numlayers, p_manager); ++ ++ tcp->POC = 1; ++ tcp->numpocs = numpocs_tile - 1 ; ++ } + } else { + tcp->numpocs = 0; + } +@@ -7546,6 +8304,8 @@ OPJ_BOOL opj_j2k_read_header(opj_stream_private_t *p_stream, + + /*Allocate and initialize some elements of codestrem index*/ + if (!opj_j2k_allocate_tile_element_cstr_index(p_j2k)) { ++ opj_image_destroy(*p_image); ++ *p_image = NULL; + return OPJ_FALSE; + } + +@@ -8632,6 +9392,7 @@ OPJ_BOOL opj_j2k_read_tile_header(opj_j2k_t * p_j2k, + OPJ_UINT32 l_marker_size; + const opj_dec_memory_marker_handler_t * l_marker_handler = 00; + opj_tcp_t * l_tcp = NULL; ++ const OPJ_UINT32 l_nb_tiles = p_j2k->m_cp.tw * p_j2k->m_cp.th; + + /* preconditions */ + assert(p_stream != 00); +@@ -8807,7 +9568,6 @@ OPJ_BOOL opj_j2k_read_tile_header(opj_j2k_t * p_j2k, + return OPJ_FALSE; + } + if (l_correction_needed) { +- OPJ_UINT32 l_nb_tiles = p_j2k->m_cp.tw * p_j2k->m_cp.th; + OPJ_UINT32 l_tile_no; + + p_j2k->m_specific_param.m_decoder.m_can_decode = 0; +@@ -8822,27 +9582,42 @@ OPJ_BOOL opj_j2k_read_tile_header(opj_j2k_t * p_j2k, + "Non conformant codestream TPsot==TNsot.\n"); + } + } +- if (! p_j2k->m_specific_param.m_decoder.m_can_decode) { +- /* Try to read 2 bytes (the next marker ID) from stream and copy them into the buffer */ +- if (opj_stream_read_data(p_stream, +- p_j2k->m_specific_param.m_decoder.m_header_data, 2, p_manager) != 2) { +- opj_event_msg(p_manager, EVT_ERROR, "Stream too short\n"); +- return OPJ_FALSE; +- } +- +- /* Read 2 bytes from buffer as the new marker ID */ +- opj_read_bytes(p_j2k->m_specific_param.m_decoder.m_header_data, +- &l_current_marker, 2); +- } + } else { + /* Indicate we will try to read a new tile-part header*/ + p_j2k->m_specific_param.m_decoder.m_skip_data = 0; + p_j2k->m_specific_param.m_decoder.m_can_decode = 0; + p_j2k->m_specific_param.m_decoder.m_state = J2K_STATE_TPHSOT; ++ } + ++ if (! p_j2k->m_specific_param.m_decoder.m_can_decode) { + /* Try to read 2 bytes (the next marker ID) from stream and copy them into the buffer */ + if (opj_stream_read_data(p_stream, + p_j2k->m_specific_param.m_decoder.m_header_data, 2, p_manager) != 2) { ++ ++ /* Deal with likely non conformant SPOT6 files, where the last */ ++ /* row of tiles have TPsot == 0 and TNsot == 0, and missing EOC, */ ++ /* but no other tile-parts were found. */ ++ if (p_j2k->m_current_tile_number + 1 == l_nb_tiles) { ++ OPJ_UINT32 l_tile_no; ++ for (l_tile_no = 0U; l_tile_no < l_nb_tiles; ++l_tile_no) { ++ if (p_j2k->m_cp.tcps[l_tile_no].m_current_tile_part_number == 0 && ++ p_j2k->m_cp.tcps[l_tile_no].m_nb_tile_parts == 0) { ++ break; ++ } ++ } ++ if (l_tile_no < l_nb_tiles) { ++ opj_event_msg(p_manager, EVT_INFO, ++ "Tile %u has TPsot == 0 and TNsot == 0, " ++ "but no other tile-parts were found. " ++ "EOC is also missing.\n", ++ l_tile_no); ++ p_j2k->m_current_tile_number = l_tile_no; ++ l_current_marker = J2K_MS_EOC; ++ p_j2k->m_specific_param.m_decoder.m_state = J2K_STATE_EOC; ++ break; ++ } ++ } ++ + opj_event_msg(p_manager, EVT_ERROR, "Stream too short\n"); + return OPJ_FALSE; + } +@@ -8861,9 +9636,8 @@ OPJ_BOOL opj_j2k_read_tile_header(opj_j2k_t * p_j2k, + } + } + +- /* FIXME DOC ???*/ ++ /* Deal with tiles that have a single tile-part with TPsot == 0 and TNsot == 0 */ + if (! p_j2k->m_specific_param.m_decoder.m_can_decode) { +- OPJ_UINT32 l_nb_tiles = p_j2k->m_cp.th * p_j2k->m_cp.tw; + l_tcp = p_j2k->m_cp.tcps + p_j2k->m_current_tile_number; + + while ((p_j2k->m_current_tile_number < l_nb_tiles) && (l_tcp->m_data == 00)) { +@@ -9245,30 +10019,40 @@ static OPJ_BOOL opj_j2k_update_image_dimensions(opj_image_t* p_image, + + l_img_comp = p_image->comps; + for (it_comp = 0; it_comp < p_image->numcomps; ++it_comp) { ++ OPJ_INT32 l_h, l_w; ++ if (p_image->x0 > (OPJ_UINT32)INT_MAX || ++ p_image->y0 > (OPJ_UINT32)INT_MAX || ++ p_image->x1 > (OPJ_UINT32)INT_MAX || ++ p_image->y1 > (OPJ_UINT32)INT_MAX) { ++ opj_event_msg(p_manager, EVT_ERROR, ++ "Image coordinates above INT_MAX are not supported\n"); ++ return OPJ_FALSE; ++ } ++ + l_img_comp->x0 = opj_uint_ceildiv(p_image->x0, l_img_comp->dx); + l_img_comp->y0 = opj_uint_ceildiv(p_image->y0, l_img_comp->dy); + l_comp_x1 = opj_int_ceildiv((OPJ_INT32)p_image->x1, (OPJ_INT32)l_img_comp->dx); + l_comp_y1 = opj_int_ceildiv((OPJ_INT32)p_image->y1, (OPJ_INT32)l_img_comp->dy); + +- OPJ_INT32 l_1 = opj_int_ceildivpow2(l_comp_x1, (OPJ_INT32)l_img_comp->factor); +- OPJ_INT32 l_2 = opj_int_ceildivpow2((OPJ_INT32)l_img_comp->x0, (OPJ_INT32)l_img_comp->factor); +- if (l_1 < l_2) { ++ l_w = opj_int_ceildivpow2(l_comp_x1, (OPJ_INT32)l_img_comp->factor) ++ - opj_int_ceildivpow2((OPJ_INT32)l_img_comp->x0, (OPJ_INT32)l_img_comp->factor); ++ if (l_w < 0) { + opj_event_msg(p_manager, EVT_ERROR, +- "Size x of the decoded component image is incorrect (comp[%d].w<0).\n", +- it_comp); ++ "Size x of the decoded component image is incorrect (comp[%d].w=%d).\n", ++ it_comp, l_w); + return OPJ_FALSE; + } +- l_img_comp->w = (OPJ_UINT32)(l_1-l_2); ++ l_img_comp->w = (OPJ_UINT32)l_w; + +- l_1 = opj_int_ceildivpow2(l_comp_y1, (OPJ_INT32)l_img_comp->factor); +- l_2 = opj_int_ceildivpow2((OPJ_INT32)l_img_comp->y0, (OPJ_INT32)l_img_comp->factor); +- if (l_1 < l_2) { ++ l_h = opj_int_ceildivpow2(l_comp_y1, (OPJ_INT32)l_img_comp->factor) ++ - opj_int_ceildivpow2((OPJ_INT32)l_img_comp->y0, (OPJ_INT32)l_img_comp->factor); ++ if (l_h < 0) { + opj_event_msg(p_manager, EVT_ERROR, +- "Size y of the decoded component image is incorrect (comp[%d].h<0).\n", +- it_comp); ++ "Size y of the decoded component image is incorrect (comp[%d].h=%d).\n", ++ it_comp, l_h); + return OPJ_FALSE; + } +- l_img_comp->h = (OPJ_UINT32)(l_1-l_2); ++ l_img_comp->h = (OPJ_UINT32)l_h; + + l_img_comp++; + } +@@ -9764,9 +10548,9 @@ static OPJ_BOOL opj_j2k_read_SPCod_SPCoc(opj_j2k_t *p_j2k, + return OPJ_FALSE; + } + +- opj_read_bytes(l_current_ptr, &l_tccp->numresolutions, +- 1); /* SPcox (D) */ +- ++l_tccp->numresolutions; /* tccp->numresolutions = read() + 1 */ ++ /* SPcod (D) / SPcoc (A) */ ++ opj_read_bytes(l_current_ptr, &l_tccp->numresolutions, 1); ++ ++l_tccp->numresolutions; /* tccp->numresolutions = read() + 1 */ + if (l_tccp->numresolutions > OPJ_J2K_MAXRLVLS) { + opj_event_msg(p_manager, EVT_ERROR, + "Invalid value for numresolutions : %d, max value is set in openjpeg.h at %d\n", +@@ -9787,11 +10571,13 @@ static OPJ_BOOL opj_j2k_read_SPCod_SPCoc(opj_j2k_t *p_j2k, + return OPJ_FALSE; + } + +- opj_read_bytes(l_current_ptr, &l_tccp->cblkw, 1); /* SPcoc (E) */ ++ /* SPcod (E) / SPcoc (B) */ ++ opj_read_bytes(l_current_ptr, &l_tccp->cblkw, 1); + ++l_current_ptr; + l_tccp->cblkw += 2; + +- opj_read_bytes(l_current_ptr, &l_tccp->cblkh, 1); /* SPcoc (F) */ ++ /* SPcod (F) / SPcoc (C) */ ++ opj_read_bytes(l_current_ptr, &l_tccp->cblkh, 1); + ++l_current_ptr; + l_tccp->cblkh += 2; + +@@ -9802,8 +10588,8 @@ static OPJ_BOOL opj_j2k_read_SPCod_SPCoc(opj_j2k_t *p_j2k, + return OPJ_FALSE; + } + +- +- opj_read_bytes(l_current_ptr, &l_tccp->cblksty, 1); /* SPcoc (G) */ ++ /* SPcod (G) / SPcoc (D) */ ++ opj_read_bytes(l_current_ptr, &l_tccp->cblksty, 1); + ++l_current_ptr; + if (l_tccp->cblksty & 0xC0U) { /* 2 msb are reserved, assume we can't read */ + opj_event_msg(p_manager, EVT_ERROR, +@@ -9811,7 +10597,8 @@ static OPJ_BOOL opj_j2k_read_SPCod_SPCoc(opj_j2k_t *p_j2k, + return OPJ_FALSE; + } + +- opj_read_bytes(l_current_ptr, &l_tccp->qmfbid, 1); /* SPcoc (H) */ ++ /* SPcod (H) / SPcoc (E) */ ++ opj_read_bytes(l_current_ptr, &l_tccp->qmfbid, 1); + ++l_current_ptr; + + if (l_tccp->qmfbid > 1) { +@@ -9829,8 +10616,9 @@ static OPJ_BOOL opj_j2k_read_SPCod_SPCoc(opj_j2k_t *p_j2k, + return OPJ_FALSE; + } + ++ /* SPcod (I_i) / SPcoc (F_i) */ + for (i = 0; i < l_tccp->numresolutions; ++i) { +- opj_read_bytes(l_current_ptr, &l_tmp, 1); /* SPcoc (I_i) */ ++ opj_read_bytes(l_current_ptr, &l_tmp, 1); + ++l_current_ptr; + /* Precinct exponent 0 is only allowed for lowest resolution level (Table A.21) */ + if ((i != 0) && (((l_tmp & 0xf) == 0) || ((l_tmp >> 4) == 0))) { +@@ -10675,6 +11463,42 @@ static OPJ_BOOL opj_j2k_allocate_tile_element_cstr_index(opj_j2k_t *p_j2k) + return OPJ_TRUE; + } + ++static OPJ_BOOL opj_j2k_are_all_used_components_decoded(opj_j2k_t *p_j2k, ++ opj_event_mgr_t * p_manager) ++{ ++ OPJ_UINT32 compno; ++ OPJ_BOOL decoded_all_used_components = OPJ_TRUE; ++ ++ if (p_j2k->m_specific_param.m_decoder.m_numcomps_to_decode) { ++ for (compno = 0; ++ compno < p_j2k->m_specific_param.m_decoder.m_numcomps_to_decode; compno++) { ++ OPJ_UINT32 dec_compno = ++ p_j2k->m_specific_param.m_decoder.m_comps_indices_to_decode[compno]; ++ if (p_j2k->m_output_image->comps[dec_compno].data == NULL) { ++ opj_event_msg(p_manager, EVT_WARNING, "Failed to decode component %d\n", ++ dec_compno); ++ decoded_all_used_components = OPJ_FALSE; ++ } ++ } ++ } else { ++ for (compno = 0; compno < p_j2k->m_output_image->numcomps; compno++) { ++ if (p_j2k->m_output_image->comps[compno].data == NULL) { ++ opj_event_msg(p_manager, EVT_WARNING, "Failed to decode component %d\n", ++ compno); ++ decoded_all_used_components = OPJ_FALSE; ++ } ++ } ++ } ++ ++ if (decoded_all_used_components == OPJ_FALSE) { ++ opj_event_msg(p_manager, EVT_ERROR, "Failed to decode all used components\n"); ++ return OPJ_FALSE; ++ } ++ ++ return OPJ_TRUE; ++} ++ ++ + static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k, + opj_stream_private_t *p_stream, + opj_event_mgr_t * p_manager) +@@ -10786,6 +11610,10 @@ static OPJ_BOOL opj_j2k_decode_tiles(opj_j2k_t *p_j2k, + } + } + ++ if (! opj_j2k_are_all_used_components_decoded(p_j2k, p_manager)) { ++ return OPJ_FALSE; ++ } ++ + return OPJ_TRUE; + } + +@@ -10914,6 +11742,10 @@ static OPJ_BOOL opj_j2k_decode_one_tile(opj_j2k_t *p_j2k, + + } + ++ if (! opj_j2k_are_all_used_components_decoded(p_j2k, p_manager)) { ++ return OPJ_FALSE; ++ } ++ + return OPJ_TRUE; + } + +@@ -11198,6 +12030,42 @@ OPJ_BOOL opj_j2k_set_decoded_resolution_factor(opj_j2k_t *p_j2k, + return OPJ_FALSE; + } + ++/* ----------------------------------------------------------------------- */ ++ ++OPJ_BOOL opj_j2k_encoder_set_extra_options( ++ opj_j2k_t *p_j2k, ++ const char* const* p_options, ++ opj_event_mgr_t * p_manager) ++{ ++ const char* const* p_option_iter; ++ ++ if (p_options == NULL) { ++ return OPJ_TRUE; ++ } ++ ++ for (p_option_iter = p_options; *p_option_iter != NULL; ++p_option_iter) { ++ if (strncmp(*p_option_iter, "PLT=", 4) == 0) { ++ if (strcmp(*p_option_iter, "PLT=YES") == 0) { ++ p_j2k->m_specific_param.m_encoder.m_PLT = OPJ_TRUE; ++ } else if (strcmp(*p_option_iter, "PLT=NO") == 0) { ++ p_j2k->m_specific_param.m_encoder.m_PLT = OPJ_FALSE; ++ } else { ++ opj_event_msg(p_manager, EVT_ERROR, ++ "Invalid value for option: %s.\n", *p_option_iter); ++ return OPJ_FALSE; ++ } ++ } else { ++ opj_event_msg(p_manager, EVT_ERROR, ++ "Invalid option: %s.\n", *p_option_iter); ++ return OPJ_FALSE; ++ } ++ } ++ ++ return OPJ_TRUE; ++} ++ ++/* ----------------------------------------------------------------------- */ ++ + OPJ_BOOL opj_j2k_encode(opj_j2k_t * p_j2k, + opj_stream_private_t *p_stream, + opj_event_mgr_t * p_manager) +@@ -11255,7 +12123,7 @@ OPJ_BOOL opj_j2k_encode(opj_j2k_t * p_j2k, + } + } + } +- l_current_tile_size = opj_tcd_get_encoded_tile_size(p_j2k->m_tcd); ++ l_current_tile_size = opj_tcd_get_encoder_input_buffer_size(p_j2k->m_tcd); + if (!l_reuse_data) { + if (l_current_tile_size > l_max_tile_size) { + OPJ_BYTE *l_new_current_data = (OPJ_BYTE *) opj_realloc(l_current_data, +@@ -11581,7 +12449,7 @@ static OPJ_BOOL opj_j2k_setup_end_compress(opj_j2k_t *p_j2k, + return OPJ_FALSE; + } + +- if (OPJ_IS_CINEMA(p_j2k->m_cp.rsiz)) { ++ if (OPJ_IS_CINEMA(p_j2k->m_cp.rsiz) || OPJ_IS_IMF(p_j2k->m_cp.rsiz)) { + if (! opj_procedure_list_add_procedure(p_j2k->m_procedure_list, + (opj_procedure)opj_j2k_write_updated_tlm, p_manager)) { + return OPJ_FALSE; +@@ -11664,7 +12532,7 @@ static OPJ_BOOL opj_j2k_setup_header_writing(opj_j2k_t *p_j2k, + return OPJ_FALSE; + } + +- if (OPJ_IS_CINEMA(p_j2k->m_cp.rsiz)) { ++ if (OPJ_IS_CINEMA(p_j2k->m_cp.rsiz) || OPJ_IS_IMF(p_j2k->m_cp.rsiz)) { + if (! opj_procedure_list_add_procedure(p_j2k->m_procedure_list, + (opj_procedure)opj_j2k_write_tlm, p_manager)) { + return OPJ_FALSE; +@@ -11691,7 +12559,8 @@ static OPJ_BOOL opj_j2k_setup_header_writing(opj_j2k_t *p_j2k, + } + + /* DEVELOPER CORNER, insert your custom procedures */ +- if (p_j2k->m_cp.rsiz & OPJ_EXTENSION_MCT) { ++ if ((p_j2k->m_cp.rsiz & (OPJ_PROFILE_PART2 | OPJ_EXTENSION_MCT)) == ++ (OPJ_PROFILE_PART2 | OPJ_EXTENSION_MCT)) { + if (! opj_procedure_list_add_procedure(p_j2k->m_procedure_list, + (opj_procedure)opj_j2k_write_mct_data_group, p_manager)) { + return OPJ_FALSE; +@@ -11721,7 +12590,7 @@ static OPJ_BOOL opj_j2k_setup_header_writing(opj_j2k_t *p_j2k, + static OPJ_BOOL opj_j2k_write_first_tile_part(opj_j2k_t *p_j2k, + OPJ_BYTE * p_data, + OPJ_UINT32 * p_data_written, +- OPJ_UINT32 p_total_data_size, ++ OPJ_UINT32 total_data_size, + opj_stream_private_t *p_stream, + struct opj_event_mgr * p_manager) + { +@@ -11745,7 +12614,7 @@ static OPJ_BOOL opj_j2k_write_first_tile_part(opj_j2k_t *p_j2k, + + l_current_nb_bytes_written = 0; + l_begin_data = p_data; +- if (! opj_j2k_write_sot(p_j2k, p_data, p_total_data_size, ++ if (! opj_j2k_write_sot(p_j2k, p_data, total_data_size, + &l_current_nb_bytes_written, p_stream, + p_manager)) { + return OPJ_FALSE; +@@ -11753,7 +12622,7 @@ static OPJ_BOOL opj_j2k_write_first_tile_part(opj_j2k_t *p_j2k, + + l_nb_bytes_written += l_current_nb_bytes_written; + p_data += l_current_nb_bytes_written; +- p_total_data_size -= l_current_nb_bytes_written; ++ total_data_size -= l_current_nb_bytes_written; + + if (!OPJ_IS_CINEMA(l_cp->rsiz)) { + #if 0 +@@ -11763,29 +12632,29 @@ static OPJ_BOOL opj_j2k_write_first_tile_part(opj_j2k_t *p_j2k, + p_manager); + l_nb_bytes_written += l_current_nb_bytes_written; + p_data += l_current_nb_bytes_written; +- p_total_data_size -= l_current_nb_bytes_written; ++ total_data_size -= l_current_nb_bytes_written; + + l_current_nb_bytes_written = 0; + opj_j2k_write_qcc_in_memory(p_j2k, compno, p_data, &l_current_nb_bytes_written, + p_manager); + l_nb_bytes_written += l_current_nb_bytes_written; + p_data += l_current_nb_bytes_written; +- p_total_data_size -= l_current_nb_bytes_written; ++ total_data_size -= l_current_nb_bytes_written; + } + #endif +- if (l_cp->tcps[p_j2k->m_current_tile_number].numpocs) { ++ if (l_cp->tcps[p_j2k->m_current_tile_number].POC) { + l_current_nb_bytes_written = 0; + opj_j2k_write_poc_in_memory(p_j2k, p_data, &l_current_nb_bytes_written, + p_manager); + l_nb_bytes_written += l_current_nb_bytes_written; + p_data += l_current_nb_bytes_written; +- p_total_data_size -= l_current_nb_bytes_written; ++ total_data_size -= l_current_nb_bytes_written; + } + } + + l_current_nb_bytes_written = 0; + if (! opj_j2k_write_sod(p_j2k, l_tcd, p_data, &l_current_nb_bytes_written, +- p_total_data_size, p_stream, p_manager)) { ++ total_data_size, p_stream, p_manager)) { + return OPJ_FALSE; + } + +@@ -11796,7 +12665,7 @@ static OPJ_BOOL opj_j2k_write_first_tile_part(opj_j2k_t *p_j2k, + opj_write_bytes(l_begin_data + 6, l_nb_bytes_written, + 4); /* PSOT */ + +- if (OPJ_IS_CINEMA(l_cp->rsiz)) { ++ if (OPJ_IS_CINEMA(l_cp->rsiz) || OPJ_IS_IMF(l_cp->rsiz)) { + opj_j2k_update_tlm(p_j2k, l_nb_bytes_written); + } + +@@ -11806,7 +12675,7 @@ static OPJ_BOOL opj_j2k_write_first_tile_part(opj_j2k_t *p_j2k, + static OPJ_BOOL opj_j2k_write_all_tile_parts(opj_j2k_t *p_j2k, + OPJ_BYTE * p_data, + OPJ_UINT32 * p_data_written, +- OPJ_UINT32 p_total_data_size, ++ OPJ_UINT32 total_data_size, + opj_stream_private_t *p_stream, + struct opj_event_mgr * p_manager + ) +@@ -11839,7 +12708,7 @@ static OPJ_BOOL opj_j2k_write_all_tile_parts(opj_j2k_t *p_j2k, + l_begin_data = p_data; + + if (! opj_j2k_write_sot(p_j2k, p_data, +- p_total_data_size, ++ total_data_size, + &l_current_nb_bytes_written, + p_stream, + p_manager)) { +@@ -11848,25 +12717,25 @@ static OPJ_BOOL opj_j2k_write_all_tile_parts(opj_j2k_t *p_j2k, + + l_nb_bytes_written += l_current_nb_bytes_written; + p_data += l_current_nb_bytes_written; +- p_total_data_size -= l_current_nb_bytes_written; ++ total_data_size -= l_current_nb_bytes_written; + l_part_tile_size += l_current_nb_bytes_written; + + l_current_nb_bytes_written = 0; + if (! opj_j2k_write_sod(p_j2k, l_tcd, p_data, &l_current_nb_bytes_written, +- p_total_data_size, p_stream, p_manager)) { ++ total_data_size, p_stream, p_manager)) { + return OPJ_FALSE; + } + + p_data += l_current_nb_bytes_written; + l_nb_bytes_written += l_current_nb_bytes_written; +- p_total_data_size -= l_current_nb_bytes_written; ++ total_data_size -= l_current_nb_bytes_written; + l_part_tile_size += l_current_nb_bytes_written; + + /* Writing Psot in SOT marker */ + opj_write_bytes(l_begin_data + 6, l_part_tile_size, + 4); /* PSOT */ + +- if (OPJ_IS_CINEMA(l_cp->rsiz)) { ++ if (OPJ_IS_CINEMA(l_cp->rsiz) || OPJ_IS_IMF(l_cp->rsiz)) { + opj_j2k_update_tlm(p_j2k, l_part_tile_size); + } + +@@ -11885,7 +12754,7 @@ static OPJ_BOOL opj_j2k_write_all_tile_parts(opj_j2k_t *p_j2k, + l_begin_data = p_data; + + if (! opj_j2k_write_sot(p_j2k, p_data, +- p_total_data_size, ++ total_data_size, + &l_current_nb_bytes_written, p_stream, + p_manager)) { + return OPJ_FALSE; +@@ -11893,26 +12762,26 @@ static OPJ_BOOL opj_j2k_write_all_tile_parts(opj_j2k_t *p_j2k, + + l_nb_bytes_written += l_current_nb_bytes_written; + p_data += l_current_nb_bytes_written; +- p_total_data_size -= l_current_nb_bytes_written; ++ total_data_size -= l_current_nb_bytes_written; + l_part_tile_size += l_current_nb_bytes_written; + + l_current_nb_bytes_written = 0; + + if (! opj_j2k_write_sod(p_j2k, l_tcd, p_data, &l_current_nb_bytes_written, +- p_total_data_size, p_stream, p_manager)) { ++ total_data_size, p_stream, p_manager)) { + return OPJ_FALSE; + } + + l_nb_bytes_written += l_current_nb_bytes_written; + p_data += l_current_nb_bytes_written; +- p_total_data_size -= l_current_nb_bytes_written; ++ total_data_size -= l_current_nb_bytes_written; + l_part_tile_size += l_current_nb_bytes_written; + + /* Writing Psot in SOT marker */ + opj_write_bytes(l_begin_data + 6, l_part_tile_size, + 4); /* PSOT */ + +- if (OPJ_IS_CINEMA(l_cp->rsiz)) { ++ if (OPJ_IS_CINEMA(l_cp->rsiz) || OPJ_IS_IMF(l_cp->rsiz)) { + opj_j2k_update_tlm(p_j2k, l_part_tile_size); + } + +diff --git a/third_party/libopenjpeg20/j2k.h b/third_party/libopenjpeg20/j2k.h +index 5d393c98130f27af8ee128175e504e580e4e4e13..9eb50b50da6977500a95d7a64d20b675c1754b50 100644 +--- a/third_party/libopenjpeg20/j2k.h ++++ b/third_party/libopenjpeg20/j2k.h +@@ -531,8 +531,14 @@ typedef struct opj_j2k_enc { + OPJ_BYTE * m_header_tile_data; + + /* size of the encoded_data */ ++ + OPJ_UINT32 m_header_tile_data_size; + ++ /* whether to generate PLT markers */ ++ OPJ_BOOL m_PLT; ++ ++ /* reserved bytes in m_encoded_tile_size for PLT markers */ ++ OPJ_UINT32 m_reserved_bytes_for_PLT; + + } opj_j2k_enc_t; + +@@ -577,15 +583,16 @@ typedef struct opj_j2k { + /** the current tile coder/decoder **/ + struct opj_tcd * m_tcd; + +- /** Number of threads to use */ +- int m_num_threads; +- + /** Thread pool */ + opj_thread_pool_t* m_tp; + ++ /** Image width coming from JP2 IHDR box. 0 from a pure codestream */ + OPJ_UINT32 ihdr_w; ++ ++ /** Image height coming from JP2 IHDR box. 0 from a pure codestream */ + OPJ_UINT32 ihdr_h; +- OPJ_UINT32 enumcs; ++ ++ /** Set to 1 by the decoder initialization if OPJ_DPARAMETERS_DUMP_FLAG is set */ + unsigned int dump_state; + } + opj_j2k_t; +@@ -827,6 +834,19 @@ OPJ_BOOL opj_j2k_set_decoded_resolution_factor(opj_j2k_t *p_j2k, + OPJ_UINT32 res_factor, + opj_event_mgr_t * p_manager); + ++/** ++ * Specify extra options for the encoder. ++ * ++ * @param p_j2k the jpeg2000 codec. ++ * @param p_options options ++ * @param p_manager the user event manager ++ * ++ * @see opj_encoder_set_extra_options() for more details. ++ */ ++OPJ_BOOL opj_j2k_encoder_set_extra_options( ++ opj_j2k_t *p_j2k, ++ const char* const* p_options, ++ opj_event_mgr_t * p_manager); + + /** + * Writes a tile. +diff --git a/third_party/libopenjpeg20/jp2.c b/third_party/libopenjpeg20/jp2.c +index 1f61a23e6bc4ecc06cbff0666efc1ac476bbf87e..02f3d04c747833283f476912c7dfdc3b13b6bfa3 100644 +--- a/third_party/libopenjpeg20/jp2.c ++++ b/third_party/libopenjpeg20/jp2.c +@@ -586,6 +586,12 @@ static OPJ_BOOL opj_jp2_read_ihdr(opj_jp2_t *jp2, + opj_read_bytes(p_image_header_data, &(jp2->numcomps), 2); /* NC */ + p_image_header_data += 2; + ++ if (jp2->h < 1 || jp2->w < 1 || jp2->numcomps < 1) { ++ opj_event_msg(p_manager, EVT_ERROR, ++ "Wrong values for: w(%d) h(%d) numcomps(%d) (ihdr)\n", ++ jp2->w, jp2->h, jp2->numcomps); ++ return OPJ_FALSE; ++ } + if ((jp2->numcomps - 1U) >= + 16384U) { /* unsigned underflow is well defined: 1U <= jp2->numcomps <= 16384U */ + opj_event_msg(p_manager, EVT_ERROR, "Invalid number of components (ihdr)\n"); +@@ -1317,7 +1323,7 @@ static OPJ_BOOL opj_jp2_read_cmap(opj_jp2_t * jp2, + + + for (i = 0; i < nr_channels; ++i) { +- opj_read_bytes_BE(p_cmap_header_data, &l_value, 2); /* CMP^i */ ++ opj_read_bytes_BE(p_cmap_header_data, &l_value, 2); /* CMP^i */ + p_cmap_header_data += 2; + cmap[i].cmp = (OPJ_UINT16) l_value; + +@@ -1599,9 +1605,7 @@ static OPJ_BOOL opj_jp2_read_colr(opj_jp2_t *jp2, + "COLR BOX meth value is not a regular value (%d), " + "so we will ignore the entire Colour Specification box. \n", jp2->meth); + } +- if (jp2->color.jp2_has_colr) { +- jp2->j2k->enumcs = jp2->enumcs; +- } ++ + return OPJ_TRUE; + } + +@@ -3252,6 +3256,18 @@ OPJ_BOOL opj_jp2_set_decoded_resolution_factor(opj_jp2_t *p_jp2, + return opj_j2k_set_decoded_resolution_factor(p_jp2->j2k, res_factor, p_manager); + } + ++/* ----------------------------------------------------------------------- */ ++ ++OPJ_BOOL opj_jp2_encoder_set_extra_options( ++ opj_jp2_t *p_jp2, ++ const char* const* p_options, ++ opj_event_mgr_t * p_manager) ++{ ++ return opj_j2k_encoder_set_extra_options(p_jp2->j2k, p_options, p_manager); ++} ++ ++/* ----------------------------------------------------------------------- */ ++ + /* JPIP specific */ + + #ifdef USE_JPIP +diff --git a/third_party/libopenjpeg20/jp2.h b/third_party/libopenjpeg20/jp2.h +index 34abd5118e3740a02c5692ee92aba3f1a7004431..9e7fa56674cd45d8133518b2b6ebffb0e0a7b348 100644 +--- a/third_party/libopenjpeg20/jp2.h ++++ b/third_party/libopenjpeg20/jp2.h +@@ -459,6 +459,20 @@ OPJ_BOOL opj_jp2_set_decoded_resolution_factor(opj_jp2_t *p_jp2, + OPJ_UINT32 res_factor, + opj_event_mgr_t * p_manager); + ++/** ++ * Specify extra options for the encoder. ++ * ++ * @param p_jp2 the jpeg2000 codec. ++ * @param p_options options ++ * @param p_manager the user event manager ++ * ++ * @see opj_encoder_set_extra_options() for more details. ++ */ ++OPJ_BOOL opj_jp2_encoder_set_extra_options( ++ opj_jp2_t *p_jp2, ++ const char* const* p_options, ++ opj_event_mgr_t * p_manager); ++ + + /* TODO MSD: clean these 3 functions */ + /** +diff --git a/third_party/libopenjpeg20/mct.c b/third_party/libopenjpeg20/mct.c +index 81ec223d85a755807ddbba281c3dfc4e14a21a78..c4c2e732e6897d46998e5a666b62df1d49f19445 100644 +--- a/third_party/libopenjpeg20/mct.c ++++ b/third_party/libopenjpeg20/mct.c +@@ -46,7 +46,6 @@ + #include + #endif + #if defined(__SSE4_1__) && !defined(_M_IX86) && !defined(__i386) +-#define USE_SSE4 + #include + #endif + +@@ -186,7 +185,7 @@ void opj_mct_decode( + OPJ_INT32* OPJ_RESTRICT c2, + OPJ_SIZE_T n) + { +- OPJ_UINT32 i; ++ OPJ_SIZE_T i; + for (i = 0; i < n; ++i) { + OPJ_INT32 y = c0[i]; + OPJ_INT32 u = c1[i]; +@@ -212,175 +211,72 @@ OPJ_FLOAT64 opj_mct_getnorm(OPJ_UINT32 compno) + /* */ + /* Forward irreversible MCT. */ + /* */ +-#ifdef USE_SSE4 + void opj_mct_encode_real( +- OPJ_INT32* OPJ_RESTRICT c0, +- OPJ_INT32* OPJ_RESTRICT c1, +- OPJ_INT32* OPJ_RESTRICT c2, ++ OPJ_FLOAT32* OPJ_RESTRICT c0, ++ OPJ_FLOAT32* OPJ_RESTRICT c1, ++ OPJ_FLOAT32* OPJ_RESTRICT c2, + OPJ_SIZE_T n) + { + OPJ_SIZE_T i; +- const OPJ_SIZE_T len = n; +- +- const __m128i ry = _mm_set1_epi32(2449); +- const __m128i gy = _mm_set1_epi32(4809); +- const __m128i by = _mm_set1_epi32(934); +- const __m128i ru = _mm_set1_epi32(1382); +- const __m128i gu = _mm_set1_epi32(2714); +- /* const __m128i bu = _mm_set1_epi32(4096); */ +- /* const __m128i rv = _mm_set1_epi32(4096); */ +- const __m128i gv = _mm_set1_epi32(3430); +- const __m128i bv = _mm_set1_epi32(666); +- const __m128i mulround = _mm_shuffle_epi32(_mm_cvtsi32_si128(4096), +- _MM_SHUFFLE(1, 0, 1, 0)); +- +- for (i = 0; i < (len & ~3U); i += 4) { +- __m128i lo, hi; +- __m128i y, u, v; +- __m128i r = _mm_load_si128((const __m128i *) & (c0[i])); +- __m128i g = _mm_load_si128((const __m128i *) & (c1[i])); +- __m128i b = _mm_load_si128((const __m128i *) & (c2[i])); +- +- lo = r; +- hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); +- lo = _mm_mul_epi32(lo, ry); +- hi = _mm_mul_epi32(hi, ry); +- lo = _mm_add_epi64(lo, mulround); +- hi = _mm_add_epi64(hi, mulround); +- lo = _mm_srli_epi64(lo, 13); +- hi = _mm_slli_epi64(hi, 32 - 13); +- y = _mm_blend_epi16(lo, hi, 0xCC); +- +- lo = g; +- hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1)); +- lo = _mm_mul_epi32(lo, gy); +- hi = _mm_mul_epi32(hi, gy); +- lo = _mm_add_epi64(lo, mulround); +- hi = _mm_add_epi64(hi, mulround); +- lo = _mm_srli_epi64(lo, 13); +- hi = _mm_slli_epi64(hi, 32 - 13); +- y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC)); +- +- lo = b; +- hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1)); +- lo = _mm_mul_epi32(lo, by); +- hi = _mm_mul_epi32(hi, by); +- lo = _mm_add_epi64(lo, mulround); +- hi = _mm_add_epi64(hi, mulround); +- lo = _mm_srli_epi64(lo, 13); +- hi = _mm_slli_epi64(hi, 32 - 13); +- y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC)); +- _mm_store_si128((__m128i *) & (c0[i]), y); +- +- /*lo = b; +- hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1)); +- lo = _mm_mul_epi32(lo, mulround); +- hi = _mm_mul_epi32(hi, mulround);*/ +- lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 2, 0))); +- hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 3, 1))); +- lo = _mm_slli_epi64(lo, 12); +- hi = _mm_slli_epi64(hi, 12); +- lo = _mm_add_epi64(lo, mulround); +- hi = _mm_add_epi64(hi, mulround); +- lo = _mm_srli_epi64(lo, 13); +- hi = _mm_slli_epi64(hi, 32 - 13); +- u = _mm_blend_epi16(lo, hi, 0xCC); +- +- lo = r; +- hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); +- lo = _mm_mul_epi32(lo, ru); +- hi = _mm_mul_epi32(hi, ru); +- lo = _mm_add_epi64(lo, mulround); +- hi = _mm_add_epi64(hi, mulround); +- lo = _mm_srli_epi64(lo, 13); +- hi = _mm_slli_epi64(hi, 32 - 13); +- u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC)); +- +- lo = g; +- hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1)); +- lo = _mm_mul_epi32(lo, gu); +- hi = _mm_mul_epi32(hi, gu); +- lo = _mm_add_epi64(lo, mulround); +- hi = _mm_add_epi64(hi, mulround); +- lo = _mm_srli_epi64(lo, 13); +- hi = _mm_slli_epi64(hi, 32 - 13); +- u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC)); +- _mm_store_si128((__m128i *) & (c1[i]), u); ++#ifdef USE_SSE ++ const __m128 YR = _mm_set1_ps(0.299f); ++ const __m128 YG = _mm_set1_ps(0.587f); ++ const __m128 YB = _mm_set1_ps(0.114f); ++ const __m128 UR = _mm_set1_ps(-0.16875f); ++ const __m128 UG = _mm_set1_ps(-0.331260f); ++ const __m128 UB = _mm_set1_ps(0.5f); ++ const __m128 VR = _mm_set1_ps(0.5f); ++ const __m128 VG = _mm_set1_ps(-0.41869f); ++ const __m128 VB = _mm_set1_ps(-0.08131f); ++ for (i = 0; i < (n >> 3); i ++) { ++ __m128 r, g, b, y, u, v; ++ ++ r = _mm_load_ps(c0); ++ g = _mm_load_ps(c1); ++ b = _mm_load_ps(c2); ++ y = _mm_add_ps(_mm_add_ps(_mm_mul_ps(r, YR), _mm_mul_ps(g, YG)), ++ _mm_mul_ps(b, YB)); ++ u = _mm_add_ps(_mm_add_ps(_mm_mul_ps(r, UR), _mm_mul_ps(g, UG)), ++ _mm_mul_ps(b, UB)); ++ v = _mm_add_ps(_mm_add_ps(_mm_mul_ps(r, VR), _mm_mul_ps(g, VG)), ++ _mm_mul_ps(b, VB)); ++ _mm_store_ps(c0, y); ++ _mm_store_ps(c1, u); ++ _mm_store_ps(c2, v); ++ c0 += 4; ++ c1 += 4; ++ c2 += 4; + +- /*lo = r; +- hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); +- lo = _mm_mul_epi32(lo, mulround); +- hi = _mm_mul_epi32(hi, mulround);*/ +- lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 2, 0))); +- hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 3, 1))); +- lo = _mm_slli_epi64(lo, 12); +- hi = _mm_slli_epi64(hi, 12); +- lo = _mm_add_epi64(lo, mulround); +- hi = _mm_add_epi64(hi, mulround); +- lo = _mm_srli_epi64(lo, 13); +- hi = _mm_slli_epi64(hi, 32 - 13); +- v = _mm_blend_epi16(lo, hi, 0xCC); +- +- lo = g; +- hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1)); +- lo = _mm_mul_epi32(lo, gv); +- hi = _mm_mul_epi32(hi, gv); +- lo = _mm_add_epi64(lo, mulround); +- hi = _mm_add_epi64(hi, mulround); +- lo = _mm_srli_epi64(lo, 13); +- hi = _mm_slli_epi64(hi, 32 - 13); +- v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC)); +- +- lo = b; +- hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1)); +- lo = _mm_mul_epi32(lo, bv); +- hi = _mm_mul_epi32(hi, bv); +- lo = _mm_add_epi64(lo, mulround); +- hi = _mm_add_epi64(hi, mulround); +- lo = _mm_srli_epi64(lo, 13); +- hi = _mm_slli_epi64(hi, 32 - 13); +- v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC)); +- _mm_store_si128((__m128i *) & (c2[i]), v); +- } +- for (; i < len; ++i) { +- OPJ_INT32 r = c0[i]; +- OPJ_INT32 g = c1[i]; +- OPJ_INT32 b = c2[i]; +- OPJ_INT32 y = opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g, +- 4809) + opj_int_fix_mul(b, 934); +- OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g, +- 2714) + opj_int_fix_mul(b, 4096); +- OPJ_INT32 v = opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g, +- 3430) - opj_int_fix_mul(b, 666); +- c0[i] = y; +- c1[i] = u; +- c2[i] = v; ++ r = _mm_load_ps(c0); ++ g = _mm_load_ps(c1); ++ b = _mm_load_ps(c2); ++ y = _mm_add_ps(_mm_add_ps(_mm_mul_ps(r, YR), _mm_mul_ps(g, YG)), ++ _mm_mul_ps(b, YB)); ++ u = _mm_add_ps(_mm_add_ps(_mm_mul_ps(r, UR), _mm_mul_ps(g, UG)), ++ _mm_mul_ps(b, UB)); ++ v = _mm_add_ps(_mm_add_ps(_mm_mul_ps(r, VR), _mm_mul_ps(g, VG)), ++ _mm_mul_ps(b, VB)); ++ _mm_store_ps(c0, y); ++ _mm_store_ps(c1, u); ++ _mm_store_ps(c2, v); ++ c0 += 4; ++ c1 += 4; ++ c2 += 4; + } +-} +-#else +-void opj_mct_encode_real( +- OPJ_INT32* OPJ_RESTRICT c0, +- OPJ_INT32* OPJ_RESTRICT c1, +- OPJ_INT32* OPJ_RESTRICT c2, +- OPJ_SIZE_T n) +-{ +- OPJ_UINT32 i; ++ n &= 7; ++#endif + for (i = 0; i < n; ++i) { +- OPJ_INT32 r = c0[i]; +- OPJ_INT32 g = c1[i]; +- OPJ_INT32 b = c2[i]; +- OPJ_INT32 y = opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g, +- 4809) + opj_int_fix_mul(b, 934); +- OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g, +- 2714) + opj_int_fix_mul(b, 4096); +- OPJ_INT32 v = opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g, +- 3430) - opj_int_fix_mul(b, 666); ++ OPJ_FLOAT32 r = c0[i]; ++ OPJ_FLOAT32 g = c1[i]; ++ OPJ_FLOAT32 b = c2[i]; ++ OPJ_FLOAT32 y = 0.299f * r + 0.587f * g + 0.114f * b; ++ OPJ_FLOAT32 u = -0.16875f * r - 0.331260f * g + 0.5f * b; ++ OPJ_FLOAT32 v = 0.5f * r - 0.41869f * g - 0.08131f * b; + c0[i] = y; + c1[i] = u; + c2[i] = v; + } + } +-#endif + + /* */ + /* Inverse irreversible MCT. */ +@@ -391,7 +287,7 @@ void opj_mct_decode_real( + OPJ_FLOAT32* OPJ_RESTRICT c2, + OPJ_SIZE_T n) + { +- OPJ_UINT32 i; ++ OPJ_SIZE_T i; + #ifdef USE_SSE + __m128 vrv, vgu, vgv, vbu; + vrv = _mm_set1_ps(1.402f); +diff --git a/third_party/libopenjpeg20/mct.h b/third_party/libopenjpeg20/mct.h +index 2e37ce7333f85083d683e510e2107b92b73f5d06..3e1f5e4946cf798b8f2e5c6ea098afb8df7dc23f 100644 +--- a/third_party/libopenjpeg20/mct.h ++++ b/third_party/libopenjpeg20/mct.h +@@ -85,8 +85,9 @@ Apply an irreversible multi-component transform to an image + @param c2 Samples blue component + @param n Number of samples for each component + */ +-void opj_mct_encode_real(OPJ_INT32* OPJ_RESTRICT c0, OPJ_INT32* OPJ_RESTRICT c1, +- OPJ_INT32* OPJ_RESTRICT c2, OPJ_SIZE_T n); ++void opj_mct_encode_real(OPJ_FLOAT32* OPJ_RESTRICT c0, ++ OPJ_FLOAT32* OPJ_RESTRICT c1, ++ OPJ_FLOAT32* OPJ_RESTRICT c2, OPJ_SIZE_T n); + /** + Apply an irreversible multi-component inverse transform to an image + @param c0 Samples for luminance component +diff --git a/third_party/libopenjpeg20/mqc.c b/third_party/libopenjpeg20/mqc.c +index 6299b171d8788ffb32bd13c36027eb3f10b6983a..3caab9e7c4411d6dc21cd3908f44ca1f84b8c8b1 100644 +--- a/third_party/libopenjpeg20/mqc.c ++++ b/third_party/libopenjpeg20/mqc.c +@@ -46,27 +46,6 @@ + /** @name Local static functions */ + /*@{*/ + +-/** +-Output a byte, doing bit-stuffing if necessary. +-After a 0xff byte, the next byte must be smaller than 0x90. +-@param mqc MQC handle +-*/ +-static void opj_mqc_byteout(opj_mqc_t *mqc); +-/** +-Renormalize mqc->a and mqc->c while encoding, so that mqc->a stays between 0x8000 and 0x10000 +-@param mqc MQC handle +-*/ +-static void opj_mqc_renorme(opj_mqc_t *mqc); +-/** +-Encode the most probable symbol +-@param mqc MQC handle +-*/ +-static void opj_mqc_codemps(opj_mqc_t *mqc); +-/** +-Encode the most least symbol +-@param mqc MQC handle +-*/ +-static void opj_mqc_codelps(opj_mqc_t *mqc); + /** + Fill mqc->c with 1's for flushing + @param mqc MQC handle +@@ -182,80 +161,6 @@ static const opj_mqc_state_t mqc_states[47 * 2] = { + ========================================================== + */ + +-static void opj_mqc_byteout(opj_mqc_t *mqc) +-{ +- /* bp is initialized to start - 1 in opj_mqc_init_enc() */ +- /* but this is safe, see opj_tcd_code_block_enc_allocate_data() */ +- assert(mqc->bp >= mqc->start - 1); +- if (*mqc->bp == 0xff) { +- mqc->bp++; +- *mqc->bp = (OPJ_BYTE)(mqc->c >> 20); +- mqc->c &= 0xfffff; +- mqc->ct = 7; +- } else { +- if ((mqc->c & 0x8000000) == 0) { +- mqc->bp++; +- *mqc->bp = (OPJ_BYTE)(mqc->c >> 19); +- mqc->c &= 0x7ffff; +- mqc->ct = 8; +- } else { +- (*mqc->bp)++; +- if (*mqc->bp == 0xff) { +- mqc->c &= 0x7ffffff; +- mqc->bp++; +- *mqc->bp = (OPJ_BYTE)(mqc->c >> 20); +- mqc->c &= 0xfffff; +- mqc->ct = 7; +- } else { +- mqc->bp++; +- *mqc->bp = (OPJ_BYTE)(mqc->c >> 19); +- mqc->c &= 0x7ffff; +- mqc->ct = 8; +- } +- } +- } +-} +- +-static void opj_mqc_renorme(opj_mqc_t *mqc) +-{ +- do { +- mqc->a <<= 1; +- mqc->c <<= 1; +- mqc->ct--; +- if (mqc->ct == 0) { +- opj_mqc_byteout(mqc); +- } +- } while ((mqc->a & 0x8000) == 0); +-} +- +-static void opj_mqc_codemps(opj_mqc_t *mqc) +-{ +- mqc->a -= (*mqc->curctx)->qeval; +- if ((mqc->a & 0x8000) == 0) { +- if (mqc->a < (*mqc->curctx)->qeval) { +- mqc->a = (*mqc->curctx)->qeval; +- } else { +- mqc->c += (*mqc->curctx)->qeval; +- } +- *mqc->curctx = (*mqc->curctx)->nmps; +- opj_mqc_renorme(mqc); +- } else { +- mqc->c += (*mqc->curctx)->qeval; +- } +-} +- +-static void opj_mqc_codelps(opj_mqc_t *mqc) +-{ +- mqc->a -= (*mqc->curctx)->qeval; +- if (mqc->a < (*mqc->curctx)->qeval) { +- mqc->c += (*mqc->curctx)->qeval; +- } else { +- mqc->a = (*mqc->curctx)->qeval; +- } +- *mqc->curctx = (*mqc->curctx)->nlps; +- opj_mqc_renorme(mqc); +-} +- + static void opj_mqc_setbits(opj_mqc_t *mqc) + { + OPJ_UINT32 tempc = mqc->c + mqc->a; +@@ -303,14 +208,6 @@ void opj_mqc_init_enc(opj_mqc_t *mqc, OPJ_BYTE *bp) + mqc->end_of_byte_stream_counter = 0; + } + +-void opj_mqc_encode(opj_mqc_t *mqc, OPJ_UINT32 d) +-{ +- if ((*mqc->curctx)->mps == d) { +- opj_mqc_codemps(mqc); +- } else { +- opj_mqc_codelps(mqc); +- } +-} + + void opj_mqc_flush(opj_mqc_t *mqc) + { +@@ -329,8 +226,6 @@ void opj_mqc_flush(opj_mqc_t *mqc) + } + } + +-#define BYPASS_CT_INIT 0xDEADBEEF +- + void opj_mqc_bypass_init_enc(opj_mqc_t *mqc) + { + /* This function is normally called after at least one opj_mqc_flush() */ +@@ -475,6 +370,38 @@ void opj_mqc_erterm_enc(opj_mqc_t *mqc) + } + } + ++/** ++Encode the most probable symbol ++@param mqc MQC handle ++*/ ++static INLINE void opj_mqc_codemps(opj_mqc_t *mqc) ++{ ++ opj_mqc_codemps_macro(mqc, mqc->curctx, mqc->a, mqc->c, mqc->ct); ++} ++ ++/** ++Encode the most least symbol ++@param mqc MQC handle ++*/ ++static INLINE void opj_mqc_codelps(opj_mqc_t *mqc) ++{ ++ opj_mqc_codelps_macro(mqc, mqc->curctx, mqc->a, mqc->c, mqc->ct); ++} ++ ++/** ++Encode a symbol using the MQ-coder ++@param mqc MQC handle ++@param d The symbol to be encoded (0 or 1) ++*/ ++static INLINE void opj_mqc_encode(opj_mqc_t *mqc, OPJ_UINT32 d) ++{ ++ if ((*mqc->curctx)->mps == d) { ++ opj_mqc_codemps(mqc); ++ } else { ++ opj_mqc_codelps(mqc); ++ } ++} ++ + void opj_mqc_segmark_enc(opj_mqc_t *mqc) + { + OPJ_UINT32 i; +@@ -557,4 +484,36 @@ void opj_mqc_setstate(opj_mqc_t *mqc, OPJ_UINT32 ctxno, OPJ_UINT32 msb, + mqc->ctxs[ctxno] = &mqc_states[msb + (OPJ_UINT32)(prob << 1)]; + } + +- ++void opj_mqc_byteout(opj_mqc_t *mqc) ++{ ++ /* bp is initialized to start - 1 in opj_mqc_init_enc() */ ++ /* but this is safe, see opj_tcd_code_block_enc_allocate_data() */ ++ assert(mqc->bp >= mqc->start - 1); ++ if (*mqc->bp == 0xff) { ++ mqc->bp++; ++ *mqc->bp = (OPJ_BYTE)(mqc->c >> 20); ++ mqc->c &= 0xfffff; ++ mqc->ct = 7; ++ } else { ++ if ((mqc->c & 0x8000000) == 0) { ++ mqc->bp++; ++ *mqc->bp = (OPJ_BYTE)(mqc->c >> 19); ++ mqc->c &= 0x7ffff; ++ mqc->ct = 8; ++ } else { ++ (*mqc->bp)++; ++ if (*mqc->bp == 0xff) { ++ mqc->c &= 0x7ffffff; ++ mqc->bp++; ++ *mqc->bp = (OPJ_BYTE)(mqc->c >> 20); ++ mqc->c &= 0xfffff; ++ mqc->ct = 7; ++ } else { ++ mqc->bp++; ++ *mqc->bp = (OPJ_BYTE)(mqc->c >> 19); ++ mqc->c &= 0x7ffff; ++ mqc->ct = 8; ++ } ++ } ++ } ++} +\ No newline at end of file +diff --git a/third_party/libopenjpeg20/mqc.h b/third_party/libopenjpeg20/mqc.h +index 69a2a79dc06d68b1a973e9aac915d13c6b0f566f..9850fed03161701cb2abee0d130b14186b94dd66 100644 +--- a/third_party/libopenjpeg20/mqc.h ++++ b/third_party/libopenjpeg20/mqc.h +@@ -96,6 +96,8 @@ typedef struct opj_mqc { + OPJ_BYTE backup[OPJ_COMMON_CBLK_DATA_EXTRA]; + } opj_mqc_t; + ++#define BYPASS_CT_INIT 0xDEADBEEF ++ + #include "mqc_inl.h" + + /** @name Exported functions */ +@@ -135,12 +137,7 @@ Set the current context used for coding/decoding + @param ctxno Number that identifies the context + */ + #define opj_mqc_setcurctx(mqc, ctxno) (mqc)->curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)] +-/** +-Encode a symbol using the MQ-coder +-@param mqc MQC handle +-@param d The symbol to be encoded (0 or 1) +-*/ +-void opj_mqc_encode(opj_mqc_t *mqc, OPJ_UINT32 d); ++ + /** + Flush the encoder, so that all remaining data is written + @param mqc MQC handle +diff --git a/third_party/libopenjpeg20/mqc_inl.h b/third_party/libopenjpeg20/mqc_inl.h +index 310a3287fd918dce3ab66e49e9f4e2706ccfa2de..0031b94be318aef6d34dcbe9b2c936169007204a 100644 +--- a/third_party/libopenjpeg20/mqc_inl.h ++++ b/third_party/libopenjpeg20/mqc_inl.h +@@ -156,13 +156,13 @@ static INLINE OPJ_UINT32 opj_mqc_raw_decode(opj_mqc_t *mqc) + } \ + } + +-#define DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct) \ ++#define DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct) \ + register const opj_mqc_state_t **curctx = mqc->curctx; \ + register OPJ_UINT32 c = mqc->c; \ + register OPJ_UINT32 a = mqc->a; \ + register OPJ_UINT32 ct = mqc->ct + +-#define UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct) \ ++#define UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct) \ + mqc->curctx = curctx; \ + mqc->c = c; \ + mqc->a = a; \ +@@ -193,4 +193,90 @@ Decode a symbol + #define opj_mqc_decode(d, mqc) \ + opj_mqc_decode_macro(d, mqc, mqc->curctx, mqc->a, mqc->c, mqc->ct) + ++/** ++Output a byte, doing bit-stuffing if necessary. ++After a 0xff byte, the next byte must be smaller than 0x90. ++@param mqc MQC handle ++*/ ++void opj_mqc_byteout(opj_mqc_t *mqc); ++ ++/** ++Renormalize mqc->a and mqc->c while encoding, so that mqc->a stays between 0x8000 and 0x10000 ++@param mqc MQC handle ++@param a_ value of mqc->a ++@param c_ value of mqc->c_ ++@param ct_ value of mqc->ct_ ++*/ ++#define opj_mqc_renorme_macro(mqc, a_, c_, ct_) \ ++{ \ ++ do { \ ++ a_ <<= 1; \ ++ c_ <<= 1; \ ++ ct_--; \ ++ if (ct_ == 0) { \ ++ mqc->c = c_; \ ++ opj_mqc_byteout(mqc); \ ++ c_ = mqc->c; \ ++ ct_ = mqc->ct; \ ++ } \ ++ } while( (a_ & 0x8000) == 0); \ ++} ++ ++#define opj_mqc_codemps_macro(mqc, curctx, a, c, ct) \ ++{ \ ++ a -= (*curctx)->qeval; \ ++ if ((a & 0x8000) == 0) { \ ++ if (a < (*curctx)->qeval) { \ ++ a = (*curctx)->qeval; \ ++ } else { \ ++ c += (*curctx)->qeval; \ ++ } \ ++ *curctx = (*curctx)->nmps; \ ++ opj_mqc_renorme_macro(mqc, a, c, ct); \ ++ } else { \ ++ c += (*curctx)->qeval; \ ++ } \ ++} ++ ++#define opj_mqc_codelps_macro(mqc, curctx, a, c, ct) \ ++{ \ ++ a -= (*curctx)->qeval; \ ++ if (a < (*curctx)->qeval) { \ ++ c += (*curctx)->qeval; \ ++ } else { \ ++ a = (*curctx)->qeval; \ ++ } \ ++ *curctx = (*curctx)->nlps; \ ++ opj_mqc_renorme_macro(mqc, a, c, ct); \ ++} ++ ++#define opj_mqc_encode_macro(mqc, curctx, a, c, ct, d) \ ++{ \ ++ if ((*curctx)->mps == (d)) { \ ++ opj_mqc_codemps_macro(mqc, curctx, a, c, ct); \ ++ } else { \ ++ opj_mqc_codelps_macro(mqc, curctx, a, c, ct); \ ++ } \ ++} ++ ++ ++#define opj_mqc_bypass_enc_macro(mqc, c, ct, d) \ ++{\ ++ if (ct == BYPASS_CT_INIT) {\ ++ ct = 8;\ ++ }\ ++ ct--;\ ++ c = c + ((d) << ct);\ ++ if (ct == 0) {\ ++ *mqc->bp = (OPJ_BYTE)c;\ ++ ct = 8;\ ++ /* If the previous byte was 0xff, make sure that the next msb is 0 */ \ ++ if (*mqc->bp == 0xff) {\ ++ ct = 7;\ ++ }\ ++ mqc->bp++;\ ++ c = 0;\ ++ }\ ++} ++ + #endif /* OPJ_MQC_INL_H */ +diff --git a/third_party/libopenjpeg20/openjpeg.c b/third_party/libopenjpeg20/openjpeg.c +index 7b12303423b7ad79cf50b6f259a682dcdab25bb4..9c9b6eb0c0ac24f51646993840512190ed443ca6 100644 +--- a/third_party/libopenjpeg20/openjpeg.c ++++ b/third_party/libopenjpeg20/openjpeg.c +@@ -652,6 +652,14 @@ opj_codec_t* OPJ_CALLCONV opj_create_compress(OPJ_CODEC_FORMAT p_format) + struct opj_image *, + struct opj_event_mgr *)) opj_j2k_setup_encoder; + ++ l_codec->m_codec_data.m_compression.opj_encoder_set_extra_options = (OPJ_BOOL( ++ *)(void *, ++ const char* const*, ++ struct opj_event_mgr *)) opj_j2k_encoder_set_extra_options; ++ ++ l_codec->opj_set_threads = ++ (OPJ_BOOL(*)(void * p_codec, OPJ_UINT32 num_threads)) opj_j2k_set_threads; ++ + l_codec->m_codec = opj_j2k_create_compress(); + if (! l_codec->m_codec) { + opj_free(l_codec); +@@ -690,6 +698,14 @@ opj_codec_t* OPJ_CALLCONV opj_create_compress(OPJ_CODEC_FORMAT p_format) + struct opj_image *, + struct opj_event_mgr *)) opj_jp2_setup_encoder; + ++ l_codec->m_codec_data.m_compression.opj_encoder_set_extra_options = (OPJ_BOOL( ++ *)(void *, ++ const char* const*, ++ struct opj_event_mgr *)) opj_jp2_encoder_set_extra_options; ++ ++ l_codec->opj_set_threads = ++ (OPJ_BOOL(*)(void * p_codec, OPJ_UINT32 num_threads)) opj_jp2_set_threads; ++ + l_codec->m_codec = opj_jp2_create(OPJ_FALSE); + if (! l_codec->m_codec) { + opj_free(l_codec); +@@ -718,11 +734,11 @@ void OPJ_CALLCONV opj_set_default_encoder_parameters(opj_cparameters_t + parameters->cp_cinema = OPJ_OFF; /* DEPRECATED */ + parameters->rsiz = OPJ_PROFILE_NONE; + parameters->max_comp_size = 0; +- parameters->numresolution = 6; ++ parameters->numresolution = OPJ_COMP_PARAM_DEFAULT_NUMRESOLUTION; + parameters->cp_rsiz = OPJ_STD_RSIZ; /* DEPRECATED */ +- parameters->cblockw_init = 64; +- parameters->cblockh_init = 64; +- parameters->prog_order = OPJ_LRCP; ++ parameters->cblockw_init = OPJ_COMP_PARAM_DEFAULT_CBLOCKW; ++ parameters->cblockh_init = OPJ_COMP_PARAM_DEFAULT_CBLOCKH; ++ parameters->prog_order = OPJ_COMP_PARAM_DEFAULT_PROG_ORDER; + parameters->roi_compno = -1; /* no ROI */ + parameters->subsampling_dx = 1; + parameters->subsampling_dy = 1; +@@ -788,6 +804,27 @@ OPJ_BOOL OPJ_CALLCONV opj_setup_encoder(opj_codec_t *p_codec, + return OPJ_FALSE; + } + ++/* ----------------------------------------------------------------------- */ ++ ++OPJ_BOOL OPJ_CALLCONV opj_encoder_set_extra_options(opj_codec_t *p_codec, ++ const char* const* options) ++{ ++ if (p_codec) { ++ opj_codec_private_t * l_codec = (opj_codec_private_t *) p_codec; ++ ++ if (! l_codec->is_decompressor) { ++ return l_codec->m_codec_data.m_compression.opj_encoder_set_extra_options( ++ l_codec->m_codec, ++ options, ++ &(l_codec->m_event_mgr)); ++ } ++ } ++ ++ return OPJ_FALSE; ++} ++ ++/* ----------------------------------------------------------------------- */ ++ + OPJ_BOOL OPJ_CALLCONV opj_start_compress(opj_codec_t *p_codec, + opj_image_t * p_image, + opj_stream_t *p_stream) +diff --git a/third_party/libopenjpeg20/openjpeg.h b/third_party/libopenjpeg20/openjpeg.h +index 53a0e10c54b3312cd6722b57ad66b93f69905f67..269ac329ae097fc938b54f8469cf3bd7eeb93a57 100644 +--- a/third_party/libopenjpeg20/openjpeg.h ++++ b/third_party/libopenjpeg20/openjpeg.h +@@ -78,7 +78,7 @@ Most compilers implement their own version of this keyword ... + + #if defined(OPJ_STATIC) || !defined(_WIN32) + /* http://gcc.gnu.org/wiki/Visibility */ +-# if __GNUC__ >= 4 ++# if !defined(_WIN32) && __GNUC__ >= 4 + # if defined(OPJ_STATIC) /* static library uses "hidden" */ + # define OPJ_API __attribute__ ((visibility ("hidden"))) + # else +@@ -204,11 +204,11 @@ typedef size_t OPJ_SIZE_T; + #define OPJ_PROFILE_BC_MULTI 0x0200 /** Multi Tile Broadcast profile defined in 15444-1 AMD3 */ + #define OPJ_PROFILE_BC_MULTI_R 0x0300 /** Multi Tile Reversible Broadcast profile defined in 15444-1 AMD3 */ + #define OPJ_PROFILE_IMF_2K 0x0400 /** 2K Single Tile Lossy IMF profile defined in 15444-1 AMD 8 */ +-#define OPJ_PROFILE_IMF_4K 0x0401 /** 4K Single Tile Lossy IMF profile defined in 15444-1 AMD 8 */ +-#define OPJ_PROFILE_IMF_8K 0x0402 /** 8K Single Tile Lossy IMF profile defined in 15444-1 AMD 8 */ +-#define OPJ_PROFILE_IMF_2K_R 0x0403 /** 2K Single/Multi Tile Reversible IMF profile defined in 15444-1 AMD 8 */ ++#define OPJ_PROFILE_IMF_4K 0x0500 /** 4K Single Tile Lossy IMF profile defined in 15444-1 AMD 8 */ ++#define OPJ_PROFILE_IMF_8K 0x0600 /** 8K Single Tile Lossy IMF profile defined in 15444-1 AMD 8 */ ++#define OPJ_PROFILE_IMF_2K_R 0x0700 /** 2K Single/Multi Tile Reversible IMF profile defined in 15444-1 AMD 8 */ + #define OPJ_PROFILE_IMF_4K_R 0x0800 /** 4K Single/Multi Tile Reversible IMF profile defined in 15444-1 AMD 8 */ +-#define OPJ_PROFILE_IMF_8K_R 0x0801 /** 8K Single/Multi Tile Reversible IMF profile defined in 15444-1 AMD 8 */ ++#define OPJ_PROFILE_IMF_8K_R 0x0900 /** 8K Single/Multi Tile Reversible IMF profile defined in 15444-1 AMD 8 */ + + /** + * JPEG 2000 Part-2 extensions +@@ -225,6 +225,36 @@ typedef size_t OPJ_SIZE_T; + #define OPJ_IS_IMF(v) (((v) >= OPJ_PROFILE_IMF_2K)&&((v) <= ((OPJ_PROFILE_IMF_8K_R) | (0x009b)))) + #define OPJ_IS_PART2(v) ((v) & OPJ_PROFILE_PART2) + ++#define OPJ_GET_IMF_PROFILE(v) ((v) & 0xff00) /** Extract IMF profile without mainlevel/sublevel */ ++#define OPJ_GET_IMF_MAINLEVEL(v) ((v) & 0xf) /** Extract IMF main level */ ++#define OPJ_GET_IMF_SUBLEVEL(v) (((v) >> 4) & 0xf) /** Extract IMF sub level */ ++ ++#define OPJ_IMF_MAINLEVEL_MAX 11 /** Maximum main level */ ++ ++/** Max. Components Sampling Rate (MSamples/sec) per IMF main level */ ++#define OPJ_IMF_MAINLEVEL_1_MSAMPLESEC 65 /** MSamples/sec for IMF main level 1 */ ++#define OPJ_IMF_MAINLEVEL_2_MSAMPLESEC 130 /** MSamples/sec for IMF main level 2 */ ++#define OPJ_IMF_MAINLEVEL_3_MSAMPLESEC 195 /** MSamples/sec for IMF main level 3 */ ++#define OPJ_IMF_MAINLEVEL_4_MSAMPLESEC 260 /** MSamples/sec for IMF main level 4 */ ++#define OPJ_IMF_MAINLEVEL_5_MSAMPLESEC 520 /** MSamples/sec for IMF main level 5 */ ++#define OPJ_IMF_MAINLEVEL_6_MSAMPLESEC 1200 /** MSamples/sec for IMF main level 6 */ ++#define OPJ_IMF_MAINLEVEL_7_MSAMPLESEC 2400 /** MSamples/sec for IMF main level 7 */ ++#define OPJ_IMF_MAINLEVEL_8_MSAMPLESEC 4800 /** MSamples/sec for IMF main level 8 */ ++#define OPJ_IMF_MAINLEVEL_9_MSAMPLESEC 9600 /** MSamples/sec for IMF main level 9 */ ++#define OPJ_IMF_MAINLEVEL_10_MSAMPLESEC 19200 /** MSamples/sec for IMF main level 10 */ ++#define OPJ_IMF_MAINLEVEL_11_MSAMPLESEC 38400 /** MSamples/sec for IMF main level 11 */ ++ ++/** Max. compressed Bit Rate (Mbits/s) per IMF sub level */ ++#define OPJ_IMF_SUBLEVEL_1_MBITSSEC 200 /** Mbits/s for IMF sub level 1 */ ++#define OPJ_IMF_SUBLEVEL_2_MBITSSEC 400 /** Mbits/s for IMF sub level 2 */ ++#define OPJ_IMF_SUBLEVEL_3_MBITSSEC 800 /** Mbits/s for IMF sub level 3 */ ++#define OPJ_IMF_SUBLEVEL_4_MBITSSEC 1600 /** Mbits/s for IMF sub level 4 */ ++#define OPJ_IMF_SUBLEVEL_5_MBITSSEC 3200 /** Mbits/s for IMF sub level 5 */ ++#define OPJ_IMF_SUBLEVEL_6_MBITSSEC 6400 /** Mbits/s for IMF sub level 6 */ ++#define OPJ_IMF_SUBLEVEL_7_MBITSSEC 12800 /** Mbits/s for IMF sub level 7 */ ++#define OPJ_IMF_SUBLEVEL_8_MBITSSEC 25600 /** Mbits/s for IMF sub level 8 */ ++#define OPJ_IMF_SUBLEVEL_9_MBITSSEC 51200 /** Mbits/s for IMF sub level 9 */ ++ + /** + * JPEG 2000 codestream and component size limits in cinema profiles + * */ +@@ -318,6 +348,10 @@ typedef void (*opj_msg_callback)(const char *msg, void *client_data); + ========================================================== + */ + ++#ifndef OPJ_UINT32_SEMANTICALLY_BUT_INT32 ++#define OPJ_UINT32_SEMANTICALLY_BUT_INT32 OPJ_INT32 ++#endif ++ + /** + * Progression order changes + * +@@ -333,10 +367,10 @@ typedef struct opj_poc { + OPJ_PROG_ORDER prg1, prg; + /** Progression order string*/ + OPJ_CHAR progorder[5]; +- /** Tile number */ ++ /** Tile number (starting at 1) */ + OPJ_UINT32 tile; + /** Start and end values for Tile width and height*/ +- OPJ_INT32 tx0, tx1, ty0, ty1; ++ OPJ_UINT32_SEMANTICALLY_BUT_INT32 tx0, tx1, ty0, ty1; + /** Start value, initialised in pi_initialise_encode*/ + OPJ_UINT32 layS, resS, compS, prcS; + /** End value, initialised in pi_initialise_encode */ +@@ -1314,15 +1348,14 @@ OPJ_API OPJ_BOOL OPJ_CALLCONV opj_setup_decoder(opj_codec_t *p_codec, + * number, or "ALL_CPUS". If OPJ_NUM_THREADS is set and this function is called, + * this function will override the behaviour of the environment variable. + * +- * Currently this function must be called after opj_setup_decoder() and +- * before opj_read_header(). +- * +- * Note: currently only has effect on the decompressor. ++ * This function must be called after opj_setup_decoder() and ++ * before opj_read_header() for the decoding side, or after opj_setup_encoder() ++ * and before opj_start_compress() for the encoding side. + * +- * @param p_codec decompressor handler ++ * @param p_codec decompressor or compressor handler + * @param num_threads number of threads. + * +- * @return OPJ_TRUE if the decoder is correctly set ++ * @return OPJ_TRUE if the function is successful. + */ + OPJ_API OPJ_BOOL OPJ_CALLCONV opj_codec_set_threads(opj_codec_t *p_codec, + int num_threads); +@@ -1546,6 +1579,33 @@ OPJ_API OPJ_BOOL OPJ_CALLCONV opj_setup_encoder(opj_codec_t *p_codec, + opj_cparameters_t *parameters, + opj_image_t *image); + ++ ++/** ++ * Specify extra options for the encoder. ++ * ++ * This may be called after opj_setup_encoder() and before opj_start_compress() ++ * ++ * This is the way to add new options in a fully ABI compatible way, without ++ * extending the opj_cparameters_t structure. ++ * ++ * Currently supported options are: ++ *
    ++ *
  • PLT=YES/NO. Defaults to NO. If set to YES, PLT marker segments, ++ * indicating the length of each packet in the tile-part header, will be ++ * written. Since 2.3.2
  • ++ *
++ * ++ * @param p_codec Compressor handle ++ * @param p_options Compression options. This should be a NULL terminated ++ * array of strings. Each string is of the form KEY=VALUE. ++ * ++ * @return OPJ_TRUE in case of success. ++ * @since 2.3.2 ++ */ ++OPJ_API OPJ_BOOL OPJ_CALLCONV opj_encoder_set_extra_options( ++ opj_codec_t *p_codec, ++ const char* const* p_options); ++ + /** + * Start to compress the current image. + * @param p_codec Compressor handle +diff --git a/third_party/libopenjpeg20/opj_codec.h b/third_party/libopenjpeg20/opj_codec.h +index b962b121633e1fb10c65a8b2bee5b5d99c8cdee3..8a8af9119e385033a404ad3a347814de39912589 100644 +--- a/third_party/libopenjpeg20/opj_codec.h ++++ b/third_party/libopenjpeg20/opj_codec.h +@@ -148,6 +148,11 @@ typedef struct opj_codec_private { + opj_cparameters_t * p_param, + struct opj_image * p_image, + struct opj_event_mgr * p_manager); ++ ++ OPJ_BOOL(* opj_encoder_set_extra_options)(void * p_codec, ++ const char* const* p_options, ++ struct opj_event_mgr * p_manager); ++ + } m_compression; + } m_codec_data; + /** FIXME DOC*/ +diff --git a/third_party/libopenjpeg20/opj_common.h b/third_party/libopenjpeg20/opj_common.h +index a051339154b69e295bddf24a0ca539cf7f773df8..ee8adf4725c4bcaf9e1b8489abdc33c2b04b1264 100644 +--- a/third_party/libopenjpeg20/opj_common.h ++++ b/third_party/libopenjpeg20/opj_common.h +@@ -38,4 +38,10 @@ + */ + #define OPJ_COMMON_CBLK_DATA_EXTRA 2 /**< Margin for a fake FFFF marker */ + ++ ++#define OPJ_COMP_PARAM_DEFAULT_CBLOCKW 64 ++#define OPJ_COMP_PARAM_DEFAULT_CBLOCKH 64 ++#define OPJ_COMP_PARAM_DEFAULT_PROG_ORDER OPJ_LRCP ++#define OPJ_COMP_PARAM_DEFAULT_NUMRESOLUTION 6 ++ + #endif /* OPJ_COMMMON_H */ +diff --git a/third_party/libopenjpeg20/opj_config.h b/third_party/libopenjpeg20/opj_config.h +index fda1f641308f11ee2a6fb7f5bbad3fae2d845fac..e5d82be4347e6c83c5aee7eaea481fc34d443138 100644 +--- a/third_party/libopenjpeg20/opj_config.h ++++ b/third_party/libopenjpeg20/opj_config.h +@@ -12,5 +12,5 @@ + + /* Version number. */ + #define OPJ_VERSION_MAJOR 2 +-#define OPJ_VERSION_MINOR 3 +-#define OPJ_VERSION_BUILD 1 ++#define OPJ_VERSION_MINOR 4 ++#define OPJ_VERSION_BUILD 0 +diff --git a/third_party/libopenjpeg20/opj_config_private.h b/third_party/libopenjpeg20/opj_config_private.h +index b6986f9320cf4afdeab240dfb65c2e66d67c65f3..ee96ceefb407894b8e1f616cd70df87d71a0859b 100644 +--- a/third_party/libopenjpeg20/opj_config_private.h ++++ b/third_party/libopenjpeg20/opj_config_private.h +@@ -7,7 +7,7 @@ + /* create opj_config_private.h for CMake */ + #define OPJ_HAVE_INTTYPES_H 1 + +-#define OPJ_PACKAGE_VERSION "2.3.1" ++#define OPJ_PACKAGE_VERSION "2.4.0" + + /* Not used by openjp2*/ + /*#define HAVE_MEMORY_H 1*/ +diff --git a/third_party/libopenjpeg20/opj_intmath.h b/third_party/libopenjpeg20/opj_intmath.h +index 754b5512ff2cf64640359c21d23259076fab7159..afe69d90c072574aa735aa0e9a1947602f66a1d4 100644 +--- a/third_party/libopenjpeg20/opj_intmath.h ++++ b/third_party/libopenjpeg20/opj_intmath.h +@@ -208,6 +208,16 @@ static INLINE OPJ_INT32 opj_int_floordivpow2(OPJ_INT32 a, OPJ_INT32 b) + { + return a >> b; + } ++ ++/** ++Divide an integer by a power of 2 and round downwards ++@return Returns a divided by 2^b ++*/ ++static INLINE OPJ_UINT32 opj_uint_floordivpow2(OPJ_UINT32 a, OPJ_UINT32 b) ++{ ++ return a >> b; ++} ++ + /** + Get logarithm of an integer and round downwards + @return Returns log2(a) +diff --git a/third_party/libopenjpeg20/pi.c b/third_party/libopenjpeg20/pi.c +index 5f3d9ec1222e7230d694a1cb35b22bedcae6b5ec..1430d12a9bf64a74f581bb9ad0df92e03ee538cf 100644 +--- a/third_party/libopenjpeg20/pi.c ++++ b/third_party/libopenjpeg20/pi.c +@@ -36,6 +36,8 @@ + * POSSIBILITY OF SUCH DAMAGE. + */ + ++#define OPJ_UINT32_SEMANTICALLY_BUT_INT32 OPJ_UINT32 ++ + #include "opj_includes.h" + + /** @defgroup PI PI - Implementation of a packet iterator */ +@@ -91,10 +93,10 @@ static OPJ_BOOL opj_pi_next_cprl(opj_pi_iterator_t * pi); + */ + static void opj_pi_update_encode_poc_and_final(opj_cp_t *p_cp, + OPJ_UINT32 p_tileno, +- OPJ_INT32 p_tx0, +- OPJ_INT32 p_tx1, +- OPJ_INT32 p_ty0, +- OPJ_INT32 p_ty1, ++ OPJ_UINT32 p_tx0, ++ OPJ_UINT32 p_tx1, ++ OPJ_UINT32 p_ty0, ++ OPJ_UINT32 p_ty1, + OPJ_UINT32 p_max_prec, + OPJ_UINT32 p_max_res, + OPJ_UINT32 p_dx_min, +@@ -118,10 +120,10 @@ static void opj_pi_update_encode_poc_and_final(opj_cp_t *p_cp, + static void opj_pi_update_encode_not_poc(opj_cp_t *p_cp, + OPJ_UINT32 p_num_comps, + OPJ_UINT32 p_tileno, +- OPJ_INT32 p_tx0, +- OPJ_INT32 p_tx1, +- OPJ_INT32 p_ty0, +- OPJ_INT32 p_ty1, ++ OPJ_UINT32 p_tx0, ++ OPJ_UINT32 p_tx1, ++ OPJ_UINT32 p_ty0, ++ OPJ_UINT32 p_ty1, + OPJ_UINT32 p_max_prec, + OPJ_UINT32 p_max_res, + OPJ_UINT32 p_dx_min, +@@ -144,10 +146,10 @@ static void opj_pi_update_encode_not_poc(opj_cp_t *p_cp, + static void opj_get_encoding_parameters(const opj_image_t *p_image, + const opj_cp_t *p_cp, + OPJ_UINT32 tileno, +- OPJ_INT32 * p_tx0, +- OPJ_INT32 * p_tx1, +- OPJ_INT32 * p_ty0, +- OPJ_INT32 * p_ty1, ++ OPJ_UINT32 * p_tx0, ++ OPJ_UINT32 * p_tx1, ++ OPJ_UINT32 * p_ty0, ++ OPJ_UINT32 * p_ty1, + OPJ_UINT32 * p_dx_min, + OPJ_UINT32 * p_dy_min, + OPJ_UINT32 * p_max_prec, +@@ -176,10 +178,10 @@ static void opj_get_encoding_parameters(const opj_image_t *p_image, + static void opj_get_all_encoding_parameters(const opj_image_t *p_image, + const opj_cp_t *p_cp, + OPJ_UINT32 tileno, +- OPJ_INT32 * p_tx0, +- OPJ_INT32 * p_tx1, +- OPJ_INT32 * p_ty0, +- OPJ_INT32 * p_ty1, ++ OPJ_UINT32 * p_tx0, ++ OPJ_UINT32 * p_tx1, ++ OPJ_UINT32 * p_ty0, ++ OPJ_UINT32 * p_ty1, + OPJ_UINT32 * p_dx_min, + OPJ_UINT32 * p_dy_min, + OPJ_UINT32 * p_max_prec, +@@ -192,10 +194,12 @@ static void opj_get_all_encoding_parameters(const opj_image_t *p_image, + * @param p_image the image used to initialize the packet iterator (in fact only the number of components is relevant. + * @param p_cp the coding parameters. + * @param tileno the index of the tile from which creating the packet iterator. ++ * @param manager Event manager + */ + static opj_pi_iterator_t * opj_pi_create(const opj_image_t *p_image, + const opj_cp_t *p_cp, +- OPJ_UINT32 tileno); ++ OPJ_UINT32 tileno, ++ opj_event_mgr_t* manager); + /** + * FIXME DOC + */ +@@ -230,18 +234,19 @@ static OPJ_BOOL opj_pi_check_next_level(OPJ_INT32 pos, + ========================================================== + */ + +-static void opj_pi_emit_error(opj_pi_iterator_t * pi, const char* msg) +-{ +- (void)pi; +- (void)msg; +-} +- + static OPJ_BOOL opj_pi_next_lrcp(opj_pi_iterator_t * pi) + { + opj_pi_comp_t *comp = NULL; + opj_pi_resolution_t *res = NULL; + OPJ_UINT32 index = 0; + ++ if (pi->poc.compno0 >= pi->numcomps || ++ pi->poc.compno1 >= pi->numcomps + 1) { ++ opj_event_msg(pi->manager, EVT_ERROR, ++ "opj_pi_next_lrcp(): invalid compno0/compno1\n"); ++ return OPJ_FALSE; ++ } ++ + if (!pi->first) { + comp = &pi->comps[pi->compno]; + res = &comp->resolutions[pi->resno]; +@@ -272,7 +277,7 @@ static OPJ_BOOL opj_pi_next_lrcp(opj_pi_iterator_t * pi) + /* include should be resized when a POC arises, or */ + /* the POC should be rejected */ + if (index >= pi->include_size) { +- opj_pi_emit_error(pi, "Invalid access to pi->include"); ++ opj_event_msg(pi->manager, EVT_ERROR, "Invalid access to pi->include"); + return OPJ_FALSE; + } + if (!pi->include[index]) { +@@ -295,6 +300,13 @@ static OPJ_BOOL opj_pi_next_rlcp(opj_pi_iterator_t * pi) + opj_pi_resolution_t *res = NULL; + OPJ_UINT32 index = 0; + ++ if (pi->poc.compno0 >= pi->numcomps || ++ pi->poc.compno1 >= pi->numcomps + 1) { ++ opj_event_msg(pi->manager, EVT_ERROR, ++ "opj_pi_next_rlcp(): invalid compno0/compno1\n"); ++ return OPJ_FALSE; ++ } ++ + if (!pi->first) { + comp = &pi->comps[pi->compno]; + res = &comp->resolutions[pi->resno]; +@@ -318,7 +330,7 @@ static OPJ_BOOL opj_pi_next_rlcp(opj_pi_iterator_t * pi) + index = pi->layno * pi->step_l + pi->resno * pi->step_r + pi->compno * + pi->step_c + pi->precno * pi->step_p; + if (index >= pi->include_size) { +- opj_pi_emit_error(pi, "Invalid access to pi->include"); ++ opj_event_msg(pi->manager, EVT_ERROR, "Invalid access to pi->include"); + return OPJ_FALSE; + } + if (!pi->include[index]) { +@@ -341,6 +353,13 @@ static OPJ_BOOL opj_pi_next_rpcl(opj_pi_iterator_t * pi) + opj_pi_resolution_t *res = NULL; + OPJ_UINT32 index = 0; + ++ if (pi->poc.compno0 >= pi->numcomps || ++ pi->poc.compno1 >= pi->numcomps + 1) { ++ opj_event_msg(pi->manager, EVT_ERROR, ++ "opj_pi_next_rpcl(): invalid compno0/compno1\n"); ++ return OPJ_FALSE; ++ } ++ + if (!pi->first) { + goto LABEL_SKIP; + } else { +@@ -376,16 +395,16 @@ static OPJ_BOOL opj_pi_next_rpcl(opj_pi_iterator_t * pi) + pi->poc.tx1 = pi->tx1; + } + for (pi->resno = pi->poc.resno0; pi->resno < pi->poc.resno1; pi->resno++) { +- for (pi->y = pi->poc.ty0; pi->y < pi->poc.ty1; +- pi->y += (OPJ_INT32)(pi->dy - (OPJ_UINT32)(pi->y % (OPJ_INT32)pi->dy))) { +- for (pi->x = pi->poc.tx0; pi->x < pi->poc.tx1; +- pi->x += (OPJ_INT32)(pi->dx - (OPJ_UINT32)(pi->x % (OPJ_INT32)pi->dx))) { ++ for (pi->y = (OPJ_UINT32)pi->poc.ty0; pi->y < (OPJ_UINT32)pi->poc.ty1; ++ pi->y += (pi->dy - (pi->y % pi->dy))) { ++ for (pi->x = (OPJ_UINT32)pi->poc.tx0; pi->x < (OPJ_UINT32)pi->poc.tx1; ++ pi->x += (pi->dx - (pi->x % pi->dx))) { + for (pi->compno = pi->poc.compno0; pi->compno < pi->poc.compno1; pi->compno++) { + OPJ_UINT32 levelno; +- OPJ_INT32 trx0, try0; +- OPJ_INT32 trx1, try1; ++ OPJ_UINT32 trx0, try0; ++ OPJ_UINT32 trx1, try1; + OPJ_UINT32 rpx, rpy; +- OPJ_INT32 prci, prcj; ++ OPJ_UINT32 prci, prcj; + comp = &pi->comps[pi->compno]; + if (pi->resno >= comp->numresolutions) { + continue; +@@ -404,10 +423,10 @@ static OPJ_BOOL opj_pi_next_rpcl(opj_pi_iterator_t * pi) + (comp->dy << levelno) > INT_MAX) { + continue; + } +- trx0 = opj_int_ceildiv(pi->tx0, (OPJ_INT32)(comp->dx << levelno)); +- try0 = opj_int_ceildiv(pi->ty0, (OPJ_INT32)(comp->dy << levelno)); +- trx1 = opj_int_ceildiv(pi->tx1, (OPJ_INT32)(comp->dx << levelno)); +- try1 = opj_int_ceildiv(pi->ty1, (OPJ_INT32)(comp->dy << levelno)); ++ trx0 = opj_uint_ceildiv(pi->tx0, (comp->dx << levelno)); ++ try0 = opj_uint_ceildiv(pi->ty0, (comp->dy << levelno)); ++ trx1 = opj_uint_ceildiv(pi->tx1, (comp->dx << levelno)); ++ try1 = opj_uint_ceildiv(pi->ty1, (comp->dy << levelno)); + rpx = res->pdx + levelno; + rpy = res->pdy + levelno; + +@@ -421,12 +440,12 @@ static OPJ_BOOL opj_pi_next_rpcl(opj_pi_iterator_t * pi) + } + + /* See ISO-15441. B.12.1.3 Resolution level-position-component-layer progression */ +- if (!((pi->y % (OPJ_INT32)(comp->dy << rpy) == 0) || ((pi->y == pi->ty0) && +- ((try0 << levelno) % (1 << rpy))))) { ++ if (!((pi->y % (comp->dy << rpy) == 0) || ((pi->y == pi->ty0) && ++ ((try0 << levelno) % (1U << rpy))))) { + continue; + } +- if (!((pi->x % (OPJ_INT32)(comp->dx << rpx) == 0) || ((pi->x == pi->tx0) && +- ((trx0 << levelno) % (1 << rpx))))) { ++ if (!((pi->x % (comp->dx << rpx) == 0) || ((pi->x == pi->tx0) && ++ ((trx0 << levelno) % (1U << rpx))))) { + continue; + } + +@@ -438,13 +457,13 @@ static OPJ_BOOL opj_pi_next_rpcl(opj_pi_iterator_t * pi) + continue; + } + +- prci = opj_int_floordivpow2(opj_int_ceildiv(pi->x, +- (OPJ_INT32)(comp->dx << levelno)), (OPJ_INT32)res->pdx) +- - opj_int_floordivpow2(trx0, (OPJ_INT32)res->pdx); +- prcj = opj_int_floordivpow2(opj_int_ceildiv(pi->y, +- (OPJ_INT32)(comp->dy << levelno)), (OPJ_INT32)res->pdy) +- - opj_int_floordivpow2(try0, (OPJ_INT32)res->pdy); +- pi->precno = (OPJ_UINT32)(prci + prcj * (OPJ_INT32)res->pw); ++ prci = opj_uint_floordivpow2(opj_uint_ceildiv(pi->x, ++ (comp->dx << levelno)), res->pdx) ++ - opj_uint_floordivpow2(trx0, res->pdx); ++ prcj = opj_uint_floordivpow2(opj_uint_ceildiv(pi->y, ++ (comp->dy << levelno)), res->pdy) ++ - opj_uint_floordivpow2(try0, res->pdy); ++ pi->precno = prci + prcj * res->pw; + if (pi->precno >= res->pw * res->ph) { + return OPJ_FALSE; + } +@@ -452,7 +471,7 @@ static OPJ_BOOL opj_pi_next_rpcl(opj_pi_iterator_t * pi) + index = pi->layno * pi->step_l + pi->resno * pi->step_r + pi->compno * + pi->step_c + pi->precno * pi->step_p; + if (index >= pi->include_size) { +- opj_pi_emit_error(pi, "Invalid access to pi->include"); ++ opj_event_msg(pi->manager, EVT_ERROR, "Invalid access to pi->include"); + return OPJ_FALSE; + } + if (!pi->include[index]) { +@@ -476,6 +495,13 @@ static OPJ_BOOL opj_pi_next_pcrl(opj_pi_iterator_t * pi) + opj_pi_resolution_t *res = NULL; + OPJ_UINT32 index = 0; + ++ if (pi->poc.compno0 >= pi->numcomps || ++ pi->poc.compno1 >= pi->numcomps + 1) { ++ opj_event_msg(pi->manager, EVT_ERROR, ++ "opj_pi_next_pcrl(): invalid compno0/compno1\n"); ++ return OPJ_FALSE; ++ } ++ + if (!pi->first) { + comp = &pi->comps[pi->compno]; + goto LABEL_SKIP; +@@ -511,19 +537,19 @@ static OPJ_BOOL opj_pi_next_pcrl(opj_pi_iterator_t * pi) + pi->poc.ty1 = pi->ty1; + pi->poc.tx1 = pi->tx1; + } +- for (pi->y = pi->poc.ty0; pi->y < pi->poc.ty1; +- pi->y += (OPJ_INT32)(pi->dy - (OPJ_UINT32)(pi->y % (OPJ_INT32)pi->dy))) { +- for (pi->x = pi->poc.tx0; pi->x < pi->poc.tx1; +- pi->x += (OPJ_INT32)(pi->dx - (OPJ_UINT32)(pi->x % (OPJ_INT32)pi->dx))) { ++ for (pi->y = (OPJ_UINT32)pi->poc.ty0; pi->y < (OPJ_UINT32)pi->poc.ty1; ++ pi->y += (pi->dy - (pi->y % pi->dy))) { ++ for (pi->x = (OPJ_UINT32)pi->poc.tx0; pi->x < (OPJ_UINT32)pi->poc.tx1; ++ pi->x += (pi->dx - (pi->x % pi->dx))) { + for (pi->compno = pi->poc.compno0; pi->compno < pi->poc.compno1; pi->compno++) { + comp = &pi->comps[pi->compno]; + for (pi->resno = pi->poc.resno0; + pi->resno < opj_uint_min(pi->poc.resno1, comp->numresolutions); pi->resno++) { + OPJ_UINT32 levelno; +- OPJ_INT32 trx0, try0; +- OPJ_INT32 trx1, try1; ++ OPJ_UINT32 trx0, try0; ++ OPJ_UINT32 trx1, try1; + OPJ_UINT32 rpx, rpy; +- OPJ_INT32 prci, prcj; ++ OPJ_UINT32 prci, prcj; + res = &comp->resolutions[pi->resno]; + levelno = comp->numresolutions - 1 - pi->resno; + /* Avoids division by zero */ +@@ -538,10 +564,10 @@ static OPJ_BOOL opj_pi_next_pcrl(opj_pi_iterator_t * pi) + (comp->dy << levelno) > INT_MAX) { + continue; + } +- trx0 = opj_int_ceildiv(pi->tx0, (OPJ_INT32)(comp->dx << levelno)); +- try0 = opj_int_ceildiv(pi->ty0, (OPJ_INT32)(comp->dy << levelno)); +- trx1 = opj_int_ceildiv(pi->tx1, (OPJ_INT32)(comp->dx << levelno)); +- try1 = opj_int_ceildiv(pi->ty1, (OPJ_INT32)(comp->dy << levelno)); ++ trx0 = opj_uint_ceildiv(pi->tx0, (comp->dx << levelno)); ++ try0 = opj_uint_ceildiv(pi->ty0, (comp->dy << levelno)); ++ trx1 = opj_uint_ceildiv(pi->tx1, (comp->dx << levelno)); ++ try1 = opj_uint_ceildiv(pi->ty1, (comp->dy << levelno)); + rpx = res->pdx + levelno; + rpy = res->pdy + levelno; + +@@ -555,12 +581,12 @@ static OPJ_BOOL opj_pi_next_pcrl(opj_pi_iterator_t * pi) + } + + /* See ISO-15441. B.12.1.4 Position-component-resolution level-layer progression */ +- if (!((pi->y % (OPJ_INT32)(comp->dy << rpy) == 0) || ((pi->y == pi->ty0) && +- ((try0 << levelno) % (1 << rpy))))) { ++ if (!((pi->y % (comp->dy << rpy) == 0) || ((pi->y == pi->ty0) && ++ ((try0 << levelno) % (1U << rpy))))) { + continue; + } +- if (!((pi->x % (OPJ_INT32)(comp->dx << rpx) == 0) || ((pi->x == pi->tx0) && +- ((trx0 << levelno) % (1 << rpx))))) { ++ if (!((pi->x % (comp->dx << rpx) == 0) || ((pi->x == pi->tx0) && ++ ((trx0 << levelno) % (1U << rpx))))) { + continue; + } + +@@ -572,13 +598,13 @@ static OPJ_BOOL opj_pi_next_pcrl(opj_pi_iterator_t * pi) + continue; + } + +- prci = opj_int_floordivpow2(opj_int_ceildiv(pi->x, +- (OPJ_INT32)(comp->dx << levelno)), (OPJ_INT32)res->pdx) +- - opj_int_floordivpow2(trx0, (OPJ_INT32)res->pdx); +- prcj = opj_int_floordivpow2(opj_int_ceildiv(pi->y, +- (OPJ_INT32)(comp->dy << levelno)), (OPJ_INT32)res->pdy) +- - opj_int_floordivpow2(try0, (OPJ_INT32)res->pdy); +- pi->precno = (OPJ_UINT32)(prci + prcj * (OPJ_INT32)res->pw); ++ prci = opj_uint_floordivpow2(opj_uint_ceildiv(pi->x, ++ (comp->dx << levelno)), res->pdx) ++ - opj_uint_floordivpow2(trx0, res->pdx); ++ prcj = opj_uint_floordivpow2(opj_uint_ceildiv(pi->y, ++ (comp->dy << levelno)), res->pdy) ++ - opj_uint_floordivpow2(try0, res->pdy); ++ pi->precno = prci + prcj * res->pw; + if (pi->precno >= res->pw * res->ph) { + return OPJ_FALSE; + } +@@ -586,7 +612,7 @@ static OPJ_BOOL opj_pi_next_pcrl(opj_pi_iterator_t * pi) + index = pi->layno * pi->step_l + pi->resno * pi->step_r + pi->compno * + pi->step_c + pi->precno * pi->step_p; + if (index >= pi->include_size) { +- opj_pi_emit_error(pi, "Invalid access to pi->include"); ++ opj_event_msg(pi->manager, EVT_ERROR, "Invalid access to pi->include"); + return OPJ_FALSE; + } + if (!pi->include[index]) { +@@ -610,6 +636,13 @@ static OPJ_BOOL opj_pi_next_cprl(opj_pi_iterator_t * pi) + opj_pi_resolution_t *res = NULL; + OPJ_UINT32 index = 0; + ++ if (pi->poc.compno0 >= pi->numcomps || ++ pi->poc.compno1 >= pi->numcomps + 1) { ++ opj_event_msg(pi->manager, EVT_ERROR, ++ "opj_pi_next_cprl(): invalid compno0/compno1\n"); ++ return OPJ_FALSE; ++ } ++ + if (!pi->first) { + comp = &pi->comps[pi->compno]; + goto LABEL_SKIP; +@@ -645,17 +678,17 @@ static OPJ_BOOL opj_pi_next_cprl(opj_pi_iterator_t * pi) + pi->poc.ty1 = pi->ty1; + pi->poc.tx1 = pi->tx1; + } +- for (pi->y = pi->poc.ty0; pi->y < pi->poc.ty1; +- pi->y += (OPJ_INT32)(pi->dy - (OPJ_UINT32)(pi->y % (OPJ_INT32)pi->dy))) { +- for (pi->x = pi->poc.tx0; pi->x < pi->poc.tx1; +- pi->x += (OPJ_INT32)(pi->dx - (OPJ_UINT32)(pi->x % (OPJ_INT32)pi->dx))) { ++ for (pi->y = (OPJ_UINT32)pi->poc.ty0; pi->y < (OPJ_UINT32)pi->poc.ty1; ++ pi->y += (pi->dy - (pi->y % pi->dy))) { ++ for (pi->x = (OPJ_UINT32)pi->poc.tx0; pi->x < (OPJ_UINT32)pi->poc.tx1; ++ pi->x += (pi->dx - (pi->x % pi->dx))) { + for (pi->resno = pi->poc.resno0; + pi->resno < opj_uint_min(pi->poc.resno1, comp->numresolutions); pi->resno++) { + OPJ_UINT32 levelno; +- OPJ_INT32 trx0, try0; +- OPJ_INT32 trx1, try1; ++ OPJ_UINT32 trx0, try0; ++ OPJ_UINT32 trx1, try1; + OPJ_UINT32 rpx, rpy; +- OPJ_INT32 prci, prcj; ++ OPJ_UINT32 prci, prcj; + res = &comp->resolutions[pi->resno]; + levelno = comp->numresolutions - 1 - pi->resno; + /* Avoids division by zero on id_000004,sig_06,src_000679,op_arith8,pos_49,val_-17 */ +@@ -669,10 +702,10 @@ static OPJ_BOOL opj_pi_next_cprl(opj_pi_iterator_t * pi) + (comp->dy << levelno) > INT_MAX) { + continue; + } +- trx0 = opj_int_ceildiv(pi->tx0, (OPJ_INT32)(comp->dx << levelno)); +- try0 = opj_int_ceildiv(pi->ty0, (OPJ_INT32)(comp->dy << levelno)); +- trx1 = opj_int_ceildiv(pi->tx1, (OPJ_INT32)(comp->dx << levelno)); +- try1 = opj_int_ceildiv(pi->ty1, (OPJ_INT32)(comp->dy << levelno)); ++ trx0 = opj_uint_ceildiv(pi->tx0, (comp->dx << levelno)); ++ try0 = opj_uint_ceildiv(pi->ty0, (comp->dy << levelno)); ++ trx1 = opj_uint_ceildiv(pi->tx1, (comp->dx << levelno)); ++ try1 = opj_uint_ceildiv(pi->ty1, (comp->dy << levelno)); + rpx = res->pdx + levelno; + rpy = res->pdy + levelno; + +@@ -686,12 +719,12 @@ static OPJ_BOOL opj_pi_next_cprl(opj_pi_iterator_t * pi) + } + + /* See ISO-15441. B.12.1.5 Component-position-resolution level-layer progression */ +- if (!((pi->y % (OPJ_INT32)(comp->dy << rpy) == 0) || ((pi->y == pi->ty0) && +- ((try0 << levelno) % (1 << rpy))))) { ++ if (!((pi->y % (comp->dy << rpy) == 0) || ((pi->y == pi->ty0) && ++ ((try0 << levelno) % (1U << rpy))))) { + continue; + } +- if (!((pi->x % (OPJ_INT32)(comp->dx << rpx) == 0) || ((pi->x == pi->tx0) && +- ((trx0 << levelno) % (1 << rpx))))) { ++ if (!((pi->x % (comp->dx << rpx) == 0) || ((pi->x == pi->tx0) && ++ ((trx0 << levelno) % (1U << rpx))))) { + continue; + } + +@@ -703,13 +736,13 @@ static OPJ_BOOL opj_pi_next_cprl(opj_pi_iterator_t * pi) + continue; + } + +- prci = opj_int_floordivpow2(opj_int_ceildiv(pi->x, +- (OPJ_INT32)(comp->dx << levelno)), (OPJ_INT32)res->pdx) +- - opj_int_floordivpow2(trx0, (OPJ_INT32)res->pdx); +- prcj = opj_int_floordivpow2(opj_int_ceildiv(pi->y, +- (OPJ_INT32)(comp->dy << levelno)), (OPJ_INT32)res->pdy) +- - opj_int_floordivpow2(try0, (OPJ_INT32)res->pdy); +- pi->precno = (OPJ_UINT32)(prci + prcj * (OPJ_INT32)res->pw); ++ prci = opj_uint_floordivpow2(opj_uint_ceildiv(pi->x, ++ (comp->dx << levelno)), res->pdx) ++ - opj_uint_floordivpow2(trx0, res->pdx); ++ prcj = opj_uint_floordivpow2(opj_uint_ceildiv(pi->y, ++ (comp->dy << levelno)), res->pdy) ++ - opj_uint_floordivpow2(try0, res->pdy); ++ pi->precno = (OPJ_UINT32)(prci + prcj * res->pw); + if (pi->precno >= res->pw * res->ph) { + return OPJ_FALSE; + } +@@ -717,7 +750,7 @@ static OPJ_BOOL opj_pi_next_cprl(opj_pi_iterator_t * pi) + index = pi->layno * pi->step_l + pi->resno * pi->step_r + pi->compno * + pi->step_c + pi->precno * pi->step_p; + if (index >= pi->include_size) { +- opj_pi_emit_error(pi, "Invalid access to pi->include"); ++ opj_event_msg(pi->manager, EVT_ERROR, "Invalid access to pi->include"); + return OPJ_FALSE; + } + if (!pi->include[index]) { +@@ -738,10 +771,10 @@ LABEL_SKIP: + static void opj_get_encoding_parameters(const opj_image_t *p_image, + const opj_cp_t *p_cp, + OPJ_UINT32 p_tileno, +- OPJ_INT32 * p_tx0, +- OPJ_INT32 * p_tx1, +- OPJ_INT32 * p_ty0, +- OPJ_INT32 * p_ty1, ++ OPJ_UINT32 * p_tx0, ++ OPJ_UINT32 * p_tx1, ++ OPJ_UINT32 * p_ty0, ++ OPJ_UINT32 * p_ty1, + OPJ_UINT32 * p_dx_min, + OPJ_UINT32 * p_dy_min, + OPJ_UINT32 * p_max_prec, +@@ -777,12 +810,12 @@ static void opj_get_encoding_parameters(const opj_image_t *p_image, + /* find extent of tile */ + l_tx0 = p_cp->tx0 + p * + p_cp->tdx; /* can't be greater than p_image->x1 so won't overflow */ +- *p_tx0 = (OPJ_INT32)opj_uint_max(l_tx0, p_image->x0); +- *p_tx1 = (OPJ_INT32)opj_uint_min(opj_uint_adds(l_tx0, p_cp->tdx), p_image->x1); ++ *p_tx0 = opj_uint_max(l_tx0, p_image->x0); ++ *p_tx1 = opj_uint_min(opj_uint_adds(l_tx0, p_cp->tdx), p_image->x1); + l_ty0 = p_cp->ty0 + q * + p_cp->tdy; /* can't be greater than p_image->y1 so won't overflow */ +- *p_ty0 = (OPJ_INT32)opj_uint_max(l_ty0, p_image->y0); +- *p_ty1 = (OPJ_INT32)opj_uint_min(opj_uint_adds(l_ty0, p_cp->tdy), p_image->y1); ++ *p_ty0 = opj_uint_max(l_ty0, p_image->y0); ++ *p_ty1 = opj_uint_min(opj_uint_adds(l_ty0, p_cp->tdy), p_image->y1); + + /* max precision is 0 (can only grow) */ + *p_max_prec = 0; +@@ -795,17 +828,17 @@ static void opj_get_encoding_parameters(const opj_image_t *p_image, + for (compno = 0; compno < p_image->numcomps; ++compno) { + /* arithmetic variables to calculate */ + OPJ_UINT32 l_level_no; +- OPJ_INT32 l_rx0, l_ry0, l_rx1, l_ry1; +- OPJ_INT32 l_px0, l_py0, l_px1, py1; ++ OPJ_UINT32 l_rx0, l_ry0, l_rx1, l_ry1; ++ OPJ_UINT32 l_px0, l_py0, l_px1, py1; + OPJ_UINT32 l_pdx, l_pdy; + OPJ_UINT32 l_pw, l_ph; + OPJ_UINT32 l_product; +- OPJ_INT32 l_tcx0, l_tcy0, l_tcx1, l_tcy1; ++ OPJ_UINT32 l_tcx0, l_tcy0, l_tcx1, l_tcy1; + +- l_tcx0 = opj_int_ceildiv(*p_tx0, (OPJ_INT32)l_img_comp->dx); +- l_tcy0 = opj_int_ceildiv(*p_ty0, (OPJ_INT32)l_img_comp->dy); +- l_tcx1 = opj_int_ceildiv(*p_tx1, (OPJ_INT32)l_img_comp->dx); +- l_tcy1 = opj_int_ceildiv(*p_ty1, (OPJ_INT32)l_img_comp->dy); ++ l_tcx0 = opj_uint_ceildiv(*p_tx0, l_img_comp->dx); ++ l_tcy0 = opj_uint_ceildiv(*p_ty0, l_img_comp->dy); ++ l_tcx1 = opj_uint_ceildiv(*p_tx1, l_img_comp->dx); ++ l_tcy1 = opj_uint_ceildiv(*p_ty1, l_img_comp->dy); + + if (l_tccp->numresolutions > *p_max_res) { + *p_max_res = l_tccp->numresolutions; +@@ -829,19 +862,19 @@ static void opj_get_encoding_parameters(const opj_image_t *p_image, + /* various calculations of extents */ + l_level_no = l_tccp->numresolutions - 1 - resno; + +- l_rx0 = opj_int_ceildivpow2(l_tcx0, (OPJ_INT32)l_level_no); +- l_ry0 = opj_int_ceildivpow2(l_tcy0, (OPJ_INT32)l_level_no); +- l_rx1 = opj_int_ceildivpow2(l_tcx1, (OPJ_INT32)l_level_no); +- l_ry1 = opj_int_ceildivpow2(l_tcy1, (OPJ_INT32)l_level_no); ++ l_rx0 = opj_uint_ceildivpow2(l_tcx0, l_level_no); ++ l_ry0 = opj_uint_ceildivpow2(l_tcy0, l_level_no); ++ l_rx1 = opj_uint_ceildivpow2(l_tcx1, l_level_no); ++ l_ry1 = opj_uint_ceildivpow2(l_tcy1, l_level_no); + +- l_px0 = opj_int_floordivpow2(l_rx0, (OPJ_INT32)l_pdx) << l_pdx; +- l_py0 = opj_int_floordivpow2(l_ry0, (OPJ_INT32)l_pdy) << l_pdy; +- l_px1 = opj_int_ceildivpow2(l_rx1, (OPJ_INT32)l_pdx) << l_pdx; ++ l_px0 = opj_uint_floordivpow2(l_rx0, l_pdx) << l_pdx; ++ l_py0 = opj_uint_floordivpow2(l_ry0, l_pdy) << l_pdy; ++ l_px1 = opj_uint_ceildivpow2(l_rx1, l_pdx) << l_pdx; + +- py1 = opj_int_ceildivpow2(l_ry1, (OPJ_INT32)l_pdy) << l_pdy; ++ py1 = opj_uint_ceildivpow2(l_ry1, l_pdy) << l_pdy; + +- l_pw = (l_rx0 == l_rx1) ? 0 : (OPJ_UINT32)((l_px1 - l_px0) >> l_pdx); +- l_ph = (l_ry0 == l_ry1) ? 0 : (OPJ_UINT32)((py1 - l_py0) >> l_pdy); ++ l_pw = (l_rx0 == l_rx1) ? 0 : ((l_px1 - l_px0) >> l_pdx); ++ l_ph = (l_ry0 == l_ry1) ? 0 : ((py1 - l_py0) >> l_pdy); + + l_product = l_pw * l_ph; + +@@ -859,10 +892,10 @@ static void opj_get_encoding_parameters(const opj_image_t *p_image, + static void opj_get_all_encoding_parameters(const opj_image_t *p_image, + const opj_cp_t *p_cp, + OPJ_UINT32 tileno, +- OPJ_INT32 * p_tx0, +- OPJ_INT32 * p_tx1, +- OPJ_INT32 * p_ty0, +- OPJ_INT32 * p_ty1, ++ OPJ_UINT32 * p_tx0, ++ OPJ_UINT32 * p_tx1, ++ OPJ_UINT32 * p_ty0, ++ OPJ_UINT32 * p_ty1, + OPJ_UINT32 * p_dx_min, + OPJ_UINT32 * p_dy_min, + OPJ_UINT32 * p_max_prec, +@@ -903,12 +936,12 @@ static void opj_get_all_encoding_parameters(const opj_image_t *p_image, + /* here calculation of tx0, tx1, ty0, ty1, maxprec, l_dx and l_dy */ + l_tx0 = p_cp->tx0 + p * + p_cp->tdx; /* can't be greater than p_image->x1 so won't overflow */ +- *p_tx0 = (OPJ_INT32)opj_uint_max(l_tx0, p_image->x0); +- *p_tx1 = (OPJ_INT32)opj_uint_min(opj_uint_adds(l_tx0, p_cp->tdx), p_image->x1); ++ *p_tx0 = opj_uint_max(l_tx0, p_image->x0); ++ *p_tx1 = opj_uint_min(opj_uint_adds(l_tx0, p_cp->tdx), p_image->x1); + l_ty0 = p_cp->ty0 + q * + p_cp->tdy; /* can't be greater than p_image->y1 so won't overflow */ +- *p_ty0 = (OPJ_INT32)opj_uint_max(l_ty0, p_image->y0); +- *p_ty1 = (OPJ_INT32)opj_uint_min(opj_uint_adds(l_ty0, p_cp->tdy), p_image->y1); ++ *p_ty0 = opj_uint_max(l_ty0, p_image->y0); ++ *p_ty1 = opj_uint_min(opj_uint_adds(l_ty0, p_cp->tdy), p_image->y1); + + /* max precision and resolution is 0 (can only grow)*/ + *p_max_prec = 0; +@@ -921,18 +954,18 @@ static void opj_get_all_encoding_parameters(const opj_image_t *p_image, + for (compno = 0; compno < p_image->numcomps; ++compno) { + /* aritmetic variables to calculate*/ + OPJ_UINT32 l_level_no; +- OPJ_INT32 l_rx0, l_ry0, l_rx1, l_ry1; +- OPJ_INT32 l_px0, l_py0, l_px1, py1; ++ OPJ_UINT32 l_rx0, l_ry0, l_rx1, l_ry1; ++ OPJ_UINT32 l_px0, l_py0, l_px1, py1; + OPJ_UINT32 l_product; +- OPJ_INT32 l_tcx0, l_tcy0, l_tcx1, l_tcy1; ++ OPJ_UINT32 l_tcx0, l_tcy0, l_tcx1, l_tcy1; + OPJ_UINT32 l_pdx, l_pdy, l_pw, l_ph; + +- lResolutionPtr = p_resolutions[compno]; ++ lResolutionPtr = p_resolutions ? p_resolutions[compno] : NULL; + +- l_tcx0 = opj_int_ceildiv(*p_tx0, (OPJ_INT32)l_img_comp->dx); +- l_tcy0 = opj_int_ceildiv(*p_ty0, (OPJ_INT32)l_img_comp->dy); +- l_tcx1 = opj_int_ceildiv(*p_tx1, (OPJ_INT32)l_img_comp->dx); +- l_tcy1 = opj_int_ceildiv(*p_ty1, (OPJ_INT32)l_img_comp->dy); ++ l_tcx0 = opj_uint_ceildiv(*p_tx0, l_img_comp->dx); ++ l_tcy0 = opj_uint_ceildiv(*p_ty0, l_img_comp->dy); ++ l_tcx1 = opj_uint_ceildiv(*p_tx1, l_img_comp->dx); ++ l_tcy1 = opj_uint_ceildiv(*p_ty1, l_img_comp->dy); + + if (l_tccp->numresolutions > *p_max_res) { + *p_max_res = l_tccp->numresolutions; +@@ -948,33 +981,37 @@ static void opj_get_all_encoding_parameters(const opj_image_t *p_image, + /* precinct width and height*/ + l_pdx = l_tccp->prcw[resno]; + l_pdy = l_tccp->prch[resno]; +- *lResolutionPtr++ = l_pdx; +- *lResolutionPtr++ = l_pdy; ++ if (lResolutionPtr) { ++ *lResolutionPtr++ = l_pdx; ++ *lResolutionPtr++ = l_pdy; ++ } + if (l_pdx + l_level_no < 32 && + l_img_comp->dx <= UINT_MAX / (1u << (l_pdx + l_level_no))) { + l_dx = l_img_comp->dx * (1u << (l_pdx + l_level_no)); + /* take the minimum size for l_dx for each comp and resolution*/ +- *p_dx_min = (OPJ_UINT32)opj_int_min((OPJ_INT32) * p_dx_min, (OPJ_INT32)l_dx); ++ *p_dx_min = opj_uint_min(*p_dx_min, l_dx); + } + if (l_pdy + l_level_no < 32 && + l_img_comp->dy <= UINT_MAX / (1u << (l_pdy + l_level_no))) { + l_dy = l_img_comp->dy * (1u << (l_pdy + l_level_no)); +- *p_dy_min = (OPJ_UINT32)opj_int_min((OPJ_INT32) * p_dy_min, (OPJ_INT32)l_dy); ++ *p_dy_min = opj_uint_min(*p_dy_min, l_dy); + } + + /* various calculations of extents*/ +- l_rx0 = opj_int_ceildivpow2(l_tcx0, (OPJ_INT32)l_level_no); +- l_ry0 = opj_int_ceildivpow2(l_tcy0, (OPJ_INT32)l_level_no); +- l_rx1 = opj_int_ceildivpow2(l_tcx1, (OPJ_INT32)l_level_no); +- l_ry1 = opj_int_ceildivpow2(l_tcy1, (OPJ_INT32)l_level_no); +- l_px0 = opj_int_floordivpow2(l_rx0, (OPJ_INT32)l_pdx) << l_pdx; +- l_py0 = opj_int_floordivpow2(l_ry0, (OPJ_INT32)l_pdy) << l_pdy; +- l_px1 = opj_int_ceildivpow2(l_rx1, (OPJ_INT32)l_pdx) << l_pdx; +- py1 = opj_int_ceildivpow2(l_ry1, (OPJ_INT32)l_pdy) << l_pdy; +- l_pw = (l_rx0 == l_rx1) ? 0 : (OPJ_UINT32)((l_px1 - l_px0) >> l_pdx); +- l_ph = (l_ry0 == l_ry1) ? 0 : (OPJ_UINT32)((py1 - l_py0) >> l_pdy); +- *lResolutionPtr++ = l_pw; +- *lResolutionPtr++ = l_ph; ++ l_rx0 = opj_uint_ceildivpow2(l_tcx0, l_level_no); ++ l_ry0 = opj_uint_ceildivpow2(l_tcy0, l_level_no); ++ l_rx1 = opj_uint_ceildivpow2(l_tcx1, l_level_no); ++ l_ry1 = opj_uint_ceildivpow2(l_tcy1, l_level_no); ++ l_px0 = opj_uint_floordivpow2(l_rx0, l_pdx) << l_pdx; ++ l_py0 = opj_uint_floordivpow2(l_ry0, l_pdy) << l_pdy; ++ l_px1 = opj_uint_ceildivpow2(l_rx1, l_pdx) << l_pdx; ++ py1 = opj_uint_ceildivpow2(l_ry1, l_pdy) << l_pdy; ++ l_pw = (l_rx0 == l_rx1) ? 0 : ((l_px1 - l_px0) >> l_pdx); ++ l_ph = (l_ry0 == l_ry1) ? 0 : ((py1 - l_py0) >> l_pdy); ++ if (lResolutionPtr) { ++ *lResolutionPtr++ = l_pw; ++ *lResolutionPtr++ = l_ph; ++ } + l_product = l_pw * l_ph; + + /* update precision*/ +@@ -990,7 +1027,8 @@ static void opj_get_all_encoding_parameters(const opj_image_t *p_image, + + static opj_pi_iterator_t * opj_pi_create(const opj_image_t *image, + const opj_cp_t *cp, +- OPJ_UINT32 tileno) ++ OPJ_UINT32 tileno, ++ opj_event_mgr_t* manager) + { + /* loop*/ + OPJ_UINT32 pino, compno; +@@ -1024,6 +1062,8 @@ static opj_pi_iterator_t * opj_pi_create(const opj_image_t *image, + l_current_pi = l_pi; + for (pino = 0; pino < l_poc_bound ; ++pino) { + ++ l_current_pi->manager = manager; ++ + l_current_pi->comps = (opj_pi_comp_t*) opj_calloc(image->numcomps, + sizeof(opj_pi_comp_t)); + if (! l_current_pi->comps) { +@@ -1054,10 +1094,10 @@ static opj_pi_iterator_t * opj_pi_create(const opj_image_t *image, + + static void opj_pi_update_encode_poc_and_final(opj_cp_t *p_cp, + OPJ_UINT32 p_tileno, +- OPJ_INT32 p_tx0, +- OPJ_INT32 p_tx1, +- OPJ_INT32 p_ty0, +- OPJ_INT32 p_ty1, ++ OPJ_UINT32 p_tx0, ++ OPJ_UINT32 p_tx1, ++ OPJ_UINT32 p_ty0, ++ OPJ_UINT32 p_ty1, + OPJ_UINT32 p_max_prec, + OPJ_UINT32 p_max_res, + OPJ_UINT32 p_dx_min, +@@ -1134,10 +1174,10 @@ static void opj_pi_update_encode_poc_and_final(opj_cp_t *p_cp, + static void opj_pi_update_encode_not_poc(opj_cp_t *p_cp, + OPJ_UINT32 p_num_comps, + OPJ_UINT32 p_tileno, +- OPJ_INT32 p_tx0, +- OPJ_INT32 p_tx1, +- OPJ_INT32 p_ty0, +- OPJ_INT32 p_ty1, ++ OPJ_UINT32 p_tx0, ++ OPJ_UINT32 p_tx1, ++ OPJ_UINT32 p_ty0, ++ OPJ_UINT32 p_ty1, + OPJ_UINT32 p_max_prec, + OPJ_UINT32 p_max_res, + OPJ_UINT32 p_dx_min, +@@ -1176,10 +1216,10 @@ static void opj_pi_update_encode_not_poc(opj_cp_t *p_cp, + l_current_poc->prg = l_tcp->prg; + l_current_poc->prcS = 0; + l_current_poc->prcE = p_max_prec; +- l_current_poc->txS = (OPJ_UINT32)p_tx0; +- l_current_poc->txE = (OPJ_UINT32)p_tx1; +- l_current_poc->tyS = (OPJ_UINT32)p_ty0; +- l_current_poc->tyE = (OPJ_UINT32)p_ty1; ++ l_current_poc->txS = p_tx0; ++ l_current_poc->txE = p_tx1; ++ l_current_poc->tyS = p_ty0; ++ l_current_poc->tyE = p_ty1; + l_current_poc->dx = p_dx_min; + l_current_poc->dy = p_dy_min; + ++ l_current_poc; +@@ -1361,7 +1401,8 @@ static OPJ_BOOL opj_pi_check_next_level(OPJ_INT32 pos, + */ + opj_pi_iterator_t *opj_pi_create_decode(opj_image_t *p_image, + opj_cp_t *p_cp, +- OPJ_UINT32 p_tile_no) ++ OPJ_UINT32 p_tile_no, ++ opj_event_mgr_t* manager) + { + OPJ_UINT32 numcomps = p_image->numcomps; + +@@ -1376,7 +1417,7 @@ opj_pi_iterator_t *opj_pi_create_decode(opj_image_t *p_image, + /* encoding prameters to set */ + OPJ_UINT32 l_max_res; + OPJ_UINT32 l_max_prec; +- OPJ_INT32 l_tx0, l_tx1, l_ty0, l_ty1; ++ OPJ_UINT32 l_tx0, l_tx1, l_ty0, l_ty1; + OPJ_UINT32 l_dx_min, l_dy_min; + OPJ_UINT32 l_bound; + OPJ_UINT32 l_step_p, l_step_c, l_step_r, l_step_l ; +@@ -1416,7 +1457,7 @@ opj_pi_iterator_t *opj_pi_create_decode(opj_image_t *p_image, + } + + /* memory allocation for pi */ +- l_pi = opj_pi_create(p_image, p_cp, p_tile_no); ++ l_pi = opj_pi_create(p_image, p_cp, p_tile_no, manager); + if (!l_pi) { + opj_free(l_tmp_data); + opj_free(l_tmp_ptr); +@@ -1557,11 +1598,34 @@ opj_pi_iterator_t *opj_pi_create_decode(opj_image_t *p_image, + } + + ++OPJ_UINT32 opj_get_encoding_packet_count(const opj_image_t *p_image, ++ const opj_cp_t *p_cp, ++ OPJ_UINT32 p_tile_no) ++{ ++ OPJ_UINT32 l_max_res; ++ OPJ_UINT32 l_max_prec; ++ OPJ_UINT32 l_tx0, l_tx1, l_ty0, l_ty1; ++ OPJ_UINT32 l_dx_min, l_dy_min; ++ ++ /* preconditions in debug*/ ++ assert(p_cp != 00); ++ assert(p_image != 00); ++ assert(p_tile_no < p_cp->tw * p_cp->th); ++ ++ /* get encoding parameters*/ ++ opj_get_all_encoding_parameters(p_image, p_cp, p_tile_no, &l_tx0, &l_tx1, ++ &l_ty0, &l_ty1, &l_dx_min, &l_dy_min, &l_max_prec, &l_max_res, NULL); ++ ++ return p_cp->tcps[p_tile_no].numlayers * l_max_prec * p_image->numcomps * ++ l_max_res; ++} ++ + + opj_pi_iterator_t *opj_pi_initialise_encode(const opj_image_t *p_image, + opj_cp_t *p_cp, + OPJ_UINT32 p_tile_no, +- J2K_T2_MODE p_t2_mode) ++ J2K_T2_MODE p_t2_mode, ++ opj_event_mgr_t* manager) + { + OPJ_UINT32 numcomps = p_image->numcomps; + +@@ -1576,7 +1640,7 @@ opj_pi_iterator_t *opj_pi_initialise_encode(const opj_image_t *p_image, + /* encoding prameters to set*/ + OPJ_UINT32 l_max_res; + OPJ_UINT32 l_max_prec; +- OPJ_INT32 l_tx0, l_tx1, l_ty0, l_ty1; ++ OPJ_UINT32 l_tx0, l_tx1, l_ty0, l_ty1; + OPJ_UINT32 l_dx_min, l_dy_min; + OPJ_UINT32 l_bound; + OPJ_UINT32 l_step_p, l_step_c, l_step_r, l_step_l ; +@@ -1615,7 +1679,7 @@ opj_pi_iterator_t *opj_pi_initialise_encode(const opj_image_t *p_image, + } + + /* memory allocation for pi*/ +- l_pi = opj_pi_create(p_image, p_cp, p_tile_no); ++ l_pi = opj_pi_create(p_image, p_cp, p_tile_no, manager); + if (!l_pi) { + opj_free(l_tmp_data); + opj_free(l_tmp_ptr); +@@ -1770,7 +1834,8 @@ void opj_pi_create_encode(opj_pi_iterator_t *pi, + pi[pino].poc.prg = tcp->prg; + + if (!(cp->m_specific_param.m_enc.m_tp_on && ((!OPJ_IS_CINEMA(cp->rsiz) && +- (t2_mode == FINAL_PASS)) || OPJ_IS_CINEMA(cp->rsiz)))) { ++ !OPJ_IS_IMF(cp->rsiz) && ++ (t2_mode == FINAL_PASS)) || OPJ_IS_CINEMA(cp->rsiz) || OPJ_IS_IMF(cp->rsiz)))) { + pi[pino].poc.resno0 = tcp->resS; + pi[pino].poc.resno1 = tcp->resE; + pi[pino].poc.compno0 = tcp->compS; +@@ -1779,10 +1844,10 @@ void opj_pi_create_encode(opj_pi_iterator_t *pi, + pi[pino].poc.layno1 = tcp->layE; + pi[pino].poc.precno0 = tcp->prcS; + pi[pino].poc.precno1 = tcp->prcE; +- pi[pino].poc.tx0 = (OPJ_INT32)tcp->txS; +- pi[pino].poc.ty0 = (OPJ_INT32)tcp->tyS; +- pi[pino].poc.tx1 = (OPJ_INT32)tcp->txE; +- pi[pino].poc.ty1 = (OPJ_INT32)tcp->tyE; ++ pi[pino].poc.tx0 = tcp->txS; ++ pi[pino].poc.ty0 = tcp->tyS; ++ pi[pino].poc.tx1 = tcp->txE; ++ pi[pino].poc.ty1 = tcp->tyE; + } else { + for (i = tppos + 1; i < 4; i++) { + switch (prog[i]) { +@@ -1806,10 +1871,10 @@ void opj_pi_create_encode(opj_pi_iterator_t *pi, + pi[pino].poc.precno1 = tcp->prcE; + break; + default: +- pi[pino].poc.tx0 = (OPJ_INT32)tcp->txS; +- pi[pino].poc.ty0 = (OPJ_INT32)tcp->tyS; +- pi[pino].poc.tx1 = (OPJ_INT32)tcp->txE; +- pi[pino].poc.ty1 = (OPJ_INT32)tcp->tyE; ++ pi[pino].poc.tx0 = tcp->txS; ++ pi[pino].poc.ty0 = tcp->tyS; ++ pi[pino].poc.tx1 = tcp->txE; ++ pi[pino].poc.ty1 = tcp->tyE; + break; + } + break; +@@ -1849,10 +1914,10 @@ void opj_pi_create_encode(opj_pi_iterator_t *pi, + default: + tcp->tx0_t = tcp->txS; + tcp->ty0_t = tcp->tyS; +- pi[pino].poc.tx0 = (OPJ_INT32)tcp->tx0_t; +- pi[pino].poc.tx1 = (OPJ_INT32)(tcp->tx0_t + tcp->dx - (tcp->tx0_t % tcp->dx)); +- pi[pino].poc.ty0 = (OPJ_INT32)tcp->ty0_t; +- pi[pino].poc.ty1 = (OPJ_INT32)(tcp->ty0_t + tcp->dy - (tcp->ty0_t % tcp->dy)); ++ pi[pino].poc.tx0 = tcp->tx0_t; ++ pi[pino].poc.tx1 = tcp->tx0_t + tcp->dx - (tcp->tx0_t % tcp->dx); ++ pi[pino].poc.ty0 = tcp->ty0_t; ++ pi[pino].poc.ty1 = tcp->ty0_t + tcp->dy - (tcp->ty0_t % tcp->dy); + tcp->tx0_t = (OPJ_UINT32)pi[pino].poc.tx1; + tcp->ty0_t = (OPJ_UINT32)pi[pino].poc.ty1; + break; +@@ -1884,10 +1949,10 @@ void opj_pi_create_encode(opj_pi_iterator_t *pi, + pi[pino].poc.precno1 = tcp->prc_t; + break; + default: +- pi[pino].poc.tx0 = (OPJ_INT32)(tcp->tx0_t - tcp->dx - (tcp->tx0_t % tcp->dx)); +- pi[pino].poc.tx1 = (OPJ_INT32)tcp->tx0_t ; +- pi[pino].poc.ty0 = (OPJ_INT32)(tcp->ty0_t - tcp->dy - (tcp->ty0_t % tcp->dy)); +- pi[pino].poc.ty1 = (OPJ_INT32)tcp->ty0_t ; ++ pi[pino].poc.tx0 = tcp->tx0_t - tcp->dx - (tcp->tx0_t % tcp->dx); ++ pi[pino].poc.tx1 = tcp->tx0_t ; ++ pi[pino].poc.ty0 = tcp->ty0_t - tcp->dy - (tcp->ty0_t % tcp->dy); ++ pi[pino].poc.ty1 = tcp->ty0_t ; + break; + } + break; +@@ -1974,8 +2039,8 @@ void opj_pi_create_encode(opj_pi_iterator_t *pi, + if (tcp->ty0_t >= tcp->tyE) { + if (opj_pi_check_next_level(i - 1, cp, tileno, pino, prog)) { + tcp->ty0_t = tcp->tyS; +- pi[pino].poc.ty0 = (OPJ_INT32)tcp->ty0_t; +- pi[pino].poc.ty1 = (OPJ_INT32)(tcp->ty0_t + tcp->dy - (tcp->ty0_t % tcp->dy)); ++ pi[pino].poc.ty0 = tcp->ty0_t; ++ pi[pino].poc.ty1 = tcp->ty0_t + tcp->dy - (tcp->ty0_t % tcp->dy); + tcp->ty0_t = (OPJ_UINT32)pi[pino].poc.ty1; + incr_top = 1; + resetX = 1; +@@ -1984,21 +2049,21 @@ void opj_pi_create_encode(opj_pi_iterator_t *pi, + resetX = 0; + } + } else { +- pi[pino].poc.ty0 = (OPJ_INT32)tcp->ty0_t; +- pi[pino].poc.ty1 = (OPJ_INT32)(tcp->ty0_t + tcp->dy - (tcp->ty0_t % tcp->dy)); ++ pi[pino].poc.ty0 = tcp->ty0_t; ++ pi[pino].poc.ty1 = tcp->ty0_t + tcp->dy - (tcp->ty0_t % tcp->dy); + tcp->ty0_t = (OPJ_UINT32)pi[pino].poc.ty1; + incr_top = 0; + resetX = 1; + } + if (resetX == 1) { + tcp->tx0_t = tcp->txS; +- pi[pino].poc.tx0 = (OPJ_INT32)tcp->tx0_t; +- pi[pino].poc.tx1 = (OPJ_INT32)(tcp->tx0_t + tcp->dx - (tcp->tx0_t % tcp->dx)); ++ pi[pino].poc.tx0 = tcp->tx0_t; ++ pi[pino].poc.tx1 = tcp->tx0_t + tcp->dx - (tcp->tx0_t % tcp->dx); + tcp->tx0_t = (OPJ_UINT32)pi[pino].poc.tx1; + } + } else { +- pi[pino].poc.tx0 = (OPJ_INT32)tcp->tx0_t; +- pi[pino].poc.tx1 = (OPJ_INT32)(tcp->tx0_t + tcp->dx - (tcp->tx0_t % tcp->dx)); ++ pi[pino].poc.tx0 = tcp->tx0_t; ++ pi[pino].poc.tx1 = tcp->tx0_t + tcp->dx - (tcp->tx0_t % tcp->dx); + tcp->tx0_t = (OPJ_UINT32)pi[pino].poc.tx1; + incr_top = 0; + } +@@ -2051,7 +2116,7 @@ void opj_pi_update_encoding_parameters(const opj_image_t *p_image, + /* encoding parameters to set */ + OPJ_UINT32 l_max_res; + OPJ_UINT32 l_max_prec; +- OPJ_INT32 l_tx0, l_tx1, l_ty0, l_ty1; ++ OPJ_UINT32 l_tx0, l_tx1, l_ty0, l_ty1; + OPJ_UINT32 l_dx_min, l_dy_min; + + /* pointers */ +diff --git a/third_party/libopenjpeg20/pi.h b/third_party/libopenjpeg20/pi.h +index 8c0dc25c19dd962bf9e76994609a37ee41fc0cca..0320523b7693376d6e57d417ba86da358bcc7747 100644 +--- a/third_party/libopenjpeg20/pi.h ++++ b/third_party/libopenjpeg20/pi.h +@@ -102,11 +102,13 @@ typedef struct opj_pi_iterator { + /** Components*/ + opj_pi_comp_t *comps; + /** FIXME DOC*/ +- OPJ_INT32 tx0, ty0, tx1, ty1; ++ OPJ_UINT32 tx0, ty0, tx1, ty1; + /** FIXME DOC*/ +- OPJ_INT32 x, y; ++ OPJ_UINT32 x, y; + /** FIXME DOC*/ + OPJ_UINT32 dx, dy; ++ /** event manager */ ++ opj_event_mgr_t* manager; + } opj_pi_iterator_t; + + /** @name Exported functions */ +@@ -119,13 +121,15 @@ typedef struct opj_pi_iterator { + * @param cp the coding parameters. + * @param tileno index of the tile being encoded. + * @param t2_mode the type of pass for generating the packet iterator ++ * @param manager Event manager + * + * @return a list of packet iterator that points to the first packet of the tile (not true). + */ + opj_pi_iterator_t *opj_pi_initialise_encode(const opj_image_t *image, + opj_cp_t *cp, + OPJ_UINT32 tileno, +- J2K_T2_MODE t2_mode); ++ J2K_T2_MODE t2_mode, ++ opj_event_mgr_t* manager); + + /** + * Updates the encoding parameters of the codec. +@@ -161,12 +165,14 @@ Create a packet iterator for Decoder + @param image Raw image for which the packets will be listed + @param cp Coding parameters + @param tileno Number that identifies the tile for which to list the packets ++@param manager Event manager + @return Returns a packet iterator that points to the first packet of the tile + @see opj_pi_destroy + */ + opj_pi_iterator_t *opj_pi_create_decode(opj_image_t * image, + opj_cp_t * cp, +- OPJ_UINT32 tileno); ++ OPJ_UINT32 tileno, ++ opj_event_mgr_t* manager); + /** + * Destroys a packet iterator array. + * +@@ -182,6 +188,17 @@ Modify the packet iterator to point to the next packet + @return Returns false if pi pointed to the last packet or else returns true + */ + OPJ_BOOL opj_pi_next(opj_pi_iterator_t * pi); ++ ++/** ++ * Return the number of packets in the tile. ++ * @param image the image being encoded. ++ * @param cp Coding parameters ++ * @param tileno Number that identifies the tile. ++ */ ++OPJ_UINT32 opj_get_encoding_packet_count(const opj_image_t *p_image, ++ const opj_cp_t *p_cp, ++ OPJ_UINT32 p_tile_no); ++ + /* ----------------------------------------------------------------------- */ + /*@}*/ + +diff --git a/third_party/libopenjpeg20/t1.c b/third_party/libopenjpeg20/t1.c +index f6f7671190cd5bc5a40a8ccac9b349abc0489e43..1bea54b0d518d04cb76c05a274ec040d0a2eeb9d 100644 +--- a/third_party/libopenjpeg20/t1.c ++++ b/third_party/libopenjpeg20/t1.c +@@ -61,6 +61,13 @@ + + #define opj_t1_setcurctx(curctx, ctxno) curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)] + ++/* Macros to deal with signed integer with just MSB bit set for ++ * negative values (smr = signed magnitude representation) */ ++#define opj_smr_abs(x) (((OPJ_UINT32)(x)) & 0x7FFFFFFFU) ++#define opj_smr_sign(x) (((OPJ_UINT32)(x)) >> 31) ++#define opj_to_smr(x) ((x) >= 0 ? (OPJ_UINT32)(x) : ((OPJ_UINT32)(-x) | 0x80000000U)) ++ ++ + /** @name Local static functions */ + /*@{*/ + +@@ -177,18 +184,18 @@ static OPJ_FLOAT64 opj_t1_getwmsedec( + const OPJ_FLOAT64 * mct_norms, + OPJ_UINT32 mct_numcomps); + +-static void opj_t1_encode_cblk(opj_t1_t *t1, +- opj_tcd_cblk_enc_t* cblk, +- OPJ_UINT32 orient, +- OPJ_UINT32 compno, +- OPJ_UINT32 level, +- OPJ_UINT32 qmfbid, +- OPJ_FLOAT64 stepsize, +- OPJ_UINT32 cblksty, +- OPJ_UINT32 numcomps, +- opj_tcd_tile_t * tile, +- const OPJ_FLOAT64 * mct_norms, +- OPJ_UINT32 mct_numcomps); ++/** Return "cumwmsedec" that should be used to increase tile->distotile */ ++static double opj_t1_encode_cblk(opj_t1_t *t1, ++ opj_tcd_cblk_enc_t* cblk, ++ OPJ_UINT32 orient, ++ OPJ_UINT32 compno, ++ OPJ_UINT32 level, ++ OPJ_UINT32 qmfbid, ++ OPJ_FLOAT64 stepsize, ++ OPJ_UINT32 cblksty, ++ OPJ_UINT32 numcomps, ++ const OPJ_FLOAT64 * mct_norms, ++ OPJ_UINT32 mct_numcomps); + + /** + Decode 1 code-block +@@ -329,61 +336,53 @@ static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci, + /** + Encode significant pass + */ +-static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1, +- opj_flag_t *flagsp, +- OPJ_INT32 *datap, +- OPJ_INT32 bpno, +- OPJ_INT32 one, +- OPJ_INT32 *nmsedec, +- OPJ_BYTE type, +- OPJ_UINT32 ci, +- OPJ_UINT32 vsc) +-{ +- OPJ_UINT32 v; +- +- opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ +- +- OPJ_UINT32 const flags = *flagsp; +- +- if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && +- (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { +- OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); +- v = (opj_int_abs(*datap) & one) ? 1 : 0; +-#ifdef DEBUG_ENC_SIG +- fprintf(stderr, " ctxt1=%d\n", ctxt1); +-#endif +- opj_mqc_setcurctx(mqc, ctxt1); +- if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ +- opj_mqc_bypass_enc(mqc, v); +- } else { +- opj_mqc_encode(mqc, v); +- } +- if (v) { +- OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( +- *flagsp, +- flagsp[-1], flagsp[1], +- ci); +- OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); +- v = *datap < 0 ? 1U : 0U; +- *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap), +- (OPJ_UINT32)bpno); +-#ifdef DEBUG_ENC_SIG +- fprintf(stderr, " ctxt2=%d\n", ctxt2); +-#endif +- opj_mqc_setcurctx(mqc, ctxt2); +- if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ +- opj_mqc_bypass_enc(mqc, v); +- } else { +- OPJ_UINT32 spb = opj_t1_getspb(lu); +-#ifdef DEBUG_ENC_SIG +- fprintf(stderr, " spb=%d\n", spb); +-#endif +- opj_mqc_encode(mqc, v ^ spb); +- } +- opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc); +- } +- *flagsp |= T1_PI_THIS << (ci * 3U); +- } ++#define opj_t1_enc_sigpass_step_macro(mqc, curctx, a, c, ct, flagspIn, datapIn, bpno, one, nmsedec, type, ciIn, vscIn) \ ++{ \ ++ OPJ_UINT32 v; \ ++ const OPJ_UINT32 ci = (ciIn); \ ++ const OPJ_UINT32 vsc = (vscIn); \ ++ const OPJ_INT32* l_datap = (datapIn); \ ++ opj_flag_t* flagsp = (flagspIn); \ ++ OPJ_UINT32 const flags = *flagsp; \ ++ if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \ ++ (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \ ++ OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \ ++ v = (opj_smr_abs(*l_datap) & (OPJ_UINT32)one) ? 1 : 0; \ ++/* #ifdef DEBUG_ENC_SIG */ \ ++/* fprintf(stderr, " ctxt1=%d\n", ctxt1); */ \ ++/* #endif */ \ ++ opj_t1_setcurctx(curctx, ctxt1); \ ++ if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ \ ++ opj_mqc_bypass_enc_macro(mqc, c, ct, v); \ ++ } else { \ ++ opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \ ++ } \ ++ if (v) { \ ++ OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \ ++ *flagsp, \ ++ flagsp[-1], flagsp[1], \ ++ ci); \ ++ OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \ ++ v = opj_smr_sign(*l_datap); \ ++ *nmsedec += opj_t1_getnmsedec_sig(opj_smr_abs(*l_datap), \ ++ (OPJ_UINT32)bpno); \ ++/* #ifdef DEBUG_ENC_SIG */ \ ++/* fprintf(stderr, " ctxt2=%d\n", ctxt2); */ \ ++/* #endif */ \ ++ opj_t1_setcurctx(curctx, ctxt2); \ ++ if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ \ ++ opj_mqc_bypass_enc_macro(mqc, c, ct, v); \ ++ } else { \ ++ OPJ_UINT32 spb = opj_t1_getspb(lu); \ ++/* #ifdef DEBUG_ENC_SIG */ \ ++/* fprintf(stderr, " spb=%d\n", spb); */ \ ++/* #endif */ \ ++ opj_mqc_encode_macro(mqc, curctx, a, c, ct, v ^ spb); \ ++ } \ ++ opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc); \ ++ } \ ++ *flagsp |= T1_PI_THIS << (ci * 3U); \ ++ } \ + } + + static INLINE void opj_t1_dec_sigpass_step_raw( +@@ -464,63 +463,64 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1, + OPJ_INT32 const one = 1 << (bpno + T1_NMSEDEC_FRACBITS); + opj_flag_t* f = &T1_FLAGS(0, 0); + OPJ_UINT32 const extra = 2; ++ opj_mqc_t* mqc = &(t1->mqc); ++ DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); ++ const OPJ_INT32* datap = t1->data; + + *nmsedec = 0; + #ifdef DEBUG_ENC_SIG + fprintf(stderr, "enc_sigpass: bpno=%d\n", bpno); + #endif +- for (k = 0; k < (t1->h & ~3U); k += 4) { ++ for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) { ++ const OPJ_UINT32 w = t1->w; + #ifdef DEBUG_ENC_SIG + fprintf(stderr, " k=%d\n", k); + #endif +- for (i = 0; i < t1->w; ++i) { ++ for (i = 0; i < w; ++i, ++f, datap += 4) { + #ifdef DEBUG_ENC_SIG + fprintf(stderr, " i=%d\n", i); + #endif + if (*f == 0U) { + /* Nothing to do for any of the 4 data points */ +- f++; + continue; + } +- opj_t1_enc_sigpass_step( +- t1, ++ opj_t1_enc_sigpass_step_macro( ++ mqc, curctx, a, c, ct, + f, +- &t1->data[((k + 0) * t1->data_stride) + i], ++ &datap[0], + bpno, + one, + nmsedec, + type, + 0, cblksty & J2K_CCP_CBLKSTY_VSC); +- opj_t1_enc_sigpass_step( +- t1, ++ opj_t1_enc_sigpass_step_macro( ++ mqc, curctx, a, c, ct, + f, +- &t1->data[((k + 1) * t1->data_stride) + i], ++ &datap[1], + bpno, + one, + nmsedec, + type, + 1, 0); +- opj_t1_enc_sigpass_step( +- t1, ++ opj_t1_enc_sigpass_step_macro( ++ mqc, curctx, a, c, ct, + f, +- &t1->data[((k + 2) * t1->data_stride) + i], ++ &datap[2], + bpno, + one, + nmsedec, + type, + 2, 0); +- opj_t1_enc_sigpass_step( +- t1, ++ opj_t1_enc_sigpass_step_macro( ++ mqc, curctx, a, c, ct, + f, +- &t1->data[((k + 3) * t1->data_stride) + i], ++ &datap[3], + bpno, + one, + nmsedec, + type, + 3, 0); +- ++f; + } +- f += extra; + } + + if (k < t1->h) { +@@ -528,20 +528,20 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1, + #ifdef DEBUG_ENC_SIG + fprintf(stderr, " k=%d\n", k); + #endif +- for (i = 0; i < t1->w; ++i) { ++ for (i = 0; i < t1->w; ++i, ++f) { + #ifdef DEBUG_ENC_SIG + fprintf(stderr, " i=%d\n", i); + #endif + if (*f == 0U) { + /* Nothing to do for any of the 4 data points */ +- f++; ++ datap += (t1->h - k); + continue; + } +- for (j = k; j < t1->h; ++j) { +- opj_t1_enc_sigpass_step( +- t1, ++ for (j = k; j < t1->h; ++j, ++datap) { ++ opj_t1_enc_sigpass_step_macro( ++ mqc, curctx, a, c, ct, + f, +- &t1->data[(j * t1->data_stride) + i], ++ &datap[0], + bpno, + one, + nmsedec, +@@ -549,9 +549,10 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1, + j - k, + (j == k && (cblksty & J2K_CCP_CBLKSTY_VSC) != 0)); + } +- ++f; + } + } ++ ++ UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); + } + + static void opj_t1_dec_sigpass_raw( +@@ -626,7 +627,7 @@ static void opj_t1_dec_sigpass_raw( + register opj_flag_t *flagsp = &t1->flags[(flags_stride) + 1]; \ + const OPJ_UINT32 l_w = w; \ + opj_mqc_t* mqc = &(t1->mqc); \ +- DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ ++ DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \ + register OPJ_UINT32 v; \ + one = 1 << bpno; \ + half = one >> 1; \ +@@ -651,7 +652,7 @@ static void opj_t1_dec_sigpass_raw( + } \ + } \ + } \ +- UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ ++ UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \ + if( k < h ) { \ + for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \ + for (j = 0; j < h - k; ++j) { \ +@@ -715,38 +716,27 @@ static void opj_t1_dec_sigpass_mqc( + /** + Encode refinement pass step + */ +-static INLINE void opj_t1_enc_refpass_step(opj_t1_t *t1, +- opj_flag_t *flagsp, +- OPJ_INT32 *datap, +- OPJ_INT32 bpno, +- OPJ_INT32 one, +- OPJ_INT32 *nmsedec, +- OPJ_BYTE type, +- OPJ_UINT32 ci) +-{ +- OPJ_UINT32 v; +- +- opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ +- +- OPJ_UINT32 const shift_flags = +- (*flagsp >> (ci * 3U)); +- +- if ((shift_flags & (T1_SIGMA_THIS | T1_PI_THIS)) == T1_SIGMA_THIS) { +- OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags); +- *nmsedec += opj_t1_getnmsedec_ref((OPJ_UINT32)opj_int_abs(*datap), +- (OPJ_UINT32)bpno); +- v = (opj_int_abs(*datap) & one) ? 1 : 0; +-#ifdef DEBUG_ENC_REF +- fprintf(stderr, " ctxt=%d\n", ctxt); +-#endif +- opj_mqc_setcurctx(mqc, ctxt); +- if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ +- opj_mqc_bypass_enc(mqc, v); +- } else { +- opj_mqc_encode(mqc, v); +- } +- *flagsp |= T1_MU_THIS << (ci * 3U); +- } ++#define opj_t1_enc_refpass_step_macro(mqc, curctx, a, c, ct, flags, flagsUpdated, datap, bpno, one, nmsedec, type, ci) \ ++{\ ++ OPJ_UINT32 v; \ ++ if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << ((ci) * 3U))) == (T1_SIGMA_THIS << ((ci) * 3U))) { \ ++ const OPJ_UINT32 shift_flags = (flags >> ((ci) * 3U)); \ ++ OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags); \ ++ OPJ_UINT32 abs_data = opj_smr_abs(*datap); \ ++ *nmsedec += opj_t1_getnmsedec_ref(abs_data, \ ++ (OPJ_UINT32)bpno); \ ++ v = ((OPJ_INT32)abs_data & one) ? 1 : 0; \ ++/* #ifdef DEBUG_ENC_REF */ \ ++/* fprintf(stderr, " ctxt=%d\n", ctxt); */ \ ++/* #endif */ \ ++ opj_t1_setcurctx(curctx, ctxt); \ ++ if (type == T1_TYPE_RAW) { /* BYPASS/LAZY MODE */ \ ++ opj_mqc_bypass_enc_macro(mqc, c, ct, v); \ ++ } else { \ ++ opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \ ++ } \ ++ flagsUpdated |= T1_MU_THIS << ((ci) * 3U); \ ++ } \ + } + + +@@ -807,100 +797,104 @@ static void opj_t1_enc_refpass( + const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS); + opj_flag_t* f = &T1_FLAGS(0, 0); + const OPJ_UINT32 extra = 2U; ++ opj_mqc_t* mqc = &(t1->mqc); ++ DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); ++ const OPJ_INT32* datap = t1->data; + + *nmsedec = 0; + #ifdef DEBUG_ENC_REF + fprintf(stderr, "enc_refpass: bpno=%d\n", bpno); + #endif +- for (k = 0; k < (t1->h & ~3U); k += 4) { ++ for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) { + #ifdef DEBUG_ENC_REF + fprintf(stderr, " k=%d\n", k); + #endif +- for (i = 0; i < t1->w; ++i) { ++ for (i = 0; i < t1->w; ++i, f++, datap += 4) { ++ const OPJ_UINT32 flags = *f; ++ OPJ_UINT32 flagsUpdated = flags; + #ifdef DEBUG_ENC_REF + fprintf(stderr, " i=%d\n", i); + #endif +- if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) { ++ if ((flags & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) { + /* none significant */ +- f++; + continue; + } +- if ((*f & (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) == ++ if ((flags & (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) == + (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) { + /* all processed by sigpass */ +- f++; + continue; + } + +- opj_t1_enc_refpass_step( +- t1, +- f, +- &t1->data[((k + 0) * t1->data_stride) + i], ++ opj_t1_enc_refpass_step_macro( ++ mqc, curctx, a, c, ct, ++ flags, flagsUpdated, ++ &datap[0], + bpno, + one, + nmsedec, + type, + 0); +- opj_t1_enc_refpass_step( +- t1, +- f, +- &t1->data[((k + 1) * t1->data_stride) + i], ++ opj_t1_enc_refpass_step_macro( ++ mqc, curctx, a, c, ct, ++ flags, flagsUpdated, ++ &datap[1], + bpno, + one, + nmsedec, + type, + 1); +- opj_t1_enc_refpass_step( +- t1, +- f, +- &t1->data[((k + 2) * t1->data_stride) + i], ++ opj_t1_enc_refpass_step_macro( ++ mqc, curctx, a, c, ct, ++ flags, flagsUpdated, ++ &datap[2], + bpno, + one, + nmsedec, + type, + 2); +- opj_t1_enc_refpass_step( +- t1, +- f, +- &t1->data[((k + 3) * t1->data_stride) + i], ++ opj_t1_enc_refpass_step_macro( ++ mqc, curctx, a, c, ct, ++ flags, flagsUpdated, ++ &datap[3], + bpno, + one, + nmsedec, + type, + 3); +- ++f; ++ *f = flagsUpdated; + } +- f += extra; + } + + if (k < t1->h) { + OPJ_UINT32 j; ++ const OPJ_UINT32 remaining_lines = t1->h - k; + #ifdef DEBUG_ENC_REF + fprintf(stderr, " k=%d\n", k); + #endif +- for (i = 0; i < t1->w; ++i) { ++ for (i = 0; i < t1->w; ++i, ++f) { + #ifdef DEBUG_ENC_REF + fprintf(stderr, " i=%d\n", i); + #endif + if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) { + /* none significant */ +- f++; ++ datap += remaining_lines; + continue; + } +- for (j = k; j < t1->h; ++j) { +- opj_t1_enc_refpass_step( +- t1, +- f, +- &t1->data[(j * t1->data_stride) + i], ++ for (j = 0; j < remaining_lines; ++j, datap ++) { ++ opj_t1_enc_refpass_step_macro( ++ mqc, curctx, a, c, ct, ++ *f, *f, ++ &datap[0], + bpno, + one, + nmsedec, + type, +- j - k); ++ j); + } +- ++f; + } + } ++ ++ UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); + } + + +@@ -968,7 +962,7 @@ static void opj_t1_dec_refpass_raw( + register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \ + const OPJ_UINT32 l_w = w; \ + opj_mqc_t* mqc = &(t1->mqc); \ +- DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ ++ DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \ + register OPJ_UINT32 v; \ + one = 1 << bpno; \ + poshalf = one >> 1; \ +@@ -992,7 +986,7 @@ static void opj_t1_dec_refpass_raw( + } \ + } \ + } \ +- UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ ++ UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \ + if( k < h ) { \ + for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \ + for (j = 0; j < h - k; ++j) { \ +@@ -1030,86 +1024,71 @@ static void opj_t1_dec_refpass_mqc( + /** + Encode clean-up pass step + */ +-static void opj_t1_enc_clnpass_step( +- opj_t1_t *t1, +- opj_flag_t *flagsp, +- OPJ_INT32 *datap, +- OPJ_INT32 bpno, +- OPJ_INT32 one, +- OPJ_INT32 *nmsedec, +- OPJ_UINT32 agg, +- OPJ_UINT32 runlen, +- OPJ_UINT32 lim, +- OPJ_UINT32 cblksty) +-{ +- OPJ_UINT32 v; +- OPJ_UINT32 ci; +- opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ +- +- const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 | +- T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); +- +- if ((*flagsp & check) == check) { +- if (runlen == 0) { +- *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); +- } else if (runlen == 1) { +- *flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3); +- } else if (runlen == 2) { +- *flagsp &= ~(T1_PI_2 | T1_PI_3); +- } else if (runlen == 3) { +- *flagsp &= ~(T1_PI_3); +- } +- return; +- } +- +- for (ci = runlen; ci < lim; ++ci) { +- OPJ_UINT32 vsc; +- opj_flag_t flags; +- OPJ_UINT32 ctxt1; +- +- flags = *flagsp; +- +- if ((agg != 0) && (ci == runlen)) { +- goto LABEL_PARTIAL; +- } +- +- if (!(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) { +- ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); +-#ifdef DEBUG_ENC_CLN +- printf(" ctxt1=%d\n", ctxt1); +-#endif +- opj_mqc_setcurctx(mqc, ctxt1); +- v = (opj_int_abs(*datap) & one) ? 1 : 0; +- opj_mqc_encode(mqc, v); +- if (v) { +- OPJ_UINT32 ctxt2, spb; +- OPJ_UINT32 lu; +-LABEL_PARTIAL: +- lu = opj_t1_getctxtno_sc_or_spb_index( +- *flagsp, +- flagsp[-1], flagsp[1], +- ci); +- *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap), +- (OPJ_UINT32)bpno); +- ctxt2 = opj_t1_getctxno_sc(lu); +-#ifdef DEBUG_ENC_CLN +- printf(" ctxt2=%d\n", ctxt2); +-#endif +- opj_mqc_setcurctx(mqc, ctxt2); +- +- v = *datap < 0 ? 1U : 0U; +- spb = opj_t1_getspb(lu); +-#ifdef DEBUG_ENC_CLN +- printf(" spb=%d\n", spb); +-#endif +- opj_mqc_encode(mqc, v ^ spb); +- vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == 0)) ? 1 : 0; +- opj_t1_update_flags(flagsp, ci, v, t1->w + 2U, vsc); +- } +- } +- *flagsp &= ~(T1_PI_THIS << (3U * ci)); +- datap += t1->data_stride; +- } ++#define opj_t1_enc_clnpass_step_macro(mqc, curctx, a, c, ct, flagspIn, datapIn, bpno, one, nmsedec, agg, runlen, lim, cblksty) \ ++{ \ ++ OPJ_UINT32 v; \ ++ OPJ_UINT32 ci; \ ++ opj_flag_t* const flagsp = (flagspIn); \ ++ const OPJ_INT32* l_datap = (datapIn); \ ++ const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 | \ ++ T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \ ++ \ ++ if ((*flagsp & check) == check) { \ ++ if (runlen == 0) { \ ++ *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \ ++ } else if (runlen == 1) { \ ++ *flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3); \ ++ } else if (runlen == 2) { \ ++ *flagsp &= ~(T1_PI_2 | T1_PI_3); \ ++ } else if (runlen == 3) { \ ++ *flagsp &= ~(T1_PI_3); \ ++ } \ ++ } \ ++ else \ ++ for (ci = runlen; ci < lim; ++ci) { \ ++ OPJ_BOOL goto_PARTIAL = OPJ_FALSE; \ ++ if ((agg != 0) && (ci == runlen)) { \ ++ goto_PARTIAL = OPJ_TRUE; \ ++ } \ ++ else if (!(*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) { \ ++ OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, *flagsp >> (ci * 3U)); \ ++/* #ifdef DEBUG_ENC_CLN */ \ ++/* printf(" ctxt1=%d\n", ctxt1); */ \ ++/* #endif */ \ ++ opj_t1_setcurctx(curctx, ctxt1); \ ++ v = (opj_smr_abs(*l_datap) & (OPJ_UINT32)one) ? 1 : 0; \ ++ opj_mqc_encode_macro(mqc, curctx, a, c, ct, v); \ ++ if (v) { \ ++ goto_PARTIAL = OPJ_TRUE; \ ++ } \ ++ } \ ++ if( goto_PARTIAL ) { \ ++ OPJ_UINT32 vsc; \ ++ OPJ_UINT32 ctxt2, spb; \ ++ OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \ ++ *flagsp, \ ++ flagsp[-1], flagsp[1], \ ++ ci); \ ++ *nmsedec += opj_t1_getnmsedec_sig(opj_smr_abs(*l_datap), \ ++ (OPJ_UINT32)bpno); \ ++ ctxt2 = opj_t1_getctxno_sc(lu); \ ++/* #ifdef DEBUG_ENC_CLN */ \ ++/* printf(" ctxt2=%d\n", ctxt2); */ \ ++/* #endif */ \ ++ opj_t1_setcurctx(curctx, ctxt2); \ ++ \ ++ v = opj_smr_sign(*l_datap); \ ++ spb = opj_t1_getspb(lu); \ ++/* #ifdef DEBUG_ENC_CLN */ \ ++/* printf(" spb=%d\n", spb); */\ ++/* #endif */ \ ++ opj_mqc_encode_macro(mqc, curctx, a, c, ct, v ^ spb); \ ++ vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == 0)) ? 1 : 0; \ ++ opj_t1_update_flags(flagsp, ci, v, t1->w + 2U, vsc); \ ++ } \ ++ *flagsp &= ~(T1_PI_THIS << (3U * ci)); \ ++ l_datap ++; \ ++ } \ + } + + #define opj_t1_dec_clnpass_step_macro(check_flags, partial, \ +@@ -1165,47 +1144,50 @@ static void opj_t1_enc_clnpass( + { + OPJ_UINT32 i, k; + const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS); +- OPJ_UINT32 agg, runlen; +- +- opj_mqc_t *mqc = &(t1->mqc); /* MQC component */ ++ opj_mqc_t* mqc = &(t1->mqc); ++ DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); ++ const OPJ_INT32* datap = t1->data; ++ opj_flag_t *f = &T1_FLAGS(0, 0); ++ const OPJ_UINT32 extra = 2U; + + *nmsedec = 0; + #ifdef DEBUG_ENC_CLN + printf("enc_clnpass: bpno=%d\n", bpno); + #endif +- for (k = 0; k < (t1->h & ~3U); k += 4) { ++ for (k = 0; k < (t1->h & ~3U); k += 4, f += extra) { + #ifdef DEBUG_ENC_CLN + printf(" k=%d\n", k); + #endif +- for (i = 0; i < t1->w; ++i) { ++ for (i = 0; i < t1->w; ++i, f++) { ++ OPJ_UINT32 agg, runlen; + #ifdef DEBUG_ENC_CLN + printf(" i=%d\n", i); + #endif +- agg = !(T1_FLAGS(i, k)); ++ agg = !*f; + #ifdef DEBUG_ENC_CLN + printf(" agg=%d\n", agg); + #endif + if (agg) { +- for (runlen = 0; runlen < 4; ++runlen) { +- if (opj_int_abs(t1->data[((k + runlen)*t1->data_stride) + i]) & one) { ++ for (runlen = 0; runlen < 4; ++runlen, ++datap) { ++ if (opj_smr_abs(*datap) & (OPJ_UINT32)one) { + break; + } + } +- opj_mqc_setcurctx(mqc, T1_CTXNO_AGG); +- opj_mqc_encode(mqc, runlen != 4); ++ opj_t1_setcurctx(curctx, T1_CTXNO_AGG); ++ opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen != 4); + if (runlen == 4) { + continue; + } +- opj_mqc_setcurctx(mqc, T1_CTXNO_UNI); +- opj_mqc_encode(mqc, runlen >> 1); +- opj_mqc_encode(mqc, runlen & 1); ++ opj_t1_setcurctx(curctx, T1_CTXNO_UNI); ++ opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen >> 1); ++ opj_mqc_encode_macro(mqc, curctx, a, c, ct, runlen & 1); + } else { + runlen = 0; + } +- opj_t1_enc_clnpass_step( +- t1, +- &T1_FLAGS(i, k), +- &t1->data[((k + runlen) * t1->data_stride) + i], ++ opj_t1_enc_clnpass_step_macro( ++ mqc, curctx, a, c, ct, ++ f, ++ datap, + bpno, + one, + nmsedec, +@@ -1213,23 +1195,24 @@ static void opj_t1_enc_clnpass( + runlen, + 4U, + cblksty); ++ datap += 4 - runlen; + } + } + if (k < t1->h) { +- agg = 0; +- runlen = 0; ++ const OPJ_UINT32 agg = 0; ++ const OPJ_UINT32 runlen = 0; + #ifdef DEBUG_ENC_CLN + printf(" k=%d\n", k); + #endif +- for (i = 0; i < t1->w; ++i) { ++ for (i = 0; i < t1->w; ++i, f++) { + #ifdef DEBUG_ENC_CLN + printf(" i=%d\n", i); + printf(" agg=%d\n", agg); + #endif +- opj_t1_enc_clnpass_step( +- t1, +- &T1_FLAGS(i, k), +- &t1->data[((k + runlen) * t1->data_stride) + i], ++ opj_t1_enc_clnpass_step_macro( ++ mqc, curctx, a, c, ct, ++ f, ++ datap, + bpno, + one, + nmsedec, +@@ -1237,8 +1220,11 @@ static void opj_t1_enc_clnpass( + runlen, + t1->h - k, + cblksty); ++ datap += t1->h - k; + } + } ++ ++ UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); + } + + #define opj_t1_dec_clnpass_internal(t1, bpno, vsc, w, h, flags_stride) \ +@@ -1250,7 +1236,7 @@ static void opj_t1_enc_clnpass( + opj_mqc_t* mqc = &(t1->mqc); \ + register OPJ_INT32 *data = t1->data; \ + register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \ +- DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ ++ DOWNLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \ + register OPJ_UINT32 v; \ + one = 1 << bpno; \ + half = one >> 1; \ +@@ -1319,7 +1305,7 @@ static void opj_t1_enc_clnpass( + *flagsp = flags & ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \ + } \ + } \ +- UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \ ++ UPLOAD_MQC_VARIABLES(mqc, curctx, a, c, ct); \ + if( k < h ) { \ + for (i = 0; i < l_w; ++i, ++flagsp, ++data) { \ + for (j = 0; j < h - k; ++j) { \ +@@ -1426,7 +1412,11 @@ static OPJ_FLOAT64 opj_t1_getwmsedec( + if (qmfbid == 1) { + w2 = opj_dwt_getnorm(level, orient); + } else { /* if (qmfbid == 0) */ ++ const OPJ_INT32 log2_gain = (orient == 0) ? 0 : ++ (orient == 3) ? 2 : 1; + w2 = opj_dwt_getnorm_real(level, orient); ++ /* Not sure this is right. But preserves past behaviour */ ++ stepsize /= (1 << log2_gain); + } + + wmsedec = w1 * w2 * stepsize * (1 << bpno); +@@ -1450,7 +1440,7 @@ static OPJ_BOOL opj_t1_allocate_buffers( + assert(w * h <= 4096); + + /* encoder uses tile buffer, so no need to allocate */ +- if (!t1->encoder) { ++ { + OPJ_UINT32 datasize = w * h; + + if (datasize > t1->datasize) { +@@ -1560,8 +1550,7 @@ void opj_t1_destroy(opj_t1_t *p_t1) + return; + } + +- /* encoder uses tile buffer, so no need to free */ +- if (!p_t1->encoder && p_t1->data) { ++ if (p_t1->data) { + opj_aligned_free(p_t1->data); + p_t1->data = 00; + } +@@ -1658,7 +1647,21 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) + t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1); + if (t1 == NULL) { + t1 = opj_t1_create(OPJ_FALSE); +- opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper); ++ if (t1 == NULL) { ++ opj_event_msg(job->p_manager, EVT_ERROR, ++ "Cannot allocate Tier 1 handle\n"); ++ *(job->pret) = OPJ_FALSE; ++ opj_free(job); ++ return; ++ } ++ if (!opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper)) { ++ opj_event_msg(job->p_manager, EVT_ERROR, ++ "Unable to set t1 handle as TLS\n"); ++ opj_t1_destroy(t1); ++ *(job->pret) = OPJ_FALSE; ++ opj_free(job); ++ return; ++ } + } + t1->mustuse_cblkdatabuffer = job->mustuse_cblkdatabuffer; + +@@ -1725,10 +1728,11 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) + datap[i] /= 2; + } + } else { /* if (tccp->qmfbid == 0) */ ++ const float stepsize = 0.5f * band->stepsize; + i = 0; + #ifdef __SSE2__ + { +- const __m128 xmm_stepsize = _mm_set1_ps(band->stepsize); ++ const __m128 xmm_stepsize = _mm_set1_ps(stepsize); + for (; i < (cblk_size & ~15U); i += 16) { + __m128 xmm0_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)( + datap + 0))); +@@ -1747,7 +1751,7 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) + } + #endif + for (; i < cblk_size; ++i) { +- OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * band->stepsize; ++ OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * stepsize; + memcpy(datap, &tmp, sizeof(tmp)); + datap++; + } +@@ -1773,12 +1777,13 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) + } + } + } else { /* if (tccp->qmfbid == 0) */ ++ const float stepsize = 0.5f * band->stepsize; + OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_SIZE_T)y * + tile_w + (OPJ_SIZE_T)x]; + for (j = 0; j < cblk_h; ++j) { + OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp; + for (i = 0; i < cblk_w; ++i) { +- OPJ_FLOAT32 tmp = (OPJ_FLOAT32) * datap * band->stepsize; ++ OPJ_FLOAT32 tmp = (OPJ_FLOAT32) * datap * stepsize; + *tiledp2 = tmp; + datap++; + tiledp2++; +@@ -2100,124 +2105,232 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1, + } + + ++typedef struct { ++ OPJ_UINT32 compno; ++ OPJ_UINT32 resno; ++ opj_tcd_cblk_enc_t* cblk; ++ opj_tcd_tile_t *tile; ++ opj_tcd_band_t* band; ++ opj_tcd_tilecomp_t* tilec; ++ opj_tccp_t* tccp; ++ const OPJ_FLOAT64 * mct_norms; ++ OPJ_UINT32 mct_numcomps; ++ volatile OPJ_BOOL* pret; ++ opj_mutex_t* mutex; ++} opj_t1_cblk_encode_processing_job_t; ++ ++/** Procedure to deal with a asynchronous code-block encoding job. ++ * ++ * @param user_data Pointer to a opj_t1_cblk_encode_processing_job_t* structure ++ * @param tls TLS handle. ++ */ ++static void opj_t1_cblk_encode_processor(void* user_data, opj_tls_t* tls) ++{ ++ opj_t1_cblk_encode_processing_job_t* job = ++ (opj_t1_cblk_encode_processing_job_t*)user_data; ++ opj_tcd_cblk_enc_t* cblk = job->cblk; ++ const opj_tcd_band_t* band = job->band; ++ const opj_tcd_tilecomp_t* tilec = job->tilec; ++ const opj_tccp_t* tccp = job->tccp; ++ const OPJ_UINT32 resno = job->resno; ++ opj_t1_t* t1; ++ const OPJ_UINT32 tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0); ++ ++ OPJ_INT32* OPJ_RESTRICT tiledp; ++ OPJ_UINT32 cblk_w; ++ OPJ_UINT32 cblk_h; ++ OPJ_UINT32 i, j; ++ ++ OPJ_INT32 x = cblk->x0 - band->x0; ++ OPJ_INT32 y = cblk->y0 - band->y0; ++ ++ if (!*(job->pret)) { ++ opj_free(job); ++ return; ++ } ++ ++ t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1); ++ if (t1 == NULL) { ++ t1 = opj_t1_create(OPJ_TRUE); /* OPJ_TRUE == T1 for encoding */ ++ opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper); ++ } ++ ++ if (band->bandno & 1) { ++ opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1]; ++ x += pres->x1 - pres->x0; ++ } ++ if (band->bandno & 2) { ++ opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1]; ++ y += pres->y1 - pres->y0; ++ } ++ ++ if (!opj_t1_allocate_buffers( ++ t1, ++ (OPJ_UINT32)(cblk->x1 - cblk->x0), ++ (OPJ_UINT32)(cblk->y1 - cblk->y0))) { ++ *(job->pret) = OPJ_FALSE; ++ opj_free(job); ++ return; ++ } ++ ++ cblk_w = t1->w; ++ cblk_h = t1->h; ++ ++ tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x]; ++ ++ if (tccp->qmfbid == 1) { ++ /* Do multiplication on unsigned type, even if the ++ * underlying type is signed, to avoid potential ++ * int overflow on large value (the output will be ++ * incorrect in such situation, but whatever...) ++ * This assumes complement-to-2 signed integer ++ * representation ++ * Fixes https://github.com/uclouvain/openjpeg/issues/1053 ++ */ ++ OPJ_UINT32* OPJ_RESTRICT tiledp_u = (OPJ_UINT32*) tiledp; ++ OPJ_UINT32* OPJ_RESTRICT t1data = (OPJ_UINT32*) t1->data; ++ /* Change from "natural" order to "zigzag" order of T1 passes */ ++ for (j = 0; j < (cblk_h & ~3U); j += 4) { ++ for (i = 0; i < cblk_w; ++i) { ++ t1data[0] = tiledp_u[(j + 0) * tile_w + i] << T1_NMSEDEC_FRACBITS; ++ t1data[1] = tiledp_u[(j + 1) * tile_w + i] << T1_NMSEDEC_FRACBITS; ++ t1data[2] = tiledp_u[(j + 2) * tile_w + i] << T1_NMSEDEC_FRACBITS; ++ t1data[3] = tiledp_u[(j + 3) * tile_w + i] << T1_NMSEDEC_FRACBITS; ++ t1data += 4; ++ } ++ } ++ if (j < cblk_h) { ++ for (i = 0; i < cblk_w; ++i) { ++ OPJ_UINT32 k; ++ for (k = j; k < cblk_h; k++) { ++ t1data[0] = tiledp_u[k * tile_w + i] << T1_NMSEDEC_FRACBITS; ++ t1data ++; ++ } ++ } ++ } ++ } else { /* if (tccp->qmfbid == 0) */ ++ OPJ_FLOAT32* OPJ_RESTRICT tiledp_f = (OPJ_FLOAT32*) tiledp; ++ OPJ_INT32* OPJ_RESTRICT t1data = t1->data; ++ /* Change from "natural" order to "zigzag" order of T1 passes */ ++ for (j = 0; j < (cblk_h & ~3U); j += 4) { ++ for (i = 0; i < cblk_w; ++i) { ++ t1data[0] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 0) * tile_w + i] / ++ band->stepsize) * (1 << T1_NMSEDEC_FRACBITS)); ++ t1data[1] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 1) * tile_w + i] / ++ band->stepsize) * (1 << T1_NMSEDEC_FRACBITS)); ++ t1data[2] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 2) * tile_w + i] / ++ band->stepsize) * (1 << T1_NMSEDEC_FRACBITS)); ++ t1data[3] = (OPJ_INT32)opj_lrintf((tiledp_f[(j + 3) * tile_w + i] / ++ band->stepsize) * (1 << T1_NMSEDEC_FRACBITS)); ++ t1data += 4; ++ } ++ } ++ if (j < cblk_h) { ++ for (i = 0; i < cblk_w; ++i) { ++ OPJ_UINT32 k; ++ for (k = j; k < cblk_h; k++) { ++ t1data[0] = (OPJ_INT32)opj_lrintf((tiledp_f[k * tile_w + i] / band->stepsize) ++ * (1 << T1_NMSEDEC_FRACBITS)); ++ t1data ++; ++ } ++ } ++ } ++ } ++ ++ { ++ OPJ_FLOAT64 cumwmsedec = ++ opj_t1_encode_cblk( ++ t1, ++ cblk, ++ band->bandno, ++ job->compno, ++ tilec->numresolutions - 1 - resno, ++ tccp->qmfbid, ++ band->stepsize, ++ tccp->cblksty, ++ job->tile->numcomps, ++ job->mct_norms, ++ job->mct_numcomps); ++ if (job->mutex) { ++ opj_mutex_lock(job->mutex); ++ } ++ job->tile->distotile += cumwmsedec; ++ if (job->mutex) { ++ opj_mutex_unlock(job->mutex); ++ } ++ } ++ ++ opj_free(job); ++} + + +-OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1, ++OPJ_BOOL opj_t1_encode_cblks(opj_tcd_t* tcd, + opj_tcd_tile_t *tile, + opj_tcp_t *tcp, + const OPJ_FLOAT64 * mct_norms, + OPJ_UINT32 mct_numcomps + ) + { ++ volatile OPJ_BOOL ret = OPJ_TRUE; ++ opj_thread_pool_t* tp = tcd->thread_pool; + OPJ_UINT32 compno, resno, bandno, precno, cblkno; ++ opj_mutex_t* mutex = opj_mutex_create(); + + tile->distotile = 0; /* fixed_quality */ + + for (compno = 0; compno < tile->numcomps; ++compno) { + opj_tcd_tilecomp_t* tilec = &tile->comps[compno]; + opj_tccp_t* tccp = &tcp->tccps[compno]; +- OPJ_UINT32 tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0); + + for (resno = 0; resno < tilec->numresolutions; ++resno) { + opj_tcd_resolution_t *res = &tilec->resolutions[resno]; + + for (bandno = 0; bandno < res->numbands; ++bandno) { + opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno]; +- OPJ_INT32 bandconst; + + /* Skip empty bands */ + if (opj_tcd_is_band_empty(band)) { + continue; + } +- +- bandconst = 8192 * 8192 / ((OPJ_INT32) floor(band->stepsize * 8192)); + for (precno = 0; precno < res->pw * res->ph; ++precno) { + opj_tcd_precinct_t *prc = &band->precincts[precno]; + + for (cblkno = 0; cblkno < prc->cw * prc->ch; ++cblkno) { + opj_tcd_cblk_enc_t* cblk = &prc->cblks.enc[cblkno]; +- OPJ_INT32* OPJ_RESTRICT tiledp; +- OPJ_UINT32 cblk_w; +- OPJ_UINT32 cblk_h; +- OPJ_UINT32 i, j, tileLineAdvance; +- OPJ_SIZE_T tileIndex = 0; +- +- OPJ_INT32 x = cblk->x0 - band->x0; +- OPJ_INT32 y = cblk->y0 - band->y0; +- if (band->bandno & 1) { +- opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1]; +- x += pres->x1 - pres->x0; +- } +- if (band->bandno & 2) { +- opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1]; +- y += pres->y1 - pres->y0; +- } +- +- if (!opj_t1_allocate_buffers( +- t1, +- (OPJ_UINT32)(cblk->x1 - cblk->x0), +- (OPJ_UINT32)(cblk->y1 - cblk->y0))) { +- return OPJ_FALSE; +- } + +- cblk_w = t1->w; +- cblk_h = t1->h; +- tileLineAdvance = tile_w - cblk_w; +- +- tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x]; +- t1->data = tiledp; +- t1->data_stride = tile_w; +- if (tccp->qmfbid == 1) { +- /* Do multiplication on unsigned type, even if the +- * underlying type is signed, to avoid potential +- * int overflow on large value (the output will be +- * incorrect in such situation, but whatever...) +- * This assumes complement-to-2 signed integer +- * representation +- * Fixes https://github.com/uclouvain/openjpeg/issues/1053 +- */ +- OPJ_UINT32* OPJ_RESTRICT tiledp_u = (OPJ_UINT32*) tiledp; +- for (j = 0; j < cblk_h; ++j) { +- for (i = 0; i < cblk_w; ++i) { +- tiledp_u[tileIndex] <<= T1_NMSEDEC_FRACBITS; +- tileIndex++; +- } +- tileIndex += tileLineAdvance; +- } +- } else { /* if (tccp->qmfbid == 0) */ +- for (j = 0; j < cblk_h; ++j) { +- for (i = 0; i < cblk_w; ++i) { +- OPJ_INT32 tmp = tiledp[tileIndex]; +- tiledp[tileIndex] = +- opj_int_fix_mul_t1( +- tmp, +- bandconst); +- tileIndex++; +- } +- tileIndex += tileLineAdvance; +- } ++ opj_t1_cblk_encode_processing_job_t* job = ++ (opj_t1_cblk_encode_processing_job_t*) opj_calloc(1, ++ sizeof(opj_t1_cblk_encode_processing_job_t)); ++ if (!job) { ++ ret = OPJ_FALSE; ++ goto end; + } +- +- opj_t1_encode_cblk( +- t1, +- cblk, +- band->bandno, +- compno, +- tilec->numresolutions - 1 - resno, +- tccp->qmfbid, +- band->stepsize, +- tccp->cblksty, +- tile->numcomps, +- tile, +- mct_norms, +- mct_numcomps); ++ job->compno = compno; ++ job->tile = tile; ++ job->resno = resno; ++ job->cblk = cblk; ++ job->band = band; ++ job->tilec = tilec; ++ job->tccp = tccp; ++ job->mct_norms = mct_norms; ++ job->mct_numcomps = mct_numcomps; ++ job->pret = &ret; ++ job->mutex = mutex; ++ opj_thread_pool_submit_job(tp, opj_t1_cblk_encode_processor, job); + + } /* cblkno */ + } /* precno */ + } /* bandno */ + } /* resno */ + } /* compno */ +- return OPJ_TRUE; ++ ++end: ++ opj_thread_pool_wait_completion(tcd->thread_pool, 0); ++ if (mutex) { ++ opj_mutex_destroy(mutex); ++ } ++ ++ return ret; + } + + /* Returns whether the pass (bpno, passtype) is terminated */ +@@ -2252,18 +2365,17 @@ static int opj_t1_enc_is_term_pass(opj_tcd_cblk_enc_t* cblk, + + + /** mod fixed_quality */ +-static void opj_t1_encode_cblk(opj_t1_t *t1, +- opj_tcd_cblk_enc_t* cblk, +- OPJ_UINT32 orient, +- OPJ_UINT32 compno, +- OPJ_UINT32 level, +- OPJ_UINT32 qmfbid, +- OPJ_FLOAT64 stepsize, +- OPJ_UINT32 cblksty, +- OPJ_UINT32 numcomps, +- opj_tcd_tile_t * tile, +- const OPJ_FLOAT64 * mct_norms, +- OPJ_UINT32 mct_numcomps) ++static OPJ_FLOAT64 opj_t1_encode_cblk(opj_t1_t *t1, ++ opj_tcd_cblk_enc_t* cblk, ++ OPJ_UINT32 orient, ++ OPJ_UINT32 compno, ++ OPJ_UINT32 level, ++ OPJ_UINT32 qmfbid, ++ OPJ_FLOAT64 stepsize, ++ OPJ_UINT32 cblksty, ++ OPJ_UINT32 numcomps, ++ const OPJ_FLOAT64 * mct_norms, ++ OPJ_UINT32 mct_numcomps) + { + OPJ_FLOAT64 cumwmsedec = 0.0; + +@@ -2277,6 +2389,7 @@ static void opj_t1_encode_cblk(opj_t1_t *t1, + OPJ_UINT32 i, j; + OPJ_BYTE type = T1_TYPE_MQ; + OPJ_FLOAT64 tempwmsedec; ++ OPJ_INT32* datap; + + #ifdef EXTRA_DEBUG + printf("encode_cblk(x=%d,y=%d,x1=%d,y1=%d,orient=%d,compno=%d,level=%d\n", +@@ -2286,10 +2399,19 @@ static void opj_t1_encode_cblk(opj_t1_t *t1, + mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9); + + max = 0; +- for (i = 0; i < t1->w; ++i) { +- for (j = 0; j < t1->h; ++j) { +- OPJ_INT32 tmp = abs(t1->data[i + j * t1->data_stride]); +- max = opj_int_max(max, tmp); ++ datap = t1->data; ++ for (j = 0; j < t1->h; ++j) { ++ const OPJ_UINT32 w = t1->w; ++ for (i = 0; i < w; ++i, ++datap) { ++ OPJ_INT32 tmp = *datap; ++ if (tmp < 0) { ++ OPJ_UINT32 tmp_unsigned; ++ max = opj_int_max(max, -tmp); ++ tmp_unsigned = opj_to_smr(tmp); ++ memcpy(datap, &tmp_unsigned, sizeof(OPJ_INT32)); ++ } else { ++ max = opj_int_max(max, tmp); ++ } + } + } + +@@ -2297,7 +2419,7 @@ static void opj_t1_encode_cblk(opj_t1_t *t1, + T1_NMSEDEC_FRACBITS) : 0; + if (cblk->numbps == 0) { + cblk->totalpasses = 0; +- return; ++ return cumwmsedec; + } + + bpno = (OPJ_INT32)(cblk->numbps - 1); +@@ -2343,7 +2465,6 @@ static void opj_t1_encode_cblk(opj_t1_t *t1, + tempwmsedec = opj_t1_getwmsedec(nmsedec, compno, level, orient, bpno, qmfbid, + stepsize, numcomps, mct_norms, mct_numcomps) ; + cumwmsedec += tempwmsedec; +- tile->distotile += tempwmsedec; + pass->distortiondec = cumwmsedec; + + if (opj_t1_enc_is_term_pass(cblk, cblksty, bpno, passtype)) { +@@ -2425,4 +2546,6 @@ static void opj_t1_encode_cblk(opj_t1_t *t1, + } + } + #endif ++ ++ return cumwmsedec; + } +diff --git a/third_party/libopenjpeg20/t1.h b/third_party/libopenjpeg20/t1.h +index 171dfb0a7ae57e5f874f74c6967d80b628b6316e..81ad0d00f17d11a7a33d6c1a02222d3ab47faf14 100644 +--- a/third_party/libopenjpeg20/t1.h ++++ b/third_party/libopenjpeg20/t1.h +@@ -198,7 +198,6 @@ typedef struct opj_t1 { + OPJ_UINT32 h; + OPJ_UINT32 datasize; + OPJ_UINT32 flagssize; +- OPJ_UINT32 data_stride; + OPJ_BOOL encoder; + + /* Thre 3 variables below are only used by the decoder */ +@@ -216,13 +215,13 @@ typedef struct opj_t1 { + + /** + Encode the code-blocks of a tile +-@param t1 T1 handle ++@param tcd TCD handle + @param tile The tile to encode + @param tcp Tile coding parameters + @param mct_norms FIXME DOC + @param mct_numcomps Number of components used for MCT + */ +-OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1, ++OPJ_BOOL opj_t1_encode_cblks(opj_tcd_t* tcd, + opj_tcd_tile_t *tile, + opj_tcp_t *tcp, + const OPJ_FLOAT64 * mct_norms, +diff --git a/third_party/libopenjpeg20/t2.c b/third_party/libopenjpeg20/t2.c +index 9825118cfd7350d091a675522c165b66cff76b1d..1481e16f461968adca4ede901b7a3af6de162165 100644 +--- a/third_party/libopenjpeg20/t2.c ++++ b/third_party/libopenjpeg20/t2.c +@@ -224,6 +224,7 @@ OPJ_BOOL opj_t2_encode_packets(opj_t2_t* p_t2, + OPJ_UINT32 * p_data_written, + OPJ_UINT32 p_max_len, + opj_codestream_info_t *cstr_info, ++ opj_tcd_marker_info_t* p_marker_info, + OPJ_UINT32 p_tp_num, + OPJ_INT32 p_tp_pos, + OPJ_UINT32 p_pino, +@@ -244,7 +245,7 @@ OPJ_BOOL opj_t2_encode_packets(opj_t2_t* p_t2, + l_image->numcomps : 1; + OPJ_UINT32 l_nb_pocs = l_tcp->numpocs + 1; + +- l_pi = opj_pi_initialise_encode(l_image, l_cp, p_tile_no, p_t2_mode); ++ l_pi = opj_pi_initialise_encode(l_image, l_cp, p_tile_no, p_t2_mode, p_manager); + if (!l_pi) { + return OPJ_FALSE; + } +@@ -310,6 +311,20 @@ OPJ_BOOL opj_t2_encode_packets(opj_t2_t* p_t2, + opj_pi_destroy(l_pi, l_nb_pocs); + return OPJ_FALSE; + } ++ ++ if (p_marker_info && p_marker_info->need_PLT) { ++ /* One time use intended */ ++ assert(p_marker_info->packet_count == 0); ++ assert(p_marker_info->p_packet_size == NULL); ++ ++ p_marker_info->p_packet_size = (OPJ_UINT32*) opj_malloc( ++ opj_get_encoding_packet_count(l_image, l_cp, p_tile_no) * sizeof(OPJ_UINT32)); ++ if (p_marker_info->p_packet_size == NULL) { ++ opj_pi_destroy(l_pi, l_nb_pocs); ++ return OPJ_FALSE; ++ } ++ } ++ + while (opj_pi_next(l_current_pi)) { + if (l_current_pi->layno < p_maxlayers) { + l_nb_bytes = 0; +@@ -326,6 +341,11 @@ OPJ_BOOL opj_t2_encode_packets(opj_t2_t* p_t2, + + * p_data_written += l_nb_bytes; + ++ if (p_marker_info && p_marker_info->need_PLT) { ++ p_marker_info->p_packet_size[p_marker_info->packet_count] = l_nb_bytes; ++ p_marker_info->packet_count ++; ++ } ++ + /* INDEX >> */ + if (cstr_info) { + if (cstr_info->index_write) { +@@ -405,7 +425,7 @@ OPJ_BOOL opj_t2_decode_packets(opj_tcd_t* tcd, + #endif + + /* create a packet iterator */ +- l_pi = opj_pi_create_decode(l_image, l_cp, p_tile_no); ++ l_pi = opj_pi_create_decode(l_image, l_cp, p_tile_no, p_manager); + if (!l_pi) { + return OPJ_FALSE; + } +@@ -673,6 +693,14 @@ static OPJ_BOOL opj_t2_encode_packet(OPJ_UINT32 tileno, + OPJ_BOOL packet_empty = OPJ_FALSE; + #endif + ++#ifdef DEBUG_VERBOSE ++ if (p_t2_mode == FINAL_PASS) { ++ fprintf(stderr, ++ "encode packet compono=%d, resno=%d, precno=%d, layno=%d\n", ++ compno, resno, precno, layno); ++ } ++#endif ++ + /* */ + if (tcp->csty & J2K_CP_CSTY_SOP) { + if (length < 6) { +@@ -711,6 +739,15 @@ static OPJ_BOOL opj_t2_encode_packet(OPJ_UINT32 tileno, + continue; + } + ++ /* Avoid out of bounds access of https://github.com/uclouvain/openjpeg/issues/1294 */ ++ /* but likely not a proper fix. */ ++ if (precno >= res->pw * res->ph) { ++ opj_event_msg(p_manager, EVT_ERROR, ++ "opj_t2_encode_packet(): accessing precno=%u >= %u\n", ++ precno, res->pw * res->ph); ++ return OPJ_FALSE; ++ } ++ + prc = &band->precincts[precno]; + opj_tgt_reset(prc->incltree); + opj_tgt_reset(prc->imsbtree); +@@ -778,6 +815,15 @@ static OPJ_BOOL opj_t2_encode_packet(OPJ_UINT32 tileno, + continue; + } + ++ /* Avoid out of bounds access of https://github.com/uclouvain/openjpeg/issues/1297 */ ++ /* but likely not a proper fix. */ ++ if (precno >= res->pw * res->ph) { ++ opj_event_msg(p_manager, EVT_ERROR, ++ "opj_t2_encode_packet(): accessing precno=%u >= %u\n", ++ precno, res->pw * res->ph); ++ return OPJ_FALSE; ++ } ++ + prc = &band->precincts[precno]; + l_nb_blocks = prc->cw * prc->ch; + cblk = prc->cblks.enc; +diff --git a/third_party/libopenjpeg20/t2.h b/third_party/libopenjpeg20/t2.h +index 66500b1699334d7752f9ad86eec672379028105b..becfa91a4deef924839953e0d1a2145e34f34bc1 100644 +--- a/third_party/libopenjpeg20/t2.h ++++ b/third_party/libopenjpeg20/t2.h +@@ -73,6 +73,7 @@ Encode the packets of a tile to a destination buffer + @param p_data_written FIXME DOC + @param len the length of the destination buffer + @param cstr_info Codestream information structure ++@param p_marker_info Marker information structure + @param tpnum Tile part number of the current tile + @param tppos The position of the tile part flag in the progression order + @param pino FIXME DOC +@@ -87,6 +88,7 @@ OPJ_BOOL opj_t2_encode_packets(opj_t2_t* t2, + OPJ_UINT32 * p_data_written, + OPJ_UINT32 len, + opj_codestream_info_t *cstr_info, ++ opj_tcd_marker_info_t* p_marker_info, + OPJ_UINT32 tpnum, + OPJ_INT32 tppos, + OPJ_UINT32 pino, +diff --git a/third_party/libopenjpeg20/tcd.c b/third_party/libopenjpeg20/tcd.c +index 9e98f04ab8bb8b008e812c9b1ef73ead49a49d7a..b9f571410b9ecd3f4c8b20c3144907f9d33d6f9e 100644 +--- a/third_party/libopenjpeg20/tcd.c ++++ b/third_party/libopenjpeg20/tcd.c +@@ -112,7 +112,7 @@ void tcd_dump(FILE *fd, opj_tcd_t *tcd, opj_tcd_image_t * img) + * Initializes tile coding/decoding + */ + static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, +- OPJ_BOOL isEncoder, OPJ_FLOAT32 fraction, OPJ_SIZE_T sizeof_block, ++ OPJ_BOOL isEncoder, OPJ_SIZE_T sizeof_block, + opj_event_mgr_t* manager); + + /** +@@ -182,6 +182,7 @@ static OPJ_BOOL opj_tcd_t2_encode(opj_tcd_t *p_tcd, + OPJ_UINT32 * p_data_written, + OPJ_UINT32 p_max_dest_size, + opj_codestream_info_t *p_cstr_info, ++ opj_tcd_marker_info_t* p_marker_info, + opj_event_mgr_t *p_manager); + + static OPJ_BOOL opj_tcd_rate_allocate_encode(opj_tcd_t *p_tcd, +@@ -573,9 +574,10 @@ OPJ_BOOL opj_tcd_rateallocate(opj_tcd_t *tcd, + opj_tcd_makelayer(tcd, layno, thresh, 0); + + if (cp->m_specific_param.m_enc.m_fixed_quality) { /* fixed_quality */ +- if (OPJ_IS_CINEMA(cp->rsiz)) { ++ if (OPJ_IS_CINEMA(cp->rsiz) || OPJ_IS_IMF(cp->rsiz)) { + if (! opj_t2_encode_packets(t2, tcd->tcd_tileno, tcd_tile, layno + 1, dest, +- p_data_written, maxlen, cstr_info, tcd->cur_tp_num, tcd->tp_pos, tcd->cur_pino, ++ p_data_written, maxlen, cstr_info, NULL, tcd->cur_tp_num, tcd->tp_pos, ++ tcd->cur_pino, + THRESH_CALC, p_manager)) { + + lo = thresh; +@@ -605,7 +607,8 @@ OPJ_BOOL opj_tcd_rateallocate(opj_tcd_t *tcd, + } + } else { + if (! opj_t2_encode_packets(t2, tcd->tcd_tileno, tcd_tile, layno + 1, dest, +- p_data_written, maxlen, cstr_info, tcd->cur_tp_num, tcd->tp_pos, tcd->cur_pino, ++ p_data_written, maxlen, cstr_info, NULL, tcd->cur_tp_num, tcd->tp_pos, ++ tcd->cur_pino, + THRESH_CALC, p_manager)) { + /* TODO: what to do with l ??? seek / tell ??? */ + /* opj_event_msg(tcd->cinfo, EVT_INFO, "rate alloc: len=%d, max=%d\n", l, maxlen); */ +@@ -718,10 +721,9 @@ OPJ_BOOL opj_alloc_tile_component_data(opj_tcd_tilecomp_t *l_tilec) + /* ----------------------------------------------------------------------- */ + + static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, +- OPJ_BOOL isEncoder, OPJ_FLOAT32 fraction, OPJ_SIZE_T sizeof_block, ++ OPJ_BOOL isEncoder, OPJ_SIZE_T sizeof_block, + opj_event_mgr_t* manager) + { +- OPJ_UINT32(*l_gain_ptr)(OPJ_UINT32) = 00; + OPJ_UINT32 compno, resno, bandno, precno, cblkno; + opj_tcp_t * l_tcp = 00; + opj_cp_t * l_cp = 00; +@@ -737,7 +739,6 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, + OPJ_UINT32 p, q; + OPJ_UINT32 l_level_no; + OPJ_UINT32 l_pdx, l_pdy; +- OPJ_UINT32 l_gain; + OPJ_INT32 l_x0b, l_y0b; + OPJ_UINT32 l_tx0, l_ty0; + /* extent of precincts , top left, bottom right**/ +@@ -881,11 +882,6 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, + l_level_no = l_tilec->numresolutions; + l_res = l_tilec->resolutions; + l_step_size = l_tccp->stepsizes; +- if (l_tccp->qmfbid == 0) { +- l_gain_ptr = &opj_dwt_getgain_real; +- } else { +- l_gain_ptr = &opj_dwt_getgain; +- } + /*fprintf(stderr, "\tlevel_no=%d\n",l_level_no);*/ + + for (resno = 0; resno < l_tilec->numresolutions; ++resno) { +@@ -972,7 +968,6 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, + l_band = l_res->bands; + + for (bandno = 0; bandno < l_res->numbands; ++bandno, ++l_band, ++l_step_size) { +- OPJ_INT32 numbps; + /*fprintf(stderr, "\t\t\tband_no=%d/%d\n", bandno, l_res->numbands );*/ + + if (resno == 0) { +@@ -1008,11 +1003,24 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, + } + } + +- /** avoid an if with storing function pointer */ +- l_gain = (*l_gain_ptr)(l_band->bandno); +- numbps = (OPJ_INT32)(l_image_comp->prec + l_gain); +- l_band->stepsize = (OPJ_FLOAT32)(((1.0 + l_step_size->mant / 2048.0) * pow(2.0, +- (OPJ_INT32)(numbps - l_step_size->expn)))) * fraction; ++ { ++ /* Table E-1 - Sub-band gains */ ++ /* BUG_WEIRD_TWO_INVK (look for this identifier in dwt.c): */ ++ /* the test (!isEncoder && l_tccp->qmfbid == 0) is strongly */ ++ /* linked to the use of two_invK instead of invK */ ++ const OPJ_INT32 log2_gain = (!isEncoder && ++ l_tccp->qmfbid == 0) ? 0 : (l_band->bandno == 0) ? 0 : ++ (l_band->bandno == 3) ? 2 : 1; ++ ++ /* Nominal dynamic range. Equation E-4 */ ++ const OPJ_INT32 Rb = (OPJ_INT32)l_image_comp->prec + log2_gain; ++ ++ /* Delta_b value of Equation E-3 in "E.1 Inverse quantization ++ * procedure" of the standard */ ++ l_band->stepsize = (OPJ_FLOAT32)(((1.0 + l_step_size->mant / 2048.0) * pow(2.0, ++ (OPJ_INT32)(Rb - l_step_size->expn)))); ++ } ++ + /* Mb value of Equation E-2 in "E.1 Inverse quantization + * procedure" of the standard */ + l_band->numbps = l_step_size->expn + (OPJ_INT32)l_tccp->numgbits - +@@ -1198,14 +1206,14 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, + OPJ_BOOL opj_tcd_init_encode_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, + opj_event_mgr_t* p_manager) + { +- return opj_tcd_init_tile(p_tcd, p_tile_no, OPJ_TRUE, 1.0F, ++ return opj_tcd_init_tile(p_tcd, p_tile_no, OPJ_TRUE, + sizeof(opj_tcd_cblk_enc_t), p_manager); + } + + OPJ_BOOL opj_tcd_init_decode_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no, + opj_event_mgr_t* p_manager) + { +- return opj_tcd_init_tile(p_tcd, p_tile_no, OPJ_FALSE, 0.5F, ++ return opj_tcd_init_tile(p_tcd, p_tile_no, OPJ_FALSE, + sizeof(opj_tcd_cblk_dec_t), p_manager); + } + +@@ -1243,10 +1251,16 @@ static OPJ_BOOL opj_tcd_code_block_enc_allocate_data(opj_tcd_cblk_enc_t * + + /* +1 is needed for https://github.com/uclouvain/openjpeg/issues/835 */ + /* and actually +2 required for https://github.com/uclouvain/openjpeg/issues/982 */ ++ /* and +7 for https://github.com/uclouvain/openjpeg/issues/1283 (-M 3) */ ++ /* and +26 for https://github.com/uclouvain/openjpeg/issues/1283 (-M 7) */ ++ /* and +28 for https://github.com/uclouvain/openjpeg/issues/1283 (-M 44) */ ++ /* and +33 for https://github.com/uclouvain/openjpeg/issues/1283 (-M 4) */ ++ /* and +63 for https://github.com/uclouvain/openjpeg/issues/1283 (-M 4 -IMF 2K) */ ++ /* and +74 for https://github.com/uclouvain/openjpeg/issues/1283 (-M 4 -n 8 -s 7,7 -I) */ + /* TODO: is there a theoretical upper-bound for the compressed code */ + /* block size ? */ +- l_data_size = 2 + (OPJ_UINT32)((p_code_block->x1 - p_code_block->x0) * +- (p_code_block->y1 - p_code_block->y0) * (OPJ_INT32)sizeof(OPJ_UINT32)); ++ l_data_size = 74 + (OPJ_UINT32)((p_code_block->x1 - p_code_block->x0) * ++ (p_code_block->y1 - p_code_block->y0) * (OPJ_INT32)sizeof(OPJ_UINT32)); + + if (l_data_size > p_code_block->data_size) { + if (p_code_block->data) { +@@ -1378,6 +1392,7 @@ OPJ_BOOL opj_tcd_encode_tile(opj_tcd_t *p_tcd, + OPJ_UINT32 * p_data_written, + OPJ_UINT32 p_max_length, + opj_codestream_info_t *p_cstr_info, ++ opj_tcd_marker_info_t* p_marker_info, + opj_event_mgr_t *p_manager) + { + +@@ -1457,7 +1472,7 @@ OPJ_BOOL opj_tcd_encode_tile(opj_tcd_t *p_tcd, + /* FIXME _ProfStart(PGROUP_T2); */ + + if (! opj_tcd_t2_encode(p_tcd, p_dest, p_data_written, p_max_length, +- p_cstr_info, p_manager)) { ++ p_cstr_info, p_marker_info, p_manager)) { + return OPJ_FALSE; + } + /* FIXME _ProfStop(PGROUP_T2); */ +@@ -2041,7 +2056,8 @@ static OPJ_BOOL opj_tcd_mct_decode(opj_tcd_t *p_tcd, opj_event_mgr_t *p_manager) + opj_tcd_tile_t * l_tile = p_tcd->tcd_image->tiles; + opj_tcp_t * l_tcp = p_tcd->tcp; + opj_tcd_tilecomp_t * l_tile_comp = l_tile->comps; +- OPJ_UINT32 l_samples, i; ++ OPJ_SIZE_T l_samples; ++ OPJ_UINT32 i; + + if (l_tcp->mct == 0 || p_tcd->used_component != NULL) { + return OPJ_TRUE; +@@ -2054,8 +2070,8 @@ static OPJ_BOOL opj_tcd_mct_decode(opj_tcd_t *p_tcd, opj_event_mgr_t *p_manager) + /* A bit inefficient: we process more data than needed if */ + /* resno_decoded < l_tile_comp->minimum_num_resolutions-1, */ + /* but we would need to take into account a stride then */ +- l_samples = (OPJ_UINT32)((res_comp0->x1 - res_comp0->x0) * +- (res_comp0->y1 - res_comp0->y0)); ++ l_samples = (OPJ_SIZE_T)(res_comp0->x1 - res_comp0->x0) * ++ (OPJ_SIZE_T)(res_comp0->y1 - res_comp0->y0); + if (l_tile->numcomps >= 3) { + if (l_tile_comp->minimum_num_resolutions != + l_tile->comps[1].minimum_num_resolutions || +@@ -2089,8 +2105,8 @@ static OPJ_BOOL opj_tcd_mct_decode(opj_tcd_t *p_tcd, opj_event_mgr_t *p_manager) + opj_tcd_resolution_t* res_comp0 = l_tile->comps[0].resolutions + + p_tcd->image->comps[0].resno_decoded; + +- l_samples = (res_comp0->win_x1 - res_comp0->win_x0) * +- (res_comp0->win_y1 - res_comp0->win_y0); ++ l_samples = (OPJ_SIZE_T)(res_comp0->win_x1 - res_comp0->win_x0) * ++ (OPJ_SIZE_T)(res_comp0->win_y1 - res_comp0->win_y0); + if (l_tile->numcomps >= 3) { + opj_tcd_resolution_t* res_comp1 = l_tile->comps[1].resolutions + + p_tcd->image->comps[1].resno_decoded; +@@ -2356,7 +2372,7 @@ static void opj_tcd_code_block_enc_deallocate(opj_tcd_precinct_t * p_precinct) + } + } + +-OPJ_SIZE_T opj_tcd_get_encoded_tile_size(opj_tcd_t *p_tcd) ++OPJ_SIZE_T opj_tcd_get_encoder_input_buffer_size(opj_tcd_t *p_tcd) + { + OPJ_UINT32 i; + OPJ_SIZE_T l_data_size = 0; +@@ -2414,7 +2430,8 @@ static OPJ_BOOL opj_tcd_dc_level_shift_encode(opj_tcd_t *p_tcd) + } + } else { + for (i = 0; i < l_nb_elem; ++i) { +- *l_current_ptr = (*l_current_ptr - l_tccp->m_dc_level_shift) * (1 << 11); ++ *((OPJ_FLOAT32 *) l_current_ptr) = (OPJ_FLOAT32)(*l_current_ptr - ++ l_tccp->m_dc_level_shift); + ++l_current_ptr; + } + } +@@ -2472,8 +2489,11 @@ static OPJ_BOOL opj_tcd_mct_encode(opj_tcd_t *p_tcd) + + opj_free(l_data); + } else if (l_tcp->tccps->qmfbid == 0) { +- opj_mct_encode_real(l_tile->comps[0].data, l_tile->comps[1].data, +- l_tile->comps[2].data, samples); ++ opj_mct_encode_real( ++ (OPJ_FLOAT32*)l_tile->comps[0].data, ++ (OPJ_FLOAT32*)l_tile->comps[1].data, ++ (OPJ_FLOAT32*)l_tile->comps[2].data, ++ samples); + } else { + opj_mct_encode(l_tile->comps[0].data, l_tile->comps[1].data, + l_tile->comps[2].data, samples); +@@ -2491,11 +2511,11 @@ static OPJ_BOOL opj_tcd_dwt_encode(opj_tcd_t *p_tcd) + + for (compno = 0; compno < l_tile->numcomps; ++compno) { + if (l_tccp->qmfbid == 1) { +- if (! opj_dwt_encode(l_tile_comp)) { ++ if (! opj_dwt_encode(p_tcd, l_tile_comp)) { + return OPJ_FALSE; + } + } else if (l_tccp->qmfbid == 0) { +- if (! opj_dwt_encode_real(l_tile_comp)) { ++ if (! opj_dwt_encode_real(p_tcd, l_tile_comp)) { + return OPJ_FALSE; + } + } +@@ -2509,16 +2529,10 @@ static OPJ_BOOL opj_tcd_dwt_encode(opj_tcd_t *p_tcd) + + static OPJ_BOOL opj_tcd_t1_encode(opj_tcd_t *p_tcd) + { +- opj_t1_t * l_t1; + const OPJ_FLOAT64 * l_mct_norms; + OPJ_UINT32 l_mct_numcomps = 0U; + opj_tcp_t * l_tcp = p_tcd->tcp; + +- l_t1 = opj_t1_create(OPJ_TRUE); +- if (l_t1 == 00) { +- return OPJ_FALSE; +- } +- + if (l_tcp->mct == 1) { + l_mct_numcomps = 3U; + /* irreversible encoding */ +@@ -2532,13 +2546,9 @@ static OPJ_BOOL opj_tcd_t1_encode(opj_tcd_t *p_tcd) + l_mct_norms = (const OPJ_FLOAT64 *)(l_tcp->mct_norms); + } + +- if (! opj_t1_encode_cblks(l_t1, p_tcd->tcd_image->tiles, l_tcp, l_mct_norms, +- l_mct_numcomps)) { +- opj_t1_destroy(l_t1); +- return OPJ_FALSE; +- } +- +- opj_t1_destroy(l_t1); ++ return opj_t1_encode_cblks(p_tcd, ++ p_tcd->tcd_image->tiles, l_tcp, l_mct_norms, ++ l_mct_numcomps); + + return OPJ_TRUE; + } +@@ -2548,6 +2558,7 @@ static OPJ_BOOL opj_tcd_t2_encode(opj_tcd_t *p_tcd, + OPJ_UINT32 * p_data_written, + OPJ_UINT32 p_max_dest_size, + opj_codestream_info_t *p_cstr_info, ++ opj_tcd_marker_info_t* p_marker_info, + opj_event_mgr_t *p_manager) + { + opj_t2_t * l_t2; +@@ -2566,6 +2577,7 @@ static OPJ_BOOL opj_tcd_t2_encode(opj_tcd_t *p_tcd, + p_data_written, + p_max_dest_size, + p_cstr_info, ++ p_marker_info, + p_tcd->tp_num, + p_tcd->tp_pos, + p_tcd->cur_pino, +@@ -2624,7 +2636,7 @@ OPJ_BOOL opj_tcd_copy_tile_data(opj_tcd_t *p_tcd, + OPJ_UINT32 l_size_comp, l_remaining; + OPJ_SIZE_T l_nb_elem; + +- l_data_size = opj_tcd_get_encoded_tile_size(p_tcd); ++ l_data_size = opj_tcd_get_encoder_input_buffer_size(p_tcd); + if (l_data_size != p_src_length) { + return OPJ_FALSE; + } +@@ -2826,3 +2838,30 @@ static OPJ_BOOL opj_tcd_is_whole_tilecomp_decoding(opj_tcd_t *p_tcd, + (((OPJ_UINT32)tilec->x1 - tcx1) >> shift) == 0 && + (((OPJ_UINT32)tilec->y1 - tcy1) >> shift) == 0))); + } ++ ++/* ----------------------------------------------------------------------- */ ++ ++opj_tcd_marker_info_t* opj_tcd_marker_info_create(OPJ_BOOL need_PLT) ++{ ++ opj_tcd_marker_info_t *l_tcd_marker_info = ++ (opj_tcd_marker_info_t*) opj_calloc(1, sizeof(opj_tcd_marker_info_t)); ++ if (!l_tcd_marker_info) { ++ return NULL; ++ } ++ ++ l_tcd_marker_info->need_PLT = need_PLT; ++ ++ return l_tcd_marker_info; ++} ++ ++/* ----------------------------------------------------------------------- */ ++ ++void opj_tcd_marker_info_destroy(opj_tcd_marker_info_t *p_tcd_marker_info) ++{ ++ if (p_tcd_marker_info) { ++ opj_free(p_tcd_marker_info->p_packet_size); ++ opj_free(p_tcd_marker_info); ++ } ++} ++ ++/* ----------------------------------------------------------------------- */ +diff --git a/third_party/libopenjpeg20/tcd.h b/third_party/libopenjpeg20/tcd.h +index e3214c1d982ad9b4ce57d17d007d6bd562883fe2..f1b52b8dac6e7115cd65580ae89e387100d946d3 100644 +--- a/third_party/libopenjpeg20/tcd.h ++++ b/third_party/libopenjpeg20/tcd.h +@@ -284,6 +284,22 @@ typedef struct opj_tcd { + OPJ_BOOL* used_component; + } opj_tcd_t; + ++/** ++ * Structure to hold information needed to generate some markers. ++ * Used by encoder. ++ */ ++typedef struct opj_tcd_marker_info { ++ /** In: Whether information to generate PLT markers in needed */ ++ OPJ_BOOL need_PLT; ++ ++ /** OUT: Number of elements in p_packet_size[] array */ ++ OPJ_UINT32 packet_count; ++ ++ /** OUT: Array of size packet_count, such that p_packet_size[i] is ++ * the size in bytes of the ith packet */ ++ OPJ_UINT32* p_packet_size; ++} opj_tcd_marker_info_t; ++ + /** @name Exported functions */ + /*@{*/ + /* ----------------------------------------------------------------------- */ +@@ -306,6 +322,21 @@ Destroy a previously created TCD handle + */ + void opj_tcd_destroy(opj_tcd_t *tcd); + ++ ++/** ++ * Create a new opj_tcd_marker_info_t* structure ++ * @param need_PLT Whether information is needed to generate PLT markers. ++ */ ++opj_tcd_marker_info_t* opj_tcd_marker_info_create(OPJ_BOOL need_PLT); ++ ++ ++/** ++Destroy a previously created opj_tcd_marker_info_t* structure ++@param p_tcd_marker_info Structure to destroy ++*/ ++void opj_tcd_marker_info_destroy(opj_tcd_marker_info_t *p_tcd_marker_info); ++ ++ + /** + * Initialize the tile coder and may reuse some memory. + * @param p_tcd TCD handle. +@@ -364,6 +395,7 @@ OPJ_UINT32 opj_tcd_get_decoded_tile_size(opj_tcd_t *p_tcd, + * @param p_data_written pointer to an int that is incremented by the number of bytes really written on p_dest + * @param p_len Maximum length of the destination buffer + * @param p_cstr_info Codestream information structure ++ * @param p_marker_info Marker information structure + * @param p_manager the user event manager + * @return true if the coding is successful. + */ +@@ -373,6 +405,7 @@ OPJ_BOOL opj_tcd_encode_tile(opj_tcd_t *p_tcd, + OPJ_UINT32 * p_data_written, + OPJ_UINT32 p_len, + struct opj_codestream_info *p_cstr_info, ++ opj_tcd_marker_info_t* p_marker_info, + opj_event_mgr_t *p_manager); + + +@@ -415,9 +448,11 @@ OPJ_BOOL opj_tcd_update_tile_data(opj_tcd_t *p_tcd, + OPJ_UINT32 p_dest_length); + + /** +- * ++ * Get the size in bytes of the input buffer provided before encoded. ++ * This must be the size provided to the p_src_length argument of ++ * opj_tcd_copy_tile_data() + */ +-OPJ_SIZE_T opj_tcd_get_encoded_tile_size(opj_tcd_t *p_tcd); ++OPJ_SIZE_T opj_tcd_get_encoder_input_buffer_size(opj_tcd_t *p_tcd); + + /** + * Initialize the tile coder and may reuse some meory. +@@ -433,6 +468,8 @@ OPJ_BOOL opj_tcd_init_encode_tile(opj_tcd_t *p_tcd, + + /** + * Copies tile data from the given memory block onto the system. ++ * ++ * p_src_length must be equal to opj_tcd_get_encoder_input_buffer_size() + */ + OPJ_BOOL opj_tcd_copy_tile_data(opj_tcd_t *p_tcd, + OPJ_BYTE * p_src,