Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
perian/ColorConversions.c
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
864 lines (728 sloc)
21.9 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* ColorConversions.c | |
* Created by Alexander Strange on 1/10/07. | |
* | |
* This file is part of Perian. | |
* | |
* This library is free software; you can redistribute it and/or | |
* modify it under the terms of the GNU Lesser General Public | |
* License as published by the Free Software Foundation; either | |
* version 2.1 of the License, or (at your option) any later version. | |
* | |
* This library is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
* Lesser General Public License for more details. | |
* | |
* You should have received a copy of the GNU Lesser General Public | |
* License along with FFmpeg; if not, write to the Free Software | |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
*/ | |
#include <QuickTime/QuickTime.h> | |
#include "ColorConversions.h" | |
#include "Codecprintf.h" | |
#include "CommonUtils.h" | |
#include <libswscale/swscale.h> | |
#include <libavutil/opt.h> | |
/* | |
Converts (without resampling) from ffmpeg pixel formats to the ones QT accepts | |
Todo: | |
- rewrite everything in asm (or C with all loop optimization opportunities removed) | |
- add a version with bilinear resampling | |
- handle YUV 4:2:0 with odd width | |
*/ | |
//#define ENABLE_SWSCALE | |
#ifdef __GNUC__ | |
#define unlikely(x) __builtin_expect(x, 0) | |
#define likely(x) __builtin_expect(x, 1) | |
#define impossible(x) if (x) __builtin_unreachable() | |
#define always_inline __attribute__((always_inline)) | |
#else | |
#define unlikely(x) x | |
#define likely(x) x | |
#define impossible(x) | |
#define always_inline inline | |
#endif | |
#pragma mark Simple conversion functions | |
static always_inline void Y420toY422_lastrow(UInt8 * __restrict o, UInt8 * __restrict yc, UInt8 * __restrict uc, UInt8 * __restrict vc, int halfwidth) | |
{ | |
int x; | |
for(x=0; x < halfwidth; x++) | |
{ | |
int x4 = x*4, x2 = x*2; | |
o[x4] = uc[x]; | |
o[x4+1] = yc[x2]; | |
o[x4+2] = vc[x]; | |
o[x4+3] = yc[x2+1]; | |
} | |
} | |
//Y420 Planar to Y422 Packed | |
#include <emmintrin.h> | |
static FASTCALL void Y420toY422_sse2(const CCConverterContext *ctx, const AVPicture *picture, UInt8 * __restrict o) | |
{ | |
short width = ctx->width, height = ctx->height; | |
int outRB = ctx->outLineSize; | |
UInt8 * __restrict yc = picture->data[0], * __restrict u = picture->data[1], * __restrict v = picture->data[2]; | |
int rY = ctx->inLineSizes[0], rUV = ctx->inLineSizes[1]; | |
impossible(width <= 1 || height <= 1 || outRB <= 0 || rY <= 0 || rUV <= 0); | |
int halfwidth = width >> 1, halfheight = height >> 1; | |
int vWidth = width >> 5; | |
int x, y; | |
for (y = 0; y < halfheight; y++) { | |
UInt8 *o2 = o + outRB, *yc2 = yc + rY; | |
__m128i *ov = (__m128i*)o, *ov2 = (__m128i*)o2, *yv = (__m128i*)yc, *yv2 = (__m128i*)yc2; | |
__m128i *uv = (__m128i*)u, *vv = (__m128i*)v; | |
#ifdef __i386__ | |
int vWidth_ = vWidth; | |
asm volatile( | |
"\n0: \n\t" | |
"movdqa (%2), %%xmm0 \n\t" | |
"movdqa 16(%2), %%xmm2 \n\t" | |
"movdqa (%3), %%xmm1 \n\t" | |
"movdqa 16(%3), %%xmm3 \n\t" | |
"movdqu (%4), %%xmm4 \n\t" | |
"movdqu (%5), %%xmm5 \n\t" | |
"addl $32, %2 \n\t" | |
"addl $32, %3 \n\t" | |
"addl $16, %4 \n\t" | |
"addl $16, %5 \n\t" | |
"movdqa %%xmm4, %%xmm6 \n\t" | |
"punpcklbw %%xmm5, %%xmm4 \n\t" /*chroma_l*/ | |
"punpckhbw %%xmm5, %%xmm6 \n\t" /*chroma_h*/ | |
"movdqa %%xmm4, %%xmm5 \n\t" | |
"punpcklbw %%xmm0, %%xmm5 \n\t" | |
"movntdq %%xmm5, (%0) \n\t" /*ov[x4]*/ | |
"movdqa %%xmm4, %%xmm5 \n\t" | |
"punpckhbw %%xmm0, %%xmm5 \n\t" | |
"movntdq %%xmm5, 16(%0) \n\t" /*ov[x4+1]*/ | |
"movdqa %%xmm6, %%xmm5 \n\t" | |
"punpcklbw %%xmm2, %%xmm5 \n\t" | |
"movntdq %%xmm5, 32(%0) \n\t" /*ov[x4+2]*/ | |
"movdqa %%xmm6, %%xmm5 \n\t" | |
"punpckhbw %%xmm2, %%xmm5 \n\t" | |
"movntdq %%xmm5, 48(%0) \n\t" /*ov[x4+3]*/ | |
"addl $64, %0 \n\t" | |
"movdqa %%xmm4, %%xmm5 \n\t" | |
"punpcklbw %%xmm1, %%xmm5 \n\t" | |
"movntdq %%xmm5, (%1) \n\t" /*ov2[x4]*/ | |
"punpckhbw %%xmm1, %%xmm4 \n\t" | |
"movntdq %%xmm4, 16(%1) \n\t" /*ov2[x4+1]*/ | |
"movdqa %%xmm6, %%xmm5 \n\t" | |
"punpcklbw %%xmm3, %%xmm5 \n\t" | |
"movntdq %%xmm5, 32(%1) \n\t" /*ov2[x4+2]*/ | |
"punpckhbw %%xmm3, %%xmm6 \n\t" | |
"movntdq %%xmm6, 48(%1) \n\t" /*ov2[x4+3]*/ | |
"addl $64, %1 \n\t" | |
"decl %6 \n\t" | |
"jnz 0b \n\t" | |
: "+r" (ov), "+r" (ov2), "+r" (yv), | |
"+r" (yv2), "+r" (uv), "+r" (vv), "+m"(vWidth_) | |
: | |
: "memory", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" | |
); | |
#else | |
for (x = 0; x < vWidth; x++) { | |
int x2 = x*2, x4 = x*4; | |
__m128i tmp_y = yv[x2], tmp_y3 = yv[x2+1], | |
tmp_y2 = yv2[x2], tmp_y4 = yv2[x2+1], | |
tmp_u = _mm_loadu_si128(&uv[x]), tmp_v = _mm_loadu_si128(&vv[x]), | |
chroma_l = _mm_unpacklo_epi8(tmp_u, tmp_v), | |
chroma_h = _mm_unpackhi_epi8(tmp_u, tmp_v); | |
_mm_stream_si128(&ov[x4], _mm_unpacklo_epi8(chroma_l, tmp_y)); | |
_mm_stream_si128(&ov[x4+1], _mm_unpackhi_epi8(chroma_l, tmp_y)); | |
_mm_stream_si128(&ov[x4+2], _mm_unpacklo_epi8(chroma_h, tmp_y3)); | |
_mm_stream_si128(&ov[x4+3], _mm_unpackhi_epi8(chroma_h, tmp_y3)); | |
_mm_stream_si128(&ov2[x4], _mm_unpacklo_epi8(chroma_l, tmp_y2)); | |
_mm_stream_si128(&ov2[x4+1],_mm_unpackhi_epi8(chroma_l, tmp_y2)); | |
_mm_stream_si128(&ov2[x4+2],_mm_unpacklo_epi8(chroma_h, tmp_y4)); | |
_mm_stream_si128(&ov2[x4+3],_mm_unpackhi_epi8(chroma_h, tmp_y4)); | |
} | |
#endif | |
for (x=vWidth * 16; x < halfwidth; x++) { | |
int x4 = x*4, x2 = x*2; | |
o2[x4] = o[x4] = u[x]; | |
o [x4 + 1] = yc[x2]; | |
o2[x4 + 1] = yc2[x2]; | |
o2[x4 + 2] = o[x4 + 2] = v[x]; | |
o [x4 + 3] = yc[x2 + 1]; | |
o2[x4 + 3] = yc2[x2 + 1]; | |
} | |
o += outRB*2; | |
yc += rY*2; | |
u += rUV; | |
v += rUV; | |
} | |
if (unlikely(height & 1)) | |
Y420toY422_lastrow(o, yc, u, v, halfwidth); | |
} | |
static FASTCALL void Y420toY422_x86_scalar(const CCConverterContext *ctx, const AVPicture *picture, UInt8 * __restrict o) | |
{ | |
short width = ctx->width, height = ctx->height; | |
int outRB = ctx->outLineSize; | |
UInt8 * __restrict yc = picture->data[0], * __restrict u = picture->data[1], * __restrict v = picture->data[2]; | |
int rY = ctx->inLineSizes[0], rUV = ctx->inLineSizes[1]; | |
impossible(width <= 1 || height <= 1 || outRB <= 0 || rY <= 0 || rUV <= 0); | |
int halfheight = height >> 1, halfwidth = width >> 1; | |
int y, x; | |
for (y = 0; y < halfheight; y ++) { | |
UInt8 *o2 = o + outRB, *yc2 = yc + rY; | |
for (x = 0; x < halfwidth; x++) { | |
int x4 = x*4, x2 = x*2; | |
o2[x4] = o[x4] = u[x]; | |
o [x4 + 1] = yc[x2]; | |
o2[x4 + 1] = yc2[x2]; | |
o2[x4 + 2] = o[x4 + 2] = v[x]; | |
o [x4 + 3] = yc[x2 + 1]; | |
o2[x4 + 3] = yc2[x2 + 1]; | |
} | |
o += outRB*2; | |
yc += rY*2; | |
u += rUV; | |
v += rUV; | |
} | |
if (unlikely(height & 1)) | |
Y420toY422_lastrow(o, yc, u, v, halfwidth); | |
} | |
static always_inline void Y420_xtoY422_8(const CCConverterContext *ctx, const AVPicture *picture, UInt8 * __restrict o, int shift) | |
{ | |
short width = ctx->width, height = ctx->height; | |
int outRB = ctx->outLineSize; | |
UInt16 * __restrict yc = (UInt16*)picture->data[0], * __restrict u = (UInt16*)picture->data[1], * __restrict v = (UInt16*)picture->data[2]; | |
int rY = ctx->inLineSizes[0]>>1, rUV = ctx->inLineSizes[1]>>1; | |
impossible(width <= 1 || height <= 1 || outRB <= 0 || rY <= 0 || rUV <= 0); | |
int halfheight = height >> 1, halfwidth = width >> 1; | |
int y, x; | |
for (y = 0; y < halfheight; y++) { | |
UInt8 *o2 = o + outRB; | |
UInt16 *yc2 = yc + rY; | |
for (x = 0; x < halfwidth; x++) { | |
int x4 = x*4, x2 = x*2; | |
o2[x4] = o[x4] = u[x] >> shift; | |
o [x4 + 1] = yc[x2] >> shift; | |
o2[x4 + 1] = yc2[x2] >> shift; | |
o2[x4 + 2] = o[x4 + 2] = v[x] >> shift; | |
o [x4 + 3] = yc[x2 + 1] >> shift; | |
o2[x4 + 3] = yc2[x2 + 1] >> shift; | |
} | |
o += outRB*2; | |
yc += rY*2; | |
u += rUV; | |
v += rUV; | |
} | |
if (likely((height&1)==0)) return; | |
for(x=0; x < halfwidth; x++) | |
{ | |
int x4 = x*4, x2 = x*2; | |
o[x4] = u[x] >> shift; | |
o[x4+1] = yc[x2] >> shift; | |
o[x4+2] = v[x] >> shift; | |
o[x4+3] = yc[x2+1] >> shift; | |
} | |
} | |
static FASTCALL void Y420_9toY422_8(const CCConverterContext *ctx, const AVPicture *picture, UInt8 * __restrict o) | |
{ | |
Y420_xtoY422_8(ctx, picture, o, 1); | |
} | |
static FASTCALL void Y420_10toY422_8(const CCConverterContext *ctx, const AVPicture *picture, UInt8 * __restrict o) | |
{ | |
Y420_xtoY422_8(ctx, picture, o, 2); | |
} | |
static FASTCALL void Y420_16toY422_8(const CCConverterContext *ctx, const AVPicture *picture, UInt8 * __restrict o) | |
{ | |
Y420_xtoY422_8(ctx, picture, o, 8); | |
} | |
//Y420+Alpha Planar to V408 (YUV 4:4:4+Alpha 32-bit packed) | |
//Could be fully unrolled to avoid x/2 | |
static FASTCALL void YA420toV408(const CCConverterContext *ctx, const AVPicture *picture, UInt8 * __restrict o) | |
{ | |
short width = ctx->width, height = ctx->height; | |
int outRB = ctx->outLineSize; | |
UInt8 * __restrict yc = picture->data[0], * __restrict u = picture->data[1], * __restrict v = picture->data[2], * __restrict a = picture->data[3]; | |
int rYA = ctx->inLineSizes[0], rUV = ctx->inLineSizes[1]; | |
int y, x; | |
impossible(width <= 0 || height <= 0 || outRB <= 0 || rYA <= 0 || rUV <= 0); | |
for (y = 0; y < height; y++) { | |
for (x = 0; x < width; x++) { | |
o[x*4] = u[x>>1]; | |
o[x*4+1] = yc[x]; | |
o[x*4+2] = v[x>>1]; | |
o[x*4+3] = a[x]; | |
} | |
o += outRB; | |
yc += rYA; | |
a += rYA; | |
if (y & 1) { | |
u += rUV; | |
v += rUV; | |
} | |
} | |
} | |
static FASTCALL void BGR24toRGB24(const CCConverterContext *ctx, const AVPicture *picture, UInt8 * __restrict baseAddr) | |
{ | |
short width = ctx->width, height = ctx->height; | |
int outRB = ctx->outLineSize; | |
UInt8 * __restrict srcPtr = picture->data[0]; | |
int srcRB = ctx->inLineSizes[0]; | |
int x, y; | |
impossible(width <= 0 || height <= 0 || outRB <= 0 || srcRB <= 0); | |
for (y = 0; y < height; y++) | |
{ | |
for (x = 0; x < width; x++) | |
{ | |
int x3 = x * 3; | |
baseAddr[x3] = srcPtr[x3+2]; | |
baseAddr[x3+1] = srcPtr[x3+1]; | |
baseAddr[x3+2] = srcPtr[x3]; | |
} | |
baseAddr += outRB; | |
srcPtr += srcRB; | |
} | |
} | |
static FASTCALL void RGBtoRGB(const CCConverterContext *ctx, const AVPicture *picture, UInt8 * __restrict baseAddr, int bytesPerPixel) | |
{ | |
short width = ctx->width, height = ctx->height; | |
int outRB = ctx->outLineSize; | |
UInt8 * __restrict srcPtr = picture->data[0]; | |
int srcRB = ctx->inLineSizes[0]; | |
int y; | |
impossible(width <= 1 || height <= 1 || outRB <= 0 || srcRB <= 0); | |
for (y = 0; y < height; y++) { | |
memcpy(baseAddr, srcPtr, width * bytesPerPixel); | |
baseAddr += outRB; | |
srcPtr += srcRB; | |
} | |
} | |
//Big-endian XRGB32 to big-endian XRGB32 | |
static FASTCALL void RGB32toRGB32Copy(const CCConverterContext *ctx, const AVPicture *picture, UInt8 * __restrict o) | |
{ | |
RGBtoRGB(ctx, picture, o, 4); | |
} | |
static FASTCALL void RGB24toRGB24(const CCConverterContext *ctx, const AVPicture *picture, UInt8 * __restrict o) | |
{ | |
RGBtoRGB(ctx, picture, o, 3); | |
} | |
static FASTCALL void RGB16toRGB16(const CCConverterContext *ctx, const AVPicture *picture, UInt8 * __restrict o) | |
{ | |
RGBtoRGB(ctx, picture, o, 2); | |
} | |
//Little-endian XRGB32 to big-endian XRGB32 | |
static FASTCALL void RGB32toRGB32Swap(const CCConverterContext *ctx, const AVPicture *picture, UInt8 * __restrict baseAddr) | |
{ | |
short width = ctx->width, height = ctx->height; | |
int outRB = ctx->outLineSize; | |
UInt8 * __restrict srcPtr = picture->data[0]; | |
int srcRB = ctx->inLineSizes[0]; | |
int x, y; | |
impossible(width <= 1 || height <= 1 || outRB <= 0 || srcRB <= 0); | |
for (y = 0; y < height; y++) { | |
UInt32 *oRow = (UInt32 *)baseAddr, *iRow = (UInt32 *)srcPtr; | |
for (x = 0; x < width; x++) oRow[x] = EndianU32_LtoB(iRow[x]); | |
baseAddr += outRB; | |
srcPtr += srcRB; | |
} | |
} | |
static FASTCALL void RGB16toRGB16Swap(const CCConverterContext *ctx, const AVPicture *picture, UInt8 * __restrict baseAddr) | |
{ | |
short width = ctx->width, height = ctx->height; | |
int outRB = ctx->outLineSize; | |
UInt8 * __restrict srcPtr = picture->data[0]; | |
int srcRB = ctx->inLineSizes[0]; | |
int x, y; | |
impossible(width <= 1 || height <= 1 || outRB <= 0 || srcRB <= 0); | |
for (y = 0; y < height; y++) { | |
UInt16 *oRow = (UInt16 *)baseAddr, *iRow = (UInt16 *)srcPtr; | |
for (x = 0; x < width; x++) oRow[x] = EndianU16_LtoB(iRow[x]); | |
baseAddr += outRB; | |
srcPtr += srcRB; | |
} | |
} | |
static FASTCALL void Y422toY422(const CCConverterContext *ctx, const AVPicture *picture, UInt8 * __restrict o) | |
{ | |
short width = ctx->width, height = ctx->height; | |
int outRB = ctx->outLineSize; | |
UInt8 * __restrict yc = picture->data[0], * __restrict u = picture->data[1], * __restrict v = picture->data[2]; | |
int rY = ctx->inLineSizes[0], rUV = ctx->inLineSizes[1]; | |
impossible(width <= 0 || height <= 1 || outRB <= 0 || rY <= 0 || rUV <= 0); | |
int halfwidth = width >> 1; | |
int y, x; | |
for (y = 0; y < height; y++) { | |
for (x = 0; x < halfwidth; x++) { | |
int x2 = x * 2, x4 = x * 4; | |
o[x4] = u[x]; | |
o[x4 + 1] = yc[x2]; | |
o[x4 + 2] = v[x]; | |
o[x4 + 3] = yc[x2 + 1]; | |
} | |
o += outRB; | |
yc += rY; | |
u += rUV; | |
v += rUV; | |
} | |
} | |
static FASTCALL void Y410toY422(const CCConverterContext *ctx, const AVPicture *picture, UInt8 * __restrict o) | |
{ | |
short width = ctx->width, height = ctx->height; | |
int outRB = ctx->outLineSize; | |
UInt8 * __restrict yc = picture->data[0], * __restrict u = picture->data[1], * __restrict v = picture->data[2]; | |
int rY = ctx->inLineSizes[0], rUV = ctx->inLineSizes[1]; | |
int x, y, halfwidth = width >> 1; | |
for (y = 0; y < height; y++) { | |
for (x = 0; x < halfwidth; x++) { | |
int x2 = x * 2, x4 = x * 4; | |
o[x4] = u[x>>1]; | |
o[x4 + 1] = yc[x2]; | |
o[x4 + 2] = v[x>>1]; | |
o[x4 + 3] = yc[x2 + 1]; | |
} | |
o += outRB; | |
yc += rY; | |
if ((y & 3) == 3) { | |
u += rUV; | |
v += rUV; | |
} | |
} | |
} | |
#pragma mark Picture clearing functions | |
static void ClearRGB(const CCConverterContext *ctx, UInt8 * __restrict baseAddr, int bytesPerPixel) | |
{ | |
short width = ctx->width, height = ctx->height; | |
int outRB = ctx->outLineSize; | |
int y; | |
for (y = 0; y < height; y++) { | |
memset(baseAddr, 0, width * bytesPerPixel); | |
baseAddr += outRB; | |
} | |
} | |
static FASTCALL void ClearRGB32(const CCConverterContext *ctx, UInt8 * __restrict o) | |
{ | |
ClearRGB(ctx, o, 4); | |
} | |
static FASTCALL void ClearRGB24(const CCConverterContext *ctx, UInt8 * __restrict o) | |
{ | |
ClearRGB(ctx, o, 3); | |
} | |
static FASTCALL void ClearRGB16(const CCConverterContext *ctx, UInt8 * __restrict o) | |
{ | |
ClearRGB(ctx, o, 2); | |
} | |
static FASTCALL void ClearV408(const CCConverterContext *ctx, UInt8 * __restrict baseAddr) | |
{ | |
short width = ctx->width, height = ctx->height; | |
int outRB = ctx->outLineSize; | |
int x, y; | |
for (y = 0; y < height; y++) | |
{ | |
for (x = 0; x < width; x++) | |
{ | |
int x4 = x * 4; | |
baseAddr[x4] = 0x80; //zero chroma | |
baseAddr[x4+1] = 0x10; //black | |
baseAddr[x4+2] = 0x80; | |
baseAddr[x4+3] = 0xEB; //opaque | |
} | |
baseAddr += outRB; | |
} | |
} | |
static FASTCALL void ClearY422(const CCConverterContext *ctx, UInt8 * __restrict baseAddr) | |
{ | |
short width = ctx->width, height = ctx->height; | |
int outRB = ctx->outLineSize; | |
int x, y; | |
for (y = 0; y < height; y++) | |
{ | |
for (x = 0; x < width; x++) | |
{ | |
int x2 = x * 2; | |
baseAddr[x2] = 0x80; //zero chroma | |
baseAddr[x2+1] = 0x10; //black | |
} | |
baseAddr += outRB; | |
} | |
} | |
#pragma mark Simple converter | |
static enum PixelFormat CCSimplePixFmtForInput(enum PixelFormat inPixFmt) | |
{ | |
enum PixelFormat outPixFmt; | |
switch (inPixFmt) { | |
case PIX_FMT_RGB555LE: | |
case PIX_FMT_RGB555BE: | |
outPixFmt = PIX_FMT_RGB555BE; | |
break; | |
case PIX_FMT_BGR24: | |
case PIX_FMT_RGB24: | |
outPixFmt = PIX_FMT_RGB24; | |
break; | |
case PIX_FMT_ARGB: | |
case PIX_FMT_BGRA: | |
outPixFmt = PIX_FMT_ARGB; | |
break; | |
case PIX_FMT_YUV410P: | |
case PIX_FMT_YUVJ420P: | |
case PIX_FMT_YUV420P: | |
case PIX_FMT_YUV422P: | |
outPixFmt = PIX_FMT_YUV422P; | |
break; | |
case PIX_FMT_YUVA420P: | |
outPixFmt = PIX_FMT_YUV444P; // not quite... | |
break; | |
case PIX_FMT_YUV420P9LE: | |
case PIX_FMT_YUV420P10LE: | |
case PIX_FMT_YUV420P16LE: | |
outPixFmt = PIX_FMT_YUV422P; | |
break; | |
default: | |
Codecprintf(NULL, "Unknown input pix fmt %d\n", inPixFmt); | |
outPixFmt = -1; | |
} | |
return outPixFmt; | |
} | |
// Let's just not decode 1x1 images, saves time | |
static bool CCIsInvalidImage(const CCConverterContext *ctx) | |
{ | |
switch (ctx->inPixFmt) { | |
case PIX_FMT_YUVJ420P: | |
case PIX_FMT_YUV420P: | |
case PIX_FMT_YUVA420P: | |
if (ctx->width < 2 || ctx->height < 2) return true; | |
case PIX_FMT_YUV422P: | |
if (ctx->height < 2) return true; | |
default: | |
; | |
} | |
return false; | |
} | |
typedef void (*ConvertFunc)(const CCConverterContext *ctx, const AVPicture *picture, UInt8 * __restrict o) FASTCALL; | |
typedef void (*ClearFunc)(const CCConverterContext *ctx, UInt8 * __restrict baseAddr) FASTCALL; | |
static ClearFunc CCSimpleClearForPixFmt(enum PixelFormat pixFmt) | |
{ | |
ClearFunc clear = NULL; | |
switch (pixFmt) { | |
case PIX_FMT_YUVJ420P: | |
case PIX_FMT_YUV420P: | |
case PIX_FMT_YUV420P9LE: | |
case PIX_FMT_YUV420P10LE: | |
case PIX_FMT_YUV420P16LE: | |
case PIX_FMT_YUV410P: | |
case PIX_FMT_YUV422P: | |
clear = ClearY422; | |
break; | |
case PIX_FMT_BGR24: | |
clear = ClearRGB24; | |
break; | |
case PIX_FMT_ARGB: | |
case PIX_FMT_BGRA: | |
clear = ClearRGB32; | |
break; | |
case PIX_FMT_RGB24: | |
clear = ClearRGB24; | |
break; | |
case PIX_FMT_RGB555LE: | |
case PIX_FMT_RGB555BE: | |
clear = ClearRGB16; | |
break; | |
case PIX_FMT_YUVA420P: | |
clear = ClearV408; | |
break; | |
default: | |
; | |
} | |
return clear; | |
} | |
static void CCOpenSimpleConverter(CCConverterContext *ctx) | |
{ | |
ConvertFunc convert; | |
void (^convertBlock)(AVPicture*, uint8_t*) FASTCALL = nil; | |
switch (ctx->inPixFmt) { | |
case PIX_FMT_YUVJ420P: | |
case PIX_FMT_YUV420P: | |
convert = unlikely(ctx->inLineSizes[0]&15) ? Y420toY422_x86_scalar : Y420toY422_sse2; | |
break; | |
case PIX_FMT_YUV420P9LE: | |
convert = Y420_9toY422_8; | |
break; | |
case PIX_FMT_YUV420P10LE: | |
convert = Y420_10toY422_8; | |
break; | |
case PIX_FMT_YUV420P16LE: | |
convert = Y420_16toY422_8; | |
break; | |
case PIX_FMT_BGR24: | |
convert = BGR24toRGB24; | |
break; | |
case PIX_FMT_ARGB: | |
#ifdef __BIG_ENDIAN__ | |
convert = RGB32toRGB32Swap; | |
#else | |
convert = RGB32toRGB32Copy; | |
#endif | |
break; | |
case PIX_FMT_BGRA: | |
#ifdef __BIG_ENDIAN__ | |
convert = RGB32toRGB32Copy; | |
#else | |
convert = RGB32toRGB32Swap; | |
#endif | |
break; | |
case PIX_FMT_RGB24: | |
convert = RGB24toRGB24; | |
break; | |
case PIX_FMT_RGB555LE: | |
convert = RGB16toRGB16Swap; | |
break; | |
case PIX_FMT_RGB555BE: | |
convert = RGB16toRGB16; | |
break; | |
case PIX_FMT_YUV410P: | |
convert = Y410toY422; | |
break; | |
case PIX_FMT_YUV422P: | |
convert = Y422toY422; | |
break; | |
case PIX_FMT_YUVA420P: | |
convert = YA420toV408; | |
break; | |
default: | |
; | |
} | |
if (!convertBlock) | |
convertBlock = ^(AVPicture *inPicture, UInt8 *outPicture) FASTCALL { | |
convert(ctx, inPicture, outPicture); | |
}; | |
ctx->convert = Block_copy(convertBlock); | |
} | |
#pragma mark SWS converter | |
static void CCOpenSwscaleConverter(CCConverterContext *ctx) | |
{ | |
struct SwsContext *sws; | |
int swsRange = ctx->inColorRange == AVCOL_RANGE_JPEG ? 1 : 0; | |
int swsCoeffCode; | |
float hShift=0, vShift=0; | |
const int *swsCoeff; | |
switch (ctx->inColorSpace) { | |
case AVCOL_SPC_SMPTE170M: | |
case AVCOL_SPC_SMPTE240M: | |
default: | |
swsCoeffCode = SWS_CS_ITU601; | |
break; | |
case AVCOL_SPC_BT709: | |
swsCoeffCode = SWS_CS_ITU709; | |
break; | |
case AVCOL_SPC_UNSPECIFIED: | |
swsCoeffCode = ctx->height > 576 ? SWS_CS_ITU709 : SWS_CS_ITU601; | |
break; | |
} | |
// TODO: should left be shifted .5, or should center be shifted -.5? | |
switch (ctx->inChromaLocation) { | |
case AVCHROMA_LOC_LEFT: | |
hShift = .5; | |
break; | |
case AVCHROMA_LOC_UNSPECIFIED: | |
case AVCHROMA_LOC_CENTER: | |
default: | |
break; | |
case AVCHROMA_LOC_TOPLEFT: | |
hShift = .5; vShift = .5; | |
break; | |
case AVCHROMA_LOC_TOP: | |
vShift = .5; | |
break; | |
case AVCHROMA_LOC_BOTTOMLEFT: | |
hShift = .5; vShift = -.5; | |
break; | |
case AVCHROMA_LOC_BOTTOM: | |
vShift = -.5; | |
} | |
Codecprintf(NULL, "Color space %d/%d, chroma loc %d (%f %f)\n", ctx->inColorSpace, swsCoeffCode, ctx->inChromaLocation, hShift, vShift); | |
swsCoeff = sws_getCoefficients(swsCoeffCode); | |
sws = sws_alloc_context(); | |
av_opt_set_int(sws, "srcw", ctx->width, 0); | |
av_opt_set_int(sws, "srch", ctx->height, 0); | |
av_opt_set_int(sws, "dstw", ctx->width, 0); | |
av_opt_set_int(sws, "dsth", ctx->height, 0); | |
av_opt_set_int(sws, "src_format", ctx->inPixFmt, 0); | |
av_opt_set_int(sws, "dst_format", ctx->outPixFmt, 0); | |
av_opt_set_int(sws, "src_range", swsRange, 0); | |
av_opt_set(sws, "sws_flags", "bicubic+full_chroma_int", 0); | |
sws_setColorspaceDetails(sws, swsCoeff, swsRange, swsCoeff, 0, 0, 1<<16, 1<<16); | |
SwsFilter *srcFilter = sws_getDefaultFilter(0, .5, 0, 0, hShift, vShift, 0); | |
SwsFilter *dstFilter = sws_getDefaultFilter(0, 0, 0, 0, 0, 0, 0); | |
int err = sws_init_context(sws, srcFilter, dstFilter); | |
ctx->opaque = sws; | |
ctx->convert = Block_copy(^(AVPicture *inPicture, uint8_t *outPicture) FASTCALL { | |
uint8_t * const outdata[4] = {outPicture}; | |
int outlinesize[4] = {ctx->outLineSize}; | |
sws_scale(ctx->opaque, | |
(const uint8_t*const*)inPicture->data, | |
inPicture->linesize, | |
0, ctx->height, | |
outdata, outlinesize); | |
}); | |
} | |
static void CCCloseSwscaleConverter(CCConverterContext *ctx) | |
{ | |
sws_freeContext(ctx->opaque); | |
} | |
#pragma mark Color converter API | |
enum CCConverterType { | |
kCCConverterSimple, | |
kCCConverterSwscale, | |
kCCConverterOpenCL | |
}; | |
#ifdef ENABLE_SWSCALE | |
static const enum CCConverterType kConverterType = kCCConverterSwscale; | |
#else | |
static const enum CCConverterType kConverterType = kCCConverterSimple; | |
#endif | |
enum PixelFormat CCOutputPixFmtForInput(enum PixelFormat inPixFmt) | |
{ | |
switch (kConverterType) { | |
case kCCConverterSimple: | |
default: | |
return CCSimplePixFmtForInput(inPixFmt); | |
case kCCConverterSwscale: | |
return PIX_FMT_RGB24; | |
} | |
} | |
void CCClearPicture(CCConverterContext *ctx, uint8_t *outPicture) | |
{ | |
ClearFunc clear = CCSimpleClearForPixFmt(ctx->inPixFmt); | |
clear(ctx, outPicture); | |
} | |
void CCOpenConverter(CCConverterContext *ctx) | |
{ | |
if (CCIsInvalidImage(ctx)) return; | |
ctx->type = kConverterType; | |
switch (ctx->type) { | |
case kCCConverterSimple: | |
ctx->outPixFmt = CCSimplePixFmtForInput(ctx->inPixFmt); | |
if (ctx->outPixFmt == -1) return; | |
CCOpenSimpleConverter(ctx); | |
break; | |
#ifdef ENABLE_SWSCALE | |
case kCCConverterSwscale: | |
ctx->outPixFmt = PIX_FMT_RGB24; | |
CCOpenSwscaleConverter(ctx); | |
break; | |
#endif | |
case kCCConverterOpenCL: | |
//CCOpenCLConverter(ctx); | |
break; | |
} | |
} | |
void CCCloseConverter(CCConverterContext *ctx) | |
{ | |
switch (ctx->type) { | |
case kCCConverterSimple: | |
if (!ctx->convert) return; | |
Block_release(ctx->convert); | |
break; | |
#ifdef ENABLE_SWSCALE | |
case kCCConverterSwscale: | |
CCCloseSwscaleConverter(ctx); | |
Block_release(ctx->convert); | |
break; | |
#endif | |
case kCCConverterOpenCL: | |
break; | |
} | |
} |